@creativeorange/azure-text-to-speech 1.1.8 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/main.ts CHANGED
@@ -1,364 +1,2 @@
1
- import {
2
- SpeakerAudioDestination,
3
- AudioConfig,
4
- SpeechConfig,
5
- SpeechSynthesizer,
6
- SpeechSynthesisOutputFormat,
7
- } from 'microsoft-cognitiveservices-speech-sdk';
8
-
9
- export default class TextToSpeech {
10
- key: string;
11
- region: string;
12
- voice: string;
13
- rate: number;
14
- pitch: number;
15
-
16
- textToRead: string = '';
17
-
18
- wordBoundryList: any[] = [];
19
-
20
- clickedNode: any;
21
- highlightDiv: any;
22
-
23
- speechConfig: any;
24
- audioConfig: any;
25
- player: any;
26
- synthesizer: any;
27
-
28
- previousWordBoundary: any;
29
-
30
- interval: any;
31
-
32
- wordEncounters: number[] = [];
33
- originalHighlightDivInnerHTML: string = '';
34
- currentWord: string = '';
35
- currentOffset: number = 0;
36
- wordBoundaryOffset: number = 0;
37
-
38
-
39
- constructor(key: string, region: string, voice: string, rate: number = 0, pitch: number = 0) {
40
- this.key = key;
41
- this.region = region;
42
- this.voice = voice;
43
- this.rate = rate;
44
- this.pitch = pitch;
45
- }
46
-
47
- async start() {
48
- await this.registerBindings(document);
49
- }
50
-
51
- setVoice(voice: string) {
52
- this.voice = voice;
53
-
54
- return this;
55
- }
56
-
57
- setRate(rate: number) {
58
- this.rate = rate;
59
-
60
- return this;
61
- }
62
-
63
- setPitch(pitch: number) {
64
- this.pitch = pitch;
65
-
66
- return this;
67
- }
68
-
69
- async registerBindings(node: any) {
70
- const nodes = node.childNodes;
71
- for (let i = 0; i < nodes.length; i++) {
72
- if (!nodes[i]) {
73
- continue;
74
- }
75
-
76
- const currentNode = nodes[i];
77
-
78
- if (currentNode.attributes) {
79
- if (currentNode.attributes.getNamedItem('co-tts.id')) {
80
- await this.handleIdModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.id'));
81
- } else if (currentNode.attributes.getNamedItem('co-tts.ajax')) {
82
- await this.handleAjaxModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.ajax'));
83
- } else if (currentNode.attributes.getNamedItem('co-tts')) {
84
- await this.handleDefault(currentNode, currentNode.attributes.getNamedItem('co-tts'));
85
- } else if (currentNode.attributes.getNamedItem('co-tts.stop')) {
86
- await this.handleStopModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.stop'));
87
- } else if (currentNode.attributes.getNamedItem('co-tts.resume')) {
88
- await this.handleResumeModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.resume'));
89
- } else if (currentNode.attributes.getNamedItem('co-tts.pause')) {
90
- await this.handlePauseModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.pause'));
91
- }
92
- }
93
-
94
- if (currentNode.childNodes.length > 0) {
95
- await this.registerBindings(currentNode);
96
- }
97
- }
98
- }
99
-
100
- async handleIdModifier(node: any, attr: Attr) {
101
- node.addEventListener('click', async (_: any) => {
102
- this.stopPlayer();
103
- await this.createInterval();
104
- const referenceDiv = document.getElementById(attr.value);
105
- this.clickedNode = referenceDiv;
106
-
107
- if (!referenceDiv) {
108
- return;
109
- }
110
-
111
- if (referenceDiv.hasAttribute('co-tts.text') && referenceDiv.getAttribute('co-tts.text') !== '') {
112
- this.textToRead = referenceDiv.getAttribute('co-tts.text') ?? '';
113
- } else {
114
- this.textToRead = referenceDiv.innerText;
115
- }
116
-
117
- if (referenceDiv.hasAttribute('co-tts.highlight')) {
118
- if (referenceDiv.attributes.getNamedItem('co-tts.highlight')?.value !== '') {
119
- const newReferenceDiv =
120
- document.getElementById(referenceDiv.attributes.getNamedItem('co-tts.highlight').value);
121
-
122
- this.highlightDiv = newReferenceDiv;
123
- this.originalHighlightDivInnerHTML = newReferenceDiv.innerHTML;
124
- } else {
125
- this.highlightDiv = referenceDiv;
126
- this.originalHighlightDivInnerHTML = referenceDiv.innerHTML;
127
- }
128
- }
129
-
130
- this.startSynthesizer(node, attr);
131
- });
132
- }
133
-
134
- async handleAjaxModifier(node: any, attr: Attr) {
135
- node.addEventListener('click', async (_: any) => {
136
- this.stopPlayer();
137
- await this.createInterval();
138
- this.clickedNode = node;
139
- const response = await fetch(attr.value, {
140
- method: `GET`,
141
- });
142
-
143
- this.textToRead = await response.text();
144
-
145
- this.startSynthesizer(node, attr);
146
- });
147
- }
148
-
149
- async handleDefault(node: any, attr: Attr) {
150
- node.addEventListener('click', async (_: any) => {
151
- this.stopPlayer();
152
- await this.createInterval();
153
- this.clickedNode = node;
154
- if (node.hasAttribute('co-tts.highlight')) {
155
- if (node.attributes.getNamedItem('co-tts.highlight')?.value !== '') {
156
- const newReferenceDiv = document.getElementById(node.attributes.getNamedItem('co-tts.highlight').value);
157
-
158
- this.highlightDiv = newReferenceDiv;
159
- this.originalHighlightDivInnerHTML = newReferenceDiv.innerHTML;
160
- } else {
161
- this.highlightDiv = node;
162
- this.originalHighlightDivInnerHTML = node.innerHTML;
163
- }
164
- }
165
- if (attr.value === '') {
166
- this.textToRead = node.innerText;
167
- } else {
168
- this.textToRead = attr.value;
169
- }
170
-
171
- this.startSynthesizer(node, attr);
172
- });
173
- }
174
-
175
- async handleWithoutClick(node: any, attr: Attr) {
176
- this.stopPlayer();
177
- await this.createInterval();
178
- this.clickedNode = node;
179
- if (node.hasAttribute('co-tts.highlight')) {
180
- if (node.attributes.getNamedItem('co-tts.highlight')?.value !== '') {
181
- const newReferenceDiv = document.getElementById(node.attributes.getNamedItem('co-tts.highlight').value);
182
-
183
- this.highlightDiv = newReferenceDiv;
184
- this.originalHighlightDivInnerHTML = newReferenceDiv.innerHTML;
185
- } else {
186
- this.highlightDiv = node;
187
- this.originalHighlightDivInnerHTML = node.innerHTML;
188
- }
189
- }
190
- if (attr.value === '') {
191
- this.textToRead = node.innerText;
192
- } else {
193
- this.textToRead = attr.value;
194
- }
195
-
196
- this.startSynthesizer(node, attr);
197
- }
198
-
199
- async handleStopModifier(node: any, attr: Attr) {
200
- node.addEventListener('click', async (_: any) => {
201
- await this.stopPlayer();
202
- document.dispatchEvent(new CustomEvent('COAzureTTSStoppedPlaying', {}));
203
- });
204
- }
205
-
206
- async handlePauseModifier(node: any, attr: Attr) {
207
- node.addEventListener('click', async (_: any) => {
208
- await this.clearInterval();
209
- await this.player.pause();
210
- document.dispatchEvent(new CustomEvent('COAzureTTSPausedPlaying', {}));
211
- });
212
- }
213
-
214
- async handleResumeModifier(node: any, attr: Attr) {
215
- node.addEventListener('click', async (_: any) => {
216
- await this.createInterval();
217
- await this.player.resume();
218
- document.dispatchEvent(new CustomEvent('COAzureTTSResumedPlaying', {}));
219
- });
220
- }
221
-
222
- async stopPlayer() {
223
- await this.clearInterval();
224
- if (this.highlightDiv !== undefined) {
225
- this.highlightDiv.innerHTML = this.originalHighlightDivInnerHTML;
226
- }
227
-
228
- this.textToRead = '';
229
- this.currentWord = '';
230
- this.originalHighlightDivInnerHTML = '';
231
- this.wordBoundryList = [];
232
- this.wordEncounters = [];
233
- if (this.player !== undefined) {
234
- this.player.pause();
235
- }
236
- this.player = undefined;
237
- this.highlightDiv = undefined;
238
- }
239
-
240
- async startSynthesizer(node: any, attr: Attr) {
241
- this.speechConfig = SpeechConfig.fromSubscription(this.key, this.region);
242
-
243
- this.speechConfig.speechSynthesisVoiceName = `Microsoft Server Speech Text to Speech Voice (${this.voice})`;
244
- this.speechConfig.speechSynthesisOutputFormat = SpeechSynthesisOutputFormat.Audio24Khz160KBitRateMonoMp3;
245
-
246
- this.player = new SpeakerAudioDestination();
247
-
248
- this.audioConfig = AudioConfig.fromSpeakerOutput(this.player);
249
- this.synthesizer = new SpeechSynthesizer(this.speechConfig, this.audioConfig);
250
-
251
- this.synthesizer.wordBoundary = (s: any, e: any) => {
252
- this.wordBoundryList.push(e);
253
- };
254
-
255
- this.player.onAudioEnd = async () => {
256
- this.stopPlayer();
257
-
258
- if (this.clickedNode.hasAttribute('co-tts.next')) {
259
- const nextNode = document.getElementById(this.clickedNode.getAttribute('co-tts.next'));
260
- if (nextNode && nextNode.attributes.getNamedItem('co-tts.text')) {
261
- this.handleWithoutClick(nextNode, nextNode.attributes.getNamedItem('co-tts.text'));
262
- } else if (nextNode) {
263
- nextNode.dispatchEvent(new Event('click'));
264
- }
265
- } else {
266
- document.dispatchEvent(new CustomEvent('COAzureTTSFinishedPlaying', {}));
267
- }
268
- };
269
-
270
- this.player.onAudioStart = async () => {
271
- document.dispatchEvent(new CustomEvent('COAzureTTSStartedPlaying', {}));
272
- };
273
-
274
- this.synthesizer.speakSsmlAsync(this.buildSSML(this.textToRead),
275
- () => {
276
- this.synthesizer.close();
277
- this.synthesizer = undefined;
278
- },
279
- () => {
280
- this.synthesizer.close();
281
- this.synthesizer = undefined;
282
- });
283
- }
284
-
285
- async clearInterval() {
286
- clearInterval(this.interval);
287
- }
288
-
289
- async createInterval() {
290
- this.interval = setInterval(() => {
291
- if (this.player !== undefined && this.highlightDiv) {
292
- const currentTime = this.player.currentTime;
293
- let wordBoundary;
294
- for (const e of this.wordBoundryList) {
295
- if (currentTime * 1000 > e.audioOffset / 10000) {
296
- wordBoundary = e;
297
- } else {
298
- break;
299
- }
300
- }
301
-
302
- if (wordBoundary !== undefined) {
303
- if (~['.', ',', '!', '?', '*', '(', ')', '&', '\\', '/', '^', '[', ']', '<', '>', ':']
304
- .indexOf(wordBoundary.text)) {
305
- wordBoundary = this.previousWordBoundary ?? undefined;
306
- }
307
-
308
- if (wordBoundary === undefined) {
309
- this.highlightDiv.innerHTML = this.originalHighlightDivInnerHTML;
310
- } else {
311
- if (!this.wordEncounters[wordBoundary.text]) {
312
- this.wordEncounters[wordBoundary.text] = 0;
313
- }
314
-
315
- if (this.currentWord !== wordBoundary.text || this.wordBoundaryOffset !== wordBoundary.textOffset) {
316
- this.currentOffset = this.getPosition(
317
- this.originalHighlightDivInnerHTML,
318
- wordBoundary.text,
319
- this.wordEncounters[wordBoundary.text]
320
- );
321
- this.wordEncounters[wordBoundary.text] = this.currentOffset + wordBoundary.wordLength;
322
- this.currentWord = wordBoundary.text;
323
- this.wordBoundaryOffset = wordBoundary.textOffset;
324
- }
325
-
326
- if (this.currentOffset <= -1) {
327
- this.highlightDiv.innerHTML = this.originalHighlightDivInnerHTML;
328
- } else {
329
- this.previousWordBoundary = wordBoundary;
330
- const startOfString = this.originalHighlightDivInnerHTML.substring(0, this.currentOffset);
331
- const endOffset = this.currentOffset + wordBoundary.wordLength;
332
- const endOfString = this.originalHighlightDivInnerHTML.substring(endOffset);
333
- this.highlightDiv.innerHTML = `
334
- ${startOfString}<mark class='co-tts-highlight'>${wordBoundary.text}</mark>${endOfString}
335
- `;
336
- }
337
- }
338
- } else {
339
- this.highlightDiv.innerHTML = this.originalHighlightDivInnerHTML;
340
- }
341
- }
342
- }, 50);
343
- }
344
-
345
- getPosition(string: string, subString: string, lastOffset: number) {
346
- const regex = new RegExp(`(?:^|[^-\\w])(${subString})\\b`, 'g');
347
- const offset = string.slice(lastOffset).search(regex);
348
- return (offset <= 0 ? offset : offset + 1) + lastOffset;
349
- }
350
-
351
- buildSSML(text: string) {
352
- return `<speak xmlns="http://www.w3.org/2001/10/synthesis"
353
- xmlns:mstts="http://www.w3.org/2001/mstts"
354
- xmlns:emo="http://www.w3.org/2009/10/emotionml"
355
- version="1.0"
356
- xml:lang="en-US">
357
- <voice name="${this.voice}">
358
- <prosody rate="${this.rate}%" pitch="${this.pitch}%">
359
- ${text}
360
- </prosody>
361
- </voice>
362
- </speak>`;
363
- }
364
- }
1
+ export * from './SpeechToText';
2
+ export * from './TextToSpeech';