@creativeorange/azure-text-to-speech 1.1.7 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,27 @@
1
+ import {SpeechToText} from "@creativeorange/azure-text-to-speech/dist/co-azure-tts.es";
2
+ export default defineNuxtPlugin(async (nuxtApp) => {
3
+ const speechToText = new SpeechToText(
4
+ '[key]',
5
+ '[region]',
6
+ '[source language]',
7
+ '[target language]'
8
+ );
9
+ let started = false;
10
+
11
+ nuxtApp.vueApp.mixin({
12
+ mounted() {
13
+ if (!started) {
14
+ setTimeout(async () => {
15
+ await speechToText.start();
16
+ }, 500);
17
+ started = true;
18
+ }
19
+ },
20
+ beforeUnmount() {
21
+ if (started) {
22
+ speechToText.stop();
23
+ started = false;
24
+ }
25
+ },
26
+ });
27
+ });
@@ -0,0 +1,28 @@
1
+ import {TextToSpeech} from "@creativeorange/azure-text-to-speech";
2
+ export default defineNuxtPlugin(async (nuxtApp) => {
3
+ const textToSpeech = new TextToSpeech(
4
+ '[key]',
5
+ '[region]',
6
+ '[voice]',
7
+ 1, // rate
8
+ 1 // pitch
9
+ );
10
+ let started = false;
11
+
12
+ nuxtApp.vueApp.mixin({
13
+ mounted() {
14
+ if (!started) {
15
+ setTimeout(() => {
16
+ textToSpeech.start();
17
+ }, 500);
18
+ started = true;
19
+ }
20
+ },
21
+ beforeUnmount() {
22
+ if (started) {
23
+ textToSpeech.stopPlayer();
24
+ started = false;
25
+ }
26
+ }
27
+ });
28
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@creativeorange/azure-text-to-speech",
3
- "version": "1.1.7",
3
+ "version": "1.2.0",
4
4
  "main": "dist/co-azure-tts.umd.js",
5
5
  "browser": "dist/co-azure-tts.es.js",
6
6
  "scripts": {
@@ -0,0 +1,99 @@
1
+ import {
2
+ SpeechTranslationConfig,
3
+ AudioConfig,
4
+ TranslationRecognizer,
5
+ ResultReason,
6
+ } from 'microsoft-cognitiveservices-speech-sdk';
7
+
8
+ export class SpeechToText {
9
+ key: string;
10
+ region: string;
11
+ sourceLanguage: string;
12
+ targetLanguage: string;
13
+ recognizer: TranslationRecognizer | undefined;
14
+
15
+ constructor(key: string, region: string, sourceLanguage: string, targetLanguage: string|null = null) {
16
+ this.key = key;
17
+ this.region = region;
18
+ this.sourceLanguage = sourceLanguage;
19
+ this.targetLanguage = (targetLanguage !== null) ? targetLanguage : sourceLanguage;
20
+ }
21
+
22
+ async start() {
23
+ await this.registerBindings(document);
24
+ }
25
+
26
+ async registerBindings(node: any) {
27
+ const nodes = node.childNodes;
28
+ for (let i = 0; i < nodes.length; i++) {
29
+ if (!nodes[i]) {
30
+ continue;
31
+ }
32
+
33
+ const currentNode = nodes[i];
34
+
35
+ if (currentNode.attributes) {
36
+ if (currentNode.attributes.getNamedItem('co-stt.start')) {
37
+ await this.handleStartModifier(currentNode, currentNode.attributes.getNamedItem('co-stt.start'));
38
+ } else if (currentNode.attributes.getNamedItem('co-stt.stop')) {
39
+ await this.handleStopModifier(currentNode, currentNode.attributes.getNamedItem('co-stt.stop'));
40
+ }
41
+ }
42
+
43
+ if (currentNode.childNodes.length > 0) {
44
+ await this.registerBindings(currentNode);
45
+ }
46
+ }
47
+ }
48
+
49
+ async handleStartModifier(node: any, attr: Attr) {
50
+ node.addEventListener('click', async (_: any) => {
51
+ const speechConfig = SpeechTranslationConfig.fromSubscription(this.key, this.region);
52
+ speechConfig.speechRecognitionLanguage = this.sourceLanguage;
53
+ speechConfig.addTargetLanguage(this.targetLanguage);
54
+
55
+ const audioConfig = AudioConfig.fromDefaultMicrophoneInput();
56
+
57
+ this.recognizer = new TranslationRecognizer(speechConfig, audioConfig);
58
+
59
+ document.dispatchEvent(new CustomEvent('COAzureSTTStartedRecording', {}));
60
+ this.recognizer.recognizeOnceAsync(
61
+ (result) => {
62
+ if (result.reason === ResultReason.TranslatedSpeech) {
63
+ const translation = result.translations.get(this.targetLanguage);
64
+ const inputElement = document.getElementById(attr.value);
65
+
66
+ if (inputElement !== null) {
67
+ if (inputElement instanceof HTMLInputElement) {
68
+ inputElement.value += `${translation} `;
69
+ } else {
70
+ inputElement.innerHTML += `${translation} `;
71
+ }
72
+ }
73
+ }
74
+
75
+ this.stop();
76
+ },
77
+ (err) => {
78
+ console.log(err);
79
+
80
+ this.stop();
81
+ }
82
+ );
83
+ });
84
+ }
85
+
86
+ async handleStopModifier(node: any, attr: Attr) {
87
+ node.addEventListener('click', async (_: any) => {
88
+ await this.stop();
89
+ });
90
+ }
91
+
92
+ async stop() {
93
+ if (this.recognizer !== undefined) {
94
+ this.recognizer.close();
95
+ this.recognizer = undefined;
96
+ }
97
+ document.dispatchEvent(new CustomEvent('COAzureSTTStoppedRecording', {}));
98
+ }
99
+ }
@@ -0,0 +1,366 @@
1
+ import {
2
+ SpeakerAudioDestination,
3
+ AudioConfig,
4
+ SpeechConfig,
5
+ SpeechSynthesizer,
6
+ SpeechSynthesisOutputFormat,
7
+ } from 'microsoft-cognitiveservices-speech-sdk';
8
+
9
+ export class TextToSpeech {
10
+ key: string;
11
+ region: string;
12
+ voice: string;
13
+ rate: number;
14
+ pitch: number;
15
+
16
+ textToRead: string = '';
17
+
18
+ wordBoundryList: any[] = [];
19
+
20
+ clickedNode: any;
21
+ highlightDiv: any;
22
+
23
+ speechConfig: any;
24
+ audioConfig: any;
25
+ player: any;
26
+ synthesizer: any;
27
+
28
+ previousWordBoundary: any;
29
+
30
+ interval: any;
31
+
32
+ wordEncounters: number[] = [];
33
+ originalHighlightDivInnerHTML: string = '';
34
+ currentWord: string = '';
35
+ currentOffset: number = 0;
36
+ wordBoundaryOffset: number = 0;
37
+
38
+
39
+ constructor(key: string, region: string, voice: string, rate: number = 0, pitch: number = 0) {
40
+ this.key = key;
41
+ this.region = region;
42
+ this.voice = voice;
43
+ this.rate = rate;
44
+ this.pitch = pitch;
45
+ }
46
+
47
+ async start() {
48
+ await this.registerBindings(document);
49
+ }
50
+
51
+ setVoice(voice: string) {
52
+ this.voice = voice;
53
+
54
+ return this;
55
+ }
56
+
57
+ setRate(rate: number) {
58
+ this.rate = rate;
59
+
60
+ return this;
61
+ }
62
+
63
+ setPitch(pitch: number) {
64
+ this.pitch = pitch;
65
+
66
+ return this;
67
+ }
68
+
69
+ async registerBindings(node: any) {
70
+ const nodes = node.childNodes;
71
+ for (let i = 0; i < nodes.length; i++) {
72
+ if (!nodes[i]) {
73
+ continue;
74
+ }
75
+
76
+ const currentNode = nodes[i];
77
+
78
+ if (currentNode.attributes) {
79
+ if (currentNode.attributes.getNamedItem('co-tts.id')) {
80
+ await this.handleIdModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.id'));
81
+ } else if (currentNode.attributes.getNamedItem('co-tts.ajax')) {
82
+ await this.handleAjaxModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.ajax'));
83
+ } else if (currentNode.attributes.getNamedItem('co-tts')) {
84
+ await this.handleDefault(currentNode, currentNode.attributes.getNamedItem('co-tts'));
85
+ } else if (currentNode.attributes.getNamedItem('co-tts.stop')) {
86
+ await this.handleStopModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.stop'));
87
+ } else if (currentNode.attributes.getNamedItem('co-tts.resume')) {
88
+ await this.handleResumeModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.resume'));
89
+ } else if (currentNode.attributes.getNamedItem('co-tts.pause')) {
90
+ await this.handlePauseModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.pause'));
91
+ }
92
+ }
93
+
94
+ if (currentNode.childNodes.length > 0) {
95
+ await this.registerBindings(currentNode);
96
+ }
97
+ }
98
+ }
99
+
100
+ async handleIdModifier(node: any, attr: Attr) {
101
+ node.addEventListener('click', async (_: any) => {
102
+ this.stopPlayer();
103
+ await this.createInterval();
104
+ const referenceDiv = document.getElementById(attr.value);
105
+ this.clickedNode = referenceDiv;
106
+
107
+ if (!referenceDiv) {
108
+ return;
109
+ }
110
+
111
+ if (referenceDiv.hasAttribute('co-tts.text') && referenceDiv.getAttribute('co-tts.text') !== '') {
112
+ this.textToRead = referenceDiv.getAttribute('co-tts.text') ?? '';
113
+ } else {
114
+ this.textToRead = referenceDiv.innerText;
115
+ }
116
+
117
+ if (referenceDiv.hasAttribute('co-tts.highlight')) {
118
+ if (referenceDiv.attributes.getNamedItem('co-tts.highlight')?.value !== '') {
119
+ const newReferenceDiv =
120
+ document.getElementById(referenceDiv.attributes.getNamedItem('co-tts.highlight').value);
121
+
122
+ this.highlightDiv = newReferenceDiv;
123
+ this.originalHighlightDivInnerHTML = newReferenceDiv.innerHTML;
124
+ } else {
125
+ this.highlightDiv = referenceDiv;
126
+ this.originalHighlightDivInnerHTML = referenceDiv.innerHTML;
127
+ }
128
+ }
129
+
130
+ this.startSynthesizer(node, attr);
131
+ });
132
+ }
133
+
134
+ async handleAjaxModifier(node: any, attr: Attr) {
135
+ node.addEventListener('click', async (_: any) => {
136
+ this.stopPlayer();
137
+ await this.createInterval();
138
+ this.clickedNode = node;
139
+ const response = await fetch(attr.value, {
140
+ method: `GET`,
141
+ });
142
+
143
+ this.textToRead = await response.text();
144
+
145
+ this.startSynthesizer(node, attr);
146
+ });
147
+ }
148
+
149
+ async handleDefault(node: any, attr: Attr) {
150
+ node.addEventListener('click', async (_: any) => {
151
+ this.stopPlayer();
152
+ await this.createInterval();
153
+ this.clickedNode = node;
154
+ if (node.hasAttribute('co-tts.highlight')) {
155
+ if (node.attributes.getNamedItem('co-tts.highlight')?.value !== '') {
156
+ const newReferenceDiv = document.getElementById(node.attributes.getNamedItem('co-tts.highlight').value);
157
+
158
+ this.highlightDiv = newReferenceDiv;
159
+ this.originalHighlightDivInnerHTML = newReferenceDiv.innerHTML;
160
+ } else {
161
+ this.highlightDiv = node;
162
+ this.originalHighlightDivInnerHTML = node.innerHTML;
163
+ }
164
+ }
165
+ if (attr.value === '') {
166
+ this.textToRead = node.innerText;
167
+ } else {
168
+ this.textToRead = attr.value;
169
+ }
170
+
171
+ this.startSynthesizer(node, attr);
172
+ });
173
+ }
174
+
175
+ async handleWithoutClick(node: any, attr: Attr) {
176
+ this.stopPlayer();
177
+ await this.createInterval();
178
+ this.clickedNode = node;
179
+ if (node.hasAttribute('co-tts.highlight')) {
180
+ if (node.attributes.getNamedItem('co-tts.highlight')?.value !== '') {
181
+ const newReferenceDiv = document.getElementById(node.attributes.getNamedItem('co-tts.highlight').value);
182
+
183
+ this.highlightDiv = newReferenceDiv;
184
+ if (newReferenceDiv !== null) {
185
+ this.originalHighlightDivInnerHTML = newReferenceDiv.innerHTML;
186
+ }
187
+ } else {
188
+ this.highlightDiv = node;
189
+ this.originalHighlightDivInnerHTML = node.innerHTML;
190
+ }
191
+ }
192
+ if (attr.value === '') {
193
+ this.textToRead = node.innerText;
194
+ } else {
195
+ this.textToRead = attr.value;
196
+ }
197
+
198
+ this.startSynthesizer(node, attr);
199
+ }
200
+
201
+ async handleStopModifier(node: any, attr: Attr) {
202
+ node.addEventListener('click', async (_: any) => {
203
+ await this.stopPlayer();
204
+ document.dispatchEvent(new CustomEvent('COAzureTTSStoppedPlaying', {}));
205
+ });
206
+ }
207
+
208
+ async handlePauseModifier(node: any, attr: Attr) {
209
+ node.addEventListener('click', async (_: any) => {
210
+ await this.clearInterval();
211
+ await this.player.pause();
212
+ document.dispatchEvent(new CustomEvent('COAzureTTSPausedPlaying', {}));
213
+ });
214
+ }
215
+
216
+ async handleResumeModifier(node: any, attr: Attr) {
217
+ node.addEventListener('click', async (_: any) => {
218
+ await this.createInterval();
219
+ await this.player.resume();
220
+ document.dispatchEvent(new CustomEvent('COAzureTTSResumedPlaying', {}));
221
+ });
222
+ }
223
+
224
+ async stopPlayer() {
225
+ await this.clearInterval();
226
+ if (this.highlightDiv !== undefined) {
227
+ this.highlightDiv.innerHTML = this.originalHighlightDivInnerHTML;
228
+ }
229
+
230
+ this.textToRead = '';
231
+ this.currentWord = '';
232
+ this.originalHighlightDivInnerHTML = '';
233
+ this.wordBoundryList = [];
234
+ this.wordEncounters = [];
235
+ if (this.player !== undefined) {
236
+ this.player.pause();
237
+ }
238
+ this.player = undefined;
239
+ this.highlightDiv = undefined;
240
+ }
241
+
242
+ async startSynthesizer(node: any, attr: Attr) {
243
+ this.speechConfig = SpeechConfig.fromSubscription(this.key, this.region);
244
+
245
+ this.speechConfig.speechSynthesisVoiceName = `Microsoft Server Speech Text to Speech Voice (${this.voice})`;
246
+ this.speechConfig.speechSynthesisOutputFormat = SpeechSynthesisOutputFormat.Audio24Khz160KBitRateMonoMp3;
247
+
248
+ this.player = new SpeakerAudioDestination();
249
+
250
+ this.audioConfig = AudioConfig.fromSpeakerOutput(this.player);
251
+ this.synthesizer = new SpeechSynthesizer(this.speechConfig, this.audioConfig);
252
+
253
+ this.synthesizer.wordBoundary = (s: any, e: any) => {
254
+ this.wordBoundryList.push(e);
255
+ };
256
+
257
+ this.player.onAudioEnd = async () => {
258
+ this.stopPlayer();
259
+
260
+ if (this.clickedNode.hasAttribute('co-tts.next')) {
261
+ const nextNode = document.getElementById(this.clickedNode.getAttribute('co-tts.next'));
262
+ if (nextNode && nextNode.attributes.getNamedItem('co-tts.text')) {
263
+ this.handleWithoutClick(nextNode, nextNode.attributes.getNamedItem('co-tts.text'));
264
+ } else if (nextNode) {
265
+ nextNode.dispatchEvent(new Event('click'));
266
+ }
267
+ } else {
268
+ document.dispatchEvent(new CustomEvent('COAzureTTSFinishedPlaying', {}));
269
+ }
270
+ };
271
+
272
+ this.player.onAudioStart = async () => {
273
+ document.dispatchEvent(new CustomEvent('COAzureTTSStartedPlaying', {}));
274
+ };
275
+
276
+ this.synthesizer.speakSsmlAsync(this.buildSSML(this.textToRead),
277
+ () => {
278
+ this.synthesizer.close();
279
+ this.synthesizer = undefined;
280
+ },
281
+ () => {
282
+ this.synthesizer.close();
283
+ this.synthesizer = undefined;
284
+ });
285
+ }
286
+
287
+ async clearInterval() {
288
+ clearInterval(this.interval);
289
+ }
290
+
291
+ async createInterval() {
292
+ this.interval = setInterval(() => {
293
+ if (this.player !== undefined && this.highlightDiv) {
294
+ const currentTime = this.player.currentTime;
295
+ let wordBoundary;
296
+ for (const e of this.wordBoundryList) {
297
+ if (currentTime * 1000 > e.audioOffset / 10000) {
298
+ wordBoundary = e;
299
+ } else {
300
+ break;
301
+ }
302
+ }
303
+
304
+ if (wordBoundary !== undefined) {
305
+ if (~['.', ',', '!', '?', '*', '(', ')', '&', '\\', '/', '^', '[', ']', '<', '>', ':']
306
+ .indexOf(wordBoundary.text)) {
307
+ wordBoundary = this.previousWordBoundary ?? undefined;
308
+ }
309
+
310
+ if (wordBoundary === undefined) {
311
+ this.highlightDiv.innerHTML = this.originalHighlightDivInnerHTML;
312
+ } else {
313
+ if (!this.wordEncounters[wordBoundary.text]) {
314
+ this.wordEncounters[wordBoundary.text] = 0;
315
+ }
316
+
317
+ if (this.currentWord !== wordBoundary.text || this.wordBoundaryOffset !== wordBoundary.textOffset) {
318
+ this.currentOffset = this.getPosition(
319
+ this.originalHighlightDivInnerHTML,
320
+ wordBoundary.text,
321
+ this.wordEncounters[wordBoundary.text]
322
+ );
323
+ this.wordEncounters[wordBoundary.text] = this.currentOffset + wordBoundary.wordLength;
324
+ this.currentWord = wordBoundary.text;
325
+ this.wordBoundaryOffset = wordBoundary.textOffset;
326
+ }
327
+
328
+ if (this.currentOffset <= -1) {
329
+ this.highlightDiv.innerHTML = this.originalHighlightDivInnerHTML;
330
+ } else {
331
+ this.previousWordBoundary = wordBoundary;
332
+ const startOfString = this.originalHighlightDivInnerHTML.substring(0, this.currentOffset);
333
+ const endOffset = this.currentOffset + wordBoundary.wordLength;
334
+ const endOfString = this.originalHighlightDivInnerHTML.substring(endOffset);
335
+ this.highlightDiv.innerHTML = `
336
+ ${startOfString}<mark class='co-tts-highlight'>${wordBoundary.text}</mark>${endOfString}
337
+ `;
338
+ }
339
+ }
340
+ } else {
341
+ this.highlightDiv.innerHTML = this.originalHighlightDivInnerHTML;
342
+ }
343
+ }
344
+ }, 50);
345
+ }
346
+
347
+ getPosition(string: string, subString: string, lastOffset: number) {
348
+ const regex = new RegExp(`(?:^|[^-\\w])(${subString})\\b`, 'g');
349
+ const offset = string.slice(lastOffset).search(regex);
350
+ return (offset <= 0 ? offset : offset + 1) + lastOffset;
351
+ }
352
+
353
+ buildSSML(text: string) {
354
+ return `<speak xmlns="http://www.w3.org/2001/10/synthesis"
355
+ xmlns:mstts="http://www.w3.org/2001/mstts"
356
+ xmlns:emo="http://www.w3.org/2009/10/emotionml"
357
+ version="1.0"
358
+ xml:lang="en-US">
359
+ <voice name="${this.voice}">
360
+ <prosody rate="${this.rate}%" pitch="${this.pitch}%">
361
+ ${text}
362
+ </prosody>
363
+ </voice>
364
+ </speak>`;
365
+ }
366
+ }