@creativeorange/azure-text-to-speech 1.1.8 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,27 @@
1
+ import {SpeechToText} from "@creativeorange/azure-text-to-speech/dist/co-azure-tts.es";
2
+ export default defineNuxtPlugin(async (nuxtApp) => {
3
+ const speechToText = new SpeechToText(
4
+ '[key]',
5
+ '[region]',
6
+ '[source language]',
7
+ '[target language]'
8
+ );
9
+ let started = false;
10
+
11
+ nuxtApp.vueApp.mixin({
12
+ mounted() {
13
+ if (!started) {
14
+ setTimeout(async () => {
15
+ await speechToText.start();
16
+ }, 500);
17
+ started = true;
18
+ }
19
+ },
20
+ beforeUnmount() {
21
+ if (started) {
22
+ speechToText.stop();
23
+ started = false;
24
+ }
25
+ },
26
+ });
27
+ });
@@ -0,0 +1,28 @@
1
+ import {TextToSpeech} from "@creativeorange/azure-text-to-speech";
2
+ export default defineNuxtPlugin(async (nuxtApp) => {
3
+ const textToSpeech = new TextToSpeech(
4
+ '[key]',
5
+ '[region]',
6
+ '[voice]',
7
+ 1, // rate
8
+ 1 // pitch
9
+ );
10
+ let started = false;
11
+
12
+ nuxtApp.vueApp.mixin({
13
+ mounted() {
14
+ if (!started) {
15
+ setTimeout(() => {
16
+ textToSpeech.start();
17
+ }, 500);
18
+ started = true;
19
+ }
20
+ },
21
+ beforeUnmount() {
22
+ if (started) {
23
+ textToSpeech.stopPlayer();
24
+ started = false;
25
+ }
26
+ }
27
+ });
28
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@creativeorange/azure-text-to-speech",
3
- "version": "1.1.8",
3
+ "version": "1.2.1",
4
4
  "main": "dist/co-azure-tts.umd.js",
5
5
  "browser": "dist/co-azure-tts.es.js",
6
6
  "scripts": {
@@ -0,0 +1,99 @@
1
+ import {
2
+ SpeechTranslationConfig,
3
+ AudioConfig,
4
+ TranslationRecognizer,
5
+ ResultReason,
6
+ } from 'microsoft-cognitiveservices-speech-sdk';
7
+
8
+ export class SpeechToText {
9
+ key: string;
10
+ region: string;
11
+ sourceLanguage: string;
12
+ targetLanguage: string;
13
+ recognizer: TranslationRecognizer | undefined;
14
+
15
+ constructor(key: string, region: string, sourceLanguage: string, targetLanguage: string|null = null) {
16
+ this.key = key;
17
+ this.region = region;
18
+ this.sourceLanguage = sourceLanguage;
19
+ this.targetLanguage = (targetLanguage !== null) ? targetLanguage : sourceLanguage;
20
+ }
21
+
22
+ async start() {
23
+ await this.registerBindings(document);
24
+ }
25
+
26
+ async registerBindings(node: any) {
27
+ const nodes = node.childNodes;
28
+ for (let i = 0; i < nodes.length; i++) {
29
+ if (!nodes[i]) {
30
+ continue;
31
+ }
32
+
33
+ const currentNode = nodes[i];
34
+
35
+ if (currentNode.attributes) {
36
+ if (currentNode.attributes.getNamedItem('co-stt.start')) {
37
+ await this.handleStartModifier(currentNode, currentNode.attributes.getNamedItem('co-stt.start'));
38
+ } else if (currentNode.attributes.getNamedItem('co-stt.stop')) {
39
+ await this.handleStopModifier(currentNode, currentNode.attributes.getNamedItem('co-stt.stop'));
40
+ }
41
+ }
42
+
43
+ if (currentNode.childNodes.length > 0) {
44
+ await this.registerBindings(currentNode);
45
+ }
46
+ }
47
+ }
48
+
49
+ async handleStartModifier(node: any, attr: Attr) {
50
+ node.addEventListener('click', async (_: any) => {
51
+ const speechConfig = SpeechTranslationConfig.fromSubscription(this.key, this.region);
52
+ speechConfig.speechRecognitionLanguage = this.sourceLanguage;
53
+ speechConfig.addTargetLanguage(this.targetLanguage);
54
+
55
+ const audioConfig = AudioConfig.fromDefaultMicrophoneInput();
56
+
57
+ this.recognizer = new TranslationRecognizer(speechConfig, audioConfig);
58
+
59
+ document.dispatchEvent(new CustomEvent('COAzureSTTStartedRecording', {}));
60
+ this.recognizer.recognizeOnceAsync(
61
+ (result) => {
62
+ if (result.reason === ResultReason.TranslatedSpeech) {
63
+ const translation = result.translations.get(this.targetLanguage);
64
+ const inputElement = document.getElementById(attr.value);
65
+
66
+ if (inputElement !== null) {
67
+ if (inputElement instanceof HTMLInputElement) {
68
+ inputElement.value += `${translation} `;
69
+ } else {
70
+ inputElement.innerHTML += `${translation} `;
71
+ }
72
+ }
73
+ }
74
+
75
+ this.stop();
76
+ },
77
+ (err) => {
78
+ console.log(err);
79
+
80
+ this.stop();
81
+ }
82
+ );
83
+ });
84
+ }
85
+
86
+ async handleStopModifier(node: any, attr: Attr) {
87
+ node.addEventListener('click', async (_: any) => {
88
+ await this.stop();
89
+ });
90
+ }
91
+
92
+ async stop() {
93
+ if (this.recognizer !== undefined) {
94
+ this.recognizer.close();
95
+ this.recognizer = undefined;
96
+ }
97
+ document.dispatchEvent(new CustomEvent('COAzureSTTStoppedRecording', {}));
98
+ }
99
+ }
@@ -0,0 +1,373 @@
1
+ import {
2
+ SpeakerAudioDestination,
3
+ AudioConfig,
4
+ SpeechConfig,
5
+ SpeechSynthesizer,
6
+ SpeechSynthesisOutputFormat,
7
+ } from 'microsoft-cognitiveservices-speech-sdk';
8
+
9
+ export class TextToSpeech {
10
+ key: string;
11
+ region: string;
12
+ voice: string;
13
+ rate: number;
14
+ pitch: number;
15
+
16
+ textToRead: string = '';
17
+
18
+ wordBoundryList: any[] = [];
19
+
20
+ clickedNode: any;
21
+ highlightDiv: any;
22
+
23
+ speechConfig: any;
24
+ audioConfig: any;
25
+ player: any;
26
+ synthesizer: any;
27
+
28
+ previousWordBoundary: any;
29
+
30
+ interval: any;
31
+
32
+ wordEncounters: number[] = [];
33
+ originalHighlightDivInnerHTML: string = '';
34
+ currentWord: string = '';
35
+ currentOffset: number = 0;
36
+ wordBoundaryOffset: number = 0;
37
+ privTextOffset: number = 0;
38
+
39
+
40
+ constructor(key: string, region: string, voice: string, rate: number = 0, pitch: number = 0) {
41
+ this.key = key;
42
+ this.region = region;
43
+ this.voice = voice;
44
+ this.rate = rate;
45
+ this.pitch = pitch;
46
+ }
47
+
48
+ async start() {
49
+ await this.registerBindings(document);
50
+ }
51
+
52
+ setVoice(voice: string) {
53
+ this.voice = voice;
54
+
55
+ return this;
56
+ }
57
+
58
+ setRate(rate: number) {
59
+ this.rate = rate;
60
+
61
+ return this;
62
+ }
63
+
64
+ setPitch(pitch: number) {
65
+ this.pitch = pitch;
66
+
67
+ return this;
68
+ }
69
+
70
+ async registerBindings(node: any) {
71
+ const nodes = node.childNodes;
72
+ for (let i = 0; i < nodes.length; i++) {
73
+ if (!nodes[i]) {
74
+ continue;
75
+ }
76
+
77
+ const currentNode = nodes[i];
78
+
79
+ if (currentNode.attributes) {
80
+ if (currentNode.attributes.getNamedItem('co-tts.id')) {
81
+ await this.handleIdModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.id'));
82
+ } else if (currentNode.attributes.getNamedItem('co-tts.ajax')) {
83
+ await this.handleAjaxModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.ajax'));
84
+ } else if (currentNode.attributes.getNamedItem('co-tts')) {
85
+ await this.handleDefault(currentNode, currentNode.attributes.getNamedItem('co-tts'));
86
+ } else if (currentNode.attributes.getNamedItem('co-tts.stop')) {
87
+ await this.handleStopModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.stop'));
88
+ } else if (currentNode.attributes.getNamedItem('co-tts.resume')) {
89
+ await this.handleResumeModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.resume'));
90
+ } else if (currentNode.attributes.getNamedItem('co-tts.pause')) {
91
+ await this.handlePauseModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.pause'));
92
+ }
93
+ }
94
+
95
+ if (currentNode.childNodes.length > 0) {
96
+ await this.registerBindings(currentNode);
97
+ }
98
+ }
99
+ }
100
+
101
+ async handleIdModifier(node: any, attr: Attr) {
102
+ node.addEventListener('click', async (_: any) => {
103
+ this.stopPlayer();
104
+ await this.createInterval();
105
+ const referenceDiv = document.getElementById(attr.value);
106
+ this.clickedNode = referenceDiv;
107
+
108
+ if (!referenceDiv) {
109
+ return;
110
+ }
111
+
112
+ if (referenceDiv.hasAttribute('co-tts.text') && referenceDiv.getAttribute('co-tts.text') !== '') {
113
+ this.textToRead = referenceDiv.getAttribute('co-tts.text') ?? '';
114
+ } else {
115
+ this.textToRead = referenceDiv.innerText;
116
+ }
117
+
118
+ if (referenceDiv.hasAttribute('co-tts.highlight')) {
119
+ if (referenceDiv.attributes.getNamedItem('co-tts.highlight')?.value !== '') {
120
+ const newReferenceDiv =
121
+ document.getElementById(referenceDiv.attributes.getNamedItem('co-tts.highlight').value);
122
+
123
+ this.highlightDiv = newReferenceDiv;
124
+ this.originalHighlightDivInnerHTML = newReferenceDiv.innerHTML;
125
+ } else {
126
+ this.highlightDiv = referenceDiv;
127
+ this.originalHighlightDivInnerHTML = referenceDiv.innerHTML;
128
+ }
129
+ }
130
+
131
+ this.startSynthesizer(node, attr);
132
+ });
133
+ }
134
+
135
+ async handleAjaxModifier(node: any, attr: Attr) {
136
+ node.addEventListener('click', async (_: any) => {
137
+ this.stopPlayer();
138
+ await this.createInterval();
139
+ this.clickedNode = node;
140
+ const response = await fetch(attr.value, {
141
+ method: `GET`,
142
+ });
143
+
144
+ this.textToRead = await response.text();
145
+
146
+ this.startSynthesizer(node, attr);
147
+ });
148
+ }
149
+
150
+ async handleDefault(node: any, attr: Attr) {
151
+ node.addEventListener('click', async (_: any) => {
152
+ this.stopPlayer();
153
+ await this.createInterval();
154
+ this.clickedNode = node;
155
+ if (node.hasAttribute('co-tts.highlight')) {
156
+ if (node.attributes.getNamedItem('co-tts.highlight')?.value !== '') {
157
+ const newReferenceDiv = document.getElementById(node.attributes.getNamedItem('co-tts.highlight').value);
158
+
159
+ this.highlightDiv = newReferenceDiv;
160
+ this.originalHighlightDivInnerHTML = newReferenceDiv.innerHTML;
161
+ } else {
162
+ this.highlightDiv = node;
163
+ this.originalHighlightDivInnerHTML = node.innerHTML;
164
+ }
165
+ }
166
+ if (attr.value === '') {
167
+ this.textToRead = node.innerText;
168
+ } else {
169
+ this.textToRead = attr.value;
170
+ }
171
+
172
+ this.startSynthesizer(node, attr);
173
+ });
174
+ }
175
+
176
+ async handleWithoutClick(node: any, attr: Attr) {
177
+ this.stopPlayer();
178
+ await this.createInterval();
179
+ this.clickedNode = node;
180
+ if (node.hasAttribute('co-tts.highlight')) {
181
+ if (node.attributes.getNamedItem('co-tts.highlight')?.value !== '') {
182
+ const newReferenceDiv = document.getElementById(node.attributes.getNamedItem('co-tts.highlight').value);
183
+
184
+ this.highlightDiv = newReferenceDiv;
185
+ if (newReferenceDiv !== null) {
186
+ this.originalHighlightDivInnerHTML = newReferenceDiv.innerHTML;
187
+ }
188
+ } else {
189
+ this.highlightDiv = node;
190
+ this.originalHighlightDivInnerHTML = node.innerHTML;
191
+ }
192
+ }
193
+ if (attr.value === '') {
194
+ this.textToRead = node.innerText;
195
+ } else {
196
+ this.textToRead = attr.value;
197
+ }
198
+
199
+ this.startSynthesizer(node, attr);
200
+ }
201
+
202
+ async handleStopModifier(node: any, attr: Attr) {
203
+ node.addEventListener('click', async (_: any) => {
204
+ await this.stopPlayer();
205
+ document.dispatchEvent(new CustomEvent('COAzureTTSStoppedPlaying', {}));
206
+ });
207
+ }
208
+
209
+ async handlePauseModifier(node: any, attr: Attr) {
210
+ node.addEventListener('click', async (_: any) => {
211
+ await this.clearInterval();
212
+ await this.player.pause();
213
+ document.dispatchEvent(new CustomEvent('COAzureTTSPausedPlaying', {}));
214
+ });
215
+ }
216
+
217
+ async handleResumeModifier(node: any, attr: Attr) {
218
+ node.addEventListener('click', async (_: any) => {
219
+ await this.createInterval();
220
+ await this.player.resume();
221
+ document.dispatchEvent(new CustomEvent('COAzureTTSResumedPlaying', {}));
222
+ });
223
+ }
224
+
225
+ async stopPlayer() {
226
+ await this.clearInterval();
227
+ if (this.highlightDiv !== undefined) {
228
+ this.highlightDiv.innerHTML = this.originalHighlightDivInnerHTML;
229
+ }
230
+
231
+ this.textToRead = '';
232
+ this.currentWord = '';
233
+ this.originalHighlightDivInnerHTML = '';
234
+ this.wordBoundryList = [];
235
+ this.wordEncounters = [];
236
+ if (this.player !== undefined) {
237
+ this.player.pause();
238
+ }
239
+ this.player = undefined;
240
+ this.highlightDiv = undefined;
241
+ this.privTextOffset = 0;
242
+ }
243
+
244
+ async startSynthesizer(node: any, attr: Attr) {
245
+ this.speechConfig = SpeechConfig.fromSubscription(this.key, this.region);
246
+
247
+ this.speechConfig.speechSynthesisVoiceName = `Microsoft Server Speech Text to Speech Voice (${this.voice})`;
248
+ this.speechConfig.speechSynthesisOutputFormat = SpeechSynthesisOutputFormat.Audio24Khz160KBitRateMonoMp3;
249
+
250
+ this.player = new SpeakerAudioDestination();
251
+
252
+ this.audioConfig = AudioConfig.fromSpeakerOutput(this.player);
253
+ this.synthesizer = new SpeechSynthesizer(this.speechConfig, this.audioConfig);
254
+
255
+ this.synthesizer.wordBoundary = (s: any, e: any) => {
256
+ this.wordBoundryList.push(e);
257
+ };
258
+
259
+ this.player.onAudioEnd = async () => {
260
+ this.stopPlayer();
261
+
262
+ if (this.clickedNode.hasAttribute('co-tts.next')) {
263
+ const nextNode = document.getElementById(this.clickedNode.getAttribute('co-tts.next'));
264
+ if (nextNode && nextNode.attributes.getNamedItem('co-tts.text')) {
265
+ this.handleWithoutClick(nextNode, nextNode.attributes.getNamedItem('co-tts.text'));
266
+ } else if (nextNode) {
267
+ nextNode.dispatchEvent(new Event('click'));
268
+ }
269
+ } else {
270
+ document.dispatchEvent(new CustomEvent('COAzureTTSFinishedPlaying', {}));
271
+ }
272
+ };
273
+
274
+ this.player.onAudioStart = async () => {
275
+ document.dispatchEvent(new CustomEvent('COAzureTTSStartedPlaying', {}));
276
+ };
277
+
278
+ this.synthesizer.speakSsmlAsync(this.buildSSML(this.textToRead),
279
+ () => {
280
+ this.synthesizer.close();
281
+ this.synthesizer = undefined;
282
+ },
283
+ () => {
284
+ this.synthesizer.close();
285
+ this.synthesizer = undefined;
286
+ });
287
+ }
288
+
289
+ async clearInterval() {
290
+ clearInterval(this.interval);
291
+ }
292
+
293
+ async createInterval() {
294
+ this.interval = setInterval(() => {
295
+ if (this.player !== undefined && this.highlightDiv) {
296
+ const currentTime = this.player.currentTime;
297
+ let wordBoundary;
298
+ for (const e of this.wordBoundryList) {
299
+ if (currentTime * 1000 > e.audioOffset / 10000) {
300
+ wordBoundary = e;
301
+ } else {
302
+ break;
303
+ }
304
+ }
305
+
306
+ if (wordBoundary !== undefined) {
307
+ if (~['.', ',', '!', '?', '*', '(', ')', '&', '\\', '/', '^', '[', ']', '<', '>', ':']
308
+ .indexOf(wordBoundary.text)) {
309
+ wordBoundary = this.previousWordBoundary ?? undefined;
310
+ }
311
+
312
+ if (wordBoundary === undefined || this.privTextOffset > wordBoundary.privTextOffset) {
313
+ this.highlightDiv.innerHTML = this.originalHighlightDivInnerHTML;
314
+ } else {
315
+ if (!this.wordEncounters[wordBoundary.text]) {
316
+ this.wordEncounters[wordBoundary.text] = 0;
317
+ }
318
+ this.privTextOffset = wordBoundary.privTextOffset;
319
+
320
+ if (this.currentWord !== wordBoundary.text || this.wordBoundaryOffset !== wordBoundary.textOffset) {
321
+ this.currentOffset = this.getPosition(
322
+ this.originalHighlightDivInnerHTML,
323
+ wordBoundary.text,
324
+ this.wordEncounters[wordBoundary.text]
325
+ );
326
+ this.wordEncounters[wordBoundary.text] = this.currentOffset + wordBoundary.wordLength;
327
+ this.currentWord = wordBoundary.text;
328
+ this.wordBoundaryOffset = wordBoundary.textOffset;
329
+ }
330
+
331
+ if (this.currentOffset <= -1) {
332
+ this.highlightDiv.innerHTML = this.originalHighlightDivInnerHTML;
333
+ } else {
334
+ this.previousWordBoundary = wordBoundary;
335
+ const startOfString = this.originalHighlightDivInnerHTML.substring(0, this.currentOffset);
336
+ const endOffset = this.currentOffset + wordBoundary.wordLength;
337
+ const endOfString = this.originalHighlightDivInnerHTML.substring(endOffset);
338
+ this.highlightDiv.innerHTML = `
339
+ ${startOfString}<mark class='co-tts-highlight'>${wordBoundary.text}</mark>${endOfString}
340
+ `;
341
+ }
342
+ }
343
+ } else {
344
+ this.highlightDiv.innerHTML = this.originalHighlightDivInnerHTML;
345
+ }
346
+ }
347
+ }, 50);
348
+ }
349
+
350
+ getPosition(string: string, subString: string, lastOffset: number) {
351
+ const regex = new RegExp(`(?:^|[^-\\w])(${subString})\\b`, 'g');
352
+ const offset = string.slice(lastOffset).search(regex);
353
+ let newOffset = (offset <= 0 ? Number.MAX_SAFE_INTEGER : offset + 1);
354
+ if (newOffset !== Number.MAX_SAFE_INTEGER) {
355
+ newOffset += lastOffset;
356
+ }
357
+ return newOffset;
358
+ }
359
+
360
+ buildSSML(text: string) {
361
+ return `<speak xmlns="http://www.w3.org/2001/10/synthesis"
362
+ xmlns:mstts="http://www.w3.org/2001/mstts"
363
+ xmlns:emo="http://www.w3.org/2009/10/emotionml"
364
+ version="1.0"
365
+ xml:lang="en-US">
366
+ <voice name="${this.voice}">
367
+ <prosody rate="${this.rate}%" pitch="${this.pitch}%">
368
+ ${text}
369
+ </prosody>
370
+ </voice>
371
+ </speak>`;
372
+ }
373
+ }