web-speech-cognitive-services 7.1.4-master.151bc9b → 8.0.0-main.181f814
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/web-speech-cognitive-services.d.mts +283 -0
- package/dist/web-speech-cognitive-services.d.ts +283 -0
- package/dist/web-speech-cognitive-services.development.js +25696 -0
- package/dist/web-speech-cognitive-services.development.js.map +1 -0
- package/dist/web-speech-cognitive-services.js +1356 -0
- package/dist/web-speech-cognitive-services.js.map +1 -0
- package/dist/web-speech-cognitive-services.mjs +1321 -0
- package/dist/web-speech-cognitive-services.mjs.map +1 -0
- package/dist/web-speech-cognitive-services.production.min.js +31 -0
- package/dist/web-speech-cognitive-services.production.min.js.map +1 -0
- package/package.json +67 -47
- package/CHANGELOG.md +0 -372
- package/lib/BingSpeech/SpeechToText/SpeechGrammarList.js +0 -94
- package/lib/BingSpeech/SpeechToText/SpeechGrammarList.js.map +0 -1
- package/lib/BingSpeech/SpeechToText/createSpeechRecognitionPonyfill.js +0 -483
- package/lib/BingSpeech/SpeechToText/createSpeechRecognitionPonyfill.js.map +0 -1
- package/lib/BingSpeech/SpeechToText.js +0 -14
- package/lib/BingSpeech/SpeechToText.js.map +0 -1
- package/lib/BingSpeech/TextToSpeech/AudioContextConsumer.js +0 -122
- package/lib/BingSpeech/TextToSpeech/AudioContextConsumer.js.map +0 -1
- package/lib/BingSpeech/TextToSpeech/AudioContextQueue.js +0 -104
- package/lib/BingSpeech/TextToSpeech/AudioContextQueue.js.map +0 -1
- package/lib/BingSpeech/TextToSpeech/SpeechSynthesisUtterance.js +0 -264
- package/lib/BingSpeech/TextToSpeech/SpeechSynthesisUtterance.js.map +0 -1
- package/lib/BingSpeech/TextToSpeech/SpeechSynthesisVoice.js +0 -61
- package/lib/BingSpeech/TextToSpeech/SpeechSynthesisVoice.js.map +0 -1
- package/lib/BingSpeech/TextToSpeech/buildSSML.js +0 -32
- package/lib/BingSpeech/TextToSpeech/buildSSML.js.map +0 -1
- package/lib/BingSpeech/TextToSpeech/createSpeechSynthesisPonyfill.js +0 -220
- package/lib/BingSpeech/TextToSpeech/createSpeechSynthesisPonyfill.js.map +0 -1
- package/lib/BingSpeech/TextToSpeech/fetchSpeechData.js +0 -74
- package/lib/BingSpeech/TextToSpeech/fetchSpeechData.js.map +0 -1
- package/lib/BingSpeech/TextToSpeech/fetchVoices.js +0 -335
- package/lib/BingSpeech/TextToSpeech/fetchVoices.js.map +0 -1
- package/lib/BingSpeech/TextToSpeech/isSSML.js +0 -13
- package/lib/BingSpeech/TextToSpeech/isSSML.js.map +0 -1
- package/lib/BingSpeech/TextToSpeech/subscribeEvent.js +0 -14
- package/lib/BingSpeech/TextToSpeech/subscribeEvent.js.map +0 -1
- package/lib/BingSpeech/TextToSpeech.js +0 -14
- package/lib/BingSpeech/TextToSpeech.js.map +0 -1
- package/lib/BingSpeech/Util/DOMEventEmitter.js +0 -61
- package/lib/BingSpeech/Util/DOMEventEmitter.js.map +0 -1
- package/lib/BingSpeech/Util/createFetchTokenUsingSubscriptionKey.js +0 -41
- package/lib/BingSpeech/Util/createFetchTokenUsingSubscriptionKey.js.map +0 -1
- package/lib/BingSpeech/fetchAuthorizationToken.js +0 -57
- package/lib/BingSpeech/fetchAuthorizationToken.js.map +0 -1
- package/lib/BingSpeech/index.js +0 -84
- package/lib/BingSpeech/index.js.map +0 -1
- package/lib/SpeechServices/SpeechSDK.js +0 -19
- package/lib/SpeechServices/SpeechSDK.js.map +0 -1
- package/lib/SpeechServices/SpeechToText/SpeechGrammarList.js +0 -45
- package/lib/SpeechServices/SpeechToText/SpeechGrammarList.js.map +0 -1
- package/lib/SpeechServices/SpeechToText/cognitiveServiceEventResultToWebSpeechRecognitionResultList.js +0 -56
- package/lib/SpeechServices/SpeechToText/cognitiveServiceEventResultToWebSpeechRecognitionResultList.js.map +0 -1
- package/lib/SpeechServices/SpeechToText/createSpeechRecognitionPonyfill.js +0 -984
- package/lib/SpeechServices/SpeechToText/createSpeechRecognitionPonyfill.js.map +0 -1
- package/lib/SpeechServices/SpeechToText.js +0 -24
- package/lib/SpeechServices/SpeechToText.js.map +0 -1
- package/lib/SpeechServices/TextToSpeech/AudioContextConsumer.js +0 -92
- package/lib/SpeechServices/TextToSpeech/AudioContextConsumer.js.map +0 -1
- package/lib/SpeechServices/TextToSpeech/AudioContextQueue.js +0 -111
- package/lib/SpeechServices/TextToSpeech/AudioContextQueue.js.map +0 -1
- package/lib/SpeechServices/TextToSpeech/SpeechSynthesisEvent.js +0 -40
- package/lib/SpeechServices/TextToSpeech/SpeechSynthesisEvent.js.map +0 -1
- package/lib/SpeechServices/TextToSpeech/SpeechSynthesisUtterance.js +0 -283
- package/lib/SpeechServices/TextToSpeech/SpeechSynthesisUtterance.js.map +0 -1
- package/lib/SpeechServices/TextToSpeech/SpeechSynthesisVoice.js +0 -63
- package/lib/SpeechServices/TextToSpeech/SpeechSynthesisVoice.js.map +0 -1
- package/lib/SpeechServices/TextToSpeech/buildSSML.js +0 -32
- package/lib/SpeechServices/TextToSpeech/buildSSML.js.map +0 -1
- package/lib/SpeechServices/TextToSpeech/createSpeechSynthesisPonyfill.js +0 -282
- package/lib/SpeechServices/TextToSpeech/createSpeechSynthesisPonyfill.js.map +0 -1
- package/lib/SpeechServices/TextToSpeech/fetchCustomVoices.js +0 -110
- package/lib/SpeechServices/TextToSpeech/fetchCustomVoices.js.map +0 -1
- package/lib/SpeechServices/TextToSpeech/fetchSpeechData.js +0 -127
- package/lib/SpeechServices/TextToSpeech/fetchSpeechData.js.map +0 -1
- package/lib/SpeechServices/TextToSpeech/fetchVoices.js +0 -87
- package/lib/SpeechServices/TextToSpeech/fetchVoices.js.map +0 -1
- package/lib/SpeechServices/TextToSpeech/isSSML.js +0 -13
- package/lib/SpeechServices/TextToSpeech/isSSML.js.map +0 -1
- package/lib/SpeechServices/TextToSpeech/subscribeEvent.js +0 -14
- package/lib/SpeechServices/TextToSpeech/subscribeEvent.js.map +0 -1
- package/lib/SpeechServices/TextToSpeech.js +0 -14
- package/lib/SpeechServices/TextToSpeech.js.map +0 -1
- package/lib/SpeechServices/fetchAuthorizationToken.js +0 -58
- package/lib/SpeechServices/fetchAuthorizationToken.js.map +0 -1
- package/lib/SpeechServices/patchOptions.js +0 -213
- package/lib/SpeechServices/patchOptions.js.map +0 -1
- package/lib/SpeechServices/resolveFunctionOrReturnValue.js +0 -11
- package/lib/SpeechServices/resolveFunctionOrReturnValue.js.map +0 -1
- package/lib/SpeechServices.js +0 -73
- package/lib/SpeechServices.js.map +0 -1
- package/lib/Util/arrayToMap.js +0 -28
- package/lib/Util/arrayToMap.js.map +0 -1
- package/lib/Util/createPromiseQueue.js +0 -40
- package/lib/Util/createPromiseQueue.js.map +0 -1
- package/lib/index.js +0 -14
- package/lib/index.js.map +0 -1
- package/umd/web-speech-cognitive-services.development.js +0 -4740
- package/umd/web-speech-cognitive-services.production.min.js +0 -2
|
@@ -0,0 +1,1356 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __export = (target, all) => {
|
|
9
|
+
for (var name in all)
|
|
10
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
|
+
};
|
|
12
|
+
var __copyProps = (to, from, except, desc) => {
|
|
13
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
+
for (let key of __getOwnPropNames(from))
|
|
15
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
+
}
|
|
18
|
+
return to;
|
|
19
|
+
};
|
|
20
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
+
mod
|
|
27
|
+
));
|
|
28
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
+
|
|
30
|
+
// src/index.js
|
|
31
|
+
var src_exports = {};
|
|
32
|
+
__export(src_exports, {
|
|
33
|
+
createSpeechRecognitionPonyfill: () => SpeechToText_default,
|
|
34
|
+
createSpeechRecognitionPonyfillFromRecognizer: () => createSpeechRecognitionPonyfillFromRecognizer,
|
|
35
|
+
createSpeechServicesPonyfill: () => createSpeechServicesPonyfill,
|
|
36
|
+
createSpeechSynthesisPonyfill: () => TextToSpeech_default,
|
|
37
|
+
fetchAuthorizationToken: () => fetchAuthorizationToken_default
|
|
38
|
+
});
|
|
39
|
+
module.exports = __toCommonJS(src_exports);
|
|
40
|
+
|
|
41
|
+
// ../../node_modules/p-defer/index.js
|
|
42
|
+
function pDefer() {
|
|
43
|
+
const deferred = {};
|
|
44
|
+
deferred.promise = new Promise((resolve, reject) => {
|
|
45
|
+
deferred.resolve = resolve;
|
|
46
|
+
deferred.reject = reject;
|
|
47
|
+
});
|
|
48
|
+
return deferred;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// src/Util/createPromiseQueue.js
|
|
52
|
+
function createPromiseQueue_default() {
|
|
53
|
+
let shiftDeferred;
|
|
54
|
+
const queue = [];
|
|
55
|
+
const push = (value) => {
|
|
56
|
+
if (shiftDeferred) {
|
|
57
|
+
const { resolve } = shiftDeferred;
|
|
58
|
+
shiftDeferred = null;
|
|
59
|
+
resolve(value);
|
|
60
|
+
} else {
|
|
61
|
+
queue.push(value);
|
|
62
|
+
}
|
|
63
|
+
};
|
|
64
|
+
const shift = () => {
|
|
65
|
+
if (queue.length) {
|
|
66
|
+
return Promise.resolve(queue.shift());
|
|
67
|
+
}
|
|
68
|
+
return (shiftDeferred || (shiftDeferred = pDefer())).promise;
|
|
69
|
+
};
|
|
70
|
+
return {
|
|
71
|
+
push,
|
|
72
|
+
shift
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// src/SpeechServices/resolveFunctionOrReturnValue.ts
|
|
77
|
+
function isFunction(value) {
|
|
78
|
+
return typeof value === "function";
|
|
79
|
+
}
|
|
80
|
+
function resolveFunctionOrReturnValue(fnOrValue) {
|
|
81
|
+
return isFunction(fnOrValue) ? fnOrValue() : fnOrValue;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// src/SpeechServices/patchOptions.js
|
|
85
|
+
var shouldWarnOnSubscriptionKey = true;
|
|
86
|
+
function patchOptions({
|
|
87
|
+
authorizationToken,
|
|
88
|
+
credentials,
|
|
89
|
+
looseEvent,
|
|
90
|
+
looseEvents,
|
|
91
|
+
region = "westus",
|
|
92
|
+
subscriptionKey,
|
|
93
|
+
...otherOptions
|
|
94
|
+
} = {}) {
|
|
95
|
+
if (typeof looseEvent !== "undefined") {
|
|
96
|
+
console.warn('web-speech-cognitive-services: The option "looseEvent" should be named as "looseEvents".');
|
|
97
|
+
looseEvents = looseEvent;
|
|
98
|
+
}
|
|
99
|
+
if (!credentials) {
|
|
100
|
+
if (!authorizationToken && !subscriptionKey) {
|
|
101
|
+
throw new Error("web-speech-cognitive-services: Credentials must be specified.");
|
|
102
|
+
} else {
|
|
103
|
+
console.warn(
|
|
104
|
+
"web-speech-cognitive-services: We are deprecating authorizationToken, region, and subscriptionKey. Please use credentials instead. The deprecated option will be removed on or after 2020-11-14."
|
|
105
|
+
);
|
|
106
|
+
credentials = async () => authorizationToken ? { authorizationToken: await resolveFunctionOrReturnValue(authorizationToken), region } : { region, subscriptionKey: await resolveFunctionOrReturnValue(subscriptionKey) };
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return {
|
|
110
|
+
...otherOptions,
|
|
111
|
+
fetchCredentials: async () => {
|
|
112
|
+
const {
|
|
113
|
+
authorizationToken: authorizationToken2,
|
|
114
|
+
customVoiceHostname,
|
|
115
|
+
region: region2,
|
|
116
|
+
speechRecognitionHostname,
|
|
117
|
+
speechSynthesisHostname,
|
|
118
|
+
subscriptionKey: subscriptionKey2
|
|
119
|
+
} = await resolveFunctionOrReturnValue(credentials);
|
|
120
|
+
if (!authorizationToken2 && !subscriptionKey2 || authorizationToken2 && subscriptionKey2) {
|
|
121
|
+
throw new Error(
|
|
122
|
+
'web-speech-cognitive-services: Either "authorizationToken" or "subscriptionKey" must be provided.'
|
|
123
|
+
);
|
|
124
|
+
} else if (!region2 && !(speechRecognitionHostname && speechSynthesisHostname)) {
|
|
125
|
+
throw new Error(
|
|
126
|
+
'web-speech-cognitive-services: Either "region" or "speechRecognitionHostname" and "speechSynthesisHostname" must be set.'
|
|
127
|
+
);
|
|
128
|
+
} else if (region2 && (customVoiceHostname || speechRecognitionHostname || speechSynthesisHostname)) {
|
|
129
|
+
throw new Error(
|
|
130
|
+
'web-speech-cognitive-services: Only either "region" or "customVoiceHostname", "speechRecognitionHostname" and "speechSynthesisHostname" can be set.'
|
|
131
|
+
);
|
|
132
|
+
} else if (authorizationToken2) {
|
|
133
|
+
if (typeof authorizationToken2 !== "string") {
|
|
134
|
+
throw new Error('web-speech-cognitive-services: "authorizationToken" must be a string.');
|
|
135
|
+
}
|
|
136
|
+
} else if (typeof subscriptionKey2 !== "string") {
|
|
137
|
+
throw new Error('web-speech-cognitive-services: "subscriptionKey" must be a string.');
|
|
138
|
+
}
|
|
139
|
+
if (shouldWarnOnSubscriptionKey && subscriptionKey2) {
|
|
140
|
+
console.warn(
|
|
141
|
+
"web-speech-cognitive-services: In production environment, subscription key should not be used, authorization token should be used instead."
|
|
142
|
+
);
|
|
143
|
+
shouldWarnOnSubscriptionKey = false;
|
|
144
|
+
}
|
|
145
|
+
const resolvedCredentials = authorizationToken2 ? { authorizationToken: authorizationToken2 } : { subscriptionKey: subscriptionKey2 };
|
|
146
|
+
if (region2) {
|
|
147
|
+
resolvedCredentials.region = region2;
|
|
148
|
+
} else {
|
|
149
|
+
resolvedCredentials.customVoiceHostname = customVoiceHostname;
|
|
150
|
+
resolvedCredentials.speechRecognitionHostname = speechRecognitionHostname;
|
|
151
|
+
resolvedCredentials.speechSynthesisHostname = speechSynthesisHostname;
|
|
152
|
+
}
|
|
153
|
+
return resolvedCredentials;
|
|
154
|
+
},
|
|
155
|
+
looseEvents
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// src/SpeechServices/SpeechSDK.js
|
|
160
|
+
var import_microsoft_cognitiveservices_speech = require("microsoft-cognitiveservices-speech-sdk/distrib/lib/microsoft.cognitiveservices.speech.sdk");
|
|
161
|
+
var SpeechSDK_default = {
|
|
162
|
+
AudioConfig: import_microsoft_cognitiveservices_speech.AudioConfig,
|
|
163
|
+
OutputFormat: import_microsoft_cognitiveservices_speech.OutputFormat,
|
|
164
|
+
ResultReason: import_microsoft_cognitiveservices_speech.ResultReason,
|
|
165
|
+
SpeechConfig: import_microsoft_cognitiveservices_speech.SpeechConfig,
|
|
166
|
+
SpeechRecognizer: import_microsoft_cognitiveservices_speech.SpeechRecognizer
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
// src/SpeechServices/SpeechToText/SpeechRecognitionAlternative.ts
|
|
170
|
+
var SpeechRecognitionAlternative = class {
|
|
171
|
+
constructor({ confidence, transcript }) {
|
|
172
|
+
this.#confidence = confidence;
|
|
173
|
+
this.#transcript = transcript;
|
|
174
|
+
}
|
|
175
|
+
#confidence;
|
|
176
|
+
#transcript;
|
|
177
|
+
get confidence() {
|
|
178
|
+
return this.#confidence;
|
|
179
|
+
}
|
|
180
|
+
get transcript() {
|
|
181
|
+
return this.#transcript;
|
|
182
|
+
}
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
// src/SpeechServices/SpeechToText/FakeArray.ts
|
|
186
|
+
var FakeArray = class {
|
|
187
|
+
constructor(array) {
|
|
188
|
+
if (!array) {
|
|
189
|
+
throw new Error("array must be set.");
|
|
190
|
+
}
|
|
191
|
+
this.#array = array;
|
|
192
|
+
for (const key in array) {
|
|
193
|
+
Object.defineProperty(this, key, {
|
|
194
|
+
enumerable: true,
|
|
195
|
+
get() {
|
|
196
|
+
return array[key];
|
|
197
|
+
}
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
#array;
|
|
202
|
+
[Symbol.iterator]() {
|
|
203
|
+
return this.#array[Symbol.iterator]();
|
|
204
|
+
}
|
|
205
|
+
get length() {
|
|
206
|
+
return this.#array.length;
|
|
207
|
+
}
|
|
208
|
+
};
|
|
209
|
+
|
|
210
|
+
// src/SpeechServices/SpeechToText/SpeechRecognitionResult.ts
|
|
211
|
+
var SpeechRecognitionResult = class extends FakeArray {
|
|
212
|
+
constructor(init) {
|
|
213
|
+
super(init.results);
|
|
214
|
+
this.#isFinal = init.isFinal;
|
|
215
|
+
}
|
|
216
|
+
#isFinal;
|
|
217
|
+
get isFinal() {
|
|
218
|
+
return this.#isFinal;
|
|
219
|
+
}
|
|
220
|
+
};
|
|
221
|
+
|
|
222
|
+
// src/SpeechServices/SpeechToText/cognitiveServiceEventResultToWebSpeechRecognitionResult.ts
|
|
223
|
+
var {
|
|
224
|
+
ResultReason: { RecognizingSpeech, RecognizedSpeech }
|
|
225
|
+
} = SpeechSDK_default;
|
|
226
|
+
function cognitiveServiceEventResultToWebSpeechRecognitionResult_default(result, init) {
|
|
227
|
+
const { maxAlternatives = Infinity, textNormalization = "display" } = init || {};
|
|
228
|
+
const json = typeof result.json === "string" ? JSON.parse(result.json) : result.json;
|
|
229
|
+
if (result.reason === RecognizingSpeech || result.reason === RecognizedSpeech && !json.NBest) {
|
|
230
|
+
return new SpeechRecognitionResult({
|
|
231
|
+
isFinal: result.reason === RecognizedSpeech,
|
|
232
|
+
results: [
|
|
233
|
+
new SpeechRecognitionAlternative({
|
|
234
|
+
confidence: 0.5,
|
|
235
|
+
transcript: result.text
|
|
236
|
+
})
|
|
237
|
+
]
|
|
238
|
+
});
|
|
239
|
+
} else if (result.reason === RecognizedSpeech) {
|
|
240
|
+
return new SpeechRecognitionResult({
|
|
241
|
+
isFinal: true,
|
|
242
|
+
results: (json.NBest || []).slice(0, maxAlternatives).map(
|
|
243
|
+
({ Confidence: confidence, Display: display, ITN: itn, Lexical: lexical, MaskedITN: maskedITN }) => new SpeechRecognitionAlternative({
|
|
244
|
+
confidence,
|
|
245
|
+
transcript: textNormalization === "itn" ? itn : textNormalization === "lexical" ? lexical : textNormalization === "maskeditn" ? maskedITN : display
|
|
246
|
+
})
|
|
247
|
+
)
|
|
248
|
+
});
|
|
249
|
+
}
|
|
250
|
+
return new SpeechRecognitionResult({ isFinal: false, results: [] });
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// src/SpeechServices/SpeechToText/EventListenerMap.ts
|
|
254
|
+
var EventListenerMap = class {
|
|
255
|
+
constructor(eventTarget) {
|
|
256
|
+
this.#eventTarget = eventTarget;
|
|
257
|
+
this.#propertyMap = {};
|
|
258
|
+
}
|
|
259
|
+
#eventTarget;
|
|
260
|
+
#propertyMap;
|
|
261
|
+
getProperty(name) {
|
|
262
|
+
return this.#propertyMap[name];
|
|
263
|
+
}
|
|
264
|
+
setProperty(name, value) {
|
|
265
|
+
const existing = this.#propertyMap[name];
|
|
266
|
+
existing && this.#eventTarget.removeEventListener(name, existing);
|
|
267
|
+
if (value) {
|
|
268
|
+
this.#eventTarget.addEventListener(name, value);
|
|
269
|
+
}
|
|
270
|
+
this.#propertyMap[name] = value;
|
|
271
|
+
}
|
|
272
|
+
};
|
|
273
|
+
|
|
274
|
+
// src/SpeechServices/SpeechToText/SpeechGrammarList.js
|
|
275
|
+
var SpeechGrammarList_default = class {
|
|
276
|
+
constructor() {
|
|
277
|
+
this._phrases = [];
|
|
278
|
+
}
|
|
279
|
+
addFromString() {
|
|
280
|
+
throw new Error("JSGF is not supported");
|
|
281
|
+
}
|
|
282
|
+
get phrases() {
|
|
283
|
+
return this._phrases;
|
|
284
|
+
}
|
|
285
|
+
set phrases(value) {
|
|
286
|
+
if (Array.isArray(value)) {
|
|
287
|
+
this._phrases = value;
|
|
288
|
+
} else if (typeof value === "string") {
|
|
289
|
+
this._phrases = [value];
|
|
290
|
+
} else {
|
|
291
|
+
throw new Error(`The provided value is not an array or of type 'string'`);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
};
|
|
295
|
+
|
|
296
|
+
// src/SpeechServices/SpeechToText/SpeechRecognitionErrorEvent.ts
|
|
297
|
+
var SpeechRecognitionErrorEvent = class extends Event {
|
|
298
|
+
constructor(type, { error, message }) {
|
|
299
|
+
super(type);
|
|
300
|
+
this.#error = error;
|
|
301
|
+
this.#message = message;
|
|
302
|
+
}
|
|
303
|
+
#error;
|
|
304
|
+
#message;
|
|
305
|
+
get error() {
|
|
306
|
+
return this.#error;
|
|
307
|
+
}
|
|
308
|
+
get message() {
|
|
309
|
+
return this.#message;
|
|
310
|
+
}
|
|
311
|
+
};
|
|
312
|
+
|
|
313
|
+
// src/SpeechServices/SpeechToText/SpeechRecognitionResultList.ts
|
|
314
|
+
var SpeechRecognitionResultList = class extends FakeArray {
|
|
315
|
+
constructor(result) {
|
|
316
|
+
super(result);
|
|
317
|
+
}
|
|
318
|
+
};
|
|
319
|
+
|
|
320
|
+
// src/SpeechServices/SpeechToText/SpeechRecognitionEvent.ts
|
|
321
|
+
var SpeechRecognitionEvent = class extends Event {
|
|
322
|
+
constructor(type, { data, resultIndex, results } = {}) {
|
|
323
|
+
super(type);
|
|
324
|
+
this.#data = data;
|
|
325
|
+
this.#resultIndex = resultIndex;
|
|
326
|
+
this.#results = results || new SpeechRecognitionResultList([]);
|
|
327
|
+
}
|
|
328
|
+
#data;
|
|
329
|
+
// TODO: "resultIndex" should be set.
|
|
330
|
+
#resultIndex;
|
|
331
|
+
#results;
|
|
332
|
+
get data() {
|
|
333
|
+
return this.#data;
|
|
334
|
+
}
|
|
335
|
+
get resultIndex() {
|
|
336
|
+
return this.#resultIndex;
|
|
337
|
+
}
|
|
338
|
+
get results() {
|
|
339
|
+
return this.#results;
|
|
340
|
+
}
|
|
341
|
+
};
|
|
342
|
+
|
|
343
|
+
// src/SpeechServices/SpeechToText/createSpeechRecognitionPonyfill.js
|
|
344
|
+
var { AudioConfig: AudioConfig2, OutputFormat: OutputFormat2, ResultReason: ResultReason2, SpeechConfig: SpeechConfig2, SpeechRecognizer: SpeechRecognizer2 } = SpeechSDK_default;
|
|
345
|
+
function serializeRecognitionResult({ duration, errorDetails, json, offset, properties, reason, resultId, text }) {
|
|
346
|
+
return {
|
|
347
|
+
duration,
|
|
348
|
+
errorDetails,
|
|
349
|
+
json: JSON.parse(json),
|
|
350
|
+
offset,
|
|
351
|
+
properties,
|
|
352
|
+
reason,
|
|
353
|
+
resultId,
|
|
354
|
+
text
|
|
355
|
+
};
|
|
356
|
+
}
|
|
357
|
+
function averageAmplitude(arrayBuffer) {
|
|
358
|
+
const array = new Int16Array(arrayBuffer);
|
|
359
|
+
return [].reduce.call(array, (averageAmplitude2, amplitude) => averageAmplitude2 + Math.abs(amplitude), 0) / array.length;
|
|
360
|
+
}
|
|
361
|
+
function cognitiveServicesAsyncToPromise(fn) {
|
|
362
|
+
return (...args) => new Promise((resolve, reject) => fn(...args, resolve, reject));
|
|
363
|
+
}
|
|
364
|
+
function prepareAudioConfig(audioConfig) {
|
|
365
|
+
const originalAttach = audioConfig.attach;
|
|
366
|
+
const boundOriginalAttach = audioConfig.attach.bind(audioConfig);
|
|
367
|
+
let firstChunk;
|
|
368
|
+
let muted;
|
|
369
|
+
audioConfig.attach = async () => {
|
|
370
|
+
const reader = await boundOriginalAttach();
|
|
371
|
+
return {
|
|
372
|
+
...reader,
|
|
373
|
+
read: async () => {
|
|
374
|
+
const chunk = await reader.read();
|
|
375
|
+
if (!firstChunk && averageAmplitude(chunk.buffer) > 150) {
|
|
376
|
+
audioConfig.events.onEvent({ name: "FirstAudibleChunk" });
|
|
377
|
+
firstChunk = true;
|
|
378
|
+
}
|
|
379
|
+
if (muted) {
|
|
380
|
+
return { buffer: new ArrayBuffer(0), isEnd: true, timeReceived: Date.now() };
|
|
381
|
+
}
|
|
382
|
+
return chunk;
|
|
383
|
+
}
|
|
384
|
+
};
|
|
385
|
+
};
|
|
386
|
+
return {
|
|
387
|
+
audioConfig,
|
|
388
|
+
pause: () => {
|
|
389
|
+
muted = true;
|
|
390
|
+
},
|
|
391
|
+
unprepare: () => {
|
|
392
|
+
audioConfig.attach = originalAttach;
|
|
393
|
+
}
|
|
394
|
+
};
|
|
395
|
+
}
|
|
396
|
+
function createSpeechRecognitionPonyfillFromRecognizer({
|
|
397
|
+
createRecognizer,
|
|
398
|
+
enableTelemetry,
|
|
399
|
+
looseEvents,
|
|
400
|
+
referenceGrammars,
|
|
401
|
+
textNormalization
|
|
402
|
+
}) {
|
|
403
|
+
SpeechRecognizer2.enableTelemetry(enableTelemetry !== false);
|
|
404
|
+
class SpeechRecognition extends EventTarget {
|
|
405
|
+
constructor() {
|
|
406
|
+
super();
|
|
407
|
+
this._continuous = false;
|
|
408
|
+
this._interimResults = false;
|
|
409
|
+
this._lang = typeof window !== "undefined" ? window.document.documentElement.getAttribute("lang") || window.navigator.language : "en-US";
|
|
410
|
+
this._grammars = new SpeechGrammarList_default();
|
|
411
|
+
this._maxAlternatives = 1;
|
|
412
|
+
this.#eventListenerMap = new EventListenerMap(this);
|
|
413
|
+
}
|
|
414
|
+
/** @type { import('./SpeechRecognitionEventListenerMap').SpeechRecognitionEventListenerMap } */
|
|
415
|
+
#eventListenerMap;
|
|
416
|
+
emitCognitiveServices(type, event) {
|
|
417
|
+
this.dispatchEvent(
|
|
418
|
+
new SpeechRecognitionEvent("cognitiveservices", {
|
|
419
|
+
data: {
|
|
420
|
+
...event,
|
|
421
|
+
type
|
|
422
|
+
}
|
|
423
|
+
})
|
|
424
|
+
);
|
|
425
|
+
}
|
|
426
|
+
get continuous() {
|
|
427
|
+
return this._continuous;
|
|
428
|
+
}
|
|
429
|
+
set continuous(value) {
|
|
430
|
+
this._continuous = value;
|
|
431
|
+
}
|
|
432
|
+
get grammars() {
|
|
433
|
+
return this._grammars;
|
|
434
|
+
}
|
|
435
|
+
set grammars(value) {
|
|
436
|
+
if (value instanceof SpeechGrammarList_default) {
|
|
437
|
+
this._grammars = value;
|
|
438
|
+
} else {
|
|
439
|
+
throw new Error(`The provided value is not of type 'SpeechGrammarList'`);
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
get interimResults() {
|
|
443
|
+
return this._interimResults;
|
|
444
|
+
}
|
|
445
|
+
set interimResults(value) {
|
|
446
|
+
this._interimResults = value;
|
|
447
|
+
}
|
|
448
|
+
get maxAlternatives() {
|
|
449
|
+
return this._maxAlternatives;
|
|
450
|
+
}
|
|
451
|
+
set maxAlternatives(value) {
|
|
452
|
+
this._maxAlternatives = value;
|
|
453
|
+
}
|
|
454
|
+
get lang() {
|
|
455
|
+
return this._lang;
|
|
456
|
+
}
|
|
457
|
+
set lang(value) {
|
|
458
|
+
this._lang = value;
|
|
459
|
+
}
|
|
460
|
+
/** @type { ((event: SpeechRecognitionEvent<'audioend'>) => void) | undefined } */
|
|
461
|
+
get onaudioend() {
|
|
462
|
+
return this.#eventListenerMap.getProperty("audioend");
|
|
463
|
+
}
|
|
464
|
+
set onaudioend(value) {
|
|
465
|
+
this.#eventListenerMap.setProperty("audioend", value);
|
|
466
|
+
}
|
|
467
|
+
/** @type { ((event: SpeechRecognitionEvent<'audiostart'>) => void) | undefined } */
|
|
468
|
+
get onaudiostart() {
|
|
469
|
+
return this.#eventListenerMap.getProperty("audiostart");
|
|
470
|
+
}
|
|
471
|
+
set onaudiostart(value) {
|
|
472
|
+
this.#eventListenerMap.setProperty("audiostart", value);
|
|
473
|
+
}
|
|
474
|
+
/** @type { ((event: SpeechRecognitionEvent<'cognitiveservices'>) => void) | undefined } */
|
|
475
|
+
get oncognitiveservices() {
|
|
476
|
+
return this.#eventListenerMap.getProperty("cognitiveservices");
|
|
477
|
+
}
|
|
478
|
+
set oncognitiveservices(value) {
|
|
479
|
+
this.#eventListenerMap.setProperty("cognitiveservices", value);
|
|
480
|
+
}
|
|
481
|
+
/** @type { ((event: SpeechRecognitionEvent<'end'>) => void) | undefined } */
|
|
482
|
+
get onend() {
|
|
483
|
+
return this.#eventListenerMap.getProperty("end");
|
|
484
|
+
}
|
|
485
|
+
set onend(value) {
|
|
486
|
+
this.#eventListenerMap.setProperty("end", value);
|
|
487
|
+
}
|
|
488
|
+
/** @type { ((event: SpeechRecognitionEvent<'error'>) => void) | undefined } */
|
|
489
|
+
get onerror() {
|
|
490
|
+
return this.#eventListenerMap.getProperty("error");
|
|
491
|
+
}
|
|
492
|
+
set onerror(value) {
|
|
493
|
+
this.#eventListenerMap.setProperty("error", value);
|
|
494
|
+
}
|
|
495
|
+
/** @type { ((event: SpeechRecognitionEvent<'result'>) => void) | undefined } */
|
|
496
|
+
get onresult() {
|
|
497
|
+
return this.#eventListenerMap.getProperty("result");
|
|
498
|
+
}
|
|
499
|
+
set onresult(value) {
|
|
500
|
+
this.#eventListenerMap.setProperty("result", value);
|
|
501
|
+
}
|
|
502
|
+
/** @type { ((event: SpeechRecognitionEvent<'soundend'>) => void) | undefined } */
|
|
503
|
+
get onsoundend() {
|
|
504
|
+
return this.#eventListenerMap.getProperty("soundend");
|
|
505
|
+
}
|
|
506
|
+
set onsoundend(value) {
|
|
507
|
+
this.#eventListenerMap.setProperty("soundend", value);
|
|
508
|
+
}
|
|
509
|
+
/** @type { ((event: SpeechRecognitionEvent<'soundstart'>) => void) | undefined } */
|
|
510
|
+
get onsoundstart() {
|
|
511
|
+
return this.#eventListenerMap.getProperty("soundstart");
|
|
512
|
+
}
|
|
513
|
+
set onsoundstart(value) {
|
|
514
|
+
this.#eventListenerMap.setProperty("soundstart", value);
|
|
515
|
+
}
|
|
516
|
+
/** @type { ((event: SpeechRecognitionEvent<'speechend'>) => void) | undefined } */
|
|
517
|
+
get onspeechend() {
|
|
518
|
+
return this.#eventListenerMap.getProperty("speechend");
|
|
519
|
+
}
|
|
520
|
+
set onspeechend(value) {
|
|
521
|
+
this.#eventListenerMap.setProperty("speechend", value);
|
|
522
|
+
}
|
|
523
|
+
/** @type { ((event: SpeechRecognitionEvent<'speechstart'>) => void) | undefined } */
|
|
524
|
+
get onspeechstart() {
|
|
525
|
+
return this.#eventListenerMap.getProperty("speechstart");
|
|
526
|
+
}
|
|
527
|
+
set onspeechstart(value) {
|
|
528
|
+
this.#eventListenerMap.setProperty("speechstart", value);
|
|
529
|
+
}
|
|
530
|
+
/** @type { ((event: SpeechRecognitionEvent<'start'>) => void) | undefined } */
|
|
531
|
+
get onstart() {
|
|
532
|
+
return this.#eventListenerMap.getProperty("start");
|
|
533
|
+
}
|
|
534
|
+
set onstart(value) {
|
|
535
|
+
this.#eventListenerMap.setProperty("start", value);
|
|
536
|
+
}
|
|
537
|
+
start() {
|
|
538
|
+
this._startOnce().catch((err) => {
|
|
539
|
+
this.dispatchEvent(
|
|
540
|
+
new SpeechRecognitionErrorEvent("error", { error: err, message: err && (err.stack || err.message) })
|
|
541
|
+
);
|
|
542
|
+
});
|
|
543
|
+
}
|
|
544
|
+
async _startOnce() {
|
|
545
|
+
const recognizer = await createRecognizer(this.lang);
|
|
546
|
+
const { pause, unprepare } = prepareAudioConfig(recognizer.audioConfig);
|
|
547
|
+
try {
|
|
548
|
+
const queue = createPromiseQueue_default();
|
|
549
|
+
let soundStarted;
|
|
550
|
+
let speechStarted;
|
|
551
|
+
let stopping;
|
|
552
|
+
const { detach: detachAudioConfigEvent } = recognizer.audioConfig.events.attach((event) => {
|
|
553
|
+
const { name } = event;
|
|
554
|
+
if (name === "AudioSourceReadyEvent") {
|
|
555
|
+
queue.push({ audioSourceReady: {} });
|
|
556
|
+
} else if (name === "AudioSourceOffEvent") {
|
|
557
|
+
queue.push({ audioSourceOff: {} });
|
|
558
|
+
} else if (name === "FirstAudibleChunk") {
|
|
559
|
+
queue.push({ firstAudibleChunk: {} });
|
|
560
|
+
}
|
|
561
|
+
});
|
|
562
|
+
recognizer.canceled = (_, { errorDetails, offset, reason, sessionId }) => {
|
|
563
|
+
queue.push({
|
|
564
|
+
canceled: {
|
|
565
|
+
errorDetails,
|
|
566
|
+
offset,
|
|
567
|
+
reason,
|
|
568
|
+
sessionId
|
|
569
|
+
}
|
|
570
|
+
});
|
|
571
|
+
};
|
|
572
|
+
recognizer.recognized = (_, { offset, result, sessionId }) => {
|
|
573
|
+
queue.push({
|
|
574
|
+
recognized: {
|
|
575
|
+
offset,
|
|
576
|
+
result: serializeRecognitionResult(result),
|
|
577
|
+
sessionId
|
|
578
|
+
}
|
|
579
|
+
});
|
|
580
|
+
};
|
|
581
|
+
recognizer.recognizing = (_, { offset, result, sessionId }) => {
|
|
582
|
+
queue.push({
|
|
583
|
+
recognizing: {
|
|
584
|
+
offset,
|
|
585
|
+
result: serializeRecognitionResult(result),
|
|
586
|
+
sessionId
|
|
587
|
+
}
|
|
588
|
+
});
|
|
589
|
+
};
|
|
590
|
+
recognizer.sessionStarted = (_, { sessionId }) => {
|
|
591
|
+
queue.push({ sessionStarted: { sessionId } });
|
|
592
|
+
};
|
|
593
|
+
recognizer.sessionStopped = (_, { sessionId }) => {
|
|
594
|
+
queue.push({ sessionStopped: { sessionId } });
|
|
595
|
+
};
|
|
596
|
+
recognizer.speechStartDetected = (_, { offset, sessionId }) => {
|
|
597
|
+
queue.push({ speechStartDetected: { offset, sessionId } });
|
|
598
|
+
};
|
|
599
|
+
recognizer.speechEndDetected = (_, { sessionId }) => {
|
|
600
|
+
queue.push({ speechEndDetected: { sessionId } });
|
|
601
|
+
};
|
|
602
|
+
const { phrases } = this.grammars;
|
|
603
|
+
const { dynamicGrammar } = recognizer.privReco;
|
|
604
|
+
referenceGrammars && referenceGrammars.length && dynamicGrammar.addReferenceGrammar(referenceGrammars);
|
|
605
|
+
phrases && phrases.length && dynamicGrammar.addPhrase(phrases);
|
|
606
|
+
await cognitiveServicesAsyncToPromise(recognizer.startContinuousRecognitionAsync.bind(recognizer))();
|
|
607
|
+
if (recognizer.stopContinuousRecognitionAsync) {
|
|
608
|
+
this.abort = () => queue.push({ abort: {} });
|
|
609
|
+
this.stop = () => queue.push({ stop: {} });
|
|
610
|
+
} else {
|
|
611
|
+
this.abort = this.stop = void 0;
|
|
612
|
+
}
|
|
613
|
+
let audioStarted;
|
|
614
|
+
let finalEvent;
|
|
615
|
+
let finalizedResults = [];
|
|
616
|
+
for (let loop = 0; !stopping || audioStarted; loop++) {
|
|
617
|
+
const event = await queue.shift();
|
|
618
|
+
const {
|
|
619
|
+
abort,
|
|
620
|
+
audioSourceOff,
|
|
621
|
+
audioSourceReady,
|
|
622
|
+
canceled,
|
|
623
|
+
firstAudibleChunk,
|
|
624
|
+
recognized,
|
|
625
|
+
recognizing,
|
|
626
|
+
stop
|
|
627
|
+
} = event;
|
|
628
|
+
Object.keys(event).forEach((name) => this.emitCognitiveServices(name, event[name]));
|
|
629
|
+
const errorMessage = canceled && canceled.errorDetails;
|
|
630
|
+
if (/Permission\sdenied/u.test(errorMessage || "")) {
|
|
631
|
+
finalEvent = new SpeechRecognitionErrorEvent("error", { error: "not-allowed" });
|
|
632
|
+
break;
|
|
633
|
+
}
|
|
634
|
+
if (!loop) {
|
|
635
|
+
this.dispatchEvent(new SpeechRecognitionEvent("start"));
|
|
636
|
+
}
|
|
637
|
+
if (errorMessage) {
|
|
638
|
+
if (/1006/u.test(errorMessage)) {
|
|
639
|
+
if (!audioStarted) {
|
|
640
|
+
this.dispatchEvent(new SpeechRecognitionEvent("audiostart"));
|
|
641
|
+
this.dispatchEvent(new SpeechRecognitionEvent("audioend"));
|
|
642
|
+
}
|
|
643
|
+
finalEvent = new SpeechRecognitionErrorEvent("error", { error: "network" });
|
|
644
|
+
} else {
|
|
645
|
+
finalEvent = new SpeechRecognitionErrorEvent("error", { error: "unknown" });
|
|
646
|
+
}
|
|
647
|
+
break;
|
|
648
|
+
} else if (abort || stop) {
|
|
649
|
+
if (abort) {
|
|
650
|
+
finalEvent = new SpeechRecognitionErrorEvent("error", { error: "aborted" });
|
|
651
|
+
stopping = "abort";
|
|
652
|
+
} else {
|
|
653
|
+
pause();
|
|
654
|
+
stopping = "stop";
|
|
655
|
+
}
|
|
656
|
+
if (abort && recognizer.stopContinuousRecognitionAsync) {
|
|
657
|
+
await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync.bind(recognizer))();
|
|
658
|
+
}
|
|
659
|
+
} else if (audioSourceReady) {
|
|
660
|
+
this.dispatchEvent(new SpeechRecognitionEvent("audiostart"));
|
|
661
|
+
audioStarted = true;
|
|
662
|
+
} else if (firstAudibleChunk) {
|
|
663
|
+
this.dispatchEvent(new SpeechRecognitionEvent("soundstart"));
|
|
664
|
+
soundStarted = true;
|
|
665
|
+
} else if (audioSourceOff) {
|
|
666
|
+
speechStarted && this.dispatchEvent(new SpeechRecognitionEvent("speechend"));
|
|
667
|
+
soundStarted && this.dispatchEvent(new SpeechRecognitionEvent("soundend"));
|
|
668
|
+
audioStarted && this.dispatchEvent(new SpeechRecognitionEvent("audioend"));
|
|
669
|
+
audioStarted = soundStarted = speechStarted = false;
|
|
670
|
+
break;
|
|
671
|
+
} else if (stopping !== "abort") {
|
|
672
|
+
if (recognized && recognized.result && recognized.result.reason === ResultReason2.NoMatch) {
|
|
673
|
+
if (!this.continuous || stopping === "stop") {
|
|
674
|
+
finalEvent = new SpeechRecognitionEvent("result");
|
|
675
|
+
recognizer.stopContinuousRecognitionAsync && await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync.bind(recognizer))();
|
|
676
|
+
break;
|
|
677
|
+
}
|
|
678
|
+
} else if (recognized || recognizing) {
|
|
679
|
+
if (!audioStarted) {
|
|
680
|
+
this.dispatchEvent(new SpeechRecognitionEvent("audiostart"));
|
|
681
|
+
audioStarted = true;
|
|
682
|
+
}
|
|
683
|
+
if (!soundStarted) {
|
|
684
|
+
this.dispatchEvent(new SpeechRecognitionEvent("soundstart"));
|
|
685
|
+
soundStarted = true;
|
|
686
|
+
}
|
|
687
|
+
if (!speechStarted) {
|
|
688
|
+
this.dispatchEvent(new SpeechRecognitionEvent("speechstart"));
|
|
689
|
+
speechStarted = true;
|
|
690
|
+
}
|
|
691
|
+
if (recognized) {
|
|
692
|
+
const result = cognitiveServiceEventResultToWebSpeechRecognitionResult_default(recognized.result, {
|
|
693
|
+
maxAlternatives: this.maxAlternatives,
|
|
694
|
+
textNormalization
|
|
695
|
+
});
|
|
696
|
+
const recognizable = !!result[0].transcript;
|
|
697
|
+
if (recognizable) {
|
|
698
|
+
finalizedResults = [...finalizedResults, result];
|
|
699
|
+
this.continuous && this.dispatchEvent(
|
|
700
|
+
new SpeechRecognitionEvent("result", {
|
|
701
|
+
results: new SpeechRecognitionResultList(finalizedResults)
|
|
702
|
+
})
|
|
703
|
+
);
|
|
704
|
+
}
|
|
705
|
+
if (this.continuous && recognizable) {
|
|
706
|
+
finalEvent = void 0;
|
|
707
|
+
} else {
|
|
708
|
+
finalEvent = new SpeechRecognitionEvent("result", {
|
|
709
|
+
results: new SpeechRecognitionResultList(finalizedResults)
|
|
710
|
+
});
|
|
711
|
+
}
|
|
712
|
+
if ((!this.continuous || stopping === "stop") && recognizer.stopContinuousRecognitionAsync) {
|
|
713
|
+
await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync.bind(recognizer))();
|
|
714
|
+
}
|
|
715
|
+
if (looseEvents && finalEvent && recognizable) {
|
|
716
|
+
this.dispatchEvent(finalEvent);
|
|
717
|
+
finalEvent = void 0;
|
|
718
|
+
}
|
|
719
|
+
} else if (recognizing) {
|
|
720
|
+
this.interimResults && this.dispatchEvent(
|
|
721
|
+
new SpeechRecognitionEvent("result", {
|
|
722
|
+
results: new SpeechRecognitionResultList([
|
|
723
|
+
...finalizedResults,
|
|
724
|
+
cognitiveServiceEventResultToWebSpeechRecognitionResult_default(recognizing.result, {
|
|
725
|
+
maxAlternatives: this.maxAlternatives,
|
|
726
|
+
textNormalization
|
|
727
|
+
})
|
|
728
|
+
])
|
|
729
|
+
})
|
|
730
|
+
);
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
if (speechStarted) {
|
|
736
|
+
this.dispatchEvent(new SpeechRecognitionEvent("speechend"));
|
|
737
|
+
}
|
|
738
|
+
if (soundStarted) {
|
|
739
|
+
this.dispatchEvent(new SpeechRecognitionEvent("soundend"));
|
|
740
|
+
}
|
|
741
|
+
if (audioStarted) {
|
|
742
|
+
this.dispatchEvent(new SpeechRecognitionEvent("audioend"));
|
|
743
|
+
}
|
|
744
|
+
if (finalEvent) {
|
|
745
|
+
if (finalEvent.type === "result" && !finalEvent.results.length) {
|
|
746
|
+
finalEvent = new SpeechRecognitionErrorEvent("error", { error: "no-speech" });
|
|
747
|
+
}
|
|
748
|
+
this.dispatchEvent(finalEvent);
|
|
749
|
+
}
|
|
750
|
+
this.dispatchEvent(new SpeechRecognitionEvent("end"));
|
|
751
|
+
detachAudioConfigEvent();
|
|
752
|
+
} catch (err) {
|
|
753
|
+
console.error(err);
|
|
754
|
+
throw err;
|
|
755
|
+
} finally {
|
|
756
|
+
unprepare();
|
|
757
|
+
recognizer.dispose();
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
return {
|
|
762
|
+
SpeechGrammarList: SpeechGrammarList_default,
|
|
763
|
+
SpeechRecognition,
|
|
764
|
+
SpeechRecognitionEvent
|
|
765
|
+
};
|
|
766
|
+
}
|
|
767
|
+
var createSpeechRecognitionPonyfill_default = (options) => {
|
|
768
|
+
const {
|
|
769
|
+
audioConfig = AudioConfig2.fromDefaultMicrophoneInput(),
|
|
770
|
+
// We set telemetry to true to honor the default telemetry settings of Speech SDK
|
|
771
|
+
// https://github.com/Microsoft/cognitive-services-speech-sdk-js#data--telemetry
|
|
772
|
+
enableTelemetry = true,
|
|
773
|
+
fetchCredentials,
|
|
774
|
+
looseEvents,
|
|
775
|
+
referenceGrammars,
|
|
776
|
+
speechRecognitionEndpointId,
|
|
777
|
+
textNormalization = "display"
|
|
778
|
+
} = patchOptions(options);
|
|
779
|
+
if (!audioConfig && (!window.navigator.mediaDevices || !window.navigator.mediaDevices.getUserMedia)) {
|
|
780
|
+
console.warn(
|
|
781
|
+
"web-speech-cognitive-services: This browser does not support WebRTC and it will not work with Cognitive Services Speech Services."
|
|
782
|
+
);
|
|
783
|
+
return {};
|
|
784
|
+
}
|
|
785
|
+
const createRecognizer = async (lang) => {
|
|
786
|
+
const { authorizationToken, region, speechRecognitionHostname, subscriptionKey } = await fetchCredentials();
|
|
787
|
+
let speechConfig;
|
|
788
|
+
if (speechRecognitionHostname) {
|
|
789
|
+
const host = { hostname: speechRecognitionHostname, port: 443, protocol: "wss:" };
|
|
790
|
+
if (authorizationToken) {
|
|
791
|
+
speechConfig = SpeechConfig2.fromHost(host);
|
|
792
|
+
speechConfig.authorizationToken = authorizationToken;
|
|
793
|
+
} else {
|
|
794
|
+
speechConfig = SpeechConfig2.fromHost(host, subscriptionKey);
|
|
795
|
+
}
|
|
796
|
+
} else {
|
|
797
|
+
speechConfig = authorizationToken ? SpeechConfig2.fromAuthorizationToken(authorizationToken, region) : SpeechConfig2.fromSubscription(subscriptionKey, region);
|
|
798
|
+
}
|
|
799
|
+
if (speechRecognitionEndpointId) {
|
|
800
|
+
speechConfig.endpointId = speechRecognitionEndpointId;
|
|
801
|
+
}
|
|
802
|
+
speechConfig.outputFormat = OutputFormat2.Detailed;
|
|
803
|
+
speechConfig.speechRecognitionLanguage = lang || "en-US";
|
|
804
|
+
return new SpeechRecognizer2(speechConfig, audioConfig);
|
|
805
|
+
};
|
|
806
|
+
return createSpeechRecognitionPonyfillFromRecognizer({
|
|
807
|
+
audioConfig,
|
|
808
|
+
createRecognizer,
|
|
809
|
+
enableTelemetry,
|
|
810
|
+
looseEvents,
|
|
811
|
+
referenceGrammars,
|
|
812
|
+
textNormalization
|
|
813
|
+
});
|
|
814
|
+
};
|
|
815
|
+
|
|
816
|
+
// src/SpeechServices/SpeechToText.js
|
|
817
|
+
var SpeechToText_default = createSpeechRecognitionPonyfill_default;
|
|
818
|
+
|
|
819
|
+
// src/SpeechServices/TextToSpeech/createSpeechSynthesisPonyfill.js
|
|
820
|
+
var import_event_target_shim3 = require("event-target-shim");
|
|
821
|
+
var import_async = require("on-error-resume-next/async");
|
|
822
|
+
|
|
823
|
+
// src/SpeechServices/TextToSpeech/AudioContextQueue.js
|
|
824
|
+
var import_memoize_one = __toESM(require("memoize-one"));
|
|
825
|
+
|
|
826
|
+
// src/SpeechServices/TextToSpeech/AudioContextConsumer.js
|
|
827
|
+
var AudioContextConsumer_default = class {
|
|
828
|
+
constructor(audioContext) {
|
|
829
|
+
this.audioContext = audioContext;
|
|
830
|
+
}
|
|
831
|
+
pause() {
|
|
832
|
+
this.audioContext && this.audioContext.suspend();
|
|
833
|
+
this.playingUtterance && this.playingUtterance.dispatchEvent(new CustomEvent("pause"));
|
|
834
|
+
}
|
|
835
|
+
resume() {
|
|
836
|
+
this.audioContext && this.audioContext.resume();
|
|
837
|
+
this.playingUtterance && this.playingUtterance.dispatchEvent(new CustomEvent("resume"));
|
|
838
|
+
}
|
|
839
|
+
async start(queue) {
|
|
840
|
+
let utterance;
|
|
841
|
+
while (utterance = queue.shift()) {
|
|
842
|
+
this.playingUtterance = utterance;
|
|
843
|
+
await utterance.play(this.audioContext);
|
|
844
|
+
this.playingUtterance = null;
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
stop() {
|
|
848
|
+
this.playingUtterance && this.playingUtterance.stop();
|
|
849
|
+
if (this.audioContext.state === "suspended") {
|
|
850
|
+
this.audioContext.resume();
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
};
|
|
854
|
+
|
|
855
|
+
// src/SpeechServices/TextToSpeech/AudioContextQueue.js
|
|
856
|
+
var AudioContextQueue_default = class {
|
|
857
|
+
constructor({ audioContext, ponyfill }) {
|
|
858
|
+
this.consumer = null;
|
|
859
|
+
this.paused = false;
|
|
860
|
+
this.queue = [];
|
|
861
|
+
this.getAudioContext = (0, import_memoize_one.default)(() => audioContext || new ponyfill.AudioContext());
|
|
862
|
+
}
|
|
863
|
+
pause() {
|
|
864
|
+
this.paused = true;
|
|
865
|
+
this.consumer && this.consumer.pause();
|
|
866
|
+
}
|
|
867
|
+
push(utterance) {
|
|
868
|
+
this.queue.push(utterance);
|
|
869
|
+
this.startConsumer();
|
|
870
|
+
}
|
|
871
|
+
resume() {
|
|
872
|
+
this.paused = false;
|
|
873
|
+
if (this.consumer) {
|
|
874
|
+
this.consumer.resume();
|
|
875
|
+
} else {
|
|
876
|
+
this.startConsumer();
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
get speaking() {
|
|
880
|
+
return !!this.consumer;
|
|
881
|
+
}
|
|
882
|
+
async startConsumer() {
|
|
883
|
+
while (!this.paused && this.queue.length && !this.consumer) {
|
|
884
|
+
this.consumer = new AudioContextConsumer_default(this.getAudioContext());
|
|
885
|
+
await this.consumer.start(this.queue);
|
|
886
|
+
this.consumer = null;
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
stop() {
|
|
890
|
+
this.queue.splice(0);
|
|
891
|
+
this.consumer && this.consumer.stop();
|
|
892
|
+
}
|
|
893
|
+
};
|
|
894
|
+
|
|
895
|
+
// src/SpeechServices/TextToSpeech/SpeechSynthesisEvent.js
|
|
896
|
+
var import_event_target_shim = require("event-target-shim");
|
|
897
|
+
var SpeechSynthesisEvent = class extends import_event_target_shim.Event {
|
|
898
|
+
constructor(type) {
|
|
899
|
+
super(type);
|
|
900
|
+
}
|
|
901
|
+
};
|
|
902
|
+
|
|
903
|
+
// src/SpeechServices/TextToSpeech/SpeechSynthesisUtterance.js
|
|
904
|
+
var import_event_as_promise = require("event-as-promise");
|
|
905
|
+
var import_event_target_shim2 = require("event-target-shim");
|
|
906
|
+
|
|
907
|
+
// src/SpeechServices/TextToSpeech/fetchSpeechData.js
|
|
908
|
+
var import_base64_arraybuffer = require("base64-arraybuffer");
|
|
909
|
+
|
|
910
|
+
// src/SpeechServices/TextToSpeech/buildSSML.js
|
|
911
|
+
function relativePercentage(value) {
|
|
912
|
+
let relative = Math.round((value - 1) * 100);
|
|
913
|
+
if (relative >= 0) {
|
|
914
|
+
relative = "+" + relative;
|
|
915
|
+
}
|
|
916
|
+
return relative + "%";
|
|
917
|
+
}
|
|
918
|
+
function buildSSML({ lang, pitch = 1, rate = 1, text, voice, volume }) {
|
|
919
|
+
return `<speak version="1.0" xml:lang="${lang}">
|
|
920
|
+
<voice xml:lang="${lang}" name="${voice}">
|
|
921
|
+
<prosody pitch="${relativePercentage(pitch)}" rate="${relativePercentage(rate)}" volume="${relativePercentage(
|
|
922
|
+
volume
|
|
923
|
+
)}">
|
|
924
|
+
${text}
|
|
925
|
+
</prosody>
|
|
926
|
+
</voice>
|
|
927
|
+
</speak>`;
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
// src/SpeechServices/TextToSpeech/isSSML.js
|
|
931
|
+
var SPEAK_TAG_PATTERN = /^\s*<speak(\s|\/?>)/u;
|
|
932
|
+
var XML_PROLOG_PATTERN = /^\s*<\?xml\s/u;
|
|
933
|
+
function isSSML(text) {
|
|
934
|
+
return SPEAK_TAG_PATTERN.test(text) || XML_PROLOG_PATTERN.test(text);
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
// src/SpeechServices/TextToSpeech/fetchSpeechData.js
|
|
938
|
+
var DEFAULT_LANGUAGE = "en-US";
|
|
939
|
+
var DEFAULT_OUTPUT_FORMAT = "riff-16khz-16bit-mono-pcm";
|
|
940
|
+
var DEFAULT_VOICE = "Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)";
|
|
941
|
+
var EMPTY_MP3_BASE64 = "SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU3LjU2LjEwMQAAAAAAAAAAAAAA//tAwAAAAAAAAAAAAAAAAAAAAAAASW5mbwAAAA8AAAACAAABhgC7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7//////////////////////////////////////////////////////////////////8AAAAATGF2YzU3LjY0AAAAAAAAAAAAAAAAJAUHAAAAAAAAAYYoRBqpAAAAAAD/+xDEAAPAAAGkAAAAIAAANIAAAARMQU1FMy45OS41VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVf/7EMQpg8AAAaQAAAAgAAA0gAAABFVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV";
|
|
942
|
+
async function fetchSpeechData_default({
|
|
943
|
+
deploymentId,
|
|
944
|
+
fetchCredentials,
|
|
945
|
+
lang = DEFAULT_LANGUAGE,
|
|
946
|
+
outputFormat = DEFAULT_OUTPUT_FORMAT,
|
|
947
|
+
pitch,
|
|
948
|
+
rate,
|
|
949
|
+
text,
|
|
950
|
+
voice = DEFAULT_VOICE,
|
|
951
|
+
volume
|
|
952
|
+
}) {
|
|
953
|
+
if (!text) {
|
|
954
|
+
return (0, import_base64_arraybuffer.decode)(EMPTY_MP3_BASE64);
|
|
955
|
+
}
|
|
956
|
+
const { authorizationToken, region, speechSynthesisHostname, subscriptionKey } = await fetchCredentials();
|
|
957
|
+
if (authorizationToken && subscriptionKey || !authorizationToken && !subscriptionKey) {
|
|
958
|
+
throw new Error('Only "authorizationToken" or "subscriptionKey" should be set.');
|
|
959
|
+
} else if (region && speechSynthesisHostname || !region && !speechSynthesisHostname) {
|
|
960
|
+
throw new Error('Only "region" or "speechSynthesisHostnamename" should be set.');
|
|
961
|
+
}
|
|
962
|
+
const ssml = isSSML(text) ? text : buildSSML({ lang, pitch, rate, text, voice, volume });
|
|
963
|
+
const hostname = speechSynthesisHostname || (deploymentId ? `${encodeURI(region)}.voice.speech.microsoft.com` : `${encodeURI(region)}.tts.speech.microsoft.com`);
|
|
964
|
+
const search = deploymentId ? `?deploymentId=${encodeURI(deploymentId)}` : "";
|
|
965
|
+
const url = `https://${hostname}/cognitiveservices/v1${search}`;
|
|
966
|
+
const res = await fetch(url, {
|
|
967
|
+
headers: {
|
|
968
|
+
"Content-Type": "application/ssml+xml",
|
|
969
|
+
"X-Microsoft-OutputFormat": outputFormat,
|
|
970
|
+
...authorizationToken ? {
|
|
971
|
+
Authorization: `Bearer ${authorizationToken}`
|
|
972
|
+
} : {
|
|
973
|
+
"Ocp-Apim-Subscription-Key": subscriptionKey
|
|
974
|
+
}
|
|
975
|
+
},
|
|
976
|
+
method: "POST",
|
|
977
|
+
body: ssml
|
|
978
|
+
});
|
|
979
|
+
if (!res.ok) {
|
|
980
|
+
throw new Error(`web-speech-cognitive-services: Failed to syntheis speech, server returned ${res.status}`);
|
|
981
|
+
}
|
|
982
|
+
return res.arrayBuffer();
|
|
983
|
+
}
|
|
984
|
+
|
|
985
|
+
// src/SpeechServices/TextToSpeech/subscribeEvent.js
|
|
986
|
+
function subscribeEvent(target, name, handler) {
|
|
987
|
+
target.addEventListener(name, handler);
|
|
988
|
+
return () => target.removeEventListener(name, handler);
|
|
989
|
+
}
|
|
990
|
+
|
|
991
|
+
// src/SpeechServices/TextToSpeech/SpeechSynthesisUtterance.js
|
|
992
|
+
function asyncDecodeAudioData(audioContext, arrayBuffer) {
|
|
993
|
+
return new Promise((resolve, reject) => {
|
|
994
|
+
const promise = audioContext.decodeAudioData(arrayBuffer, resolve, reject);
|
|
995
|
+
promise && typeof promise.then === "function" && resolve(promise);
|
|
996
|
+
});
|
|
997
|
+
}
|
|
998
|
+
function playDecoded(audioContext, audioBuffer, source) {
|
|
999
|
+
return new Promise((resolve, reject) => {
|
|
1000
|
+
const audioContextClosed = new import_event_as_promise.EventAsPromise();
|
|
1001
|
+
const sourceEnded = new import_event_as_promise.EventAsPromise();
|
|
1002
|
+
const unsubscribe = subscribeEvent(
|
|
1003
|
+
audioContext,
|
|
1004
|
+
"statechange",
|
|
1005
|
+
({ target: { state } }) => state === "closed" && audioContextClosed.eventListener()
|
|
1006
|
+
);
|
|
1007
|
+
try {
|
|
1008
|
+
source.buffer = audioBuffer;
|
|
1009
|
+
source.onended = sourceEnded.eventListener;
|
|
1010
|
+
source.connect(audioContext.destination);
|
|
1011
|
+
source.start(0);
|
|
1012
|
+
Promise.race([audioContextClosed.upcoming(), sourceEnded.upcoming()]).then(resolve);
|
|
1013
|
+
} catch (err) {
|
|
1014
|
+
reject(err);
|
|
1015
|
+
} finally {
|
|
1016
|
+
unsubscribe();
|
|
1017
|
+
}
|
|
1018
|
+
});
|
|
1019
|
+
}
|
|
1020
|
+
var SpeechSynthesisUtterance = class extends import_event_target_shim2.EventTarget {
|
|
1021
|
+
constructor(text) {
|
|
1022
|
+
super();
|
|
1023
|
+
this._lang = null;
|
|
1024
|
+
this._pitch = 1;
|
|
1025
|
+
this._rate = 1;
|
|
1026
|
+
this._voice = null;
|
|
1027
|
+
this._volume = 1;
|
|
1028
|
+
this.text = text;
|
|
1029
|
+
this.onboundary = null;
|
|
1030
|
+
this.onend = null;
|
|
1031
|
+
this.onerror = null;
|
|
1032
|
+
this.onmark = null;
|
|
1033
|
+
this.onpause = null;
|
|
1034
|
+
this.onresume = null;
|
|
1035
|
+
this.onstart = null;
|
|
1036
|
+
}
|
|
1037
|
+
get lang() {
|
|
1038
|
+
return this._lang;
|
|
1039
|
+
}
|
|
1040
|
+
set lang(value) {
|
|
1041
|
+
this._lang = value;
|
|
1042
|
+
}
|
|
1043
|
+
get onboundary() {
|
|
1044
|
+
return (0, import_event_target_shim2.getEventAttributeValue)(this, "boundary");
|
|
1045
|
+
}
|
|
1046
|
+
set onboundary(value) {
|
|
1047
|
+
(0, import_event_target_shim2.setEventAttributeValue)(this, "boundary", value);
|
|
1048
|
+
}
|
|
1049
|
+
get onend() {
|
|
1050
|
+
return (0, import_event_target_shim2.getEventAttributeValue)(this, "end");
|
|
1051
|
+
}
|
|
1052
|
+
set onend(value) {
|
|
1053
|
+
(0, import_event_target_shim2.setEventAttributeValue)(this, "end", value);
|
|
1054
|
+
}
|
|
1055
|
+
get onerror() {
|
|
1056
|
+
return (0, import_event_target_shim2.getEventAttributeValue)(this, "error");
|
|
1057
|
+
}
|
|
1058
|
+
set onerror(value) {
|
|
1059
|
+
(0, import_event_target_shim2.setEventAttributeValue)(this, "error", value);
|
|
1060
|
+
}
|
|
1061
|
+
get onmark() {
|
|
1062
|
+
return (0, import_event_target_shim2.getEventAttributeValue)(this, "mark");
|
|
1063
|
+
}
|
|
1064
|
+
set onmark(value) {
|
|
1065
|
+
(0, import_event_target_shim2.setEventAttributeValue)(this, "mark", value);
|
|
1066
|
+
}
|
|
1067
|
+
get onpause() {
|
|
1068
|
+
return (0, import_event_target_shim2.getEventAttributeValue)(this, "pause");
|
|
1069
|
+
}
|
|
1070
|
+
set onpause(value) {
|
|
1071
|
+
(0, import_event_target_shim2.setEventAttributeValue)(this, "pause", value);
|
|
1072
|
+
}
|
|
1073
|
+
get onresume() {
|
|
1074
|
+
return (0, import_event_target_shim2.getEventAttributeValue)(this, "resume");
|
|
1075
|
+
}
|
|
1076
|
+
set onresume(value) {
|
|
1077
|
+
(0, import_event_target_shim2.setEventAttributeValue)(this, "resume", value);
|
|
1078
|
+
}
|
|
1079
|
+
get onstart() {
|
|
1080
|
+
return (0, import_event_target_shim2.getEventAttributeValue)(this, "start");
|
|
1081
|
+
}
|
|
1082
|
+
set onstart(value) {
|
|
1083
|
+
(0, import_event_target_shim2.setEventAttributeValue)(this, "start", value);
|
|
1084
|
+
}
|
|
1085
|
+
get pitch() {
|
|
1086
|
+
return this._pitch;
|
|
1087
|
+
}
|
|
1088
|
+
set pitch(value) {
|
|
1089
|
+
this._pitch = value;
|
|
1090
|
+
}
|
|
1091
|
+
get rate() {
|
|
1092
|
+
return this._rate;
|
|
1093
|
+
}
|
|
1094
|
+
set rate(value) {
|
|
1095
|
+
this._rate = value;
|
|
1096
|
+
}
|
|
1097
|
+
get voice() {
|
|
1098
|
+
return this._voice;
|
|
1099
|
+
}
|
|
1100
|
+
set voice(value) {
|
|
1101
|
+
this._voice = value;
|
|
1102
|
+
}
|
|
1103
|
+
get volume() {
|
|
1104
|
+
return this._volume;
|
|
1105
|
+
}
|
|
1106
|
+
set volume(value) {
|
|
1107
|
+
this._volume = value;
|
|
1108
|
+
}
|
|
1109
|
+
preload({ deploymentId, fetchCredentials, outputFormat }) {
|
|
1110
|
+
this.arrayBufferPromise = fetchSpeechData_default({
|
|
1111
|
+
fetchCredentials,
|
|
1112
|
+
deploymentId,
|
|
1113
|
+
lang: this.lang || window.navigator.language,
|
|
1114
|
+
outputFormat,
|
|
1115
|
+
pitch: this.pitch,
|
|
1116
|
+
rate: this.rate,
|
|
1117
|
+
text: this.text,
|
|
1118
|
+
voice: this.voice && this.voice.voiceURI,
|
|
1119
|
+
volume: this.volume
|
|
1120
|
+
});
|
|
1121
|
+
this.arrayBufferPromise.catch();
|
|
1122
|
+
}
|
|
1123
|
+
async play(audioContext) {
|
|
1124
|
+
try {
|
|
1125
|
+
this.dispatchEvent(new SpeechSynthesisEvent("start"));
|
|
1126
|
+
const source = audioContext.createBufferSource();
|
|
1127
|
+
const audioBuffer = await asyncDecodeAudioData(audioContext, await this.arrayBufferPromise);
|
|
1128
|
+
this._playingSource = source;
|
|
1129
|
+
await playDecoded(audioContext, audioBuffer, source);
|
|
1130
|
+
this._playingSource = null;
|
|
1131
|
+
this.dispatchEvent(new SpeechSynthesisEvent("end"));
|
|
1132
|
+
} catch (error) {
|
|
1133
|
+
this.dispatchEvent(new ErrorEvent("error", { error: "synthesis-failed", message: error.stack }));
|
|
1134
|
+
}
|
|
1135
|
+
}
|
|
1136
|
+
stop() {
|
|
1137
|
+
this._playingSource && this._playingSource.stop();
|
|
1138
|
+
}
|
|
1139
|
+
};
|
|
1140
|
+
var SpeechSynthesisUtterance_default = SpeechSynthesisUtterance;
|
|
1141
|
+
|
|
1142
|
+
// src/SpeechServices/TextToSpeech/SpeechSynthesisVoice.js
|
|
1143
|
+
var SpeechSynthesisVoice_default = class {
|
|
1144
|
+
constructor({ gender, lang, voiceURI }) {
|
|
1145
|
+
this._default = false;
|
|
1146
|
+
this._gender = gender;
|
|
1147
|
+
this._lang = lang;
|
|
1148
|
+
this._localService = false;
|
|
1149
|
+
this._name = voiceURI;
|
|
1150
|
+
this._voiceURI = voiceURI;
|
|
1151
|
+
}
|
|
1152
|
+
get default() {
|
|
1153
|
+
return this._default;
|
|
1154
|
+
}
|
|
1155
|
+
get gender() {
|
|
1156
|
+
return this._gender;
|
|
1157
|
+
}
|
|
1158
|
+
get lang() {
|
|
1159
|
+
return this._lang;
|
|
1160
|
+
}
|
|
1161
|
+
get localService() {
|
|
1162
|
+
return this._localService;
|
|
1163
|
+
}
|
|
1164
|
+
get name() {
|
|
1165
|
+
return this._name;
|
|
1166
|
+
}
|
|
1167
|
+
get voiceURI() {
|
|
1168
|
+
return this._voiceURI;
|
|
1169
|
+
}
|
|
1170
|
+
};
|
|
1171
|
+
|
|
1172
|
+
// src/SpeechServices/TextToSpeech/fetchCustomVoices.js
|
|
1173
|
+
async function fetchCustomVoices({ customVoiceHostname, deploymentId, region, subscriptionKey }) {
|
|
1174
|
+
const hostname = customVoiceHostname || `${region}.customvoice.api.speech.microsoft.com`;
|
|
1175
|
+
const res = await fetch(
|
|
1176
|
+
`https://${encodeURI(hostname)}/api/texttospeech/v2.0/endpoints/${encodeURIComponent(deploymentId)}`,
|
|
1177
|
+
{
|
|
1178
|
+
headers: {
|
|
1179
|
+
accept: "application/json",
|
|
1180
|
+
"ocp-apim-subscription-key": subscriptionKey
|
|
1181
|
+
}
|
|
1182
|
+
}
|
|
1183
|
+
);
|
|
1184
|
+
if (!res.ok) {
|
|
1185
|
+
throw new Error("Failed to fetch custom voices");
|
|
1186
|
+
}
|
|
1187
|
+
return res.json();
|
|
1188
|
+
}
|
|
1189
|
+
async function fetchCustomVoices_default({ customVoiceHostname, deploymentId, region, subscriptionKey }) {
|
|
1190
|
+
const { models } = await fetchCustomVoices({ customVoiceHostname, deploymentId, region, subscriptionKey });
|
|
1191
|
+
return models.map(
|
|
1192
|
+
({ properties: { Gender: gender }, locale: lang, name: voiceURI }) => new SpeechSynthesisVoice_default({ gender, lang, voiceURI })
|
|
1193
|
+
).sort(({ name: x }, { name: y }) => x > y ? 1 : x < y ? -1 : 0);
|
|
1194
|
+
}
|
|
1195
|
+
|
|
1196
|
+
// src/SpeechServices/TextToSpeech/fetchVoices.js
|
|
1197
|
+
async function fetchVoices({ authorizationToken, region, speechSynthesisHostname, subscriptionKey }) {
|
|
1198
|
+
const hostname = speechSynthesisHostname || `${encodeURI(region)}.tts.speech.microsoft.com`;
|
|
1199
|
+
const res = await fetch(`https://${hostname}/cognitiveservices/voices/list`, {
|
|
1200
|
+
headers: {
|
|
1201
|
+
"content-type": "application/json",
|
|
1202
|
+
...authorizationToken ? {
|
|
1203
|
+
authorization: `Bearer ${authorizationToken}`
|
|
1204
|
+
} : {
|
|
1205
|
+
"Ocp-Apim-Subscription-Key": subscriptionKey
|
|
1206
|
+
}
|
|
1207
|
+
}
|
|
1208
|
+
});
|
|
1209
|
+
if (!res.ok) {
|
|
1210
|
+
throw new Error("Failed to fetch voices");
|
|
1211
|
+
}
|
|
1212
|
+
const voices = await res.json();
|
|
1213
|
+
return voices.map(({ Gender: gender, Locale: lang, Name: voiceURI }) => new SpeechSynthesisVoice_default({ gender, lang, voiceURI })).sort(({ name: x }, { name: y }) => x > y ? 1 : x < y ? -1 : 0);
|
|
1214
|
+
}
|
|
1215
|
+
|
|
1216
|
+
// src/SpeechServices/TextToSpeech/createSpeechSynthesisPonyfill.js
|
|
1217
|
+
var DEFAULT_OUTPUT_FORMAT2 = "audio-24khz-160kbitrate-mono-mp3";
|
|
1218
|
+
var EMPTY_ARRAY = [];
|
|
1219
|
+
var createSpeechSynthesisPonyfill_default = (options) => {
|
|
1220
|
+
const {
|
|
1221
|
+
audioContext,
|
|
1222
|
+
fetchCredentials,
|
|
1223
|
+
ponyfill = {
|
|
1224
|
+
AudioContext: window.AudioContext || window.webkitAudioContext
|
|
1225
|
+
},
|
|
1226
|
+
speechSynthesisDeploymentId,
|
|
1227
|
+
speechSynthesisOutputFormat = DEFAULT_OUTPUT_FORMAT2
|
|
1228
|
+
} = patchOptions(options);
|
|
1229
|
+
if (!audioContext && !ponyfill.AudioContext) {
|
|
1230
|
+
console.warn(
|
|
1231
|
+
"web-speech-cognitive-services: This browser does not support Web Audio and it will not work with Cognitive Services Speech Services."
|
|
1232
|
+
);
|
|
1233
|
+
return {};
|
|
1234
|
+
}
|
|
1235
|
+
class SpeechSynthesis extends import_event_target_shim3.EventTarget {
|
|
1236
|
+
constructor() {
|
|
1237
|
+
super();
|
|
1238
|
+
this.queue = new AudioContextQueue_default({ audioContext, ponyfill });
|
|
1239
|
+
this.updateVoices();
|
|
1240
|
+
}
|
|
1241
|
+
cancel() {
|
|
1242
|
+
this.queue.stop();
|
|
1243
|
+
}
|
|
1244
|
+
getVoices() {
|
|
1245
|
+
return EMPTY_ARRAY;
|
|
1246
|
+
}
|
|
1247
|
+
get onvoiceschanged() {
|
|
1248
|
+
return (0, import_event_target_shim3.getEventAttributeValue)(this, "voiceschanged");
|
|
1249
|
+
}
|
|
1250
|
+
set onvoiceschanged(value) {
|
|
1251
|
+
(0, import_event_target_shim3.setEventAttributeValue)(this, "voiceschanged", value);
|
|
1252
|
+
}
|
|
1253
|
+
pause() {
|
|
1254
|
+
this.queue.pause();
|
|
1255
|
+
}
|
|
1256
|
+
resume() {
|
|
1257
|
+
this.queue.resume();
|
|
1258
|
+
}
|
|
1259
|
+
speak(utterance) {
|
|
1260
|
+
if (!(utterance instanceof SpeechSynthesisUtterance_default)) {
|
|
1261
|
+
throw new Error("invalid utterance");
|
|
1262
|
+
}
|
|
1263
|
+
const { reject, resolve, promise } = pDefer();
|
|
1264
|
+
const handleError = ({ error: errorCode, message }) => {
|
|
1265
|
+
const error = new Error(errorCode);
|
|
1266
|
+
error.stack = message;
|
|
1267
|
+
reject(error);
|
|
1268
|
+
};
|
|
1269
|
+
utterance.addEventListener("end", resolve);
|
|
1270
|
+
utterance.addEventListener("error", handleError);
|
|
1271
|
+
utterance.preload({
|
|
1272
|
+
deploymentId: speechSynthesisDeploymentId,
|
|
1273
|
+
fetchCredentials,
|
|
1274
|
+
outputFormat: speechSynthesisOutputFormat
|
|
1275
|
+
});
|
|
1276
|
+
this.queue.push(utterance);
|
|
1277
|
+
return promise.finally(() => {
|
|
1278
|
+
utterance.removeEventListener("end", resolve);
|
|
1279
|
+
utterance.removeEventListener("error", handleError);
|
|
1280
|
+
});
|
|
1281
|
+
}
|
|
1282
|
+
get speaking() {
|
|
1283
|
+
return this.queue.speaking;
|
|
1284
|
+
}
|
|
1285
|
+
async updateVoices() {
|
|
1286
|
+
const { customVoiceHostname, region, speechSynthesisHostname, subscriptionKey } = await fetchCredentials();
|
|
1287
|
+
if (speechSynthesisDeploymentId) {
|
|
1288
|
+
if (subscriptionKey) {
|
|
1289
|
+
console.warn(
|
|
1290
|
+
"web-speech-cognitive-services: Listing of custom voice models are only available when using subscription key."
|
|
1291
|
+
);
|
|
1292
|
+
await (0, import_async.onErrorResumeNext)(async () => {
|
|
1293
|
+
const voices = await fetchCustomVoices_default({
|
|
1294
|
+
customVoiceHostname,
|
|
1295
|
+
deploymentId: speechSynthesisDeploymentId,
|
|
1296
|
+
region,
|
|
1297
|
+
speechSynthesisHostname,
|
|
1298
|
+
subscriptionKey
|
|
1299
|
+
});
|
|
1300
|
+
this.getVoices = () => voices;
|
|
1301
|
+
});
|
|
1302
|
+
}
|
|
1303
|
+
} else {
|
|
1304
|
+
await (0, import_async.onErrorResumeNext)(async () => {
|
|
1305
|
+
const voices = await fetchVoices(await fetchCredentials());
|
|
1306
|
+
this.getVoices = () => voices;
|
|
1307
|
+
});
|
|
1308
|
+
}
|
|
1309
|
+
this.dispatchEvent(new SpeechSynthesisEvent("voiceschanged"));
|
|
1310
|
+
}
|
|
1311
|
+
}
|
|
1312
|
+
return {
|
|
1313
|
+
speechSynthesis: new SpeechSynthesis(),
|
|
1314
|
+
SpeechSynthesisEvent,
|
|
1315
|
+
SpeechSynthesisUtterance: SpeechSynthesisUtterance_default
|
|
1316
|
+
};
|
|
1317
|
+
};
|
|
1318
|
+
|
|
1319
|
+
// src/SpeechServices/TextToSpeech.js
|
|
1320
|
+
var TextToSpeech_default = createSpeechSynthesisPonyfill_default;
|
|
1321
|
+
|
|
1322
|
+
// src/SpeechServices/fetchAuthorizationToken.js
|
|
1323
|
+
var TOKEN_URL_TEMPLATE = "https://{region}.api.cognitive.microsoft.com/sts/v1.0/issueToken";
|
|
1324
|
+
async function fetchAuthorizationToken_default({ region, subscriptionKey }) {
|
|
1325
|
+
const res = await fetch(TOKEN_URL_TEMPLATE.replace(/\{region\}/u, region), {
|
|
1326
|
+
headers: {
|
|
1327
|
+
"Ocp-Apim-Subscription-Key": subscriptionKey
|
|
1328
|
+
},
|
|
1329
|
+
method: "POST"
|
|
1330
|
+
});
|
|
1331
|
+
if (!res.ok) {
|
|
1332
|
+
throw new Error(`Failed to fetch authorization token, server returned ${res.status}`);
|
|
1333
|
+
}
|
|
1334
|
+
return res.text();
|
|
1335
|
+
}
|
|
1336
|
+
|
|
1337
|
+
// src/SpeechServices.js
|
|
1338
|
+
function createSpeechServicesPonyfill(options = {}, ...args) {
|
|
1339
|
+
return {
|
|
1340
|
+
...SpeechToText_default(options, ...args),
|
|
1341
|
+
...TextToSpeech_default(options, ...args)
|
|
1342
|
+
};
|
|
1343
|
+
}
|
|
1344
|
+
var meta = document.createElement("meta");
|
|
1345
|
+
meta.setAttribute("name", "web-speech-cognitive-services");
|
|
1346
|
+
meta.setAttribute("content", `version=${"8.0.0-main.181f814"}`);
|
|
1347
|
+
document.head.appendChild(meta);
|
|
1348
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
1349
|
+
0 && (module.exports = {
|
|
1350
|
+
createSpeechRecognitionPonyfill,
|
|
1351
|
+
createSpeechRecognitionPonyfillFromRecognizer,
|
|
1352
|
+
createSpeechServicesPonyfill,
|
|
1353
|
+
createSpeechSynthesisPonyfill,
|
|
1354
|
+
fetchAuthorizationToken
|
|
1355
|
+
});
|
|
1356
|
+
//# sourceMappingURL=web-speech-cognitive-services.js.map
|