web-speech-cognitive-services 8.0.0-main.85313be → 8.0.0-main.ccf35da
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/web-speech-cognitive-services.d.mts +148 -53
- package/dist/web-speech-cognitive-services.d.ts +148 -53
- package/dist/web-speech-cognitive-services.development.js +1544 -1487
- package/dist/web-speech-cognitive-services.development.js.map +1 -1
- package/dist/web-speech-cognitive-services.js +476 -349
- package/dist/web-speech-cognitive-services.js.map +1 -1
- package/dist/web-speech-cognitive-services.mjs +472 -345
- package/dist/web-speech-cognitive-services.mjs.map +1 -1
- package/dist/web-speech-cognitive-services.production.min.js +12 -12
- package/dist/web-speech-cognitive-services.production.min.js.map +1 -1
- package/package.json +21 -15
|
@@ -1,104 +1,3 @@
|
|
|
1
|
-
// src/SpeechServices/SpeechToText/createSpeechRecognitionPonyfill.js
|
|
2
|
-
import { Event, EventTarget, getEventAttributeValue, setEventAttributeValue } from "event-target-shim";
|
|
3
|
-
|
|
4
|
-
// src/Util/arrayToMap.js
|
|
5
|
-
function arrayToMap_default(array, extras) {
|
|
6
|
-
const map = {
|
|
7
|
-
...[].reduce.call(
|
|
8
|
-
array,
|
|
9
|
-
(map2, value, index) => {
|
|
10
|
-
map2[index] = value;
|
|
11
|
-
return map2;
|
|
12
|
-
},
|
|
13
|
-
{}
|
|
14
|
-
),
|
|
15
|
-
...extras,
|
|
16
|
-
length: array.length,
|
|
17
|
-
[Symbol.iterator]: () => [].slice.call(map)[Symbol.iterator]()
|
|
18
|
-
};
|
|
19
|
-
return map;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
// src/SpeechServices/SpeechSDK.js
|
|
23
|
-
import {
|
|
24
|
-
AudioConfig,
|
|
25
|
-
OutputFormat,
|
|
26
|
-
ResultReason,
|
|
27
|
-
SpeechConfig,
|
|
28
|
-
SpeechRecognizer
|
|
29
|
-
} from "microsoft-cognitiveservices-speech-sdk/distrib/lib/microsoft.cognitiveservices.speech.sdk";
|
|
30
|
-
var SpeechSDK_default = {
|
|
31
|
-
AudioConfig,
|
|
32
|
-
OutputFormat,
|
|
33
|
-
ResultReason,
|
|
34
|
-
SpeechConfig,
|
|
35
|
-
SpeechRecognizer
|
|
36
|
-
};
|
|
37
|
-
|
|
38
|
-
// src/SpeechServices/SpeechToText/cognitiveServiceEventResultToWebSpeechRecognitionResultList.js
|
|
39
|
-
var {
|
|
40
|
-
ResultReason: { RecognizingSpeech, RecognizedSpeech }
|
|
41
|
-
} = SpeechSDK_default;
|
|
42
|
-
function cognitiveServiceEventResultToWebSpeechRecognitionResultList_default(result, { maxAlternatives = Infinity, textNormalization = "display" } = {}) {
|
|
43
|
-
if (result.reason === RecognizingSpeech || result.reason === RecognizedSpeech && !result.json.NBest) {
|
|
44
|
-
const resultList = [
|
|
45
|
-
{
|
|
46
|
-
confidence: 0.5,
|
|
47
|
-
transcript: result.text
|
|
48
|
-
}
|
|
49
|
-
];
|
|
50
|
-
if (result.reason === RecognizedSpeech) {
|
|
51
|
-
resultList.isFinal = true;
|
|
52
|
-
}
|
|
53
|
-
return resultList;
|
|
54
|
-
} else if (result.reason === RecognizedSpeech) {
|
|
55
|
-
const resultList = arrayToMap_default(
|
|
56
|
-
(result.json.NBest || []).slice(0, maxAlternatives).map(({ Confidence: confidence, Display: display, ITN: itn, Lexical: lexical, MaskedITN: maskedITN }) => ({
|
|
57
|
-
confidence,
|
|
58
|
-
transcript: textNormalization === "itn" ? itn : textNormalization === "lexical" ? lexical : textNormalization === "maskeditn" ? maskedITN : display
|
|
59
|
-
})),
|
|
60
|
-
{ isFinal: true }
|
|
61
|
-
);
|
|
62
|
-
return resultList;
|
|
63
|
-
}
|
|
64
|
-
return [];
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
// ../../node_modules/p-defer/index.js
|
|
68
|
-
function pDefer() {
|
|
69
|
-
const deferred = {};
|
|
70
|
-
deferred.promise = new Promise((resolve, reject) => {
|
|
71
|
-
deferred.resolve = resolve;
|
|
72
|
-
deferred.reject = reject;
|
|
73
|
-
});
|
|
74
|
-
return deferred;
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
// src/Util/createPromiseQueue.js
|
|
78
|
-
function createPromiseQueue_default() {
|
|
79
|
-
let shiftDeferred;
|
|
80
|
-
const queue = [];
|
|
81
|
-
const push = (value) => {
|
|
82
|
-
if (shiftDeferred) {
|
|
83
|
-
const { resolve } = shiftDeferred;
|
|
84
|
-
shiftDeferred = null;
|
|
85
|
-
resolve(value);
|
|
86
|
-
} else {
|
|
87
|
-
queue.push(value);
|
|
88
|
-
}
|
|
89
|
-
};
|
|
90
|
-
const shift = () => {
|
|
91
|
-
if (queue.length) {
|
|
92
|
-
return Promise.resolve(queue.shift());
|
|
93
|
-
}
|
|
94
|
-
return (shiftDeferred || (shiftDeferred = pDefer())).promise;
|
|
95
|
-
};
|
|
96
|
-
return {
|
|
97
|
-
push,
|
|
98
|
-
shift
|
|
99
|
-
};
|
|
100
|
-
}
|
|
101
|
-
|
|
102
1
|
// src/SpeechServices/resolveFunctionOrReturnValue.ts
|
|
103
2
|
function isFunction(value) {
|
|
104
3
|
return typeof value === "function";
|
|
@@ -182,72 +81,270 @@ function patchOptions({
|
|
|
182
81
|
};
|
|
183
82
|
}
|
|
184
83
|
|
|
185
|
-
// src/SpeechServices/
|
|
186
|
-
|
|
84
|
+
// src/SpeechServices/SpeechSDK.js
|
|
85
|
+
import {
|
|
86
|
+
AudioConfig,
|
|
87
|
+
OutputFormat,
|
|
88
|
+
ResultReason,
|
|
89
|
+
SpeechConfig,
|
|
90
|
+
SpeechRecognizer
|
|
91
|
+
} from "microsoft-cognitiveservices-speech-sdk/distrib/lib/microsoft.cognitiveservices.speech.sdk";
|
|
92
|
+
var SpeechSDK_default = {
|
|
93
|
+
AudioConfig,
|
|
94
|
+
OutputFormat,
|
|
95
|
+
ResultReason,
|
|
96
|
+
SpeechConfig,
|
|
97
|
+
SpeechRecognizer
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
// ../../node_modules/p-defer/index.js
|
|
101
|
+
function pDefer() {
|
|
102
|
+
const deferred = {};
|
|
103
|
+
deferred.promise = new Promise((resolve, reject) => {
|
|
104
|
+
deferred.resolve = resolve;
|
|
105
|
+
deferred.reject = reject;
|
|
106
|
+
});
|
|
107
|
+
return deferred;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// src/Util/createPromiseQueue.js
|
|
111
|
+
function createPromiseQueue_default() {
|
|
112
|
+
let shiftDeferred;
|
|
113
|
+
const queue = [];
|
|
114
|
+
const push = (value) => {
|
|
115
|
+
if (shiftDeferred) {
|
|
116
|
+
const { resolve } = shiftDeferred;
|
|
117
|
+
shiftDeferred = null;
|
|
118
|
+
resolve(value);
|
|
119
|
+
} else {
|
|
120
|
+
queue.push(value);
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
const shift = () => {
|
|
124
|
+
if (queue.length) {
|
|
125
|
+
return Promise.resolve(queue.shift());
|
|
126
|
+
}
|
|
127
|
+
return (shiftDeferred || (shiftDeferred = pDefer())).promise;
|
|
128
|
+
};
|
|
129
|
+
return {
|
|
130
|
+
push,
|
|
131
|
+
shift
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// src/SpeechServices/SpeechToText/SpeechRecognitionAlternative.ts
|
|
136
|
+
var SpeechRecognitionAlternative = class {
|
|
137
|
+
constructor({ confidence, transcript }) {
|
|
138
|
+
this.#confidence = confidence;
|
|
139
|
+
this.#transcript = transcript;
|
|
140
|
+
}
|
|
141
|
+
#confidence;
|
|
142
|
+
#transcript;
|
|
143
|
+
get confidence() {
|
|
144
|
+
return this.#confidence;
|
|
145
|
+
}
|
|
146
|
+
get transcript() {
|
|
147
|
+
return this.#transcript;
|
|
148
|
+
}
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
// src/SpeechServices/SpeechToText/private/FakeArray.ts
|
|
152
|
+
var FakeArray = class {
|
|
153
|
+
constructor(array) {
|
|
154
|
+
if (!array) {
|
|
155
|
+
throw new Error("array must be set.");
|
|
156
|
+
}
|
|
157
|
+
this.#array = array;
|
|
158
|
+
for (const key in array) {
|
|
159
|
+
Object.defineProperty(this, key, {
|
|
160
|
+
enumerable: true,
|
|
161
|
+
get() {
|
|
162
|
+
return array[key];
|
|
163
|
+
}
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
#array;
|
|
168
|
+
[Symbol.iterator]() {
|
|
169
|
+
return this.#array[Symbol.iterator]();
|
|
170
|
+
}
|
|
171
|
+
get length() {
|
|
172
|
+
return this.#array.length;
|
|
173
|
+
}
|
|
174
|
+
};
|
|
175
|
+
|
|
176
|
+
// src/SpeechServices/SpeechToText/SpeechRecognitionResult.ts
|
|
177
|
+
var SpeechRecognitionResult = class extends FakeArray {
|
|
178
|
+
constructor(init) {
|
|
179
|
+
super(init.results);
|
|
180
|
+
this.#isFinal = init.isFinal;
|
|
181
|
+
}
|
|
182
|
+
#isFinal;
|
|
183
|
+
get isFinal() {
|
|
184
|
+
return this.#isFinal;
|
|
185
|
+
}
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
// src/SpeechServices/SpeechToText/cognitiveServiceEventResultToWebSpeechRecognitionResult.ts
|
|
189
|
+
var {
|
|
190
|
+
ResultReason: { RecognizingSpeech, RecognizedSpeech }
|
|
191
|
+
} = SpeechSDK_default;
|
|
192
|
+
function cognitiveServiceEventResultToWebSpeechRecognitionResult_default(result, init) {
|
|
193
|
+
const { maxAlternatives = Infinity, textNormalization = "display" } = init || {};
|
|
194
|
+
const json = typeof result.json === "string" ? JSON.parse(result.json) : result.json;
|
|
195
|
+
if (result.reason === RecognizingSpeech || result.reason === RecognizedSpeech && !json.NBest) {
|
|
196
|
+
return new SpeechRecognitionResult({
|
|
197
|
+
isFinal: result.reason === RecognizedSpeech,
|
|
198
|
+
results: [
|
|
199
|
+
new SpeechRecognitionAlternative({
|
|
200
|
+
confidence: 0.5,
|
|
201
|
+
transcript: result.text
|
|
202
|
+
})
|
|
203
|
+
]
|
|
204
|
+
});
|
|
205
|
+
} else if (result.reason === RecognizedSpeech) {
|
|
206
|
+
return new SpeechRecognitionResult({
|
|
207
|
+
isFinal: true,
|
|
208
|
+
results: (json.NBest || []).slice(0, maxAlternatives).map(
|
|
209
|
+
({ Confidence: confidence, Display: display, ITN: itn, Lexical: lexical, MaskedITN: maskedITN }) => new SpeechRecognitionAlternative({
|
|
210
|
+
confidence,
|
|
211
|
+
transcript: textNormalization === "itn" ? itn : textNormalization === "lexical" ? lexical : textNormalization === "maskeditn" ? maskedITN : display
|
|
212
|
+
})
|
|
213
|
+
)
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
return new SpeechRecognitionResult({ isFinal: false, results: [] });
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// src/SpeechServices/SpeechToText/cognitiveServicesAsyncToPromise.ts
|
|
220
|
+
function cognitiveServicesAsyncToPromise(fn, context = void 0) {
|
|
221
|
+
return (...args) => (
|
|
222
|
+
// eslint-disable-next-line prefer-spread
|
|
223
|
+
new Promise((resolve, reject) => fn.apply(context, [...args, resolve, reject]))
|
|
224
|
+
);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// src/SpeechServices/SpeechToText/SpeechGrammarList.ts
|
|
228
|
+
var SpeechGrammarList = class {
|
|
187
229
|
constructor() {
|
|
188
|
-
this
|
|
230
|
+
this.#phrases = [];
|
|
189
231
|
}
|
|
190
232
|
addFromString() {
|
|
191
233
|
throw new Error("JSGF is not supported");
|
|
192
234
|
}
|
|
235
|
+
#phrases;
|
|
193
236
|
get phrases() {
|
|
194
|
-
return this
|
|
237
|
+
return this.#phrases;
|
|
195
238
|
}
|
|
196
239
|
set phrases(value) {
|
|
197
240
|
if (Array.isArray(value)) {
|
|
198
|
-
this
|
|
241
|
+
this.#phrases = Object.freeze([...value]);
|
|
199
242
|
} else if (typeof value === "string") {
|
|
200
|
-
this
|
|
243
|
+
this.#phrases = Object.freeze([value]);
|
|
201
244
|
} else {
|
|
202
245
|
throw new Error(`The provided value is not an array or of type 'string'`);
|
|
203
246
|
}
|
|
204
247
|
}
|
|
205
248
|
};
|
|
206
249
|
|
|
207
|
-
// src/SpeechServices/SpeechToText/
|
|
208
|
-
var
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
250
|
+
// src/SpeechServices/SpeechToText/SpeechRecognitionErrorEvent.ts
|
|
251
|
+
var SpeechRecognitionErrorEvent = class extends Event {
|
|
252
|
+
constructor(type, { error, message }) {
|
|
253
|
+
super(type);
|
|
254
|
+
this.#error = error;
|
|
255
|
+
this.#message = message;
|
|
256
|
+
}
|
|
257
|
+
#error;
|
|
258
|
+
#message;
|
|
259
|
+
get error() {
|
|
260
|
+
return this.#error;
|
|
261
|
+
}
|
|
262
|
+
get message() {
|
|
263
|
+
return this.#message;
|
|
264
|
+
}
|
|
265
|
+
get type() {
|
|
266
|
+
return "error";
|
|
267
|
+
}
|
|
268
|
+
};
|
|
269
|
+
|
|
270
|
+
// src/SpeechServices/SpeechToText/SpeechRecognitionResultList.ts
|
|
271
|
+
var SpeechRecognitionResultList = class extends FakeArray {
|
|
272
|
+
constructor(result) {
|
|
273
|
+
super(result);
|
|
274
|
+
}
|
|
275
|
+
};
|
|
276
|
+
|
|
277
|
+
// src/SpeechServices/SpeechToText/SpeechRecognitionEvent.ts
|
|
228
278
|
var SpeechRecognitionEvent = class extends Event {
|
|
229
|
-
constructor(type, { data,
|
|
279
|
+
constructor(type, { data, resultIndex, results } = {}) {
|
|
230
280
|
super(type);
|
|
231
|
-
this
|
|
232
|
-
this
|
|
233
|
-
this
|
|
234
|
-
|
|
235
|
-
|
|
281
|
+
this.#data = data;
|
|
282
|
+
this.#resultIndex = resultIndex;
|
|
283
|
+
this.#results = results || new SpeechRecognitionResultList([]);
|
|
284
|
+
}
|
|
285
|
+
#data;
|
|
286
|
+
// TODO: "resultIndex" should be set.
|
|
287
|
+
#resultIndex;
|
|
288
|
+
#results;
|
|
289
|
+
get data() {
|
|
290
|
+
return this.#data;
|
|
291
|
+
}
|
|
292
|
+
get resultIndex() {
|
|
293
|
+
return this.#resultIndex;
|
|
294
|
+
}
|
|
295
|
+
get results() {
|
|
296
|
+
return this.#results;
|
|
297
|
+
}
|
|
298
|
+
get type() {
|
|
299
|
+
return super.type;
|
|
300
|
+
}
|
|
301
|
+
};
|
|
302
|
+
|
|
303
|
+
// src/SpeechServices/SpeechToText/private/EventListenerMap.ts
|
|
304
|
+
var EventListenerMap = class {
|
|
305
|
+
constructor(eventTarget) {
|
|
306
|
+
this.#eventTarget = eventTarget;
|
|
307
|
+
this.#propertyMap = {};
|
|
308
|
+
}
|
|
309
|
+
#eventTarget;
|
|
310
|
+
#propertyMap;
|
|
311
|
+
getProperty(name) {
|
|
312
|
+
return this.#propertyMap[name];
|
|
313
|
+
}
|
|
314
|
+
setProperty(name, value) {
|
|
315
|
+
const existing = this.#propertyMap[name];
|
|
316
|
+
existing && this.#eventTarget.removeEventListener(name, existing);
|
|
317
|
+
if (value) {
|
|
318
|
+
this.#eventTarget.addEventListener(name, value);
|
|
319
|
+
}
|
|
320
|
+
this.#propertyMap[name] = value;
|
|
236
321
|
}
|
|
237
322
|
};
|
|
323
|
+
|
|
324
|
+
// src/SpeechServices/SpeechToText/private/prepareAudioConfig.ts
|
|
325
|
+
import { AudioSourceEvent } from "microsoft-cognitiveservices-speech-sdk/distrib/lib/src/common/AudioSourceEvents";
|
|
326
|
+
|
|
327
|
+
// src/SpeechServices/SpeechToText/private/averageAmplitude.ts
|
|
328
|
+
function averageAmplitude(arrayBuffer) {
|
|
329
|
+
const array = Array.from(new Int16Array(arrayBuffer));
|
|
330
|
+
return array.reduce((averageAmplitude2, amplitude) => averageAmplitude2 + Math.abs(amplitude), 0) / array.length;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
// src/SpeechServices/SpeechToText/private/prepareAudioConfig.ts
|
|
238
334
|
function prepareAudioConfig(audioConfig) {
|
|
239
|
-
const
|
|
240
|
-
const
|
|
241
|
-
|
|
242
|
-
let
|
|
243
|
-
|
|
244
|
-
|
|
335
|
+
const audioConfigImpl = audioConfig;
|
|
336
|
+
const originalAttach = audioConfigImpl.attach;
|
|
337
|
+
const boundOriginalAttach = audioConfigImpl.attach.bind(audioConfigImpl);
|
|
338
|
+
let firstChunk = false;
|
|
339
|
+
let muted = false;
|
|
340
|
+
audioConfigImpl.attach = async () => {
|
|
341
|
+
const reader = await boundOriginalAttach("");
|
|
245
342
|
return {
|
|
246
343
|
...reader,
|
|
247
344
|
read: async () => {
|
|
248
345
|
const chunk = await reader.read();
|
|
249
346
|
if (!firstChunk && averageAmplitude(chunk.buffer) > 150) {
|
|
250
|
-
|
|
347
|
+
audioConfigImpl.events.onEvent(new AudioSourceEvent("FirstAudibleChunk", ""));
|
|
251
348
|
firstChunk = true;
|
|
252
349
|
}
|
|
253
350
|
if (muted) {
|
|
@@ -263,10 +360,36 @@ function prepareAudioConfig(audioConfig) {
|
|
|
263
360
|
muted = true;
|
|
264
361
|
},
|
|
265
362
|
unprepare: () => {
|
|
266
|
-
|
|
363
|
+
audioConfigImpl.attach = originalAttach;
|
|
267
364
|
}
|
|
268
365
|
};
|
|
269
366
|
}
|
|
367
|
+
|
|
368
|
+
// src/SpeechServices/SpeechToText/private/serializeRecognitionResult.ts
|
|
369
|
+
function serializeRecognitionResult({
|
|
370
|
+
duration,
|
|
371
|
+
errorDetails,
|
|
372
|
+
json,
|
|
373
|
+
offset,
|
|
374
|
+
properties,
|
|
375
|
+
reason,
|
|
376
|
+
resultId,
|
|
377
|
+
text
|
|
378
|
+
}) {
|
|
379
|
+
return Object.freeze({
|
|
380
|
+
duration,
|
|
381
|
+
errorDetails,
|
|
382
|
+
json: json && JSON.parse(json),
|
|
383
|
+
offset,
|
|
384
|
+
properties,
|
|
385
|
+
reason,
|
|
386
|
+
resultId,
|
|
387
|
+
text
|
|
388
|
+
});
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// src/SpeechServices/SpeechToText/createSpeechRecognitionPonyfillFromRecognizer.ts
|
|
392
|
+
var { ResultReason: ResultReason2, SpeechRecognizer: SpeechRecognizer2 } = SpeechSDK_default;
|
|
270
393
|
function createSpeechRecognitionPonyfillFromRecognizer({
|
|
271
394
|
createRecognizer,
|
|
272
395
|
enableTelemetry,
|
|
@@ -276,14 +399,13 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
276
399
|
}) {
|
|
277
400
|
SpeechRecognizer2.enableTelemetry(enableTelemetry !== false);
|
|
278
401
|
class SpeechRecognition extends EventTarget {
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
}
|
|
402
|
+
#continuous = false;
|
|
403
|
+
#eventListenerMap = new EventListenerMap(this);
|
|
404
|
+
#grammars = new SpeechGrammarList();
|
|
405
|
+
#interimResults = false;
|
|
406
|
+
#lang = typeof window !== "undefined" ? window.document.documentElement.getAttribute("lang") || window.navigator.language : "en-US";
|
|
407
|
+
// eslint-disable-next-line no-magic-numbers
|
|
408
|
+
#maxAlternatives = 1;
|
|
287
409
|
emitCognitiveServices(type, event) {
|
|
288
410
|
this.dispatchEvent(
|
|
289
411
|
new SpeechRecognitionEvent("cognitiveservices", {
|
|
@@ -295,128 +417,144 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
295
417
|
);
|
|
296
418
|
}
|
|
297
419
|
get continuous() {
|
|
298
|
-
return this
|
|
420
|
+
return this.#continuous;
|
|
299
421
|
}
|
|
300
422
|
set continuous(value) {
|
|
301
|
-
this
|
|
423
|
+
this.#continuous = value;
|
|
302
424
|
}
|
|
303
425
|
get grammars() {
|
|
304
|
-
return this
|
|
426
|
+
return this.#grammars;
|
|
305
427
|
}
|
|
306
428
|
set grammars(value) {
|
|
307
|
-
if (value instanceof
|
|
308
|
-
this
|
|
429
|
+
if (value instanceof SpeechGrammarList) {
|
|
430
|
+
this.#grammars = value;
|
|
309
431
|
} else {
|
|
310
432
|
throw new Error(`The provided value is not of type 'SpeechGrammarList'`);
|
|
311
433
|
}
|
|
312
434
|
}
|
|
313
435
|
get interimResults() {
|
|
314
|
-
return this
|
|
436
|
+
return this.#interimResults;
|
|
315
437
|
}
|
|
316
438
|
set interimResults(value) {
|
|
317
|
-
this
|
|
439
|
+
this.#interimResults = value;
|
|
318
440
|
}
|
|
319
441
|
get maxAlternatives() {
|
|
320
|
-
return this
|
|
442
|
+
return this.#maxAlternatives;
|
|
321
443
|
}
|
|
322
444
|
set maxAlternatives(value) {
|
|
323
|
-
this
|
|
445
|
+
this.#maxAlternatives = value;
|
|
324
446
|
}
|
|
325
447
|
get lang() {
|
|
326
|
-
return this
|
|
448
|
+
return this.#lang;
|
|
327
449
|
}
|
|
328
450
|
set lang(value) {
|
|
329
|
-
this
|
|
451
|
+
this.#lang = value;
|
|
330
452
|
}
|
|
331
453
|
get onaudioend() {
|
|
332
|
-
return
|
|
454
|
+
return this.#eventListenerMap.getProperty("audioend");
|
|
333
455
|
}
|
|
334
456
|
set onaudioend(value) {
|
|
335
|
-
|
|
457
|
+
this.#eventListenerMap.setProperty("audioend", value);
|
|
336
458
|
}
|
|
459
|
+
/** @type { ((event: SpeechRecognitionEvent<'audiostart'>) => void) | undefined } */
|
|
337
460
|
get onaudiostart() {
|
|
338
|
-
return
|
|
461
|
+
return this.#eventListenerMap.getProperty("audiostart");
|
|
339
462
|
}
|
|
340
463
|
set onaudiostart(value) {
|
|
341
|
-
|
|
464
|
+
this.#eventListenerMap.setProperty("audiostart", value);
|
|
342
465
|
}
|
|
466
|
+
/** @type { ((event: SpeechRecognitionEvent<'cognitiveservices'>) => void) | undefined } */
|
|
343
467
|
get oncognitiveservices() {
|
|
344
|
-
return
|
|
468
|
+
return this.#eventListenerMap.getProperty("cognitiveservices");
|
|
345
469
|
}
|
|
346
470
|
set oncognitiveservices(value) {
|
|
347
|
-
|
|
471
|
+
this.#eventListenerMap.setProperty("cognitiveservices", value);
|
|
348
472
|
}
|
|
473
|
+
/** @type { ((event: SpeechRecognitionEvent<'end'>) => void) | undefined } */
|
|
349
474
|
get onend() {
|
|
350
|
-
return
|
|
475
|
+
return this.#eventListenerMap.getProperty("end");
|
|
351
476
|
}
|
|
352
477
|
set onend(value) {
|
|
353
|
-
|
|
478
|
+
this.#eventListenerMap.setProperty("end", value);
|
|
354
479
|
}
|
|
480
|
+
/** @type { ((event: SpeechRecognitionErrorEvent) => void) | undefined } */
|
|
355
481
|
get onerror() {
|
|
356
|
-
return
|
|
482
|
+
return this.#eventListenerMap.getProperty("error");
|
|
357
483
|
}
|
|
358
484
|
set onerror(value) {
|
|
359
|
-
|
|
485
|
+
this.#eventListenerMap.setProperty("error", value);
|
|
360
486
|
}
|
|
487
|
+
/** @type { ((event: SpeechRecognitionEvent<'result'>) => void) | undefined } */
|
|
361
488
|
get onresult() {
|
|
362
|
-
return
|
|
489
|
+
return this.#eventListenerMap.getProperty("result");
|
|
363
490
|
}
|
|
364
491
|
set onresult(value) {
|
|
365
|
-
|
|
492
|
+
this.#eventListenerMap.setProperty("result", value);
|
|
366
493
|
}
|
|
494
|
+
/** @type { ((event: SpeechRecognitionEvent<'soundend'>) => void) | undefined } */
|
|
367
495
|
get onsoundend() {
|
|
368
|
-
return
|
|
496
|
+
return this.#eventListenerMap.getProperty("soundend");
|
|
369
497
|
}
|
|
370
498
|
set onsoundend(value) {
|
|
371
|
-
|
|
499
|
+
this.#eventListenerMap.setProperty("soundend", value);
|
|
372
500
|
}
|
|
501
|
+
/** @type { ((event: SpeechRecognitionEvent<'soundstart'>) => void) | undefined } */
|
|
373
502
|
get onsoundstart() {
|
|
374
|
-
return
|
|
503
|
+
return this.#eventListenerMap.getProperty("soundstart");
|
|
375
504
|
}
|
|
376
505
|
set onsoundstart(value) {
|
|
377
|
-
|
|
506
|
+
this.#eventListenerMap.setProperty("soundstart", value);
|
|
378
507
|
}
|
|
508
|
+
/** @type { ((event: SpeechRecognitionEvent<'speechend'>) => void) | undefined } */
|
|
379
509
|
get onspeechend() {
|
|
380
|
-
return
|
|
510
|
+
return this.#eventListenerMap.getProperty("speechend");
|
|
381
511
|
}
|
|
382
512
|
set onspeechend(value) {
|
|
383
|
-
|
|
513
|
+
this.#eventListenerMap.setProperty("speechend", value);
|
|
384
514
|
}
|
|
515
|
+
/** @type { ((event: SpeechRecognitionEvent<'speechstart'>) => void) | undefined } */
|
|
385
516
|
get onspeechstart() {
|
|
386
|
-
return
|
|
517
|
+
return this.#eventListenerMap.getProperty("speechstart");
|
|
387
518
|
}
|
|
388
519
|
set onspeechstart(value) {
|
|
389
|
-
|
|
520
|
+
this.#eventListenerMap.setProperty("speechstart", value);
|
|
390
521
|
}
|
|
522
|
+
/** @type { ((event: SpeechRecognitionEvent<'start'>) => void) | undefined } */
|
|
391
523
|
get onstart() {
|
|
392
|
-
return
|
|
524
|
+
return this.#eventListenerMap.getProperty("start");
|
|
393
525
|
}
|
|
394
526
|
set onstart(value) {
|
|
395
|
-
|
|
527
|
+
this.#eventListenerMap.setProperty("start", value);
|
|
396
528
|
}
|
|
529
|
+
abort;
|
|
530
|
+
stop;
|
|
397
531
|
start() {
|
|
398
532
|
this._startOnce().catch((err) => {
|
|
399
|
-
this.dispatchEvent(
|
|
533
|
+
this.dispatchEvent(
|
|
534
|
+
new SpeechRecognitionErrorEvent("error", { error: err, message: err && (err.stack || err.message) })
|
|
535
|
+
);
|
|
400
536
|
});
|
|
401
537
|
}
|
|
402
538
|
async _startOnce() {
|
|
403
539
|
const recognizer = await createRecognizer(this.lang);
|
|
404
|
-
const { pause, unprepare } = prepareAudioConfig(recognizer
|
|
540
|
+
const { pause, unprepare } = prepareAudioConfig(recognizer["audioConfig"]);
|
|
405
541
|
try {
|
|
406
542
|
const queue = createPromiseQueue_default();
|
|
407
543
|
let soundStarted;
|
|
408
544
|
let speechStarted;
|
|
409
545
|
let stopping;
|
|
410
|
-
const { detach: detachAudioConfigEvent } = recognizer
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
546
|
+
const { detach: detachAudioConfigEvent } = recognizer["audioConfig"].events.attach(
|
|
547
|
+
(event) => {
|
|
548
|
+
const { name } = event;
|
|
549
|
+
if (name === "AudioSourceReadyEvent") {
|
|
550
|
+
queue.push({ audioSourceReady: {} });
|
|
551
|
+
} else if (name === "AudioSourceOffEvent") {
|
|
552
|
+
queue.push({ audioSourceOff: {} });
|
|
553
|
+
} else if (name === "FirstAudibleChunk") {
|
|
554
|
+
queue.push({ firstAudibleChunk: {} });
|
|
555
|
+
}
|
|
418
556
|
}
|
|
419
|
-
|
|
557
|
+
);
|
|
420
558
|
recognizer.canceled = (_, { errorDetails, offset, reason, sessionId }) => {
|
|
421
559
|
queue.push({
|
|
422
560
|
canceled: {
|
|
@@ -458,18 +596,18 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
458
596
|
queue.push({ speechEndDetected: { sessionId } });
|
|
459
597
|
};
|
|
460
598
|
const { phrases } = this.grammars;
|
|
461
|
-
const { dynamicGrammar } = recognizer
|
|
599
|
+
const { dynamicGrammar } = recognizer["privReco"];
|
|
462
600
|
referenceGrammars && referenceGrammars.length && dynamicGrammar.addReferenceGrammar(referenceGrammars);
|
|
463
|
-
phrases && phrases.length && dynamicGrammar.addPhrase(phrases);
|
|
464
|
-
await cognitiveServicesAsyncToPromise(recognizer.startContinuousRecognitionAsync
|
|
465
|
-
if (recognizer.stopContinuousRecognitionAsync) {
|
|
601
|
+
phrases && phrases.length && dynamicGrammar.addPhrase([...phrases]);
|
|
602
|
+
await cognitiveServicesAsyncToPromise(recognizer.startContinuousRecognitionAsync, recognizer)();
|
|
603
|
+
if (typeof recognizer.stopContinuousRecognitionAsync === "function") {
|
|
466
604
|
this.abort = () => queue.push({ abort: {} });
|
|
467
605
|
this.stop = () => queue.push({ stop: {} });
|
|
468
606
|
} else {
|
|
469
607
|
this.abort = this.stop = void 0;
|
|
470
608
|
}
|
|
471
609
|
let audioStarted;
|
|
472
|
-
let finalEvent;
|
|
610
|
+
let finalEvent = void 0;
|
|
473
611
|
let finalizedResults = [];
|
|
474
612
|
for (let loop = 0; !stopping || audioStarted; loop++) {
|
|
475
613
|
const event = await queue.shift();
|
|
@@ -486,10 +624,7 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
486
624
|
Object.keys(event).forEach((name) => this.emitCognitiveServices(name, event[name]));
|
|
487
625
|
const errorMessage = canceled && canceled.errorDetails;
|
|
488
626
|
if (/Permission\sdenied/u.test(errorMessage || "")) {
|
|
489
|
-
finalEvent = {
|
|
490
|
-
error: "not-allowed",
|
|
491
|
-
type: "error"
|
|
492
|
-
};
|
|
627
|
+
finalEvent = new SpeechRecognitionErrorEvent("error", { error: "not-allowed" });
|
|
493
628
|
break;
|
|
494
629
|
}
|
|
495
630
|
if (!loop) {
|
|
@@ -501,30 +636,21 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
501
636
|
this.dispatchEvent(new SpeechRecognitionEvent("audiostart"));
|
|
502
637
|
this.dispatchEvent(new SpeechRecognitionEvent("audioend"));
|
|
503
638
|
}
|
|
504
|
-
finalEvent = {
|
|
505
|
-
error: "network",
|
|
506
|
-
type: "error"
|
|
507
|
-
};
|
|
639
|
+
finalEvent = new SpeechRecognitionErrorEvent("error", { error: "network" });
|
|
508
640
|
} else {
|
|
509
|
-
finalEvent = {
|
|
510
|
-
error: "unknown",
|
|
511
|
-
type: "error"
|
|
512
|
-
};
|
|
641
|
+
finalEvent = new SpeechRecognitionErrorEvent("error", { error: "unknown" });
|
|
513
642
|
}
|
|
514
643
|
break;
|
|
515
644
|
} else if (abort || stop) {
|
|
516
645
|
if (abort) {
|
|
517
|
-
finalEvent = {
|
|
518
|
-
error: "aborted",
|
|
519
|
-
type: "error"
|
|
520
|
-
};
|
|
646
|
+
finalEvent = new SpeechRecognitionErrorEvent("error", { error: "aborted" });
|
|
521
647
|
stopping = "abort";
|
|
522
648
|
} else {
|
|
523
649
|
pause();
|
|
524
650
|
stopping = "stop";
|
|
525
651
|
}
|
|
526
652
|
if (abort && recognizer.stopContinuousRecognitionAsync) {
|
|
527
|
-
await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync
|
|
653
|
+
await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync, recognizer)();
|
|
528
654
|
}
|
|
529
655
|
} else if (audioSourceReady) {
|
|
530
656
|
this.dispatchEvent(new SpeechRecognitionEvent("audiostart"));
|
|
@@ -540,10 +666,16 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
540
666
|
break;
|
|
541
667
|
} else if (stopping !== "abort") {
|
|
542
668
|
if (recognized && recognized.result && recognized.result.reason === ResultReason2.NoMatch) {
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
669
|
+
if (!this.continuous || stopping === "stop") {
|
|
670
|
+
finalEvent = new SpeechRecognitionEvent("result", {
|
|
671
|
+
results: new SpeechRecognitionResultList(finalizedResults)
|
|
672
|
+
});
|
|
673
|
+
recognizer.stopContinuousRecognitionAsync && await cognitiveServicesAsyncToPromise(
|
|
674
|
+
recognizer.stopContinuousRecognitionAsync,
|
|
675
|
+
recognizer
|
|
676
|
+
)();
|
|
677
|
+
break;
|
|
678
|
+
}
|
|
547
679
|
} else if (recognized || recognizing) {
|
|
548
680
|
if (!audioStarted) {
|
|
549
681
|
this.dispatchEvent(new SpeechRecognitionEvent("audiostart"));
|
|
@@ -558,44 +690,43 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
558
690
|
speechStarted = true;
|
|
559
691
|
}
|
|
560
692
|
if (recognized) {
|
|
561
|
-
const result =
|
|
693
|
+
const result = cognitiveServiceEventResultToWebSpeechRecognitionResult_default(recognized.result, {
|
|
562
694
|
maxAlternatives: this.maxAlternatives,
|
|
563
695
|
textNormalization
|
|
564
696
|
});
|
|
565
|
-
const recognizable = !!result[0]
|
|
697
|
+
const recognizable = !!result[0]?.transcript;
|
|
566
698
|
if (recognizable) {
|
|
567
699
|
finalizedResults = [...finalizedResults, result];
|
|
568
700
|
this.continuous && this.dispatchEvent(
|
|
569
701
|
new SpeechRecognitionEvent("result", {
|
|
570
|
-
results: finalizedResults
|
|
702
|
+
results: new SpeechRecognitionResultList(finalizedResults)
|
|
571
703
|
})
|
|
572
704
|
);
|
|
573
705
|
}
|
|
574
706
|
if (this.continuous && recognizable) {
|
|
575
|
-
finalEvent =
|
|
707
|
+
finalEvent = void 0;
|
|
576
708
|
} else {
|
|
577
|
-
finalEvent = {
|
|
578
|
-
results: finalizedResults
|
|
579
|
-
|
|
580
|
-
};
|
|
709
|
+
finalEvent = new SpeechRecognitionEvent("result", {
|
|
710
|
+
results: new SpeechRecognitionResultList(finalizedResults)
|
|
711
|
+
});
|
|
581
712
|
}
|
|
582
|
-
if (!this.continuous && recognizer.stopContinuousRecognitionAsync) {
|
|
583
|
-
await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync
|
|
713
|
+
if ((!this.continuous || stopping === "stop") && recognizer.stopContinuousRecognitionAsync) {
|
|
714
|
+
await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync, recognizer)();
|
|
584
715
|
}
|
|
585
716
|
if (looseEvents && finalEvent && recognizable) {
|
|
586
|
-
this.dispatchEvent(
|
|
587
|
-
finalEvent =
|
|
717
|
+
this.dispatchEvent(finalEvent);
|
|
718
|
+
finalEvent = void 0;
|
|
588
719
|
}
|
|
589
720
|
} else if (recognizing) {
|
|
590
721
|
this.interimResults && this.dispatchEvent(
|
|
591
722
|
new SpeechRecognitionEvent("result", {
|
|
592
|
-
results: [
|
|
723
|
+
results: new SpeechRecognitionResultList([
|
|
593
724
|
...finalizedResults,
|
|
594
|
-
|
|
725
|
+
cognitiveServiceEventResultToWebSpeechRecognitionResult_default(recognizing.result, {
|
|
595
726
|
maxAlternatives: this.maxAlternatives,
|
|
596
727
|
textNormalization
|
|
597
728
|
})
|
|
598
|
-
]
|
|
729
|
+
])
|
|
599
730
|
})
|
|
600
731
|
);
|
|
601
732
|
}
|
|
@@ -613,16 +744,9 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
613
744
|
}
|
|
614
745
|
if (finalEvent) {
|
|
615
746
|
if (finalEvent.type === "result" && !finalEvent.results.length) {
|
|
616
|
-
finalEvent = {
|
|
617
|
-
error: "no-speech",
|
|
618
|
-
type: "error"
|
|
619
|
-
};
|
|
620
|
-
}
|
|
621
|
-
if (finalEvent.type === "error") {
|
|
622
|
-
this.dispatchEvent(new ErrorEvent("error", finalEvent));
|
|
623
|
-
} else {
|
|
624
|
-
this.dispatchEvent(new SpeechRecognitionEvent(finalEvent.type, finalEvent));
|
|
747
|
+
finalEvent = new SpeechRecognitionErrorEvent("error", { error: "no-speech" });
|
|
625
748
|
}
|
|
749
|
+
this.dispatchEvent(finalEvent);
|
|
626
750
|
}
|
|
627
751
|
this.dispatchEvent(new SpeechRecognitionEvent("end"));
|
|
628
752
|
detachAudioConfigEvent();
|
|
@@ -631,17 +755,20 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
631
755
|
throw err;
|
|
632
756
|
} finally {
|
|
633
757
|
unprepare();
|
|
634
|
-
recognizer
|
|
758
|
+
recognizer["dispose"](false);
|
|
635
759
|
}
|
|
636
760
|
}
|
|
637
761
|
}
|
|
638
762
|
return {
|
|
639
|
-
SpeechGrammarList
|
|
763
|
+
SpeechGrammarList,
|
|
640
764
|
SpeechRecognition,
|
|
641
765
|
SpeechRecognitionEvent
|
|
642
766
|
};
|
|
643
767
|
}
|
|
644
|
-
|
|
768
|
+
|
|
769
|
+
// src/SpeechServices/SpeechToText/createSpeechRecognitionPonyfill.js
|
|
770
|
+
var { AudioConfig: AudioConfig2, OutputFormat: OutputFormat2, SpeechConfig: SpeechConfig2, SpeechRecognizer: SpeechRecognizer3 } = SpeechSDK_default;
|
|
771
|
+
function createSpeechRecognitionPonyfill(options) {
|
|
645
772
|
const {
|
|
646
773
|
audioConfig = AudioConfig2.fromDefaultMicrophoneInput(),
|
|
647
774
|
// We set telemetry to true to honor the default telemetry settings of Speech SDK
|
|
@@ -678,7 +805,7 @@ var createSpeechRecognitionPonyfill_default = (options) => {
|
|
|
678
805
|
}
|
|
679
806
|
speechConfig.outputFormat = OutputFormat2.Detailed;
|
|
680
807
|
speechConfig.speechRecognitionLanguage = lang || "en-US";
|
|
681
|
-
return new
|
|
808
|
+
return new SpeechRecognizer3(speechConfig, audioConfig);
|
|
682
809
|
};
|
|
683
810
|
return createSpeechRecognitionPonyfillFromRecognizer({
|
|
684
811
|
audioConfig,
|
|
@@ -688,14 +815,14 @@ var createSpeechRecognitionPonyfill_default = (options) => {
|
|
|
688
815
|
referenceGrammars,
|
|
689
816
|
textNormalization
|
|
690
817
|
});
|
|
691
|
-
}
|
|
818
|
+
}
|
|
692
819
|
|
|
693
820
|
// src/SpeechServices/SpeechToText.js
|
|
694
|
-
var SpeechToText_default =
|
|
821
|
+
var SpeechToText_default = createSpeechRecognitionPonyfill;
|
|
695
822
|
|
|
696
823
|
// src/SpeechServices/TextToSpeech/createSpeechSynthesisPonyfill.js
|
|
697
|
-
import { EventTarget as EventTarget3, getEventAttributeValue as
|
|
698
|
-
import onErrorResumeNext from "on-error-resume-next";
|
|
824
|
+
import { EventTarget as EventTarget3, getEventAttributeValue as getEventAttributeValue2, setEventAttributeValue as setEventAttributeValue2 } from "event-target-shim";
|
|
825
|
+
import { onErrorResumeNext } from "on-error-resume-next/async";
|
|
699
826
|
|
|
700
827
|
// src/SpeechServices/TextToSpeech/AudioContextQueue.js
|
|
701
828
|
import memoize from "memoize-one";
|
|
@@ -769,80 +896,6 @@ var AudioContextQueue_default = class {
|
|
|
769
896
|
}
|
|
770
897
|
};
|
|
771
898
|
|
|
772
|
-
// src/SpeechServices/TextToSpeech/SpeechSynthesisVoice.js
|
|
773
|
-
var SpeechSynthesisVoice_default = class {
|
|
774
|
-
constructor({ gender, lang, voiceURI }) {
|
|
775
|
-
this._default = false;
|
|
776
|
-
this._gender = gender;
|
|
777
|
-
this._lang = lang;
|
|
778
|
-
this._localService = false;
|
|
779
|
-
this._name = voiceURI;
|
|
780
|
-
this._voiceURI = voiceURI;
|
|
781
|
-
}
|
|
782
|
-
get default() {
|
|
783
|
-
return this._default;
|
|
784
|
-
}
|
|
785
|
-
get gender() {
|
|
786
|
-
return this._gender;
|
|
787
|
-
}
|
|
788
|
-
get lang() {
|
|
789
|
-
return this._lang;
|
|
790
|
-
}
|
|
791
|
-
get localService() {
|
|
792
|
-
return this._localService;
|
|
793
|
-
}
|
|
794
|
-
get name() {
|
|
795
|
-
return this._name;
|
|
796
|
-
}
|
|
797
|
-
get voiceURI() {
|
|
798
|
-
return this._voiceURI;
|
|
799
|
-
}
|
|
800
|
-
};
|
|
801
|
-
|
|
802
|
-
// src/SpeechServices/TextToSpeech/fetchCustomVoices.js
|
|
803
|
-
async function fetchCustomVoices({ customVoiceHostname, deploymentId, region, subscriptionKey }) {
|
|
804
|
-
const hostname = customVoiceHostname || `${region}.customvoice.api.speech.microsoft.com`;
|
|
805
|
-
const res = await fetch(
|
|
806
|
-
`https://${encodeURI(hostname)}/api/texttospeech/v2.0/endpoints/${encodeURIComponent(deploymentId)}`,
|
|
807
|
-
{
|
|
808
|
-
headers: {
|
|
809
|
-
accept: "application/json",
|
|
810
|
-
"ocp-apim-subscription-key": subscriptionKey
|
|
811
|
-
}
|
|
812
|
-
}
|
|
813
|
-
);
|
|
814
|
-
if (!res.ok) {
|
|
815
|
-
throw new Error("Failed to fetch custom voices");
|
|
816
|
-
}
|
|
817
|
-
return res.json();
|
|
818
|
-
}
|
|
819
|
-
async function fetchCustomVoices_default({ customVoiceHostname, deploymentId, region, subscriptionKey }) {
|
|
820
|
-
const { models } = await fetchCustomVoices({ customVoiceHostname, deploymentId, region, subscriptionKey });
|
|
821
|
-
return models.map(
|
|
822
|
-
({ properties: { Gender: gender }, locale: lang, name: voiceURI }) => new SpeechSynthesisVoice_default({ gender, lang, voiceURI })
|
|
823
|
-
).sort(({ name: x }, { name: y }) => x > y ? 1 : x < y ? -1 : 0);
|
|
824
|
-
}
|
|
825
|
-
|
|
826
|
-
// src/SpeechServices/TextToSpeech/fetchVoices.js
|
|
827
|
-
async function fetchVoices({ authorizationToken, region, speechSynthesisHostname, subscriptionKey }) {
|
|
828
|
-
const hostname = speechSynthesisHostname || `${encodeURI(region)}.tts.speech.microsoft.com`;
|
|
829
|
-
const res = await fetch(`https://${hostname}/cognitiveservices/voices/list`, {
|
|
830
|
-
headers: {
|
|
831
|
-
"content-type": "application/json",
|
|
832
|
-
...authorizationToken ? {
|
|
833
|
-
authorization: `Bearer ${authorizationToken}`
|
|
834
|
-
} : {
|
|
835
|
-
"Ocp-Apim-Subscription-Key": subscriptionKey
|
|
836
|
-
}
|
|
837
|
-
}
|
|
838
|
-
});
|
|
839
|
-
if (!res.ok) {
|
|
840
|
-
throw new Error("Failed to fetch voices");
|
|
841
|
-
}
|
|
842
|
-
const voices = await res.json();
|
|
843
|
-
return voices.map(({ Gender: gender, Locale: lang, Name: voiceURI }) => new SpeechSynthesisVoice_default({ gender, lang, voiceURI })).sort(({ name: x }, { name: y }) => x > y ? 1 : x < y ? -1 : 0);
|
|
844
|
-
}
|
|
845
|
-
|
|
846
899
|
// src/SpeechServices/TextToSpeech/SpeechSynthesisEvent.js
|
|
847
900
|
import { Event as Event2 } from "event-target-shim";
|
|
848
901
|
var SpeechSynthesisEvent = class extends Event2 {
|
|
@@ -852,8 +905,8 @@ var SpeechSynthesisEvent = class extends Event2 {
|
|
|
852
905
|
};
|
|
853
906
|
|
|
854
907
|
// src/SpeechServices/TextToSpeech/SpeechSynthesisUtterance.js
|
|
855
|
-
import {
|
|
856
|
-
import
|
|
908
|
+
import { EventAsPromise } from "event-as-promise";
|
|
909
|
+
import { EventTarget as EventTarget2, getEventAttributeValue, setEventAttributeValue } from "event-target-shim";
|
|
857
910
|
|
|
858
911
|
// src/SpeechServices/TextToSpeech/fetchSpeechData.js
|
|
859
912
|
import { decode } from "base64-arraybuffer";
|
|
@@ -992,46 +1045,46 @@ var SpeechSynthesisUtterance = class extends EventTarget2 {
|
|
|
992
1045
|
this._lang = value;
|
|
993
1046
|
}
|
|
994
1047
|
get onboundary() {
|
|
995
|
-
return
|
|
1048
|
+
return getEventAttributeValue(this, "boundary");
|
|
996
1049
|
}
|
|
997
1050
|
set onboundary(value) {
|
|
998
|
-
|
|
1051
|
+
setEventAttributeValue(this, "boundary", value);
|
|
999
1052
|
}
|
|
1000
1053
|
get onend() {
|
|
1001
|
-
return
|
|
1054
|
+
return getEventAttributeValue(this, "end");
|
|
1002
1055
|
}
|
|
1003
1056
|
set onend(value) {
|
|
1004
|
-
|
|
1057
|
+
setEventAttributeValue(this, "end", value);
|
|
1005
1058
|
}
|
|
1006
1059
|
get onerror() {
|
|
1007
|
-
return
|
|
1060
|
+
return getEventAttributeValue(this, "error");
|
|
1008
1061
|
}
|
|
1009
1062
|
set onerror(value) {
|
|
1010
|
-
|
|
1063
|
+
setEventAttributeValue(this, "error", value);
|
|
1011
1064
|
}
|
|
1012
1065
|
get onmark() {
|
|
1013
|
-
return
|
|
1066
|
+
return getEventAttributeValue(this, "mark");
|
|
1014
1067
|
}
|
|
1015
1068
|
set onmark(value) {
|
|
1016
|
-
|
|
1069
|
+
setEventAttributeValue(this, "mark", value);
|
|
1017
1070
|
}
|
|
1018
1071
|
get onpause() {
|
|
1019
|
-
return
|
|
1072
|
+
return getEventAttributeValue(this, "pause");
|
|
1020
1073
|
}
|
|
1021
1074
|
set onpause(value) {
|
|
1022
|
-
|
|
1075
|
+
setEventAttributeValue(this, "pause", value);
|
|
1023
1076
|
}
|
|
1024
1077
|
get onresume() {
|
|
1025
|
-
return
|
|
1078
|
+
return getEventAttributeValue(this, "resume");
|
|
1026
1079
|
}
|
|
1027
1080
|
set onresume(value) {
|
|
1028
|
-
|
|
1081
|
+
setEventAttributeValue(this, "resume", value);
|
|
1029
1082
|
}
|
|
1030
1083
|
get onstart() {
|
|
1031
|
-
return
|
|
1084
|
+
return getEventAttributeValue(this, "start");
|
|
1032
1085
|
}
|
|
1033
1086
|
set onstart(value) {
|
|
1034
|
-
|
|
1087
|
+
setEventAttributeValue(this, "start", value);
|
|
1035
1088
|
}
|
|
1036
1089
|
get pitch() {
|
|
1037
1090
|
return this._pitch;
|
|
@@ -1090,6 +1143,80 @@ var SpeechSynthesisUtterance = class extends EventTarget2 {
|
|
|
1090
1143
|
};
|
|
1091
1144
|
var SpeechSynthesisUtterance_default = SpeechSynthesisUtterance;
|
|
1092
1145
|
|
|
1146
|
+
// src/SpeechServices/TextToSpeech/SpeechSynthesisVoice.js
|
|
1147
|
+
var SpeechSynthesisVoice_default = class {
|
|
1148
|
+
constructor({ gender, lang, voiceURI }) {
|
|
1149
|
+
this._default = false;
|
|
1150
|
+
this._gender = gender;
|
|
1151
|
+
this._lang = lang;
|
|
1152
|
+
this._localService = false;
|
|
1153
|
+
this._name = voiceURI;
|
|
1154
|
+
this._voiceURI = voiceURI;
|
|
1155
|
+
}
|
|
1156
|
+
get default() {
|
|
1157
|
+
return this._default;
|
|
1158
|
+
}
|
|
1159
|
+
get gender() {
|
|
1160
|
+
return this._gender;
|
|
1161
|
+
}
|
|
1162
|
+
get lang() {
|
|
1163
|
+
return this._lang;
|
|
1164
|
+
}
|
|
1165
|
+
get localService() {
|
|
1166
|
+
return this._localService;
|
|
1167
|
+
}
|
|
1168
|
+
get name() {
|
|
1169
|
+
return this._name;
|
|
1170
|
+
}
|
|
1171
|
+
get voiceURI() {
|
|
1172
|
+
return this._voiceURI;
|
|
1173
|
+
}
|
|
1174
|
+
};
|
|
1175
|
+
|
|
1176
|
+
// src/SpeechServices/TextToSpeech/fetchCustomVoices.js
|
|
1177
|
+
async function fetchCustomVoices({ customVoiceHostname, deploymentId, region, subscriptionKey }) {
|
|
1178
|
+
const hostname = customVoiceHostname || `${region}.customvoice.api.speech.microsoft.com`;
|
|
1179
|
+
const res = await fetch(
|
|
1180
|
+
`https://${encodeURI(hostname)}/api/texttospeech/v2.0/endpoints/${encodeURIComponent(deploymentId)}`,
|
|
1181
|
+
{
|
|
1182
|
+
headers: {
|
|
1183
|
+
accept: "application/json",
|
|
1184
|
+
"ocp-apim-subscription-key": subscriptionKey
|
|
1185
|
+
}
|
|
1186
|
+
}
|
|
1187
|
+
);
|
|
1188
|
+
if (!res.ok) {
|
|
1189
|
+
throw new Error("Failed to fetch custom voices");
|
|
1190
|
+
}
|
|
1191
|
+
return res.json();
|
|
1192
|
+
}
|
|
1193
|
+
async function fetchCustomVoices_default({ customVoiceHostname, deploymentId, region, subscriptionKey }) {
|
|
1194
|
+
const { models } = await fetchCustomVoices({ customVoiceHostname, deploymentId, region, subscriptionKey });
|
|
1195
|
+
return models.map(
|
|
1196
|
+
({ properties: { Gender: gender }, locale: lang, name: voiceURI }) => new SpeechSynthesisVoice_default({ gender, lang, voiceURI })
|
|
1197
|
+
).sort(({ name: x }, { name: y }) => x > y ? 1 : x < y ? -1 : 0);
|
|
1198
|
+
}
|
|
1199
|
+
|
|
1200
|
+
// src/SpeechServices/TextToSpeech/fetchVoices.js
|
|
1201
|
+
async function fetchVoices({ authorizationToken, region, speechSynthesisHostname, subscriptionKey }) {
|
|
1202
|
+
const hostname = speechSynthesisHostname || `${encodeURI(region)}.tts.speech.microsoft.com`;
|
|
1203
|
+
const res = await fetch(`https://${hostname}/cognitiveservices/voices/list`, {
|
|
1204
|
+
headers: {
|
|
1205
|
+
"content-type": "application/json",
|
|
1206
|
+
...authorizationToken ? {
|
|
1207
|
+
authorization: `Bearer ${authorizationToken}`
|
|
1208
|
+
} : {
|
|
1209
|
+
"Ocp-Apim-Subscription-Key": subscriptionKey
|
|
1210
|
+
}
|
|
1211
|
+
}
|
|
1212
|
+
});
|
|
1213
|
+
if (!res.ok) {
|
|
1214
|
+
throw new Error("Failed to fetch voices");
|
|
1215
|
+
}
|
|
1216
|
+
const voices = await res.json();
|
|
1217
|
+
return voices.map(({ Gender: gender, Locale: lang, Name: voiceURI }) => new SpeechSynthesisVoice_default({ gender, lang, voiceURI })).sort(({ name: x }, { name: y }) => x > y ? 1 : x < y ? -1 : 0);
|
|
1218
|
+
}
|
|
1219
|
+
|
|
1093
1220
|
// src/SpeechServices/TextToSpeech/createSpeechSynthesisPonyfill.js
|
|
1094
1221
|
var DEFAULT_OUTPUT_FORMAT2 = "audio-24khz-160kbitrate-mono-mp3";
|
|
1095
1222
|
var EMPTY_ARRAY = [];
|
|
@@ -1122,10 +1249,10 @@ var createSpeechSynthesisPonyfill_default = (options) => {
|
|
|
1122
1249
|
return EMPTY_ARRAY;
|
|
1123
1250
|
}
|
|
1124
1251
|
get onvoiceschanged() {
|
|
1125
|
-
return
|
|
1252
|
+
return getEventAttributeValue2(this, "voiceschanged");
|
|
1126
1253
|
}
|
|
1127
1254
|
set onvoiceschanged(value) {
|
|
1128
|
-
|
|
1255
|
+
setEventAttributeValue2(this, "voiceschanged", value);
|
|
1129
1256
|
}
|
|
1130
1257
|
pause() {
|
|
1131
1258
|
this.queue.pause();
|
|
@@ -1220,7 +1347,7 @@ function createSpeechServicesPonyfill(options = {}, ...args) {
|
|
|
1220
1347
|
}
|
|
1221
1348
|
var meta = document.createElement("meta");
|
|
1222
1349
|
meta.setAttribute("name", "web-speech-cognitive-services");
|
|
1223
|
-
meta.setAttribute("content", `version=${"8.0.0-main.
|
|
1350
|
+
meta.setAttribute("content", `version=${"8.0.0-main.ccf35da"}`);
|
|
1224
1351
|
document.head.appendChild(meta);
|
|
1225
1352
|
export {
|
|
1226
1353
|
SpeechToText_default as createSpeechRecognitionPonyfill,
|