web-speech-cognitive-services 8.0.0-main.478b2e9 → 8.0.0-main.5903868
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/web-speech-cognitive-services.d.mts +123 -26
- package/dist/web-speech-cognitive-services.d.ts +123 -26
- package/dist/web-speech-cognitive-services.development.js +1487 -1468
- package/dist/web-speech-cognitive-services.development.js.map +1 -1
- package/dist/web-speech-cognitive-services.js +328 -239
- package/dist/web-speech-cognitive-services.js.map +1 -1
- package/dist/web-speech-cognitive-services.mjs +325 -236
- package/dist/web-speech-cognitive-services.mjs.map +1 -1
- package/dist/web-speech-cognitive-services.production.min.js +12 -12
- package/dist/web-speech-cognitive-services.production.min.js.map +1 -1
- package/package.json +21 -15
|
@@ -1,69 +1,3 @@
|
|
|
1
|
-
// src/SpeechServices/SpeechToText/createSpeechRecognitionPonyfill.js
|
|
2
|
-
import { Event, EventTarget, getEventAttributeValue, setEventAttributeValue } from "event-target-shim";
|
|
3
|
-
|
|
4
|
-
// src/Util/arrayToMap.js
|
|
5
|
-
function arrayToMap_default(array, extras) {
|
|
6
|
-
const map = {
|
|
7
|
-
...[].reduce.call(
|
|
8
|
-
array,
|
|
9
|
-
(map2, value, index) => {
|
|
10
|
-
map2[index] = value;
|
|
11
|
-
return map2;
|
|
12
|
-
},
|
|
13
|
-
{}
|
|
14
|
-
),
|
|
15
|
-
...extras,
|
|
16
|
-
length: array.length,
|
|
17
|
-
[Symbol.iterator]: () => [].slice.call(map)[Symbol.iterator]()
|
|
18
|
-
};
|
|
19
|
-
return map;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
// src/SpeechServices/SpeechSDK.js
|
|
23
|
-
import {
|
|
24
|
-
AudioConfig,
|
|
25
|
-
OutputFormat,
|
|
26
|
-
ResultReason,
|
|
27
|
-
SpeechConfig,
|
|
28
|
-
SpeechRecognizer
|
|
29
|
-
} from "microsoft-cognitiveservices-speech-sdk/distrib/lib/microsoft.cognitiveservices.speech.sdk";
|
|
30
|
-
var SpeechSDK_default = {
|
|
31
|
-
AudioConfig,
|
|
32
|
-
OutputFormat,
|
|
33
|
-
ResultReason,
|
|
34
|
-
SpeechConfig,
|
|
35
|
-
SpeechRecognizer
|
|
36
|
-
};
|
|
37
|
-
|
|
38
|
-
// src/SpeechServices/SpeechToText/cognitiveServiceEventResultToWebSpeechRecognitionResultList.js
|
|
39
|
-
var {
|
|
40
|
-
ResultReason: { RecognizingSpeech, RecognizedSpeech }
|
|
41
|
-
} = SpeechSDK_default;
|
|
42
|
-
function cognitiveServiceEventResultToWebSpeechRecognitionResultList_default(result, { maxAlternatives = Infinity, textNormalization = "display" } = {}) {
|
|
43
|
-
if (result.reason === RecognizingSpeech || result.reason === RecognizedSpeech && !result.json.NBest) {
|
|
44
|
-
const resultList = [
|
|
45
|
-
{
|
|
46
|
-
confidence: 0.5,
|
|
47
|
-
transcript: result.text
|
|
48
|
-
}
|
|
49
|
-
];
|
|
50
|
-
if (result.reason === RecognizedSpeech) {
|
|
51
|
-
resultList.isFinal = true;
|
|
52
|
-
}
|
|
53
|
-
return resultList;
|
|
54
|
-
} else if (result.reason === RecognizedSpeech) {
|
|
55
|
-
const resultList = arrayToMap_default(
|
|
56
|
-
(result.json.NBest || []).slice(0, maxAlternatives).map(({ Confidence: confidence, Display: display, ITN: itn, Lexical: lexical, MaskedITN: maskedITN }) => ({
|
|
57
|
-
confidence,
|
|
58
|
-
transcript: textNormalization === "itn" ? itn : textNormalization === "lexical" ? lexical : textNormalization === "maskeditn" ? maskedITN : display
|
|
59
|
-
})),
|
|
60
|
-
{ isFinal: true }
|
|
61
|
-
);
|
|
62
|
-
return resultList;
|
|
63
|
-
}
|
|
64
|
-
return [];
|
|
65
|
-
}
|
|
66
|
-
|
|
67
1
|
// ../../node_modules/p-defer/index.js
|
|
68
2
|
function pDefer() {
|
|
69
3
|
const deferred = {};
|
|
@@ -182,6 +116,127 @@ function patchOptions({
|
|
|
182
116
|
};
|
|
183
117
|
}
|
|
184
118
|
|
|
119
|
+
// src/SpeechServices/SpeechSDK.js
|
|
120
|
+
import {
|
|
121
|
+
AudioConfig,
|
|
122
|
+
OutputFormat,
|
|
123
|
+
ResultReason,
|
|
124
|
+
SpeechConfig,
|
|
125
|
+
SpeechRecognizer
|
|
126
|
+
} from "microsoft-cognitiveservices-speech-sdk/distrib/lib/microsoft.cognitiveservices.speech.sdk";
|
|
127
|
+
var SpeechSDK_default = {
|
|
128
|
+
AudioConfig,
|
|
129
|
+
OutputFormat,
|
|
130
|
+
ResultReason,
|
|
131
|
+
SpeechConfig,
|
|
132
|
+
SpeechRecognizer
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
// src/SpeechServices/SpeechToText/SpeechRecognitionAlternative.ts
|
|
136
|
+
var SpeechRecognitionAlternative = class {
|
|
137
|
+
constructor({ confidence, transcript }) {
|
|
138
|
+
this.#confidence = confidence;
|
|
139
|
+
this.#transcript = transcript;
|
|
140
|
+
}
|
|
141
|
+
#confidence;
|
|
142
|
+
#transcript;
|
|
143
|
+
get confidence() {
|
|
144
|
+
return this.#confidence;
|
|
145
|
+
}
|
|
146
|
+
get transcript() {
|
|
147
|
+
return this.#transcript;
|
|
148
|
+
}
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
// src/SpeechServices/SpeechToText/FakeArray.ts
|
|
152
|
+
var FakeArray = class {
|
|
153
|
+
constructor(array) {
|
|
154
|
+
if (!array) {
|
|
155
|
+
throw new Error("array must be set.");
|
|
156
|
+
}
|
|
157
|
+
this.#array = array;
|
|
158
|
+
for (const key in array) {
|
|
159
|
+
Object.defineProperty(this, key, {
|
|
160
|
+
enumerable: true,
|
|
161
|
+
get() {
|
|
162
|
+
return array[key];
|
|
163
|
+
}
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
#array;
|
|
168
|
+
[Symbol.iterator]() {
|
|
169
|
+
return this.#array[Symbol.iterator]();
|
|
170
|
+
}
|
|
171
|
+
get length() {
|
|
172
|
+
return this.#array.length;
|
|
173
|
+
}
|
|
174
|
+
};
|
|
175
|
+
|
|
176
|
+
// src/SpeechServices/SpeechToText/SpeechRecognitionResult.ts
|
|
177
|
+
var SpeechRecognitionResult = class extends FakeArray {
|
|
178
|
+
constructor(init) {
|
|
179
|
+
super(init.results);
|
|
180
|
+
this.#isFinal = init.isFinal;
|
|
181
|
+
}
|
|
182
|
+
#isFinal;
|
|
183
|
+
get isFinal() {
|
|
184
|
+
return this.#isFinal;
|
|
185
|
+
}
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
// src/SpeechServices/SpeechToText/cognitiveServiceEventResultToWebSpeechRecognitionResult.ts
|
|
189
|
+
var {
|
|
190
|
+
ResultReason: { RecognizingSpeech, RecognizedSpeech }
|
|
191
|
+
} = SpeechSDK_default;
|
|
192
|
+
function cognitiveServiceEventResultToWebSpeechRecognitionResult_default(result, init) {
|
|
193
|
+
const { maxAlternatives = Infinity, textNormalization = "display" } = init || {};
|
|
194
|
+
const json = typeof result.json === "string" ? JSON.parse(result.json) : result.json;
|
|
195
|
+
if (result.reason === RecognizingSpeech || result.reason === RecognizedSpeech && !json.NBest) {
|
|
196
|
+
return new SpeechRecognitionResult({
|
|
197
|
+
isFinal: result.reason === RecognizedSpeech,
|
|
198
|
+
results: [
|
|
199
|
+
new SpeechRecognitionAlternative({
|
|
200
|
+
confidence: 0.5,
|
|
201
|
+
transcript: result.text
|
|
202
|
+
})
|
|
203
|
+
]
|
|
204
|
+
});
|
|
205
|
+
} else if (result.reason === RecognizedSpeech) {
|
|
206
|
+
return new SpeechRecognitionResult({
|
|
207
|
+
isFinal: true,
|
|
208
|
+
results: (json.NBest || []).slice(0, maxAlternatives).map(
|
|
209
|
+
({ Confidence: confidence, Display: display, ITN: itn, Lexical: lexical, MaskedITN: maskedITN }) => new SpeechRecognitionAlternative({
|
|
210
|
+
confidence,
|
|
211
|
+
transcript: textNormalization === "itn" ? itn : textNormalization === "lexical" ? lexical : textNormalization === "maskeditn" ? maskedITN : display
|
|
212
|
+
})
|
|
213
|
+
)
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
return new SpeechRecognitionResult({ isFinal: false, results: [] });
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// src/SpeechServices/SpeechToText/EventListenerMap.ts
|
|
220
|
+
var EventListenerMap = class {
|
|
221
|
+
constructor(eventTarget) {
|
|
222
|
+
this.#eventTarget = eventTarget;
|
|
223
|
+
this.#propertyMap = {};
|
|
224
|
+
}
|
|
225
|
+
#eventTarget;
|
|
226
|
+
#propertyMap;
|
|
227
|
+
getProperty(name) {
|
|
228
|
+
return this.#propertyMap[name];
|
|
229
|
+
}
|
|
230
|
+
setProperty(name, value) {
|
|
231
|
+
const existing = this.#propertyMap[name];
|
|
232
|
+
existing && this.#eventTarget.removeEventListener(name, existing);
|
|
233
|
+
if (value) {
|
|
234
|
+
this.#eventTarget.addEventListener(name, value);
|
|
235
|
+
}
|
|
236
|
+
this.#propertyMap[name] = value;
|
|
237
|
+
}
|
|
238
|
+
};
|
|
239
|
+
|
|
185
240
|
// src/SpeechServices/SpeechToText/SpeechGrammarList.js
|
|
186
241
|
var SpeechGrammarList_default = class {
|
|
187
242
|
constructor() {
|
|
@@ -204,6 +259,53 @@ var SpeechGrammarList_default = class {
|
|
|
204
259
|
}
|
|
205
260
|
};
|
|
206
261
|
|
|
262
|
+
// src/SpeechServices/SpeechToText/SpeechRecognitionErrorEvent.ts
|
|
263
|
+
var SpeechRecognitionErrorEvent = class extends Event {
|
|
264
|
+
constructor(type, { error, message }) {
|
|
265
|
+
super(type);
|
|
266
|
+
this.#error = error;
|
|
267
|
+
this.#message = message;
|
|
268
|
+
}
|
|
269
|
+
#error;
|
|
270
|
+
#message;
|
|
271
|
+
get error() {
|
|
272
|
+
return this.#error;
|
|
273
|
+
}
|
|
274
|
+
get message() {
|
|
275
|
+
return this.#message;
|
|
276
|
+
}
|
|
277
|
+
};
|
|
278
|
+
|
|
279
|
+
// src/SpeechServices/SpeechToText/SpeechRecognitionResultList.ts
|
|
280
|
+
var SpeechRecognitionResultList = class extends FakeArray {
|
|
281
|
+
constructor(result) {
|
|
282
|
+
super(result);
|
|
283
|
+
}
|
|
284
|
+
};
|
|
285
|
+
|
|
286
|
+
// src/SpeechServices/SpeechToText/SpeechRecognitionEvent.ts
|
|
287
|
+
var SpeechRecognitionEvent = class extends Event {
|
|
288
|
+
constructor(type, { data, resultIndex, results } = {}) {
|
|
289
|
+
super(type);
|
|
290
|
+
this.#data = data;
|
|
291
|
+
this.#resultIndex = resultIndex;
|
|
292
|
+
this.#results = results || new SpeechRecognitionResultList([]);
|
|
293
|
+
}
|
|
294
|
+
#data;
|
|
295
|
+
// TODO: "resultIndex" should be set.
|
|
296
|
+
#resultIndex;
|
|
297
|
+
#results;
|
|
298
|
+
get data() {
|
|
299
|
+
return this.#data;
|
|
300
|
+
}
|
|
301
|
+
get resultIndex() {
|
|
302
|
+
return this.#resultIndex;
|
|
303
|
+
}
|
|
304
|
+
get results() {
|
|
305
|
+
return this.#results;
|
|
306
|
+
}
|
|
307
|
+
};
|
|
308
|
+
|
|
207
309
|
// src/SpeechServices/SpeechToText/createSpeechRecognitionPonyfill.js
|
|
208
310
|
var { AudioConfig: AudioConfig2, OutputFormat: OutputFormat2, ResultReason: ResultReason2, SpeechConfig: SpeechConfig2, SpeechRecognizer: SpeechRecognizer2 } = SpeechSDK_default;
|
|
209
311
|
function serializeRecognitionResult({ duration, errorDetails, json, offset, properties, reason, resultId, text }) {
|
|
@@ -225,16 +327,6 @@ function averageAmplitude(arrayBuffer) {
|
|
|
225
327
|
function cognitiveServicesAsyncToPromise(fn) {
|
|
226
328
|
return (...args) => new Promise((resolve, reject) => fn(...args, resolve, reject));
|
|
227
329
|
}
|
|
228
|
-
var SpeechRecognitionEvent = class extends Event {
|
|
229
|
-
constructor(type, { data, emma, interpretation, resultIndex, results } = {}) {
|
|
230
|
-
super(type);
|
|
231
|
-
this.data = data;
|
|
232
|
-
this.emma = emma;
|
|
233
|
-
this.interpretation = interpretation;
|
|
234
|
-
this.resultIndex = resultIndex;
|
|
235
|
-
this.results = results;
|
|
236
|
-
}
|
|
237
|
-
};
|
|
238
330
|
function prepareAudioConfig(audioConfig) {
|
|
239
331
|
const originalAttach = audioConfig.attach;
|
|
240
332
|
const boundOriginalAttach = audioConfig.attach.bind(audioConfig);
|
|
@@ -283,7 +375,10 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
283
375
|
this._lang = typeof window !== "undefined" ? window.document.documentElement.getAttribute("lang") || window.navigator.language : "en-US";
|
|
284
376
|
this._grammars = new SpeechGrammarList_default();
|
|
285
377
|
this._maxAlternatives = 1;
|
|
378
|
+
this.#eventListenerMap = new EventListenerMap(this);
|
|
286
379
|
}
|
|
380
|
+
/** @type { import('./SpeechRecognitionEventListenerMap').SpeechRecognitionEventListenerMap } */
|
|
381
|
+
#eventListenerMap;
|
|
287
382
|
emitCognitiveServices(type, event) {
|
|
288
383
|
this.dispatchEvent(
|
|
289
384
|
new SpeechRecognitionEvent("cognitiveservices", {
|
|
@@ -328,75 +423,88 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
328
423
|
set lang(value) {
|
|
329
424
|
this._lang = value;
|
|
330
425
|
}
|
|
426
|
+
/** @type { ((event: SpeechRecognitionEvent<'audioend'>) => void) | undefined } */
|
|
331
427
|
get onaudioend() {
|
|
332
|
-
return
|
|
428
|
+
return this.#eventListenerMap.getProperty("audioend");
|
|
333
429
|
}
|
|
334
430
|
set onaudioend(value) {
|
|
335
|
-
|
|
431
|
+
this.#eventListenerMap.setProperty("audioend", value);
|
|
336
432
|
}
|
|
433
|
+
/** @type { ((event: SpeechRecognitionEvent<'audiostart'>) => void) | undefined } */
|
|
337
434
|
get onaudiostart() {
|
|
338
|
-
return
|
|
435
|
+
return this.#eventListenerMap.getProperty("audiostart");
|
|
339
436
|
}
|
|
340
437
|
set onaudiostart(value) {
|
|
341
|
-
|
|
438
|
+
this.#eventListenerMap.setProperty("audiostart", value);
|
|
342
439
|
}
|
|
440
|
+
/** @type { ((event: SpeechRecognitionEvent<'cognitiveservices'>) => void) | undefined } */
|
|
343
441
|
get oncognitiveservices() {
|
|
344
|
-
return
|
|
442
|
+
return this.#eventListenerMap.getProperty("cognitiveservices");
|
|
345
443
|
}
|
|
346
444
|
set oncognitiveservices(value) {
|
|
347
|
-
|
|
445
|
+
this.#eventListenerMap.setProperty("cognitiveservices", value);
|
|
348
446
|
}
|
|
447
|
+
/** @type { ((event: SpeechRecognitionEvent<'end'>) => void) | undefined } */
|
|
349
448
|
get onend() {
|
|
350
|
-
return
|
|
449
|
+
return this.#eventListenerMap.getProperty("end");
|
|
351
450
|
}
|
|
352
451
|
set onend(value) {
|
|
353
|
-
|
|
452
|
+
this.#eventListenerMap.setProperty("end", value);
|
|
354
453
|
}
|
|
454
|
+
/** @type { ((event: SpeechRecognitionEvent<'error'>) => void) | undefined } */
|
|
355
455
|
get onerror() {
|
|
356
|
-
return
|
|
456
|
+
return this.#eventListenerMap.getProperty("error");
|
|
357
457
|
}
|
|
358
458
|
set onerror(value) {
|
|
359
|
-
|
|
459
|
+
this.#eventListenerMap.setProperty("error", value);
|
|
360
460
|
}
|
|
461
|
+
/** @type { ((event: SpeechRecognitionEvent<'result'>) => void) | undefined } */
|
|
361
462
|
get onresult() {
|
|
362
|
-
return
|
|
463
|
+
return this.#eventListenerMap.getProperty("result");
|
|
363
464
|
}
|
|
364
465
|
set onresult(value) {
|
|
365
|
-
|
|
466
|
+
this.#eventListenerMap.setProperty("result", value);
|
|
366
467
|
}
|
|
468
|
+
/** @type { ((event: SpeechRecognitionEvent<'soundend'>) => void) | undefined } */
|
|
367
469
|
get onsoundend() {
|
|
368
|
-
return
|
|
470
|
+
return this.#eventListenerMap.getProperty("soundend");
|
|
369
471
|
}
|
|
370
472
|
set onsoundend(value) {
|
|
371
|
-
|
|
473
|
+
this.#eventListenerMap.setProperty("soundend", value);
|
|
372
474
|
}
|
|
475
|
+
/** @type { ((event: SpeechRecognitionEvent<'soundstart'>) => void) | undefined } */
|
|
373
476
|
get onsoundstart() {
|
|
374
|
-
return
|
|
477
|
+
return this.#eventListenerMap.getProperty("soundstart");
|
|
375
478
|
}
|
|
376
479
|
set onsoundstart(value) {
|
|
377
|
-
|
|
480
|
+
this.#eventListenerMap.setProperty("soundstart", value);
|
|
378
481
|
}
|
|
482
|
+
/** @type { ((event: SpeechRecognitionEvent<'speechend'>) => void) | undefined } */
|
|
379
483
|
get onspeechend() {
|
|
380
|
-
return
|
|
484
|
+
return this.#eventListenerMap.getProperty("speechend");
|
|
381
485
|
}
|
|
382
486
|
set onspeechend(value) {
|
|
383
|
-
|
|
487
|
+
this.#eventListenerMap.setProperty("speechend", value);
|
|
384
488
|
}
|
|
489
|
+
/** @type { ((event: SpeechRecognitionEvent<'speechstart'>) => void) | undefined } */
|
|
385
490
|
get onspeechstart() {
|
|
386
|
-
return
|
|
491
|
+
return this.#eventListenerMap.getProperty("speechstart");
|
|
387
492
|
}
|
|
388
493
|
set onspeechstart(value) {
|
|
389
|
-
|
|
494
|
+
this.#eventListenerMap.setProperty("speechstart", value);
|
|
390
495
|
}
|
|
496
|
+
/** @type { ((event: SpeechRecognitionEvent<'start'>) => void) | undefined } */
|
|
391
497
|
get onstart() {
|
|
392
|
-
return
|
|
498
|
+
return this.#eventListenerMap.getProperty("start");
|
|
393
499
|
}
|
|
394
500
|
set onstart(value) {
|
|
395
|
-
|
|
501
|
+
this.#eventListenerMap.setProperty("start", value);
|
|
396
502
|
}
|
|
397
503
|
start() {
|
|
398
504
|
this._startOnce().catch((err) => {
|
|
399
|
-
this.dispatchEvent(
|
|
505
|
+
this.dispatchEvent(
|
|
506
|
+
new SpeechRecognitionErrorEvent("error", { error: err, message: err && (err.stack || err.message) })
|
|
507
|
+
);
|
|
400
508
|
});
|
|
401
509
|
}
|
|
402
510
|
async _startOnce() {
|
|
@@ -486,10 +594,7 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
486
594
|
Object.keys(event).forEach((name) => this.emitCognitiveServices(name, event[name]));
|
|
487
595
|
const errorMessage = canceled && canceled.errorDetails;
|
|
488
596
|
if (/Permission\sdenied/u.test(errorMessage || "")) {
|
|
489
|
-
finalEvent = {
|
|
490
|
-
error: "not-allowed",
|
|
491
|
-
type: "error"
|
|
492
|
-
};
|
|
597
|
+
finalEvent = new SpeechRecognitionErrorEvent("error", { error: "not-allowed" });
|
|
493
598
|
break;
|
|
494
599
|
}
|
|
495
600
|
if (!loop) {
|
|
@@ -501,23 +606,14 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
501
606
|
this.dispatchEvent(new SpeechRecognitionEvent("audiostart"));
|
|
502
607
|
this.dispatchEvent(new SpeechRecognitionEvent("audioend"));
|
|
503
608
|
}
|
|
504
|
-
finalEvent = {
|
|
505
|
-
error: "network",
|
|
506
|
-
type: "error"
|
|
507
|
-
};
|
|
609
|
+
finalEvent = new SpeechRecognitionErrorEvent("error", { error: "network" });
|
|
508
610
|
} else {
|
|
509
|
-
finalEvent = {
|
|
510
|
-
error: "unknown",
|
|
511
|
-
type: "error"
|
|
512
|
-
};
|
|
611
|
+
finalEvent = new SpeechRecognitionErrorEvent("error", { error: "unknown" });
|
|
513
612
|
}
|
|
514
613
|
break;
|
|
515
614
|
} else if (abort || stop) {
|
|
516
615
|
if (abort) {
|
|
517
|
-
finalEvent = {
|
|
518
|
-
error: "aborted",
|
|
519
|
-
type: "error"
|
|
520
|
-
};
|
|
616
|
+
finalEvent = new SpeechRecognitionErrorEvent("error", { error: "aborted" });
|
|
521
617
|
stopping = "abort";
|
|
522
618
|
} else {
|
|
523
619
|
pause();
|
|
@@ -540,10 +636,11 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
540
636
|
break;
|
|
541
637
|
} else if (stopping !== "abort") {
|
|
542
638
|
if (recognized && recognized.result && recognized.result.reason === ResultReason2.NoMatch) {
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
639
|
+
if (!this.continuous || stopping === "stop") {
|
|
640
|
+
finalEvent = new SpeechRecognitionEvent("result", { results: finalizedResults });
|
|
641
|
+
recognizer.stopContinuousRecognitionAsync && await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync.bind(recognizer))();
|
|
642
|
+
break;
|
|
643
|
+
}
|
|
547
644
|
} else if (recognized || recognizing) {
|
|
548
645
|
if (!audioStarted) {
|
|
549
646
|
this.dispatchEvent(new SpeechRecognitionEvent("audiostart"));
|
|
@@ -558,7 +655,7 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
558
655
|
speechStarted = true;
|
|
559
656
|
}
|
|
560
657
|
if (recognized) {
|
|
561
|
-
const result =
|
|
658
|
+
const result = cognitiveServiceEventResultToWebSpeechRecognitionResult_default(recognized.result, {
|
|
562
659
|
maxAlternatives: this.maxAlternatives,
|
|
563
660
|
textNormalization
|
|
564
661
|
});
|
|
@@ -567,35 +664,34 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
567
664
|
finalizedResults = [...finalizedResults, result];
|
|
568
665
|
this.continuous && this.dispatchEvent(
|
|
569
666
|
new SpeechRecognitionEvent("result", {
|
|
570
|
-
results: finalizedResults
|
|
667
|
+
results: new SpeechRecognitionResultList(finalizedResults)
|
|
571
668
|
})
|
|
572
669
|
);
|
|
573
670
|
}
|
|
574
671
|
if (this.continuous && recognizable) {
|
|
575
|
-
finalEvent =
|
|
672
|
+
finalEvent = void 0;
|
|
576
673
|
} else {
|
|
577
|
-
finalEvent = {
|
|
578
|
-
results: finalizedResults
|
|
579
|
-
|
|
580
|
-
};
|
|
674
|
+
finalEvent = new SpeechRecognitionEvent("result", {
|
|
675
|
+
results: new SpeechRecognitionResultList(finalizedResults)
|
|
676
|
+
});
|
|
581
677
|
}
|
|
582
|
-
if (!this.continuous && recognizer.stopContinuousRecognitionAsync) {
|
|
678
|
+
if ((!this.continuous || stopping === "stop") && recognizer.stopContinuousRecognitionAsync) {
|
|
583
679
|
await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync.bind(recognizer))();
|
|
584
680
|
}
|
|
585
681
|
if (looseEvents && finalEvent && recognizable) {
|
|
586
|
-
this.dispatchEvent(
|
|
587
|
-
finalEvent =
|
|
682
|
+
this.dispatchEvent(finalEvent);
|
|
683
|
+
finalEvent = void 0;
|
|
588
684
|
}
|
|
589
685
|
} else if (recognizing) {
|
|
590
686
|
this.interimResults && this.dispatchEvent(
|
|
591
687
|
new SpeechRecognitionEvent("result", {
|
|
592
|
-
results: [
|
|
688
|
+
results: new SpeechRecognitionResultList([
|
|
593
689
|
...finalizedResults,
|
|
594
|
-
|
|
690
|
+
cognitiveServiceEventResultToWebSpeechRecognitionResult_default(recognizing.result, {
|
|
595
691
|
maxAlternatives: this.maxAlternatives,
|
|
596
692
|
textNormalization
|
|
597
693
|
})
|
|
598
|
-
]
|
|
694
|
+
])
|
|
599
695
|
})
|
|
600
696
|
);
|
|
601
697
|
}
|
|
@@ -613,16 +709,9 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
613
709
|
}
|
|
614
710
|
if (finalEvent) {
|
|
615
711
|
if (finalEvent.type === "result" && !finalEvent.results.length) {
|
|
616
|
-
finalEvent = {
|
|
617
|
-
error: "no-speech",
|
|
618
|
-
type: "error"
|
|
619
|
-
};
|
|
620
|
-
}
|
|
621
|
-
if (finalEvent.type === "error") {
|
|
622
|
-
this.dispatchEvent(new ErrorEvent("error", finalEvent));
|
|
623
|
-
} else {
|
|
624
|
-
this.dispatchEvent(new SpeechRecognitionEvent(finalEvent.type, finalEvent));
|
|
712
|
+
finalEvent = new SpeechRecognitionErrorEvent("error", { error: "no-speech" });
|
|
625
713
|
}
|
|
714
|
+
this.dispatchEvent(finalEvent);
|
|
626
715
|
}
|
|
627
716
|
this.dispatchEvent(new SpeechRecognitionEvent("end"));
|
|
628
717
|
detachAudioConfigEvent();
|
|
@@ -694,8 +783,8 @@ var createSpeechRecognitionPonyfill_default = (options) => {
|
|
|
694
783
|
var SpeechToText_default = createSpeechRecognitionPonyfill_default;
|
|
695
784
|
|
|
696
785
|
// src/SpeechServices/TextToSpeech/createSpeechSynthesisPonyfill.js
|
|
697
|
-
import { EventTarget as EventTarget3, getEventAttributeValue as
|
|
698
|
-
import onErrorResumeNext from "on-error-resume-next";
|
|
786
|
+
import { EventTarget as EventTarget3, getEventAttributeValue as getEventAttributeValue2, setEventAttributeValue as setEventAttributeValue2 } from "event-target-shim";
|
|
787
|
+
import { onErrorResumeNext } from "on-error-resume-next/async";
|
|
699
788
|
|
|
700
789
|
// src/SpeechServices/TextToSpeech/AudioContextQueue.js
|
|
701
790
|
import memoize from "memoize-one";
|
|
@@ -769,80 +858,6 @@ var AudioContextQueue_default = class {
|
|
|
769
858
|
}
|
|
770
859
|
};
|
|
771
860
|
|
|
772
|
-
// src/SpeechServices/TextToSpeech/SpeechSynthesisVoice.js
|
|
773
|
-
var SpeechSynthesisVoice_default = class {
|
|
774
|
-
constructor({ gender, lang, voiceURI }) {
|
|
775
|
-
this._default = false;
|
|
776
|
-
this._gender = gender;
|
|
777
|
-
this._lang = lang;
|
|
778
|
-
this._localService = false;
|
|
779
|
-
this._name = voiceURI;
|
|
780
|
-
this._voiceURI = voiceURI;
|
|
781
|
-
}
|
|
782
|
-
get default() {
|
|
783
|
-
return this._default;
|
|
784
|
-
}
|
|
785
|
-
get gender() {
|
|
786
|
-
return this._gender;
|
|
787
|
-
}
|
|
788
|
-
get lang() {
|
|
789
|
-
return this._lang;
|
|
790
|
-
}
|
|
791
|
-
get localService() {
|
|
792
|
-
return this._localService;
|
|
793
|
-
}
|
|
794
|
-
get name() {
|
|
795
|
-
return this._name;
|
|
796
|
-
}
|
|
797
|
-
get voiceURI() {
|
|
798
|
-
return this._voiceURI;
|
|
799
|
-
}
|
|
800
|
-
};
|
|
801
|
-
|
|
802
|
-
// src/SpeechServices/TextToSpeech/fetchCustomVoices.js
|
|
803
|
-
async function fetchCustomVoices({ customVoiceHostname, deploymentId, region, subscriptionKey }) {
|
|
804
|
-
const hostname = customVoiceHostname || `${region}.customvoice.api.speech.microsoft.com`;
|
|
805
|
-
const res = await fetch(
|
|
806
|
-
`https://${encodeURI(hostname)}/api/texttospeech/v2.0/endpoints/${encodeURIComponent(deploymentId)}`,
|
|
807
|
-
{
|
|
808
|
-
headers: {
|
|
809
|
-
accept: "application/json",
|
|
810
|
-
"ocp-apim-subscription-key": subscriptionKey
|
|
811
|
-
}
|
|
812
|
-
}
|
|
813
|
-
);
|
|
814
|
-
if (!res.ok) {
|
|
815
|
-
throw new Error("Failed to fetch custom voices");
|
|
816
|
-
}
|
|
817
|
-
return res.json();
|
|
818
|
-
}
|
|
819
|
-
async function fetchCustomVoices_default({ customVoiceHostname, deploymentId, region, subscriptionKey }) {
|
|
820
|
-
const { models } = await fetchCustomVoices({ customVoiceHostname, deploymentId, region, subscriptionKey });
|
|
821
|
-
return models.map(
|
|
822
|
-
({ properties: { Gender: gender }, locale: lang, name: voiceURI }) => new SpeechSynthesisVoice_default({ gender, lang, voiceURI })
|
|
823
|
-
).sort(({ name: x }, { name: y }) => x > y ? 1 : x < y ? -1 : 0);
|
|
824
|
-
}
|
|
825
|
-
|
|
826
|
-
// src/SpeechServices/TextToSpeech/fetchVoices.js
|
|
827
|
-
async function fetchVoices({ authorizationToken, region, speechSynthesisHostname, subscriptionKey }) {
|
|
828
|
-
const hostname = speechSynthesisHostname || `${encodeURI(region)}.tts.speech.microsoft.com`;
|
|
829
|
-
const res = await fetch(`https://${hostname}/cognitiveservices/voices/list`, {
|
|
830
|
-
headers: {
|
|
831
|
-
"content-type": "application/json",
|
|
832
|
-
...authorizationToken ? {
|
|
833
|
-
authorization: `Bearer ${authorizationToken}`
|
|
834
|
-
} : {
|
|
835
|
-
"Ocp-Apim-Subscription-Key": subscriptionKey
|
|
836
|
-
}
|
|
837
|
-
}
|
|
838
|
-
});
|
|
839
|
-
if (!res.ok) {
|
|
840
|
-
throw new Error("Failed to fetch voices");
|
|
841
|
-
}
|
|
842
|
-
const voices = await res.json();
|
|
843
|
-
return voices.map(({ Gender: gender, Locale: lang, Name: voiceURI }) => new SpeechSynthesisVoice_default({ gender, lang, voiceURI })).sort(({ name: x }, { name: y }) => x > y ? 1 : x < y ? -1 : 0);
|
|
844
|
-
}
|
|
845
|
-
|
|
846
861
|
// src/SpeechServices/TextToSpeech/SpeechSynthesisEvent.js
|
|
847
862
|
import { Event as Event2 } from "event-target-shim";
|
|
848
863
|
var SpeechSynthesisEvent = class extends Event2 {
|
|
@@ -852,8 +867,8 @@ var SpeechSynthesisEvent = class extends Event2 {
|
|
|
852
867
|
};
|
|
853
868
|
|
|
854
869
|
// src/SpeechServices/TextToSpeech/SpeechSynthesisUtterance.js
|
|
855
|
-
import {
|
|
856
|
-
import
|
|
870
|
+
import { EventAsPromise } from "event-as-promise";
|
|
871
|
+
import { EventTarget as EventTarget2, getEventAttributeValue, setEventAttributeValue } from "event-target-shim";
|
|
857
872
|
|
|
858
873
|
// src/SpeechServices/TextToSpeech/fetchSpeechData.js
|
|
859
874
|
import { decode } from "base64-arraybuffer";
|
|
@@ -992,46 +1007,46 @@ var SpeechSynthesisUtterance = class extends EventTarget2 {
|
|
|
992
1007
|
this._lang = value;
|
|
993
1008
|
}
|
|
994
1009
|
get onboundary() {
|
|
995
|
-
return
|
|
1010
|
+
return getEventAttributeValue(this, "boundary");
|
|
996
1011
|
}
|
|
997
1012
|
set onboundary(value) {
|
|
998
|
-
|
|
1013
|
+
setEventAttributeValue(this, "boundary", value);
|
|
999
1014
|
}
|
|
1000
1015
|
get onend() {
|
|
1001
|
-
return
|
|
1016
|
+
return getEventAttributeValue(this, "end");
|
|
1002
1017
|
}
|
|
1003
1018
|
set onend(value) {
|
|
1004
|
-
|
|
1019
|
+
setEventAttributeValue(this, "end", value);
|
|
1005
1020
|
}
|
|
1006
1021
|
get onerror() {
|
|
1007
|
-
return
|
|
1022
|
+
return getEventAttributeValue(this, "error");
|
|
1008
1023
|
}
|
|
1009
1024
|
set onerror(value) {
|
|
1010
|
-
|
|
1025
|
+
setEventAttributeValue(this, "error", value);
|
|
1011
1026
|
}
|
|
1012
1027
|
get onmark() {
|
|
1013
|
-
return
|
|
1028
|
+
return getEventAttributeValue(this, "mark");
|
|
1014
1029
|
}
|
|
1015
1030
|
set onmark(value) {
|
|
1016
|
-
|
|
1031
|
+
setEventAttributeValue(this, "mark", value);
|
|
1017
1032
|
}
|
|
1018
1033
|
get onpause() {
|
|
1019
|
-
return
|
|
1034
|
+
return getEventAttributeValue(this, "pause");
|
|
1020
1035
|
}
|
|
1021
1036
|
set onpause(value) {
|
|
1022
|
-
|
|
1037
|
+
setEventAttributeValue(this, "pause", value);
|
|
1023
1038
|
}
|
|
1024
1039
|
get onresume() {
|
|
1025
|
-
return
|
|
1040
|
+
return getEventAttributeValue(this, "resume");
|
|
1026
1041
|
}
|
|
1027
1042
|
set onresume(value) {
|
|
1028
|
-
|
|
1043
|
+
setEventAttributeValue(this, "resume", value);
|
|
1029
1044
|
}
|
|
1030
1045
|
get onstart() {
|
|
1031
|
-
return
|
|
1046
|
+
return getEventAttributeValue(this, "start");
|
|
1032
1047
|
}
|
|
1033
1048
|
set onstart(value) {
|
|
1034
|
-
|
|
1049
|
+
setEventAttributeValue(this, "start", value);
|
|
1035
1050
|
}
|
|
1036
1051
|
get pitch() {
|
|
1037
1052
|
return this._pitch;
|
|
@@ -1090,6 +1105,80 @@ var SpeechSynthesisUtterance = class extends EventTarget2 {
|
|
|
1090
1105
|
};
|
|
1091
1106
|
var SpeechSynthesisUtterance_default = SpeechSynthesisUtterance;
|
|
1092
1107
|
|
|
1108
|
+
// src/SpeechServices/TextToSpeech/SpeechSynthesisVoice.js
|
|
1109
|
+
var SpeechSynthesisVoice_default = class {
|
|
1110
|
+
constructor({ gender, lang, voiceURI }) {
|
|
1111
|
+
this._default = false;
|
|
1112
|
+
this._gender = gender;
|
|
1113
|
+
this._lang = lang;
|
|
1114
|
+
this._localService = false;
|
|
1115
|
+
this._name = voiceURI;
|
|
1116
|
+
this._voiceURI = voiceURI;
|
|
1117
|
+
}
|
|
1118
|
+
get default() {
|
|
1119
|
+
return this._default;
|
|
1120
|
+
}
|
|
1121
|
+
get gender() {
|
|
1122
|
+
return this._gender;
|
|
1123
|
+
}
|
|
1124
|
+
get lang() {
|
|
1125
|
+
return this._lang;
|
|
1126
|
+
}
|
|
1127
|
+
get localService() {
|
|
1128
|
+
return this._localService;
|
|
1129
|
+
}
|
|
1130
|
+
get name() {
|
|
1131
|
+
return this._name;
|
|
1132
|
+
}
|
|
1133
|
+
get voiceURI() {
|
|
1134
|
+
return this._voiceURI;
|
|
1135
|
+
}
|
|
1136
|
+
};
|
|
1137
|
+
|
|
1138
|
+
// src/SpeechServices/TextToSpeech/fetchCustomVoices.js
|
|
1139
|
+
async function fetchCustomVoices({ customVoiceHostname, deploymentId, region, subscriptionKey }) {
|
|
1140
|
+
const hostname = customVoiceHostname || `${region}.customvoice.api.speech.microsoft.com`;
|
|
1141
|
+
const res = await fetch(
|
|
1142
|
+
`https://${encodeURI(hostname)}/api/texttospeech/v2.0/endpoints/${encodeURIComponent(deploymentId)}`,
|
|
1143
|
+
{
|
|
1144
|
+
headers: {
|
|
1145
|
+
accept: "application/json",
|
|
1146
|
+
"ocp-apim-subscription-key": subscriptionKey
|
|
1147
|
+
}
|
|
1148
|
+
}
|
|
1149
|
+
);
|
|
1150
|
+
if (!res.ok) {
|
|
1151
|
+
throw new Error("Failed to fetch custom voices");
|
|
1152
|
+
}
|
|
1153
|
+
return res.json();
|
|
1154
|
+
}
|
|
1155
|
+
async function fetchCustomVoices_default({ customVoiceHostname, deploymentId, region, subscriptionKey }) {
|
|
1156
|
+
const { models } = await fetchCustomVoices({ customVoiceHostname, deploymentId, region, subscriptionKey });
|
|
1157
|
+
return models.map(
|
|
1158
|
+
({ properties: { Gender: gender }, locale: lang, name: voiceURI }) => new SpeechSynthesisVoice_default({ gender, lang, voiceURI })
|
|
1159
|
+
).sort(({ name: x }, { name: y }) => x > y ? 1 : x < y ? -1 : 0);
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
// src/SpeechServices/TextToSpeech/fetchVoices.js
|
|
1163
|
+
async function fetchVoices({ authorizationToken, region, speechSynthesisHostname, subscriptionKey }) {
|
|
1164
|
+
const hostname = speechSynthesisHostname || `${encodeURI(region)}.tts.speech.microsoft.com`;
|
|
1165
|
+
const res = await fetch(`https://${hostname}/cognitiveservices/voices/list`, {
|
|
1166
|
+
headers: {
|
|
1167
|
+
"content-type": "application/json",
|
|
1168
|
+
...authorizationToken ? {
|
|
1169
|
+
authorization: `Bearer ${authorizationToken}`
|
|
1170
|
+
} : {
|
|
1171
|
+
"Ocp-Apim-Subscription-Key": subscriptionKey
|
|
1172
|
+
}
|
|
1173
|
+
}
|
|
1174
|
+
});
|
|
1175
|
+
if (!res.ok) {
|
|
1176
|
+
throw new Error("Failed to fetch voices");
|
|
1177
|
+
}
|
|
1178
|
+
const voices = await res.json();
|
|
1179
|
+
return voices.map(({ Gender: gender, Locale: lang, Name: voiceURI }) => new SpeechSynthesisVoice_default({ gender, lang, voiceURI })).sort(({ name: x }, { name: y }) => x > y ? 1 : x < y ? -1 : 0);
|
|
1180
|
+
}
|
|
1181
|
+
|
|
1093
1182
|
// src/SpeechServices/TextToSpeech/createSpeechSynthesisPonyfill.js
|
|
1094
1183
|
var DEFAULT_OUTPUT_FORMAT2 = "audio-24khz-160kbitrate-mono-mp3";
|
|
1095
1184
|
var EMPTY_ARRAY = [];
|
|
@@ -1122,10 +1211,10 @@ var createSpeechSynthesisPonyfill_default = (options) => {
|
|
|
1122
1211
|
return EMPTY_ARRAY;
|
|
1123
1212
|
}
|
|
1124
1213
|
get onvoiceschanged() {
|
|
1125
|
-
return
|
|
1214
|
+
return getEventAttributeValue2(this, "voiceschanged");
|
|
1126
1215
|
}
|
|
1127
1216
|
set onvoiceschanged(value) {
|
|
1128
|
-
|
|
1217
|
+
setEventAttributeValue2(this, "voiceschanged", value);
|
|
1129
1218
|
}
|
|
1130
1219
|
pause() {
|
|
1131
1220
|
this.queue.pause();
|
|
@@ -1220,7 +1309,7 @@ function createSpeechServicesPonyfill(options = {}, ...args) {
|
|
|
1220
1309
|
}
|
|
1221
1310
|
var meta = document.createElement("meta");
|
|
1222
1311
|
meta.setAttribute("name", "web-speech-cognitive-services");
|
|
1223
|
-
meta.setAttribute("content", `version=${"8.0.0-main.
|
|
1312
|
+
meta.setAttribute("content", `version=${"8.0.0-main.5903868"}`);
|
|
1224
1313
|
document.head.appendChild(meta);
|
|
1225
1314
|
export {
|
|
1226
1315
|
SpeechToText_default as createSpeechRecognitionPonyfill,
|