web-speech-cognitive-services 8.0.0-main.5903868 → 8.0.0-main.6cbf0fb
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/web-speech-cognitive-services.d.mts +217 -65
- package/dist/web-speech-cognitive-services.d.ts +217 -65
- package/dist/web-speech-cognitive-services.development.js +240 -199
- package/dist/web-speech-cognitive-services.development.js.map +1 -1
- package/dist/web-speech-cognitive-services.js +238 -194
- package/dist/web-speech-cognitive-services.js.map +1 -1
- package/dist/web-speech-cognitive-services.mjs +238 -194
- package/dist/web-speech-cognitive-services.mjs.map +1 -1
- package/dist/web-speech-cognitive-services.production.min.js +12 -12
- package/dist/web-speech-cognitive-services.production.min.js.map +1 -1
- package/package.json +2 -2
|
@@ -1,38 +1,3 @@
|
|
|
1
|
-
// ../../node_modules/p-defer/index.js
|
|
2
|
-
function pDefer() {
|
|
3
|
-
const deferred = {};
|
|
4
|
-
deferred.promise = new Promise((resolve, reject) => {
|
|
5
|
-
deferred.resolve = resolve;
|
|
6
|
-
deferred.reject = reject;
|
|
7
|
-
});
|
|
8
|
-
return deferred;
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
// src/Util/createPromiseQueue.js
|
|
12
|
-
function createPromiseQueue_default() {
|
|
13
|
-
let shiftDeferred;
|
|
14
|
-
const queue = [];
|
|
15
|
-
const push = (value) => {
|
|
16
|
-
if (shiftDeferred) {
|
|
17
|
-
const { resolve } = shiftDeferred;
|
|
18
|
-
shiftDeferred = null;
|
|
19
|
-
resolve(value);
|
|
20
|
-
} else {
|
|
21
|
-
queue.push(value);
|
|
22
|
-
}
|
|
23
|
-
};
|
|
24
|
-
const shift = () => {
|
|
25
|
-
if (queue.length) {
|
|
26
|
-
return Promise.resolve(queue.shift());
|
|
27
|
-
}
|
|
28
|
-
return (shiftDeferred || (shiftDeferred = pDefer())).promise;
|
|
29
|
-
};
|
|
30
|
-
return {
|
|
31
|
-
push,
|
|
32
|
-
shift
|
|
33
|
-
};
|
|
34
|
-
}
|
|
35
|
-
|
|
36
1
|
// src/SpeechServices/resolveFunctionOrReturnValue.ts
|
|
37
2
|
function isFunction(value) {
|
|
38
3
|
return typeof value === "function";
|
|
@@ -41,17 +6,21 @@ function resolveFunctionOrReturnValue(fnOrValue) {
|
|
|
41
6
|
return isFunction(fnOrValue) ? fnOrValue() : fnOrValue;
|
|
42
7
|
}
|
|
43
8
|
|
|
44
|
-
// src/SpeechServices/patchOptions.
|
|
9
|
+
// src/SpeechServices/patchOptions.ts
|
|
45
10
|
var shouldWarnOnSubscriptionKey = true;
|
|
46
|
-
function patchOptions({
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
11
|
+
function patchOptions(init) {
|
|
12
|
+
const {
|
|
13
|
+
audioConfig,
|
|
14
|
+
authorizationToken,
|
|
15
|
+
enableTelemetry,
|
|
16
|
+
looseEvent,
|
|
17
|
+
referenceGrammars,
|
|
18
|
+
region = "westus",
|
|
19
|
+
speechRecognitionEndpointId,
|
|
20
|
+
subscriptionKey,
|
|
21
|
+
textNormalization
|
|
22
|
+
} = init;
|
|
23
|
+
let { credentials, looseEvents } = init;
|
|
55
24
|
if (typeof looseEvent !== "undefined") {
|
|
56
25
|
console.warn('web-speech-cognitive-services: The option "looseEvent" should be named as "looseEvents".');
|
|
57
26
|
looseEvents = looseEvent;
|
|
@@ -63,11 +32,12 @@ function patchOptions({
|
|
|
63
32
|
console.warn(
|
|
64
33
|
"web-speech-cognitive-services: We are deprecating authorizationToken, region, and subscriptionKey. Please use credentials instead. The deprecated option will be removed on or after 2020-11-14."
|
|
65
34
|
);
|
|
66
|
-
credentials = async () => authorizationToken ? { authorizationToken: await resolveFunctionOrReturnValue(authorizationToken), region } : { region, subscriptionKey: await resolveFunctionOrReturnValue(subscriptionKey) };
|
|
35
|
+
credentials = async () => typeof init.authorizationToken !== "undefined" ? { authorizationToken: await resolveFunctionOrReturnValue(init.authorizationToken), region } : { region, subscriptionKey: await resolveFunctionOrReturnValue(init.subscriptionKey) };
|
|
67
36
|
}
|
|
68
37
|
}
|
|
69
|
-
return {
|
|
70
|
-
|
|
38
|
+
return Object.freeze({
|
|
39
|
+
audioConfig,
|
|
40
|
+
enableTelemetry,
|
|
71
41
|
fetchCredentials: async () => {
|
|
72
42
|
const {
|
|
73
43
|
authorizationToken: authorizationToken2,
|
|
@@ -102,21 +72,23 @@ function patchOptions({
|
|
|
102
72
|
);
|
|
103
73
|
shouldWarnOnSubscriptionKey = false;
|
|
104
74
|
}
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
}
|
|
113
|
-
return resolvedCredentials;
|
|
75
|
+
return {
|
|
76
|
+
...typeof authorizationToken2 !== "undefined" ? { authorizationToken: authorizationToken2 } : { subscriptionKey: subscriptionKey2 },
|
|
77
|
+
...typeof region2 !== "undefined" ? { region: region2 } : {
|
|
78
|
+
customVoiceHostname,
|
|
79
|
+
speechRecognitionHostname,
|
|
80
|
+
speechSynthesisHostname
|
|
81
|
+
}
|
|
82
|
+
};
|
|
114
83
|
},
|
|
115
|
-
looseEvents
|
|
116
|
-
|
|
84
|
+
looseEvents: !!looseEvents,
|
|
85
|
+
referenceGrammars: referenceGrammars && Object.freeze([...referenceGrammars]),
|
|
86
|
+
speechRecognitionEndpointId,
|
|
87
|
+
textNormalization
|
|
88
|
+
});
|
|
117
89
|
}
|
|
118
90
|
|
|
119
|
-
// src/SpeechServices/SpeechSDK.
|
|
91
|
+
// src/SpeechServices/SpeechSDK.ts
|
|
120
92
|
import {
|
|
121
93
|
AudioConfig,
|
|
122
94
|
OutputFormat,
|
|
@@ -132,6 +104,41 @@ var SpeechSDK_default = {
|
|
|
132
104
|
SpeechRecognizer
|
|
133
105
|
};
|
|
134
106
|
|
|
107
|
+
// ../../node_modules/p-defer/index.js
|
|
108
|
+
function pDefer() {
|
|
109
|
+
const deferred = {};
|
|
110
|
+
deferred.promise = new Promise((resolve, reject) => {
|
|
111
|
+
deferred.resolve = resolve;
|
|
112
|
+
deferred.reject = reject;
|
|
113
|
+
});
|
|
114
|
+
return deferred;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// src/Util/createPromiseQueue.js
|
|
118
|
+
function createPromiseQueue_default() {
|
|
119
|
+
let shiftDeferred;
|
|
120
|
+
const queue = [];
|
|
121
|
+
const push = (value) => {
|
|
122
|
+
if (shiftDeferred) {
|
|
123
|
+
const { resolve } = shiftDeferred;
|
|
124
|
+
shiftDeferred = null;
|
|
125
|
+
resolve(value);
|
|
126
|
+
} else {
|
|
127
|
+
queue.push(value);
|
|
128
|
+
}
|
|
129
|
+
};
|
|
130
|
+
const shift = () => {
|
|
131
|
+
if (queue.length) {
|
|
132
|
+
return Promise.resolve(queue.shift());
|
|
133
|
+
}
|
|
134
|
+
return (shiftDeferred || (shiftDeferred = pDefer())).promise;
|
|
135
|
+
};
|
|
136
|
+
return {
|
|
137
|
+
push,
|
|
138
|
+
shift
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
135
142
|
// src/SpeechServices/SpeechToText/SpeechRecognitionAlternative.ts
|
|
136
143
|
var SpeechRecognitionAlternative = class {
|
|
137
144
|
constructor({ confidence, transcript }) {
|
|
@@ -148,7 +155,7 @@ var SpeechRecognitionAlternative = class {
|
|
|
148
155
|
}
|
|
149
156
|
};
|
|
150
157
|
|
|
151
|
-
// src/SpeechServices/SpeechToText/FakeArray.ts
|
|
158
|
+
// src/SpeechServices/SpeechToText/private/FakeArray.ts
|
|
152
159
|
var FakeArray = class {
|
|
153
160
|
constructor(array) {
|
|
154
161
|
if (!array) {
|
|
@@ -216,7 +223,15 @@ function cognitiveServiceEventResultToWebSpeechRecognitionResult_default(result,
|
|
|
216
223
|
return new SpeechRecognitionResult({ isFinal: false, results: [] });
|
|
217
224
|
}
|
|
218
225
|
|
|
219
|
-
// src/SpeechServices/SpeechToText/
|
|
226
|
+
// src/SpeechServices/SpeechToText/cognitiveServicesAsyncToPromise.ts
|
|
227
|
+
function cognitiveServicesAsyncToPromise(fn, context = void 0) {
|
|
228
|
+
return (...args) => (
|
|
229
|
+
// eslint-disable-next-line prefer-spread
|
|
230
|
+
new Promise((resolve, reject) => fn.apply(context, [...args, resolve, reject]))
|
|
231
|
+
);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// src/SpeechServices/SpeechToText/private/EventListenerMap.ts
|
|
220
235
|
var EventListenerMap = class {
|
|
221
236
|
constructor(eventTarget) {
|
|
222
237
|
this.#eventTarget = eventTarget;
|
|
@@ -237,22 +252,90 @@ var EventListenerMap = class {
|
|
|
237
252
|
}
|
|
238
253
|
};
|
|
239
254
|
|
|
240
|
-
// src/SpeechServices/SpeechToText/
|
|
241
|
-
|
|
255
|
+
// src/SpeechServices/SpeechToText/private/prepareAudioConfig.ts
|
|
256
|
+
import { AudioSourceEvent } from "microsoft-cognitiveservices-speech-sdk/distrib/lib/src/common/AudioSourceEvents";
|
|
257
|
+
|
|
258
|
+
// src/SpeechServices/SpeechToText/private/averageAmplitude.ts
|
|
259
|
+
function averageAmplitude(arrayBuffer) {
|
|
260
|
+
const array = Array.from(new Int16Array(arrayBuffer));
|
|
261
|
+
return array.reduce((averageAmplitude2, amplitude) => averageAmplitude2 + Math.abs(amplitude), 0) / array.length;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// src/SpeechServices/SpeechToText/private/prepareAudioConfig.ts
|
|
265
|
+
function prepareAudioConfig(audioConfig) {
|
|
266
|
+
const audioConfigImpl = audioConfig;
|
|
267
|
+
const originalAttach = audioConfigImpl.attach;
|
|
268
|
+
const boundOriginalAttach = audioConfigImpl.attach.bind(audioConfigImpl);
|
|
269
|
+
let firstChunk = false;
|
|
270
|
+
let muted = false;
|
|
271
|
+
audioConfigImpl.attach = async () => {
|
|
272
|
+
const reader = await boundOriginalAttach("");
|
|
273
|
+
return {
|
|
274
|
+
...reader,
|
|
275
|
+
read: async () => {
|
|
276
|
+
const chunk = await reader.read();
|
|
277
|
+
if (!firstChunk && averageAmplitude(chunk.buffer) > 150) {
|
|
278
|
+
audioConfigImpl.events.onEvent(new AudioSourceEvent("FirstAudibleChunk", ""));
|
|
279
|
+
firstChunk = true;
|
|
280
|
+
}
|
|
281
|
+
if (muted) {
|
|
282
|
+
return { buffer: new ArrayBuffer(0), isEnd: true, timeReceived: Date.now() };
|
|
283
|
+
}
|
|
284
|
+
return chunk;
|
|
285
|
+
}
|
|
286
|
+
};
|
|
287
|
+
};
|
|
288
|
+
return {
|
|
289
|
+
audioConfig,
|
|
290
|
+
pause: () => {
|
|
291
|
+
muted = true;
|
|
292
|
+
},
|
|
293
|
+
unprepare: () => {
|
|
294
|
+
audioConfigImpl.attach = originalAttach;
|
|
295
|
+
}
|
|
296
|
+
};
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// src/SpeechServices/SpeechToText/private/serializeRecognitionResult.ts
|
|
300
|
+
function serializeRecognitionResult({
|
|
301
|
+
duration,
|
|
302
|
+
errorDetails,
|
|
303
|
+
json,
|
|
304
|
+
offset,
|
|
305
|
+
properties,
|
|
306
|
+
reason,
|
|
307
|
+
resultId,
|
|
308
|
+
text
|
|
309
|
+
}) {
|
|
310
|
+
return Object.freeze({
|
|
311
|
+
duration,
|
|
312
|
+
errorDetails,
|
|
313
|
+
json: json && JSON.parse(json),
|
|
314
|
+
offset,
|
|
315
|
+
properties,
|
|
316
|
+
reason,
|
|
317
|
+
resultId,
|
|
318
|
+
text
|
|
319
|
+
});
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// src/SpeechServices/SpeechToText/SpeechGrammarList.ts
|
|
323
|
+
var SpeechGrammarList = class {
|
|
242
324
|
constructor() {
|
|
243
|
-
this
|
|
325
|
+
this.#phrases = [];
|
|
244
326
|
}
|
|
245
327
|
addFromString() {
|
|
246
328
|
throw new Error("JSGF is not supported");
|
|
247
329
|
}
|
|
330
|
+
#phrases;
|
|
248
331
|
get phrases() {
|
|
249
|
-
return this
|
|
332
|
+
return this.#phrases;
|
|
250
333
|
}
|
|
251
334
|
set phrases(value) {
|
|
252
335
|
if (Array.isArray(value)) {
|
|
253
|
-
this
|
|
336
|
+
this.#phrases = Object.freeze([...value]);
|
|
254
337
|
} else if (typeof value === "string") {
|
|
255
|
-
this
|
|
338
|
+
this.#phrases = Object.freeze([value]);
|
|
256
339
|
} else {
|
|
257
340
|
throw new Error(`The provided value is not an array or of type 'string'`);
|
|
258
341
|
}
|
|
@@ -274,6 +357,9 @@ var SpeechRecognitionErrorEvent = class extends Event {
|
|
|
274
357
|
get message() {
|
|
275
358
|
return this.#message;
|
|
276
359
|
}
|
|
360
|
+
get type() {
|
|
361
|
+
return "error";
|
|
362
|
+
}
|
|
277
363
|
};
|
|
278
364
|
|
|
279
365
|
// src/SpeechServices/SpeechToText/SpeechRecognitionResultList.ts
|
|
@@ -304,61 +390,13 @@ var SpeechRecognitionEvent = class extends Event {
|
|
|
304
390
|
get results() {
|
|
305
391
|
return this.#results;
|
|
306
392
|
}
|
|
393
|
+
get type() {
|
|
394
|
+
return super.type;
|
|
395
|
+
}
|
|
307
396
|
};
|
|
308
397
|
|
|
309
|
-
// src/SpeechServices/SpeechToText/
|
|
310
|
-
var {
|
|
311
|
-
function serializeRecognitionResult({ duration, errorDetails, json, offset, properties, reason, resultId, text }) {
|
|
312
|
-
return {
|
|
313
|
-
duration,
|
|
314
|
-
errorDetails,
|
|
315
|
-
json: JSON.parse(json),
|
|
316
|
-
offset,
|
|
317
|
-
properties,
|
|
318
|
-
reason,
|
|
319
|
-
resultId,
|
|
320
|
-
text
|
|
321
|
-
};
|
|
322
|
-
}
|
|
323
|
-
function averageAmplitude(arrayBuffer) {
|
|
324
|
-
const array = new Int16Array(arrayBuffer);
|
|
325
|
-
return [].reduce.call(array, (averageAmplitude2, amplitude) => averageAmplitude2 + Math.abs(amplitude), 0) / array.length;
|
|
326
|
-
}
|
|
327
|
-
function cognitiveServicesAsyncToPromise(fn) {
|
|
328
|
-
return (...args) => new Promise((resolve, reject) => fn(...args, resolve, reject));
|
|
329
|
-
}
|
|
330
|
-
function prepareAudioConfig(audioConfig) {
|
|
331
|
-
const originalAttach = audioConfig.attach;
|
|
332
|
-
const boundOriginalAttach = audioConfig.attach.bind(audioConfig);
|
|
333
|
-
let firstChunk;
|
|
334
|
-
let muted;
|
|
335
|
-
audioConfig.attach = async () => {
|
|
336
|
-
const reader = await boundOriginalAttach();
|
|
337
|
-
return {
|
|
338
|
-
...reader,
|
|
339
|
-
read: async () => {
|
|
340
|
-
const chunk = await reader.read();
|
|
341
|
-
if (!firstChunk && averageAmplitude(chunk.buffer) > 150) {
|
|
342
|
-
audioConfig.events.onEvent({ name: "FirstAudibleChunk" });
|
|
343
|
-
firstChunk = true;
|
|
344
|
-
}
|
|
345
|
-
if (muted) {
|
|
346
|
-
return { buffer: new ArrayBuffer(0), isEnd: true, timeReceived: Date.now() };
|
|
347
|
-
}
|
|
348
|
-
return chunk;
|
|
349
|
-
}
|
|
350
|
-
};
|
|
351
|
-
};
|
|
352
|
-
return {
|
|
353
|
-
audioConfig,
|
|
354
|
-
pause: () => {
|
|
355
|
-
muted = true;
|
|
356
|
-
},
|
|
357
|
-
unprepare: () => {
|
|
358
|
-
audioConfig.attach = originalAttach;
|
|
359
|
-
}
|
|
360
|
-
};
|
|
361
|
-
}
|
|
398
|
+
// src/SpeechServices/SpeechToText/createSpeechRecognitionPonyfillFromRecognizer.ts
|
|
399
|
+
var { ResultReason: ResultReason2, SpeechRecognizer: SpeechRecognizer2 } = SpeechSDK_default;
|
|
362
400
|
function createSpeechRecognitionPonyfillFromRecognizer({
|
|
363
401
|
createRecognizer,
|
|
364
402
|
enableTelemetry,
|
|
@@ -368,17 +406,13 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
368
406
|
}) {
|
|
369
407
|
SpeechRecognizer2.enableTelemetry(enableTelemetry !== false);
|
|
370
408
|
class SpeechRecognition extends EventTarget {
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
this.#eventListenerMap = new EventListenerMap(this);
|
|
379
|
-
}
|
|
380
|
-
/** @type { import('./SpeechRecognitionEventListenerMap').SpeechRecognitionEventListenerMap } */
|
|
381
|
-
#eventListenerMap;
|
|
409
|
+
#continuous = false;
|
|
410
|
+
#eventListenerMap = new EventListenerMap(this);
|
|
411
|
+
#grammars = new SpeechGrammarList();
|
|
412
|
+
#interimResults = false;
|
|
413
|
+
#lang = typeof window !== "undefined" ? window.document.documentElement.getAttribute("lang") || window.navigator.language : "en-US";
|
|
414
|
+
// eslint-disable-next-line no-magic-numbers
|
|
415
|
+
#maxAlternatives = 1;
|
|
382
416
|
emitCognitiveServices(type, event) {
|
|
383
417
|
this.dispatchEvent(
|
|
384
418
|
new SpeechRecognitionEvent("cognitiveservices", {
|
|
@@ -390,40 +424,39 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
390
424
|
);
|
|
391
425
|
}
|
|
392
426
|
get continuous() {
|
|
393
|
-
return this
|
|
427
|
+
return this.#continuous;
|
|
394
428
|
}
|
|
395
429
|
set continuous(value) {
|
|
396
|
-
this
|
|
430
|
+
this.#continuous = value;
|
|
397
431
|
}
|
|
398
432
|
get grammars() {
|
|
399
|
-
return this
|
|
433
|
+
return this.#grammars;
|
|
400
434
|
}
|
|
401
435
|
set grammars(value) {
|
|
402
|
-
if (value instanceof
|
|
403
|
-
this
|
|
436
|
+
if (value instanceof SpeechGrammarList) {
|
|
437
|
+
this.#grammars = value;
|
|
404
438
|
} else {
|
|
405
439
|
throw new Error(`The provided value is not of type 'SpeechGrammarList'`);
|
|
406
440
|
}
|
|
407
441
|
}
|
|
408
442
|
get interimResults() {
|
|
409
|
-
return this
|
|
443
|
+
return this.#interimResults;
|
|
410
444
|
}
|
|
411
445
|
set interimResults(value) {
|
|
412
|
-
this
|
|
446
|
+
this.#interimResults = value;
|
|
413
447
|
}
|
|
414
448
|
get maxAlternatives() {
|
|
415
|
-
return this
|
|
449
|
+
return this.#maxAlternatives;
|
|
416
450
|
}
|
|
417
451
|
set maxAlternatives(value) {
|
|
418
|
-
this
|
|
452
|
+
this.#maxAlternatives = value;
|
|
419
453
|
}
|
|
420
454
|
get lang() {
|
|
421
|
-
return this
|
|
455
|
+
return this.#lang;
|
|
422
456
|
}
|
|
423
457
|
set lang(value) {
|
|
424
|
-
this
|
|
458
|
+
this.#lang = value;
|
|
425
459
|
}
|
|
426
|
-
/** @type { ((event: SpeechRecognitionEvent<'audioend'>) => void) | undefined } */
|
|
427
460
|
get onaudioend() {
|
|
428
461
|
return this.#eventListenerMap.getProperty("audioend");
|
|
429
462
|
}
|
|
@@ -451,7 +484,7 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
451
484
|
set onend(value) {
|
|
452
485
|
this.#eventListenerMap.setProperty("end", value);
|
|
453
486
|
}
|
|
454
|
-
/** @type { ((event:
|
|
487
|
+
/** @type { ((event: SpeechRecognitionErrorEvent) => void) | undefined } */
|
|
455
488
|
get onerror() {
|
|
456
489
|
return this.#eventListenerMap.getProperty("error");
|
|
457
490
|
}
|
|
@@ -500,6 +533,8 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
500
533
|
set onstart(value) {
|
|
501
534
|
this.#eventListenerMap.setProperty("start", value);
|
|
502
535
|
}
|
|
536
|
+
abort;
|
|
537
|
+
stop;
|
|
503
538
|
start() {
|
|
504
539
|
this._startOnce().catch((err) => {
|
|
505
540
|
this.dispatchEvent(
|
|
@@ -509,22 +544,24 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
509
544
|
}
|
|
510
545
|
async _startOnce() {
|
|
511
546
|
const recognizer = await createRecognizer(this.lang);
|
|
512
|
-
const { pause, unprepare } = prepareAudioConfig(recognizer
|
|
547
|
+
const { pause, unprepare } = prepareAudioConfig(recognizer["audioConfig"]);
|
|
513
548
|
try {
|
|
514
549
|
const queue = createPromiseQueue_default();
|
|
515
550
|
let soundStarted;
|
|
516
551
|
let speechStarted;
|
|
517
552
|
let stopping;
|
|
518
|
-
const { detach: detachAudioConfigEvent } = recognizer
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
553
|
+
const { detach: detachAudioConfigEvent } = recognizer["audioConfig"].events.attach(
|
|
554
|
+
(event) => {
|
|
555
|
+
const { name } = event;
|
|
556
|
+
if (name === "AudioSourceReadyEvent") {
|
|
557
|
+
queue.push({ audioSourceReady: {} });
|
|
558
|
+
} else if (name === "AudioSourceOffEvent") {
|
|
559
|
+
queue.push({ audioSourceOff: {} });
|
|
560
|
+
} else if (name === "FirstAudibleChunk") {
|
|
561
|
+
queue.push({ firstAudibleChunk: {} });
|
|
562
|
+
}
|
|
526
563
|
}
|
|
527
|
-
|
|
564
|
+
);
|
|
528
565
|
recognizer.canceled = (_, { errorDetails, offset, reason, sessionId }) => {
|
|
529
566
|
queue.push({
|
|
530
567
|
canceled: {
|
|
@@ -566,18 +603,18 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
566
603
|
queue.push({ speechEndDetected: { sessionId } });
|
|
567
604
|
};
|
|
568
605
|
const { phrases } = this.grammars;
|
|
569
|
-
const { dynamicGrammar } = recognizer
|
|
570
|
-
referenceGrammars && referenceGrammars.length && dynamicGrammar.addReferenceGrammar(referenceGrammars);
|
|
571
|
-
phrases && phrases.length && dynamicGrammar.addPhrase(phrases);
|
|
572
|
-
await cognitiveServicesAsyncToPromise(recognizer.startContinuousRecognitionAsync
|
|
573
|
-
if (recognizer.stopContinuousRecognitionAsync) {
|
|
606
|
+
const { dynamicGrammar } = recognizer["privReco"];
|
|
607
|
+
referenceGrammars && referenceGrammars.length && dynamicGrammar.addReferenceGrammar([...referenceGrammars]);
|
|
608
|
+
phrases && phrases.length && dynamicGrammar.addPhrase([...phrases]);
|
|
609
|
+
await cognitiveServicesAsyncToPromise(recognizer.startContinuousRecognitionAsync, recognizer)();
|
|
610
|
+
if (typeof recognizer.stopContinuousRecognitionAsync === "function") {
|
|
574
611
|
this.abort = () => queue.push({ abort: {} });
|
|
575
612
|
this.stop = () => queue.push({ stop: {} });
|
|
576
613
|
} else {
|
|
577
614
|
this.abort = this.stop = void 0;
|
|
578
615
|
}
|
|
579
616
|
let audioStarted;
|
|
580
|
-
let finalEvent;
|
|
617
|
+
let finalEvent = void 0;
|
|
581
618
|
let finalizedResults = [];
|
|
582
619
|
for (let loop = 0; !stopping || audioStarted; loop++) {
|
|
583
620
|
const event = await queue.shift();
|
|
@@ -620,7 +657,7 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
620
657
|
stopping = "stop";
|
|
621
658
|
}
|
|
622
659
|
if (abort && recognizer.stopContinuousRecognitionAsync) {
|
|
623
|
-
await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync
|
|
660
|
+
await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync, recognizer)();
|
|
624
661
|
}
|
|
625
662
|
} else if (audioSourceReady) {
|
|
626
663
|
this.dispatchEvent(new SpeechRecognitionEvent("audiostart"));
|
|
@@ -637,8 +674,13 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
637
674
|
} else if (stopping !== "abort") {
|
|
638
675
|
if (recognized && recognized.result && recognized.result.reason === ResultReason2.NoMatch) {
|
|
639
676
|
if (!this.continuous || stopping === "stop") {
|
|
640
|
-
finalEvent = new SpeechRecognitionEvent("result", {
|
|
641
|
-
|
|
677
|
+
finalEvent = new SpeechRecognitionEvent("result", {
|
|
678
|
+
results: new SpeechRecognitionResultList(finalizedResults)
|
|
679
|
+
});
|
|
680
|
+
recognizer.stopContinuousRecognitionAsync && await cognitiveServicesAsyncToPromise(
|
|
681
|
+
recognizer.stopContinuousRecognitionAsync,
|
|
682
|
+
recognizer
|
|
683
|
+
)();
|
|
642
684
|
break;
|
|
643
685
|
}
|
|
644
686
|
} else if (recognized || recognizing) {
|
|
@@ -659,7 +701,7 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
659
701
|
maxAlternatives: this.maxAlternatives,
|
|
660
702
|
textNormalization
|
|
661
703
|
});
|
|
662
|
-
const recognizable = !!result[0]
|
|
704
|
+
const recognizable = !!result[0]?.transcript;
|
|
663
705
|
if (recognizable) {
|
|
664
706
|
finalizedResults = [...finalizedResults, result];
|
|
665
707
|
this.continuous && this.dispatchEvent(
|
|
@@ -676,7 +718,7 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
676
718
|
});
|
|
677
719
|
}
|
|
678
720
|
if ((!this.continuous || stopping === "stop") && recognizer.stopContinuousRecognitionAsync) {
|
|
679
|
-
await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync
|
|
721
|
+
await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync, recognizer)();
|
|
680
722
|
}
|
|
681
723
|
if (looseEvents && finalEvent && recognizable) {
|
|
682
724
|
this.dispatchEvent(finalEvent);
|
|
@@ -720,17 +762,20 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
720
762
|
throw err;
|
|
721
763
|
} finally {
|
|
722
764
|
unprepare();
|
|
723
|
-
recognizer
|
|
765
|
+
recognizer["dispose"](false);
|
|
724
766
|
}
|
|
725
767
|
}
|
|
726
768
|
}
|
|
727
769
|
return {
|
|
728
|
-
SpeechGrammarList
|
|
770
|
+
SpeechGrammarList,
|
|
729
771
|
SpeechRecognition,
|
|
730
772
|
SpeechRecognitionEvent
|
|
731
773
|
};
|
|
732
774
|
}
|
|
733
|
-
|
|
775
|
+
|
|
776
|
+
// src/SpeechServices/SpeechToText/createSpeechRecognitionPonyfill.ts
|
|
777
|
+
var { AudioConfig: AudioConfig2, OutputFormat: OutputFormat2, SpeechConfig: SpeechConfig2, SpeechRecognizer: SpeechRecognizer3 } = SpeechSDK_default;
|
|
778
|
+
function createSpeechRecognitionPonyfill(options) {
|
|
734
779
|
const {
|
|
735
780
|
audioConfig = AudioConfig2.fromDefaultMicrophoneInput(),
|
|
736
781
|
// We set telemetry to true to honor the default telemetry settings of Speech SDK
|
|
@@ -743,44 +788,40 @@ var createSpeechRecognitionPonyfill_default = (options) => {
|
|
|
743
788
|
textNormalization = "display"
|
|
744
789
|
} = patchOptions(options);
|
|
745
790
|
if (!audioConfig && (!window.navigator.mediaDevices || !window.navigator.mediaDevices.getUserMedia)) {
|
|
746
|
-
|
|
747
|
-
"web-speech-cognitive-services: This browser does not support
|
|
791
|
+
throw new Error(
|
|
792
|
+
"web-speech-cognitive-services: This browser does not support Media Capture and Streams API and it will not work with Cognitive Services Speech Services."
|
|
748
793
|
);
|
|
749
|
-
return {};
|
|
750
794
|
}
|
|
751
795
|
const createRecognizer = async (lang) => {
|
|
752
|
-
const
|
|
796
|
+
const credentials = await fetchCredentials();
|
|
753
797
|
let speechConfig;
|
|
754
|
-
if (speechRecognitionHostname) {
|
|
755
|
-
const host =
|
|
756
|
-
|
|
798
|
+
if (typeof credentials.speechRecognitionHostname !== "undefined") {
|
|
799
|
+
const host = new URL("wss://hostname:443");
|
|
800
|
+
host.hostname = credentials.speechRecognitionHostname;
|
|
801
|
+
if (credentials.authorizationToken) {
|
|
757
802
|
speechConfig = SpeechConfig2.fromHost(host);
|
|
758
|
-
speechConfig.authorizationToken = authorizationToken;
|
|
803
|
+
speechConfig.authorizationToken = credentials.authorizationToken;
|
|
759
804
|
} else {
|
|
760
|
-
speechConfig = SpeechConfig2.fromHost(host, subscriptionKey);
|
|
805
|
+
speechConfig = SpeechConfig2.fromHost(host, credentials.subscriptionKey);
|
|
761
806
|
}
|
|
762
807
|
} else {
|
|
763
|
-
speechConfig = authorizationToken ? SpeechConfig2.fromAuthorizationToken(authorizationToken, region) : SpeechConfig2.fromSubscription(subscriptionKey, region);
|
|
808
|
+
speechConfig = typeof credentials.authorizationToken !== "undefined" ? SpeechConfig2.fromAuthorizationToken(credentials.authorizationToken, credentials.region) : SpeechConfig2.fromSubscription(credentials.subscriptionKey, credentials.region);
|
|
764
809
|
}
|
|
765
810
|
if (speechRecognitionEndpointId) {
|
|
766
811
|
speechConfig.endpointId = speechRecognitionEndpointId;
|
|
767
812
|
}
|
|
768
813
|
speechConfig.outputFormat = OutputFormat2.Detailed;
|
|
769
814
|
speechConfig.speechRecognitionLanguage = lang || "en-US";
|
|
770
|
-
return new
|
|
815
|
+
return new SpeechRecognizer3(speechConfig, audioConfig);
|
|
771
816
|
};
|
|
772
817
|
return createSpeechRecognitionPonyfillFromRecognizer({
|
|
773
|
-
audioConfig,
|
|
774
818
|
createRecognizer,
|
|
775
819
|
enableTelemetry,
|
|
776
820
|
looseEvents,
|
|
777
821
|
referenceGrammars,
|
|
778
822
|
textNormalization
|
|
779
823
|
});
|
|
780
|
-
}
|
|
781
|
-
|
|
782
|
-
// src/SpeechServices/SpeechToText.js
|
|
783
|
-
var SpeechToText_default = createSpeechRecognitionPonyfill_default;
|
|
824
|
+
}
|
|
784
825
|
|
|
785
826
|
// src/SpeechServices/TextToSpeech/createSpeechSynthesisPonyfill.js
|
|
786
827
|
import { EventTarget as EventTarget3, getEventAttributeValue as getEventAttributeValue2, setEventAttributeValue as setEventAttributeValue2 } from "event-target-shim";
|
|
@@ -1285,9 +1326,12 @@ var createSpeechSynthesisPonyfill_default = (options) => {
|
|
|
1285
1326
|
// src/SpeechServices/TextToSpeech.js
|
|
1286
1327
|
var TextToSpeech_default = createSpeechSynthesisPonyfill_default;
|
|
1287
1328
|
|
|
1288
|
-
// src/SpeechServices/fetchAuthorizationToken.
|
|
1329
|
+
// src/SpeechServices/fetchAuthorizationToken.ts
|
|
1289
1330
|
var TOKEN_URL_TEMPLATE = "https://{region}.api.cognitive.microsoft.com/sts/v1.0/issueToken";
|
|
1290
|
-
async function
|
|
1331
|
+
async function fetchAuthorizationToken({
|
|
1332
|
+
region,
|
|
1333
|
+
subscriptionKey
|
|
1334
|
+
}) {
|
|
1291
1335
|
const res = await fetch(TOKEN_URL_TEMPLATE.replace(/\{region\}/u, region), {
|
|
1292
1336
|
headers: {
|
|
1293
1337
|
"Ocp-Apim-Subscription-Key": subscriptionKey
|
|
@@ -1300,22 +1344,22 @@ async function fetchAuthorizationToken_default({ region, subscriptionKey }) {
|
|
|
1300
1344
|
return res.text();
|
|
1301
1345
|
}
|
|
1302
1346
|
|
|
1303
|
-
// src/SpeechServices.
|
|
1304
|
-
function createSpeechServicesPonyfill(options = {}
|
|
1347
|
+
// src/SpeechServices.ts
|
|
1348
|
+
function createSpeechServicesPonyfill(options = {}) {
|
|
1305
1349
|
return {
|
|
1306
|
-
...
|
|
1307
|
-
...TextToSpeech_default(options
|
|
1350
|
+
...createSpeechRecognitionPonyfill(options),
|
|
1351
|
+
...TextToSpeech_default(options)
|
|
1308
1352
|
};
|
|
1309
1353
|
}
|
|
1310
1354
|
var meta = document.createElement("meta");
|
|
1311
1355
|
meta.setAttribute("name", "web-speech-cognitive-services");
|
|
1312
|
-
meta.setAttribute("content", `version=${"8.0.0-main.
|
|
1356
|
+
meta.setAttribute("content", `version=${"8.0.0-main.6cbf0fb"}`);
|
|
1313
1357
|
document.head.appendChild(meta);
|
|
1314
1358
|
export {
|
|
1315
|
-
|
|
1359
|
+
createSpeechRecognitionPonyfill,
|
|
1316
1360
|
createSpeechRecognitionPonyfillFromRecognizer,
|
|
1317
1361
|
createSpeechServicesPonyfill,
|
|
1318
1362
|
TextToSpeech_default as createSpeechSynthesisPonyfill,
|
|
1319
|
-
|
|
1363
|
+
fetchAuthorizationToken
|
|
1320
1364
|
};
|
|
1321
1365
|
//# sourceMappingURL=web-speech-cognitive-services.mjs.map
|