web-speech-cognitive-services 8.0.0-main.5903868 → 8.0.0-main.6cbf0fb
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/web-speech-cognitive-services.d.mts +217 -65
- package/dist/web-speech-cognitive-services.d.ts +217 -65
- package/dist/web-speech-cognitive-services.development.js +240 -199
- package/dist/web-speech-cognitive-services.development.js.map +1 -1
- package/dist/web-speech-cognitive-services.js +238 -194
- package/dist/web-speech-cognitive-services.js.map +1 -1
- package/dist/web-speech-cognitive-services.mjs +238 -194
- package/dist/web-speech-cognitive-services.mjs.map +1 -1
- package/dist/web-speech-cognitive-services.production.min.js +12 -12
- package/dist/web-speech-cognitive-services.production.min.js.map +1 -1
- package/package.json +2 -2
|
@@ -30,49 +30,14 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
30
30
|
// src/index.js
|
|
31
31
|
var src_exports = {};
|
|
32
32
|
__export(src_exports, {
|
|
33
|
-
createSpeechRecognitionPonyfill: () =>
|
|
33
|
+
createSpeechRecognitionPonyfill: () => createSpeechRecognitionPonyfill,
|
|
34
34
|
createSpeechRecognitionPonyfillFromRecognizer: () => createSpeechRecognitionPonyfillFromRecognizer,
|
|
35
35
|
createSpeechServicesPonyfill: () => createSpeechServicesPonyfill,
|
|
36
36
|
createSpeechSynthesisPonyfill: () => TextToSpeech_default,
|
|
37
|
-
fetchAuthorizationToken: () =>
|
|
37
|
+
fetchAuthorizationToken: () => fetchAuthorizationToken
|
|
38
38
|
});
|
|
39
39
|
module.exports = __toCommonJS(src_exports);
|
|
40
40
|
|
|
41
|
-
// ../../node_modules/p-defer/index.js
|
|
42
|
-
function pDefer() {
|
|
43
|
-
const deferred = {};
|
|
44
|
-
deferred.promise = new Promise((resolve, reject) => {
|
|
45
|
-
deferred.resolve = resolve;
|
|
46
|
-
deferred.reject = reject;
|
|
47
|
-
});
|
|
48
|
-
return deferred;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
// src/Util/createPromiseQueue.js
|
|
52
|
-
function createPromiseQueue_default() {
|
|
53
|
-
let shiftDeferred;
|
|
54
|
-
const queue = [];
|
|
55
|
-
const push = (value) => {
|
|
56
|
-
if (shiftDeferred) {
|
|
57
|
-
const { resolve } = shiftDeferred;
|
|
58
|
-
shiftDeferred = null;
|
|
59
|
-
resolve(value);
|
|
60
|
-
} else {
|
|
61
|
-
queue.push(value);
|
|
62
|
-
}
|
|
63
|
-
};
|
|
64
|
-
const shift = () => {
|
|
65
|
-
if (queue.length) {
|
|
66
|
-
return Promise.resolve(queue.shift());
|
|
67
|
-
}
|
|
68
|
-
return (shiftDeferred || (shiftDeferred = pDefer())).promise;
|
|
69
|
-
};
|
|
70
|
-
return {
|
|
71
|
-
push,
|
|
72
|
-
shift
|
|
73
|
-
};
|
|
74
|
-
}
|
|
75
|
-
|
|
76
41
|
// src/SpeechServices/resolveFunctionOrReturnValue.ts
|
|
77
42
|
function isFunction(value) {
|
|
78
43
|
return typeof value === "function";
|
|
@@ -81,17 +46,21 @@ function resolveFunctionOrReturnValue(fnOrValue) {
|
|
|
81
46
|
return isFunction(fnOrValue) ? fnOrValue() : fnOrValue;
|
|
82
47
|
}
|
|
83
48
|
|
|
84
|
-
// src/SpeechServices/patchOptions.
|
|
49
|
+
// src/SpeechServices/patchOptions.ts
|
|
85
50
|
var shouldWarnOnSubscriptionKey = true;
|
|
86
|
-
function patchOptions({
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
51
|
+
function patchOptions(init) {
|
|
52
|
+
const {
|
|
53
|
+
audioConfig,
|
|
54
|
+
authorizationToken,
|
|
55
|
+
enableTelemetry,
|
|
56
|
+
looseEvent,
|
|
57
|
+
referenceGrammars,
|
|
58
|
+
region = "westus",
|
|
59
|
+
speechRecognitionEndpointId,
|
|
60
|
+
subscriptionKey,
|
|
61
|
+
textNormalization
|
|
62
|
+
} = init;
|
|
63
|
+
let { credentials, looseEvents } = init;
|
|
95
64
|
if (typeof looseEvent !== "undefined") {
|
|
96
65
|
console.warn('web-speech-cognitive-services: The option "looseEvent" should be named as "looseEvents".');
|
|
97
66
|
looseEvents = looseEvent;
|
|
@@ -103,11 +72,12 @@ function patchOptions({
|
|
|
103
72
|
console.warn(
|
|
104
73
|
"web-speech-cognitive-services: We are deprecating authorizationToken, region, and subscriptionKey. Please use credentials instead. The deprecated option will be removed on or after 2020-11-14."
|
|
105
74
|
);
|
|
106
|
-
credentials = async () => authorizationToken ? { authorizationToken: await resolveFunctionOrReturnValue(authorizationToken), region } : { region, subscriptionKey: await resolveFunctionOrReturnValue(subscriptionKey) };
|
|
75
|
+
credentials = async () => typeof init.authorizationToken !== "undefined" ? { authorizationToken: await resolveFunctionOrReturnValue(init.authorizationToken), region } : { region, subscriptionKey: await resolveFunctionOrReturnValue(init.subscriptionKey) };
|
|
107
76
|
}
|
|
108
77
|
}
|
|
109
|
-
return {
|
|
110
|
-
|
|
78
|
+
return Object.freeze({
|
|
79
|
+
audioConfig,
|
|
80
|
+
enableTelemetry,
|
|
111
81
|
fetchCredentials: async () => {
|
|
112
82
|
const {
|
|
113
83
|
authorizationToken: authorizationToken2,
|
|
@@ -142,21 +112,23 @@ function patchOptions({
|
|
|
142
112
|
);
|
|
143
113
|
shouldWarnOnSubscriptionKey = false;
|
|
144
114
|
}
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
}
|
|
153
|
-
return resolvedCredentials;
|
|
115
|
+
return {
|
|
116
|
+
...typeof authorizationToken2 !== "undefined" ? { authorizationToken: authorizationToken2 } : { subscriptionKey: subscriptionKey2 },
|
|
117
|
+
...typeof region2 !== "undefined" ? { region: region2 } : {
|
|
118
|
+
customVoiceHostname,
|
|
119
|
+
speechRecognitionHostname,
|
|
120
|
+
speechSynthesisHostname
|
|
121
|
+
}
|
|
122
|
+
};
|
|
154
123
|
},
|
|
155
|
-
looseEvents
|
|
156
|
-
|
|
124
|
+
looseEvents: !!looseEvents,
|
|
125
|
+
referenceGrammars: referenceGrammars && Object.freeze([...referenceGrammars]),
|
|
126
|
+
speechRecognitionEndpointId,
|
|
127
|
+
textNormalization
|
|
128
|
+
});
|
|
157
129
|
}
|
|
158
130
|
|
|
159
|
-
// src/SpeechServices/SpeechSDK.
|
|
131
|
+
// src/SpeechServices/SpeechSDK.ts
|
|
160
132
|
var import_microsoft_cognitiveservices_speech = require("microsoft-cognitiveservices-speech-sdk/distrib/lib/microsoft.cognitiveservices.speech.sdk");
|
|
161
133
|
var SpeechSDK_default = {
|
|
162
134
|
AudioConfig: import_microsoft_cognitiveservices_speech.AudioConfig,
|
|
@@ -166,6 +138,41 @@ var SpeechSDK_default = {
|
|
|
166
138
|
SpeechRecognizer: import_microsoft_cognitiveservices_speech.SpeechRecognizer
|
|
167
139
|
};
|
|
168
140
|
|
|
141
|
+
// ../../node_modules/p-defer/index.js
|
|
142
|
+
function pDefer() {
|
|
143
|
+
const deferred = {};
|
|
144
|
+
deferred.promise = new Promise((resolve, reject) => {
|
|
145
|
+
deferred.resolve = resolve;
|
|
146
|
+
deferred.reject = reject;
|
|
147
|
+
});
|
|
148
|
+
return deferred;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// src/Util/createPromiseQueue.js
|
|
152
|
+
function createPromiseQueue_default() {
|
|
153
|
+
let shiftDeferred;
|
|
154
|
+
const queue = [];
|
|
155
|
+
const push = (value) => {
|
|
156
|
+
if (shiftDeferred) {
|
|
157
|
+
const { resolve } = shiftDeferred;
|
|
158
|
+
shiftDeferred = null;
|
|
159
|
+
resolve(value);
|
|
160
|
+
} else {
|
|
161
|
+
queue.push(value);
|
|
162
|
+
}
|
|
163
|
+
};
|
|
164
|
+
const shift = () => {
|
|
165
|
+
if (queue.length) {
|
|
166
|
+
return Promise.resolve(queue.shift());
|
|
167
|
+
}
|
|
168
|
+
return (shiftDeferred || (shiftDeferred = pDefer())).promise;
|
|
169
|
+
};
|
|
170
|
+
return {
|
|
171
|
+
push,
|
|
172
|
+
shift
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
|
|
169
176
|
// src/SpeechServices/SpeechToText/SpeechRecognitionAlternative.ts
|
|
170
177
|
var SpeechRecognitionAlternative = class {
|
|
171
178
|
constructor({ confidence, transcript }) {
|
|
@@ -182,7 +189,7 @@ var SpeechRecognitionAlternative = class {
|
|
|
182
189
|
}
|
|
183
190
|
};
|
|
184
191
|
|
|
185
|
-
// src/SpeechServices/SpeechToText/FakeArray.ts
|
|
192
|
+
// src/SpeechServices/SpeechToText/private/FakeArray.ts
|
|
186
193
|
var FakeArray = class {
|
|
187
194
|
constructor(array) {
|
|
188
195
|
if (!array) {
|
|
@@ -250,7 +257,15 @@ function cognitiveServiceEventResultToWebSpeechRecognitionResult_default(result,
|
|
|
250
257
|
return new SpeechRecognitionResult({ isFinal: false, results: [] });
|
|
251
258
|
}
|
|
252
259
|
|
|
253
|
-
// src/SpeechServices/SpeechToText/
|
|
260
|
+
// src/SpeechServices/SpeechToText/cognitiveServicesAsyncToPromise.ts
|
|
261
|
+
function cognitiveServicesAsyncToPromise(fn, context = void 0) {
|
|
262
|
+
return (...args) => (
|
|
263
|
+
// eslint-disable-next-line prefer-spread
|
|
264
|
+
new Promise((resolve, reject) => fn.apply(context, [...args, resolve, reject]))
|
|
265
|
+
);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// src/SpeechServices/SpeechToText/private/EventListenerMap.ts
|
|
254
269
|
var EventListenerMap = class {
|
|
255
270
|
constructor(eventTarget) {
|
|
256
271
|
this.#eventTarget = eventTarget;
|
|
@@ -271,22 +286,90 @@ var EventListenerMap = class {
|
|
|
271
286
|
}
|
|
272
287
|
};
|
|
273
288
|
|
|
274
|
-
// src/SpeechServices/SpeechToText/
|
|
275
|
-
var
|
|
289
|
+
// src/SpeechServices/SpeechToText/private/prepareAudioConfig.ts
|
|
290
|
+
var import_AudioSourceEvents = require("microsoft-cognitiveservices-speech-sdk/distrib/lib/src/common/AudioSourceEvents");
|
|
291
|
+
|
|
292
|
+
// src/SpeechServices/SpeechToText/private/averageAmplitude.ts
|
|
293
|
+
function averageAmplitude(arrayBuffer) {
|
|
294
|
+
const array = Array.from(new Int16Array(arrayBuffer));
|
|
295
|
+
return array.reduce((averageAmplitude2, amplitude) => averageAmplitude2 + Math.abs(amplitude), 0) / array.length;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// src/SpeechServices/SpeechToText/private/prepareAudioConfig.ts
|
|
299
|
+
function prepareAudioConfig(audioConfig) {
|
|
300
|
+
const audioConfigImpl = audioConfig;
|
|
301
|
+
const originalAttach = audioConfigImpl.attach;
|
|
302
|
+
const boundOriginalAttach = audioConfigImpl.attach.bind(audioConfigImpl);
|
|
303
|
+
let firstChunk = false;
|
|
304
|
+
let muted = false;
|
|
305
|
+
audioConfigImpl.attach = async () => {
|
|
306
|
+
const reader = await boundOriginalAttach("");
|
|
307
|
+
return {
|
|
308
|
+
...reader,
|
|
309
|
+
read: async () => {
|
|
310
|
+
const chunk = await reader.read();
|
|
311
|
+
if (!firstChunk && averageAmplitude(chunk.buffer) > 150) {
|
|
312
|
+
audioConfigImpl.events.onEvent(new import_AudioSourceEvents.AudioSourceEvent("FirstAudibleChunk", ""));
|
|
313
|
+
firstChunk = true;
|
|
314
|
+
}
|
|
315
|
+
if (muted) {
|
|
316
|
+
return { buffer: new ArrayBuffer(0), isEnd: true, timeReceived: Date.now() };
|
|
317
|
+
}
|
|
318
|
+
return chunk;
|
|
319
|
+
}
|
|
320
|
+
};
|
|
321
|
+
};
|
|
322
|
+
return {
|
|
323
|
+
audioConfig,
|
|
324
|
+
pause: () => {
|
|
325
|
+
muted = true;
|
|
326
|
+
},
|
|
327
|
+
unprepare: () => {
|
|
328
|
+
audioConfigImpl.attach = originalAttach;
|
|
329
|
+
}
|
|
330
|
+
};
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
// src/SpeechServices/SpeechToText/private/serializeRecognitionResult.ts
|
|
334
|
+
function serializeRecognitionResult({
|
|
335
|
+
duration,
|
|
336
|
+
errorDetails,
|
|
337
|
+
json,
|
|
338
|
+
offset,
|
|
339
|
+
properties,
|
|
340
|
+
reason,
|
|
341
|
+
resultId,
|
|
342
|
+
text
|
|
343
|
+
}) {
|
|
344
|
+
return Object.freeze({
|
|
345
|
+
duration,
|
|
346
|
+
errorDetails,
|
|
347
|
+
json: json && JSON.parse(json),
|
|
348
|
+
offset,
|
|
349
|
+
properties,
|
|
350
|
+
reason,
|
|
351
|
+
resultId,
|
|
352
|
+
text
|
|
353
|
+
});
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// src/SpeechServices/SpeechToText/SpeechGrammarList.ts
|
|
357
|
+
var SpeechGrammarList = class {
|
|
276
358
|
constructor() {
|
|
277
|
-
this
|
|
359
|
+
this.#phrases = [];
|
|
278
360
|
}
|
|
279
361
|
addFromString() {
|
|
280
362
|
throw new Error("JSGF is not supported");
|
|
281
363
|
}
|
|
364
|
+
#phrases;
|
|
282
365
|
get phrases() {
|
|
283
|
-
return this
|
|
366
|
+
return this.#phrases;
|
|
284
367
|
}
|
|
285
368
|
set phrases(value) {
|
|
286
369
|
if (Array.isArray(value)) {
|
|
287
|
-
this
|
|
370
|
+
this.#phrases = Object.freeze([...value]);
|
|
288
371
|
} else if (typeof value === "string") {
|
|
289
|
-
this
|
|
372
|
+
this.#phrases = Object.freeze([value]);
|
|
290
373
|
} else {
|
|
291
374
|
throw new Error(`The provided value is not an array or of type 'string'`);
|
|
292
375
|
}
|
|
@@ -308,6 +391,9 @@ var SpeechRecognitionErrorEvent = class extends Event {
|
|
|
308
391
|
get message() {
|
|
309
392
|
return this.#message;
|
|
310
393
|
}
|
|
394
|
+
get type() {
|
|
395
|
+
return "error";
|
|
396
|
+
}
|
|
311
397
|
};
|
|
312
398
|
|
|
313
399
|
// src/SpeechServices/SpeechToText/SpeechRecognitionResultList.ts
|
|
@@ -338,61 +424,13 @@ var SpeechRecognitionEvent = class extends Event {
|
|
|
338
424
|
get results() {
|
|
339
425
|
return this.#results;
|
|
340
426
|
}
|
|
427
|
+
get type() {
|
|
428
|
+
return super.type;
|
|
429
|
+
}
|
|
341
430
|
};
|
|
342
431
|
|
|
343
|
-
// src/SpeechServices/SpeechToText/
|
|
344
|
-
var {
|
|
345
|
-
function serializeRecognitionResult({ duration, errorDetails, json, offset, properties, reason, resultId, text }) {
|
|
346
|
-
return {
|
|
347
|
-
duration,
|
|
348
|
-
errorDetails,
|
|
349
|
-
json: JSON.parse(json),
|
|
350
|
-
offset,
|
|
351
|
-
properties,
|
|
352
|
-
reason,
|
|
353
|
-
resultId,
|
|
354
|
-
text
|
|
355
|
-
};
|
|
356
|
-
}
|
|
357
|
-
function averageAmplitude(arrayBuffer) {
|
|
358
|
-
const array = new Int16Array(arrayBuffer);
|
|
359
|
-
return [].reduce.call(array, (averageAmplitude2, amplitude) => averageAmplitude2 + Math.abs(amplitude), 0) / array.length;
|
|
360
|
-
}
|
|
361
|
-
function cognitiveServicesAsyncToPromise(fn) {
|
|
362
|
-
return (...args) => new Promise((resolve, reject) => fn(...args, resolve, reject));
|
|
363
|
-
}
|
|
364
|
-
function prepareAudioConfig(audioConfig) {
|
|
365
|
-
const originalAttach = audioConfig.attach;
|
|
366
|
-
const boundOriginalAttach = audioConfig.attach.bind(audioConfig);
|
|
367
|
-
let firstChunk;
|
|
368
|
-
let muted;
|
|
369
|
-
audioConfig.attach = async () => {
|
|
370
|
-
const reader = await boundOriginalAttach();
|
|
371
|
-
return {
|
|
372
|
-
...reader,
|
|
373
|
-
read: async () => {
|
|
374
|
-
const chunk = await reader.read();
|
|
375
|
-
if (!firstChunk && averageAmplitude(chunk.buffer) > 150) {
|
|
376
|
-
audioConfig.events.onEvent({ name: "FirstAudibleChunk" });
|
|
377
|
-
firstChunk = true;
|
|
378
|
-
}
|
|
379
|
-
if (muted) {
|
|
380
|
-
return { buffer: new ArrayBuffer(0), isEnd: true, timeReceived: Date.now() };
|
|
381
|
-
}
|
|
382
|
-
return chunk;
|
|
383
|
-
}
|
|
384
|
-
};
|
|
385
|
-
};
|
|
386
|
-
return {
|
|
387
|
-
audioConfig,
|
|
388
|
-
pause: () => {
|
|
389
|
-
muted = true;
|
|
390
|
-
},
|
|
391
|
-
unprepare: () => {
|
|
392
|
-
audioConfig.attach = originalAttach;
|
|
393
|
-
}
|
|
394
|
-
};
|
|
395
|
-
}
|
|
432
|
+
// src/SpeechServices/SpeechToText/createSpeechRecognitionPonyfillFromRecognizer.ts
|
|
433
|
+
var { ResultReason: ResultReason2, SpeechRecognizer: SpeechRecognizer2 } = SpeechSDK_default;
|
|
396
434
|
function createSpeechRecognitionPonyfillFromRecognizer({
|
|
397
435
|
createRecognizer,
|
|
398
436
|
enableTelemetry,
|
|
@@ -402,17 +440,13 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
402
440
|
}) {
|
|
403
441
|
SpeechRecognizer2.enableTelemetry(enableTelemetry !== false);
|
|
404
442
|
class SpeechRecognition extends EventTarget {
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
this.#eventListenerMap = new EventListenerMap(this);
|
|
413
|
-
}
|
|
414
|
-
/** @type { import('./SpeechRecognitionEventListenerMap').SpeechRecognitionEventListenerMap } */
|
|
415
|
-
#eventListenerMap;
|
|
443
|
+
#continuous = false;
|
|
444
|
+
#eventListenerMap = new EventListenerMap(this);
|
|
445
|
+
#grammars = new SpeechGrammarList();
|
|
446
|
+
#interimResults = false;
|
|
447
|
+
#lang = typeof window !== "undefined" ? window.document.documentElement.getAttribute("lang") || window.navigator.language : "en-US";
|
|
448
|
+
// eslint-disable-next-line no-magic-numbers
|
|
449
|
+
#maxAlternatives = 1;
|
|
416
450
|
emitCognitiveServices(type, event) {
|
|
417
451
|
this.dispatchEvent(
|
|
418
452
|
new SpeechRecognitionEvent("cognitiveservices", {
|
|
@@ -424,40 +458,39 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
424
458
|
);
|
|
425
459
|
}
|
|
426
460
|
get continuous() {
|
|
427
|
-
return this
|
|
461
|
+
return this.#continuous;
|
|
428
462
|
}
|
|
429
463
|
set continuous(value) {
|
|
430
|
-
this
|
|
464
|
+
this.#continuous = value;
|
|
431
465
|
}
|
|
432
466
|
get grammars() {
|
|
433
|
-
return this
|
|
467
|
+
return this.#grammars;
|
|
434
468
|
}
|
|
435
469
|
set grammars(value) {
|
|
436
|
-
if (value instanceof
|
|
437
|
-
this
|
|
470
|
+
if (value instanceof SpeechGrammarList) {
|
|
471
|
+
this.#grammars = value;
|
|
438
472
|
} else {
|
|
439
473
|
throw new Error(`The provided value is not of type 'SpeechGrammarList'`);
|
|
440
474
|
}
|
|
441
475
|
}
|
|
442
476
|
get interimResults() {
|
|
443
|
-
return this
|
|
477
|
+
return this.#interimResults;
|
|
444
478
|
}
|
|
445
479
|
set interimResults(value) {
|
|
446
|
-
this
|
|
480
|
+
this.#interimResults = value;
|
|
447
481
|
}
|
|
448
482
|
get maxAlternatives() {
|
|
449
|
-
return this
|
|
483
|
+
return this.#maxAlternatives;
|
|
450
484
|
}
|
|
451
485
|
set maxAlternatives(value) {
|
|
452
|
-
this
|
|
486
|
+
this.#maxAlternatives = value;
|
|
453
487
|
}
|
|
454
488
|
get lang() {
|
|
455
|
-
return this
|
|
489
|
+
return this.#lang;
|
|
456
490
|
}
|
|
457
491
|
set lang(value) {
|
|
458
|
-
this
|
|
492
|
+
this.#lang = value;
|
|
459
493
|
}
|
|
460
|
-
/** @type { ((event: SpeechRecognitionEvent<'audioend'>) => void) | undefined } */
|
|
461
494
|
get onaudioend() {
|
|
462
495
|
return this.#eventListenerMap.getProperty("audioend");
|
|
463
496
|
}
|
|
@@ -485,7 +518,7 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
485
518
|
set onend(value) {
|
|
486
519
|
this.#eventListenerMap.setProperty("end", value);
|
|
487
520
|
}
|
|
488
|
-
/** @type { ((event:
|
|
521
|
+
/** @type { ((event: SpeechRecognitionErrorEvent) => void) | undefined } */
|
|
489
522
|
get onerror() {
|
|
490
523
|
return this.#eventListenerMap.getProperty("error");
|
|
491
524
|
}
|
|
@@ -534,6 +567,8 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
534
567
|
set onstart(value) {
|
|
535
568
|
this.#eventListenerMap.setProperty("start", value);
|
|
536
569
|
}
|
|
570
|
+
abort;
|
|
571
|
+
stop;
|
|
537
572
|
start() {
|
|
538
573
|
this._startOnce().catch((err) => {
|
|
539
574
|
this.dispatchEvent(
|
|
@@ -543,22 +578,24 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
543
578
|
}
|
|
544
579
|
async _startOnce() {
|
|
545
580
|
const recognizer = await createRecognizer(this.lang);
|
|
546
|
-
const { pause, unprepare } = prepareAudioConfig(recognizer
|
|
581
|
+
const { pause, unprepare } = prepareAudioConfig(recognizer["audioConfig"]);
|
|
547
582
|
try {
|
|
548
583
|
const queue = createPromiseQueue_default();
|
|
549
584
|
let soundStarted;
|
|
550
585
|
let speechStarted;
|
|
551
586
|
let stopping;
|
|
552
|
-
const { detach: detachAudioConfigEvent } = recognizer
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
587
|
+
const { detach: detachAudioConfigEvent } = recognizer["audioConfig"].events.attach(
|
|
588
|
+
(event) => {
|
|
589
|
+
const { name } = event;
|
|
590
|
+
if (name === "AudioSourceReadyEvent") {
|
|
591
|
+
queue.push({ audioSourceReady: {} });
|
|
592
|
+
} else if (name === "AudioSourceOffEvent") {
|
|
593
|
+
queue.push({ audioSourceOff: {} });
|
|
594
|
+
} else if (name === "FirstAudibleChunk") {
|
|
595
|
+
queue.push({ firstAudibleChunk: {} });
|
|
596
|
+
}
|
|
560
597
|
}
|
|
561
|
-
|
|
598
|
+
);
|
|
562
599
|
recognizer.canceled = (_, { errorDetails, offset, reason, sessionId }) => {
|
|
563
600
|
queue.push({
|
|
564
601
|
canceled: {
|
|
@@ -600,18 +637,18 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
600
637
|
queue.push({ speechEndDetected: { sessionId } });
|
|
601
638
|
};
|
|
602
639
|
const { phrases } = this.grammars;
|
|
603
|
-
const { dynamicGrammar } = recognizer
|
|
604
|
-
referenceGrammars && referenceGrammars.length && dynamicGrammar.addReferenceGrammar(referenceGrammars);
|
|
605
|
-
phrases && phrases.length && dynamicGrammar.addPhrase(phrases);
|
|
606
|
-
await cognitiveServicesAsyncToPromise(recognizer.startContinuousRecognitionAsync
|
|
607
|
-
if (recognizer.stopContinuousRecognitionAsync) {
|
|
640
|
+
const { dynamicGrammar } = recognizer["privReco"];
|
|
641
|
+
referenceGrammars && referenceGrammars.length && dynamicGrammar.addReferenceGrammar([...referenceGrammars]);
|
|
642
|
+
phrases && phrases.length && dynamicGrammar.addPhrase([...phrases]);
|
|
643
|
+
await cognitiveServicesAsyncToPromise(recognizer.startContinuousRecognitionAsync, recognizer)();
|
|
644
|
+
if (typeof recognizer.stopContinuousRecognitionAsync === "function") {
|
|
608
645
|
this.abort = () => queue.push({ abort: {} });
|
|
609
646
|
this.stop = () => queue.push({ stop: {} });
|
|
610
647
|
} else {
|
|
611
648
|
this.abort = this.stop = void 0;
|
|
612
649
|
}
|
|
613
650
|
let audioStarted;
|
|
614
|
-
let finalEvent;
|
|
651
|
+
let finalEvent = void 0;
|
|
615
652
|
let finalizedResults = [];
|
|
616
653
|
for (let loop = 0; !stopping || audioStarted; loop++) {
|
|
617
654
|
const event = await queue.shift();
|
|
@@ -654,7 +691,7 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
654
691
|
stopping = "stop";
|
|
655
692
|
}
|
|
656
693
|
if (abort && recognizer.stopContinuousRecognitionAsync) {
|
|
657
|
-
await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync
|
|
694
|
+
await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync, recognizer)();
|
|
658
695
|
}
|
|
659
696
|
} else if (audioSourceReady) {
|
|
660
697
|
this.dispatchEvent(new SpeechRecognitionEvent("audiostart"));
|
|
@@ -671,8 +708,13 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
671
708
|
} else if (stopping !== "abort") {
|
|
672
709
|
if (recognized && recognized.result && recognized.result.reason === ResultReason2.NoMatch) {
|
|
673
710
|
if (!this.continuous || stopping === "stop") {
|
|
674
|
-
finalEvent = new SpeechRecognitionEvent("result", {
|
|
675
|
-
|
|
711
|
+
finalEvent = new SpeechRecognitionEvent("result", {
|
|
712
|
+
results: new SpeechRecognitionResultList(finalizedResults)
|
|
713
|
+
});
|
|
714
|
+
recognizer.stopContinuousRecognitionAsync && await cognitiveServicesAsyncToPromise(
|
|
715
|
+
recognizer.stopContinuousRecognitionAsync,
|
|
716
|
+
recognizer
|
|
717
|
+
)();
|
|
676
718
|
break;
|
|
677
719
|
}
|
|
678
720
|
} else if (recognized || recognizing) {
|
|
@@ -693,7 +735,7 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
693
735
|
maxAlternatives: this.maxAlternatives,
|
|
694
736
|
textNormalization
|
|
695
737
|
});
|
|
696
|
-
const recognizable = !!result[0]
|
|
738
|
+
const recognizable = !!result[0]?.transcript;
|
|
697
739
|
if (recognizable) {
|
|
698
740
|
finalizedResults = [...finalizedResults, result];
|
|
699
741
|
this.continuous && this.dispatchEvent(
|
|
@@ -710,7 +752,7 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
710
752
|
});
|
|
711
753
|
}
|
|
712
754
|
if ((!this.continuous || stopping === "stop") && recognizer.stopContinuousRecognitionAsync) {
|
|
713
|
-
await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync
|
|
755
|
+
await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync, recognizer)();
|
|
714
756
|
}
|
|
715
757
|
if (looseEvents && finalEvent && recognizable) {
|
|
716
758
|
this.dispatchEvent(finalEvent);
|
|
@@ -754,17 +796,20 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
754
796
|
throw err;
|
|
755
797
|
} finally {
|
|
756
798
|
unprepare();
|
|
757
|
-
recognizer
|
|
799
|
+
recognizer["dispose"](false);
|
|
758
800
|
}
|
|
759
801
|
}
|
|
760
802
|
}
|
|
761
803
|
return {
|
|
762
|
-
SpeechGrammarList
|
|
804
|
+
SpeechGrammarList,
|
|
763
805
|
SpeechRecognition,
|
|
764
806
|
SpeechRecognitionEvent
|
|
765
807
|
};
|
|
766
808
|
}
|
|
767
|
-
|
|
809
|
+
|
|
810
|
+
// src/SpeechServices/SpeechToText/createSpeechRecognitionPonyfill.ts
|
|
811
|
+
var { AudioConfig: AudioConfig2, OutputFormat: OutputFormat2, SpeechConfig: SpeechConfig2, SpeechRecognizer: SpeechRecognizer3 } = SpeechSDK_default;
|
|
812
|
+
function createSpeechRecognitionPonyfill(options) {
|
|
768
813
|
const {
|
|
769
814
|
audioConfig = AudioConfig2.fromDefaultMicrophoneInput(),
|
|
770
815
|
// We set telemetry to true to honor the default telemetry settings of Speech SDK
|
|
@@ -777,44 +822,40 @@ var createSpeechRecognitionPonyfill_default = (options) => {
|
|
|
777
822
|
textNormalization = "display"
|
|
778
823
|
} = patchOptions(options);
|
|
779
824
|
if (!audioConfig && (!window.navigator.mediaDevices || !window.navigator.mediaDevices.getUserMedia)) {
|
|
780
|
-
|
|
781
|
-
"web-speech-cognitive-services: This browser does not support
|
|
825
|
+
throw new Error(
|
|
826
|
+
"web-speech-cognitive-services: This browser does not support Media Capture and Streams API and it will not work with Cognitive Services Speech Services."
|
|
782
827
|
);
|
|
783
|
-
return {};
|
|
784
828
|
}
|
|
785
829
|
const createRecognizer = async (lang) => {
|
|
786
|
-
const
|
|
830
|
+
const credentials = await fetchCredentials();
|
|
787
831
|
let speechConfig;
|
|
788
|
-
if (speechRecognitionHostname) {
|
|
789
|
-
const host =
|
|
790
|
-
|
|
832
|
+
if (typeof credentials.speechRecognitionHostname !== "undefined") {
|
|
833
|
+
const host = new URL("wss://hostname:443");
|
|
834
|
+
host.hostname = credentials.speechRecognitionHostname;
|
|
835
|
+
if (credentials.authorizationToken) {
|
|
791
836
|
speechConfig = SpeechConfig2.fromHost(host);
|
|
792
|
-
speechConfig.authorizationToken = authorizationToken;
|
|
837
|
+
speechConfig.authorizationToken = credentials.authorizationToken;
|
|
793
838
|
} else {
|
|
794
|
-
speechConfig = SpeechConfig2.fromHost(host, subscriptionKey);
|
|
839
|
+
speechConfig = SpeechConfig2.fromHost(host, credentials.subscriptionKey);
|
|
795
840
|
}
|
|
796
841
|
} else {
|
|
797
|
-
speechConfig = authorizationToken ? SpeechConfig2.fromAuthorizationToken(authorizationToken, region) : SpeechConfig2.fromSubscription(subscriptionKey, region);
|
|
842
|
+
speechConfig = typeof credentials.authorizationToken !== "undefined" ? SpeechConfig2.fromAuthorizationToken(credentials.authorizationToken, credentials.region) : SpeechConfig2.fromSubscription(credentials.subscriptionKey, credentials.region);
|
|
798
843
|
}
|
|
799
844
|
if (speechRecognitionEndpointId) {
|
|
800
845
|
speechConfig.endpointId = speechRecognitionEndpointId;
|
|
801
846
|
}
|
|
802
847
|
speechConfig.outputFormat = OutputFormat2.Detailed;
|
|
803
848
|
speechConfig.speechRecognitionLanguage = lang || "en-US";
|
|
804
|
-
return new
|
|
849
|
+
return new SpeechRecognizer3(speechConfig, audioConfig);
|
|
805
850
|
};
|
|
806
851
|
return createSpeechRecognitionPonyfillFromRecognizer({
|
|
807
|
-
audioConfig,
|
|
808
852
|
createRecognizer,
|
|
809
853
|
enableTelemetry,
|
|
810
854
|
looseEvents,
|
|
811
855
|
referenceGrammars,
|
|
812
856
|
textNormalization
|
|
813
857
|
});
|
|
814
|
-
}
|
|
815
|
-
|
|
816
|
-
// src/SpeechServices/SpeechToText.js
|
|
817
|
-
var SpeechToText_default = createSpeechRecognitionPonyfill_default;
|
|
858
|
+
}
|
|
818
859
|
|
|
819
860
|
// src/SpeechServices/TextToSpeech/createSpeechSynthesisPonyfill.js
|
|
820
861
|
var import_event_target_shim3 = require("event-target-shim");
|
|
@@ -1319,9 +1360,12 @@ var createSpeechSynthesisPonyfill_default = (options) => {
|
|
|
1319
1360
|
// src/SpeechServices/TextToSpeech.js
|
|
1320
1361
|
var TextToSpeech_default = createSpeechSynthesisPonyfill_default;
|
|
1321
1362
|
|
|
1322
|
-
// src/SpeechServices/fetchAuthorizationToken.
|
|
1363
|
+
// src/SpeechServices/fetchAuthorizationToken.ts
|
|
1323
1364
|
var TOKEN_URL_TEMPLATE = "https://{region}.api.cognitive.microsoft.com/sts/v1.0/issueToken";
|
|
1324
|
-
async function
|
|
1365
|
+
async function fetchAuthorizationToken({
|
|
1366
|
+
region,
|
|
1367
|
+
subscriptionKey
|
|
1368
|
+
}) {
|
|
1325
1369
|
const res = await fetch(TOKEN_URL_TEMPLATE.replace(/\{region\}/u, region), {
|
|
1326
1370
|
headers: {
|
|
1327
1371
|
"Ocp-Apim-Subscription-Key": subscriptionKey
|
|
@@ -1334,16 +1378,16 @@ async function fetchAuthorizationToken_default({ region, subscriptionKey }) {
|
|
|
1334
1378
|
return res.text();
|
|
1335
1379
|
}
|
|
1336
1380
|
|
|
1337
|
-
// src/SpeechServices.
|
|
1338
|
-
function createSpeechServicesPonyfill(options = {}
|
|
1381
|
+
// src/SpeechServices.ts
|
|
1382
|
+
function createSpeechServicesPonyfill(options = {}) {
|
|
1339
1383
|
return {
|
|
1340
|
-
...
|
|
1341
|
-
...TextToSpeech_default(options
|
|
1384
|
+
...createSpeechRecognitionPonyfill(options),
|
|
1385
|
+
...TextToSpeech_default(options)
|
|
1342
1386
|
};
|
|
1343
1387
|
}
|
|
1344
1388
|
var meta = document.createElement("meta");
|
|
1345
1389
|
meta.setAttribute("name", "web-speech-cognitive-services");
|
|
1346
|
-
meta.setAttribute("content", `version=${"8.0.0-main.
|
|
1390
|
+
meta.setAttribute("content", `version=${"8.0.0-main.6cbf0fb"}`);
|
|
1347
1391
|
document.head.appendChild(meta);
|
|
1348
1392
|
// Annotate the CommonJS export names for ESM import in node:
|
|
1349
1393
|
0 && (module.exports = {
|