web-speech-cognitive-services 8.1.3 → 8.1.4-main.202512240452.5ddefc2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/web-speech-cognitive-services.d.mts +10 -10
- package/dist/web-speech-cognitive-services.d.ts +10 -10
- package/dist/web-speech-cognitive-services.development.js +26 -23
- package/dist/web-speech-cognitive-services.development.js.map +1 -1
- package/dist/web-speech-cognitive-services.js +26 -23
- package/dist/web-speech-cognitive-services.js.map +1 -1
- package/dist/web-speech-cognitive-services.mjs +26 -23
- package/dist/web-speech-cognitive-services.mjs.map +1 -1
- package/dist/web-speech-cognitive-services.production.min.js +10 -10
- package/dist/web-speech-cognitive-services.production.min.js.map +1 -1
- package/package.json +11 -9
|
@@ -20,13 +20,14 @@ import {
|
|
|
20
20
|
SpeechConfig,
|
|
21
21
|
SpeechRecognizer
|
|
22
22
|
} from "microsoft-cognitiveservices-speech-sdk/distrib/lib/microsoft.cognitiveservices.speech.sdk.js";
|
|
23
|
-
var
|
|
23
|
+
var SpeechSDK = {
|
|
24
24
|
AudioConfig,
|
|
25
25
|
OutputFormat,
|
|
26
26
|
ResultReason,
|
|
27
27
|
SpeechConfig,
|
|
28
28
|
SpeechRecognizer
|
|
29
29
|
};
|
|
30
|
+
var SpeechSDK_default = SpeechSDK;
|
|
30
31
|
|
|
31
32
|
// src/SpeechServices/SpeechToText/validation/credentialsSchema.ts
|
|
32
33
|
import { intersect, object, optional, pipe, readonly, string, undefined_, union } from "valibot";
|
|
@@ -169,7 +170,7 @@ function pDefer() {
|
|
|
169
170
|
}
|
|
170
171
|
|
|
171
172
|
// src/Util/createPromiseQueue.js
|
|
172
|
-
function
|
|
173
|
+
function createPromiseQueue() {
|
|
173
174
|
let shiftDeferred;
|
|
174
175
|
const queue = [];
|
|
175
176
|
const push = (value) => {
|
|
@@ -250,7 +251,7 @@ var SpeechRecognitionResult = class extends FakeArray {
|
|
|
250
251
|
var {
|
|
251
252
|
ResultReason: { RecognizingSpeech, RecognizedSpeech }
|
|
252
253
|
} = SpeechSDK_default;
|
|
253
|
-
function
|
|
254
|
+
function cognitiveServiceEventResultToWebSpeechRecognitionResult(result, init) {
|
|
254
255
|
const { maxAlternatives = Infinity, textNormalization = "display" } = init || {};
|
|
255
256
|
const json = typeof result.json === "string" ? JSON.parse(result.json) : result.json;
|
|
256
257
|
if (result.reason === RecognizingSpeech || result.reason === RecognizedSpeech && !json.NBest) {
|
|
@@ -615,7 +616,7 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
615
616
|
const recognizer = await createRecognizer(this.lang);
|
|
616
617
|
const { pause, unprepare } = prepareAudioConfig(recognizer["audioConfig"]);
|
|
617
618
|
try {
|
|
618
|
-
const queue =
|
|
619
|
+
const queue = createPromiseQueue();
|
|
619
620
|
let soundStarted;
|
|
620
621
|
let speechStarted;
|
|
621
622
|
let stopping;
|
|
@@ -766,7 +767,7 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
766
767
|
speechStarted = true;
|
|
767
768
|
}
|
|
768
769
|
if (recognized) {
|
|
769
|
-
const result =
|
|
770
|
+
const result = cognitiveServiceEventResultToWebSpeechRecognitionResult(recognized.result, {
|
|
770
771
|
maxAlternatives: this.maxAlternatives,
|
|
771
772
|
textNormalization
|
|
772
773
|
});
|
|
@@ -798,7 +799,7 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
798
799
|
new SpeechRecognitionEvent("result", {
|
|
799
800
|
results: new SpeechRecognitionResultList([
|
|
800
801
|
...finalizedResults,
|
|
801
|
-
|
|
802
|
+
cognitiveServiceEventResultToWebSpeechRecognitionResult(recognizing.result, {
|
|
802
803
|
maxAlternatives: this.maxAlternatives,
|
|
803
804
|
textNormalization
|
|
804
805
|
})
|
|
@@ -900,7 +901,7 @@ import { onErrorResumeNext } from "on-error-resume-next/async";
|
|
|
900
901
|
import memoize from "memoize-one";
|
|
901
902
|
|
|
902
903
|
// src/SpeechServices/TextToSpeech/AudioContextConsumer.js
|
|
903
|
-
var
|
|
904
|
+
var AudioContextConsumer = class {
|
|
904
905
|
constructor(audioContext) {
|
|
905
906
|
this.audioContext = audioContext;
|
|
906
907
|
}
|
|
@@ -929,7 +930,7 @@ var AudioContextConsumer_default = class {
|
|
|
929
930
|
};
|
|
930
931
|
|
|
931
932
|
// src/SpeechServices/TextToSpeech/AudioContextQueue.js
|
|
932
|
-
var
|
|
933
|
+
var AudioContextQueue = class {
|
|
933
934
|
constructor({ audioContext, ponyfill }) {
|
|
934
935
|
this.consumer = null;
|
|
935
936
|
this.paused = false;
|
|
@@ -957,7 +958,7 @@ var AudioContextQueue_default = class {
|
|
|
957
958
|
}
|
|
958
959
|
async startConsumer() {
|
|
959
960
|
while (!this.paused && this.queue.length && !this.consumer) {
|
|
960
|
-
this.consumer = new
|
|
961
|
+
this.consumer = new AudioContextConsumer(this.getAudioContext());
|
|
961
962
|
await this.consumer.start(this.queue);
|
|
962
963
|
this.consumer = null;
|
|
963
964
|
}
|
|
@@ -1015,7 +1016,7 @@ var DEFAULT_LANGUAGE = "en-US";
|
|
|
1015
1016
|
var DEFAULT_OUTPUT_FORMAT = "riff-16khz-16bit-mono-pcm";
|
|
1016
1017
|
var DEFAULT_VOICE = "Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)";
|
|
1017
1018
|
var EMPTY_MP3_BASE64 = "SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU3LjU2LjEwMQAAAAAAAAAAAAAA//tAwAAAAAAAAAAAAAAAAAAAAAAASW5mbwAAAA8AAAACAAABhgC7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7//////////////////////////////////////////////////////////////////8AAAAATGF2YzU3LjY0AAAAAAAAAAAAAAAAJAUHAAAAAAAAAYYoRBqpAAAAAAD/+xDEAAPAAAGkAAAAIAAANIAAAARMQU1FMy45OS41VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVf/7EMQpg8AAAaQAAAAgAAA0gAAABFVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV";
|
|
1018
|
-
async function
|
|
1019
|
+
async function fetchSpeechData({
|
|
1019
1020
|
deploymentId,
|
|
1020
1021
|
fetchCredentials,
|
|
1021
1022
|
lang = DEFAULT_LANGUAGE,
|
|
@@ -1183,7 +1184,7 @@ var SpeechSynthesisUtterance = class extends EventTarget2 {
|
|
|
1183
1184
|
this._volume = value;
|
|
1184
1185
|
}
|
|
1185
1186
|
preload({ deploymentId, fetchCredentials, outputFormat }) {
|
|
1186
|
-
this.arrayBufferPromise =
|
|
1187
|
+
this.arrayBufferPromise = fetchSpeechData({
|
|
1187
1188
|
fetchCredentials,
|
|
1188
1189
|
deploymentId,
|
|
1189
1190
|
lang: this.lang || window.navigator.language,
|
|
@@ -1216,7 +1217,7 @@ var SpeechSynthesisUtterance = class extends EventTarget2 {
|
|
|
1216
1217
|
var SpeechSynthesisUtterance_default = SpeechSynthesisUtterance;
|
|
1217
1218
|
|
|
1218
1219
|
// src/SpeechServices/TextToSpeech/SpeechSynthesisVoice.js
|
|
1219
|
-
var
|
|
1220
|
+
var SpeechSynthesisVoice = class {
|
|
1220
1221
|
constructor({ gender, lang, voiceURI }) {
|
|
1221
1222
|
this._default = false;
|
|
1222
1223
|
this._gender = gender;
|
|
@@ -1246,7 +1247,7 @@ var SpeechSynthesisVoice_default = class {
|
|
|
1246
1247
|
};
|
|
1247
1248
|
|
|
1248
1249
|
// src/SpeechServices/TextToSpeech/fetchCustomVoices.js
|
|
1249
|
-
async function
|
|
1250
|
+
async function fetchCustomVoices_({ customVoiceHostname, deploymentId, region, subscriptionKey }) {
|
|
1250
1251
|
const hostname = customVoiceHostname || `${region}.customvoice.api.speech.microsoft.com`;
|
|
1251
1252
|
const res = await fetch(
|
|
1252
1253
|
`https://${encodeURI(hostname)}/api/texttospeech/v2.0/endpoints/${encodeURIComponent(deploymentId)}`,
|
|
@@ -1262,10 +1263,10 @@ async function fetchCustomVoices({ customVoiceHostname, deploymentId, region, su
|
|
|
1262
1263
|
}
|
|
1263
1264
|
return res.json();
|
|
1264
1265
|
}
|
|
1265
|
-
async function
|
|
1266
|
-
const { models } = await
|
|
1266
|
+
async function fetchCustomVoices({ customVoiceHostname, deploymentId, region, subscriptionKey }) {
|
|
1267
|
+
const { models } = await fetchCustomVoices_({ customVoiceHostname, deploymentId, region, subscriptionKey });
|
|
1267
1268
|
return models.map(
|
|
1268
|
-
({ properties: { Gender: gender }, locale: lang, name: voiceURI }) => new
|
|
1269
|
+
({ properties: { Gender: gender }, locale: lang, name: voiceURI }) => new SpeechSynthesisVoice({ gender, lang, voiceURI })
|
|
1269
1270
|
).sort(({ name: x }, { name: y }) => x > y ? 1 : x < y ? -1 : 0);
|
|
1270
1271
|
}
|
|
1271
1272
|
|
|
@@ -1286,13 +1287,13 @@ async function fetchVoices({ authorizationToken, region, speechSynthesisHostname
|
|
|
1286
1287
|
throw new Error("Failed to fetch voices");
|
|
1287
1288
|
}
|
|
1288
1289
|
const voices = await res.json();
|
|
1289
|
-
return voices.map(({ Gender: gender, Locale: lang, Name: voiceURI }) => new
|
|
1290
|
+
return voices.map(({ Gender: gender, Locale: lang, Name: voiceURI }) => new SpeechSynthesisVoice({ gender, lang, voiceURI })).sort(({ name: x }, { name: y }) => x > y ? 1 : x < y ? -1 : 0);
|
|
1290
1291
|
}
|
|
1291
1292
|
|
|
1292
1293
|
// src/SpeechServices/TextToSpeech/createSpeechSynthesisPonyfill.js
|
|
1293
1294
|
var DEFAULT_OUTPUT_FORMAT2 = "audio-24khz-160kbitrate-mono-mp3";
|
|
1294
1295
|
var EMPTY_ARRAY = [];
|
|
1295
|
-
|
|
1296
|
+
function createSpeechRecognitionPonyfill2(options) {
|
|
1296
1297
|
const {
|
|
1297
1298
|
audioContext,
|
|
1298
1299
|
fetchCredentials,
|
|
@@ -1311,7 +1312,7 @@ var createSpeechSynthesisPonyfill_default = (options) => {
|
|
|
1311
1312
|
class SpeechSynthesis extends EventTarget3 {
|
|
1312
1313
|
constructor() {
|
|
1313
1314
|
super();
|
|
1314
|
-
this.queue = new
|
|
1315
|
+
this.queue = new AudioContextQueue({ audioContext, ponyfill });
|
|
1315
1316
|
this.updateVoices();
|
|
1316
1317
|
}
|
|
1317
1318
|
cancel() {
|
|
@@ -1366,7 +1367,7 @@ var createSpeechSynthesisPonyfill_default = (options) => {
|
|
|
1366
1367
|
"web-speech-cognitive-services: Listing of custom voice models are only available when using subscription key."
|
|
1367
1368
|
);
|
|
1368
1369
|
await onErrorResumeNext(async () => {
|
|
1369
|
-
const voices = await
|
|
1370
|
+
const voices = await fetchCustomVoices({
|
|
1370
1371
|
customVoiceHostname,
|
|
1371
1372
|
deploymentId: speechSynthesisDeploymentId,
|
|
1372
1373
|
region,
|
|
@@ -1390,7 +1391,8 @@ var createSpeechSynthesisPonyfill_default = (options) => {
|
|
|
1390
1391
|
SpeechSynthesisEvent,
|
|
1391
1392
|
SpeechSynthesisUtterance: SpeechSynthesisUtterance_default
|
|
1392
1393
|
};
|
|
1393
|
-
}
|
|
1394
|
+
}
|
|
1395
|
+
var createSpeechSynthesisPonyfill_default = createSpeechRecognitionPonyfill2;
|
|
1394
1396
|
|
|
1395
1397
|
// src/SpeechServices/TextToSpeech.js
|
|
1396
1398
|
var TextToSpeech_default = createSpeechSynthesisPonyfill_default;
|
|
@@ -1422,12 +1424,13 @@ function createSpeechServicesPonyfill(options = {}) {
|
|
|
1422
1424
|
}
|
|
1423
1425
|
var meta = document.createElement("meta");
|
|
1424
1426
|
meta.setAttribute("name", "web-speech-cognitive-services");
|
|
1425
|
-
meta.setAttribute("content", `version=${"8.1.
|
|
1427
|
+
meta.setAttribute("content", `version=${"8.1.4-main.202512240452.5ddefc2"}`);
|
|
1426
1428
|
document.head.appendChild(meta);
|
|
1429
|
+
var SpeechServices_default = createSpeechServicesPonyfill;
|
|
1427
1430
|
export {
|
|
1428
1431
|
createSpeechRecognitionPonyfill,
|
|
1429
1432
|
createSpeechRecognitionPonyfillFromRecognizer,
|
|
1430
|
-
createSpeechServicesPonyfill,
|
|
1433
|
+
SpeechServices_default as createSpeechServicesPonyfill,
|
|
1431
1434
|
TextToSpeech_default as createSpeechSynthesisPonyfill,
|
|
1432
1435
|
fetchAuthorizationToken
|
|
1433
1436
|
};
|