web-speech-cognitive-services 8.1.4-main.b358385 → 8.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/web-speech-cognitive-services.d.mts +83 -53
- package/dist/web-speech-cognitive-services.d.ts +83 -53
- package/dist/web-speech-cognitive-services.development.js +4429 -6776
- package/dist/web-speech-cognitive-services.development.js.map +1 -1
- package/dist/web-speech-cognitive-services.js +26 -23
- package/dist/web-speech-cognitive-services.js.map +1 -1
- package/dist/web-speech-cognitive-services.mjs +26 -23
- package/dist/web-speech-cognitive-services.mjs.map +1 -1
- package/dist/web-speech-cognitive-services.production.min.js +12 -12
- package/dist/web-speech-cognitive-services.production.min.js.map +1 -1
- package/package.json +29 -27
|
@@ -32,7 +32,7 @@ var src_exports = {};
|
|
|
32
32
|
__export(src_exports, {
|
|
33
33
|
createSpeechRecognitionPonyfill: () => createSpeechRecognitionPonyfill,
|
|
34
34
|
createSpeechRecognitionPonyfillFromRecognizer: () => createSpeechRecognitionPonyfillFromRecognizer,
|
|
35
|
-
createSpeechServicesPonyfill: () =>
|
|
35
|
+
createSpeechServicesPonyfill: () => SpeechServices_default,
|
|
36
36
|
createSpeechSynthesisPonyfill: () => TextToSpeech_default,
|
|
37
37
|
fetchAuthorizationToken: () => fetchAuthorizationToken
|
|
38
38
|
});
|
|
@@ -54,13 +54,14 @@ function resolveFunctionOrReturnValue(fnOrValue) {
|
|
|
54
54
|
|
|
55
55
|
// src/SpeechServices/SpeechSDK.ts
|
|
56
56
|
var import_microsoft_cognitiveservices_speech_sdk = require("microsoft-cognitiveservices-speech-sdk/distrib/lib/microsoft.cognitiveservices.speech.sdk.js");
|
|
57
|
-
var
|
|
57
|
+
var SpeechSDK = {
|
|
58
58
|
AudioConfig: import_microsoft_cognitiveservices_speech_sdk.AudioConfig,
|
|
59
59
|
OutputFormat: import_microsoft_cognitiveservices_speech_sdk.OutputFormat,
|
|
60
60
|
ResultReason: import_microsoft_cognitiveservices_speech_sdk.ResultReason,
|
|
61
61
|
SpeechConfig: import_microsoft_cognitiveservices_speech_sdk.SpeechConfig,
|
|
62
62
|
SpeechRecognizer: import_microsoft_cognitiveservices_speech_sdk.SpeechRecognizer
|
|
63
63
|
};
|
|
64
|
+
var SpeechSDK_default = SpeechSDK;
|
|
64
65
|
|
|
65
66
|
// src/SpeechServices/SpeechToText/validation/credentialsSchema.ts
|
|
66
67
|
var import_valibot = require("valibot");
|
|
@@ -203,7 +204,7 @@ function pDefer() {
|
|
|
203
204
|
}
|
|
204
205
|
|
|
205
206
|
// src/Util/createPromiseQueue.js
|
|
206
|
-
function
|
|
207
|
+
function createPromiseQueue() {
|
|
207
208
|
let shiftDeferred;
|
|
208
209
|
const queue = [];
|
|
209
210
|
const push = (value) => {
|
|
@@ -284,7 +285,7 @@ var SpeechRecognitionResult = class extends FakeArray {
|
|
|
284
285
|
var {
|
|
285
286
|
ResultReason: { RecognizingSpeech, RecognizedSpeech }
|
|
286
287
|
} = SpeechSDK_default;
|
|
287
|
-
function
|
|
288
|
+
function cognitiveServiceEventResultToWebSpeechRecognitionResult(result, init) {
|
|
288
289
|
const { maxAlternatives = Infinity, textNormalization = "display" } = init || {};
|
|
289
290
|
const json = typeof result.json === "string" ? JSON.parse(result.json) : result.json;
|
|
290
291
|
if (result.reason === RecognizingSpeech || result.reason === RecognizedSpeech && !json.NBest) {
|
|
@@ -649,7 +650,7 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
649
650
|
const recognizer = await createRecognizer(this.lang);
|
|
650
651
|
const { pause, unprepare } = prepareAudioConfig(recognizer["audioConfig"]);
|
|
651
652
|
try {
|
|
652
|
-
const queue =
|
|
653
|
+
const queue = createPromiseQueue();
|
|
653
654
|
let soundStarted;
|
|
654
655
|
let speechStarted;
|
|
655
656
|
let stopping;
|
|
@@ -800,7 +801,7 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
800
801
|
speechStarted = true;
|
|
801
802
|
}
|
|
802
803
|
if (recognized) {
|
|
803
|
-
const result =
|
|
804
|
+
const result = cognitiveServiceEventResultToWebSpeechRecognitionResult(recognized.result, {
|
|
804
805
|
maxAlternatives: this.maxAlternatives,
|
|
805
806
|
textNormalization
|
|
806
807
|
});
|
|
@@ -832,7 +833,7 @@ function createSpeechRecognitionPonyfillFromRecognizer({
|
|
|
832
833
|
new SpeechRecognitionEvent("result", {
|
|
833
834
|
results: new SpeechRecognitionResultList([
|
|
834
835
|
...finalizedResults,
|
|
835
|
-
|
|
836
|
+
cognitiveServiceEventResultToWebSpeechRecognitionResult(recognizing.result, {
|
|
836
837
|
maxAlternatives: this.maxAlternatives,
|
|
837
838
|
textNormalization
|
|
838
839
|
})
|
|
@@ -934,7 +935,7 @@ var import_async = require("on-error-resume-next/async");
|
|
|
934
935
|
var import_memoize_one = __toESM(require("memoize-one"));
|
|
935
936
|
|
|
936
937
|
// src/SpeechServices/TextToSpeech/AudioContextConsumer.js
|
|
937
|
-
var
|
|
938
|
+
var AudioContextConsumer = class {
|
|
938
939
|
constructor(audioContext) {
|
|
939
940
|
this.audioContext = audioContext;
|
|
940
941
|
}
|
|
@@ -963,7 +964,7 @@ var AudioContextConsumer_default = class {
|
|
|
963
964
|
};
|
|
964
965
|
|
|
965
966
|
// src/SpeechServices/TextToSpeech/AudioContextQueue.js
|
|
966
|
-
var
|
|
967
|
+
var AudioContextQueue = class {
|
|
967
968
|
constructor({ audioContext, ponyfill }) {
|
|
968
969
|
this.consumer = null;
|
|
969
970
|
this.paused = false;
|
|
@@ -991,7 +992,7 @@ var AudioContextQueue_default = class {
|
|
|
991
992
|
}
|
|
992
993
|
async startConsumer() {
|
|
993
994
|
while (!this.paused && this.queue.length && !this.consumer) {
|
|
994
|
-
this.consumer = new
|
|
995
|
+
this.consumer = new AudioContextConsumer(this.getAudioContext());
|
|
995
996
|
await this.consumer.start(this.queue);
|
|
996
997
|
this.consumer = null;
|
|
997
998
|
}
|
|
@@ -1049,7 +1050,7 @@ var DEFAULT_LANGUAGE = "en-US";
|
|
|
1049
1050
|
var DEFAULT_OUTPUT_FORMAT = "riff-16khz-16bit-mono-pcm";
|
|
1050
1051
|
var DEFAULT_VOICE = "Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)";
|
|
1051
1052
|
var EMPTY_MP3_BASE64 = "SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU3LjU2LjEwMQAAAAAAAAAAAAAA//tAwAAAAAAAAAAAAAAAAAAAAAAASW5mbwAAAA8AAAACAAABhgC7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7//////////////////////////////////////////////////////////////////8AAAAATGF2YzU3LjY0AAAAAAAAAAAAAAAAJAUHAAAAAAAAAYYoRBqpAAAAAAD/+xDEAAPAAAGkAAAAIAAANIAAAARMQU1FMy45OS41VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVf/7EMQpg8AAAaQAAAAgAAA0gAAABFVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV";
|
|
1052
|
-
async function
|
|
1053
|
+
async function fetchSpeechData({
|
|
1053
1054
|
deploymentId,
|
|
1054
1055
|
fetchCredentials,
|
|
1055
1056
|
lang = DEFAULT_LANGUAGE,
|
|
@@ -1217,7 +1218,7 @@ var SpeechSynthesisUtterance = class extends import_event_target_shim2.EventTarg
|
|
|
1217
1218
|
this._volume = value;
|
|
1218
1219
|
}
|
|
1219
1220
|
preload({ deploymentId, fetchCredentials, outputFormat }) {
|
|
1220
|
-
this.arrayBufferPromise =
|
|
1221
|
+
this.arrayBufferPromise = fetchSpeechData({
|
|
1221
1222
|
fetchCredentials,
|
|
1222
1223
|
deploymentId,
|
|
1223
1224
|
lang: this.lang || window.navigator.language,
|
|
@@ -1250,7 +1251,7 @@ var SpeechSynthesisUtterance = class extends import_event_target_shim2.EventTarg
|
|
|
1250
1251
|
var SpeechSynthesisUtterance_default = SpeechSynthesisUtterance;
|
|
1251
1252
|
|
|
1252
1253
|
// src/SpeechServices/TextToSpeech/SpeechSynthesisVoice.js
|
|
1253
|
-
var
|
|
1254
|
+
var SpeechSynthesisVoice = class {
|
|
1254
1255
|
constructor({ gender, lang, voiceURI }) {
|
|
1255
1256
|
this._default = false;
|
|
1256
1257
|
this._gender = gender;
|
|
@@ -1280,7 +1281,7 @@ var SpeechSynthesisVoice_default = class {
|
|
|
1280
1281
|
};
|
|
1281
1282
|
|
|
1282
1283
|
// src/SpeechServices/TextToSpeech/fetchCustomVoices.js
|
|
1283
|
-
async function
|
|
1284
|
+
async function fetchCustomVoices_({ customVoiceHostname, deploymentId, region, subscriptionKey }) {
|
|
1284
1285
|
const hostname = customVoiceHostname || `${region}.customvoice.api.speech.microsoft.com`;
|
|
1285
1286
|
const res = await fetch(
|
|
1286
1287
|
`https://${encodeURI(hostname)}/api/texttospeech/v2.0/endpoints/${encodeURIComponent(deploymentId)}`,
|
|
@@ -1296,10 +1297,10 @@ async function fetchCustomVoices({ customVoiceHostname, deploymentId, region, su
|
|
|
1296
1297
|
}
|
|
1297
1298
|
return res.json();
|
|
1298
1299
|
}
|
|
1299
|
-
async function
|
|
1300
|
-
const { models } = await
|
|
1300
|
+
async function fetchCustomVoices({ customVoiceHostname, deploymentId, region, subscriptionKey }) {
|
|
1301
|
+
const { models } = await fetchCustomVoices_({ customVoiceHostname, deploymentId, region, subscriptionKey });
|
|
1301
1302
|
return models.map(
|
|
1302
|
-
({ properties: { Gender: gender }, locale: lang, name: voiceURI }) => new
|
|
1303
|
+
({ properties: { Gender: gender }, locale: lang, name: voiceURI }) => new SpeechSynthesisVoice({ gender, lang, voiceURI })
|
|
1303
1304
|
).sort(({ name: x }, { name: y }) => x > y ? 1 : x < y ? -1 : 0);
|
|
1304
1305
|
}
|
|
1305
1306
|
|
|
@@ -1320,13 +1321,13 @@ async function fetchVoices({ authorizationToken, region, speechSynthesisHostname
|
|
|
1320
1321
|
throw new Error("Failed to fetch voices");
|
|
1321
1322
|
}
|
|
1322
1323
|
const voices = await res.json();
|
|
1323
|
-
return voices.map(({ Gender: gender, Locale: lang, Name: voiceURI }) => new
|
|
1324
|
+
return voices.map(({ Gender: gender, Locale: lang, Name: voiceURI }) => new SpeechSynthesisVoice({ gender, lang, voiceURI })).sort(({ name: x }, { name: y }) => x > y ? 1 : x < y ? -1 : 0);
|
|
1324
1325
|
}
|
|
1325
1326
|
|
|
1326
1327
|
// src/SpeechServices/TextToSpeech/createSpeechSynthesisPonyfill.js
|
|
1327
1328
|
var DEFAULT_OUTPUT_FORMAT2 = "audio-24khz-160kbitrate-mono-mp3";
|
|
1328
1329
|
var EMPTY_ARRAY = [];
|
|
1329
|
-
|
|
1330
|
+
function createSpeechRecognitionPonyfill2(options) {
|
|
1330
1331
|
const {
|
|
1331
1332
|
audioContext,
|
|
1332
1333
|
fetchCredentials,
|
|
@@ -1345,7 +1346,7 @@ var createSpeechSynthesisPonyfill_default = (options) => {
|
|
|
1345
1346
|
class SpeechSynthesis extends import_event_target_shim3.EventTarget {
|
|
1346
1347
|
constructor() {
|
|
1347
1348
|
super();
|
|
1348
|
-
this.queue = new
|
|
1349
|
+
this.queue = new AudioContextQueue({ audioContext, ponyfill });
|
|
1349
1350
|
this.updateVoices();
|
|
1350
1351
|
}
|
|
1351
1352
|
cancel() {
|
|
@@ -1400,7 +1401,7 @@ var createSpeechSynthesisPonyfill_default = (options) => {
|
|
|
1400
1401
|
"web-speech-cognitive-services: Listing of custom voice models are only available when using subscription key."
|
|
1401
1402
|
);
|
|
1402
1403
|
await (0, import_async.onErrorResumeNext)(async () => {
|
|
1403
|
-
const voices = await
|
|
1404
|
+
const voices = await fetchCustomVoices({
|
|
1404
1405
|
customVoiceHostname,
|
|
1405
1406
|
deploymentId: speechSynthesisDeploymentId,
|
|
1406
1407
|
region,
|
|
@@ -1424,7 +1425,8 @@ var createSpeechSynthesisPonyfill_default = (options) => {
|
|
|
1424
1425
|
SpeechSynthesisEvent,
|
|
1425
1426
|
SpeechSynthesisUtterance: SpeechSynthesisUtterance_default
|
|
1426
1427
|
};
|
|
1427
|
-
}
|
|
1428
|
+
}
|
|
1429
|
+
var createSpeechSynthesisPonyfill_default = createSpeechRecognitionPonyfill2;
|
|
1428
1430
|
|
|
1429
1431
|
// src/SpeechServices/TextToSpeech.js
|
|
1430
1432
|
var TextToSpeech_default = createSpeechSynthesisPonyfill_default;
|
|
@@ -1456,8 +1458,9 @@ function createSpeechServicesPonyfill(options = {}) {
|
|
|
1456
1458
|
}
|
|
1457
1459
|
var meta = document.createElement("meta");
|
|
1458
1460
|
meta.setAttribute("name", "web-speech-cognitive-services");
|
|
1459
|
-
meta.setAttribute("content", `version=${"8.1.4
|
|
1461
|
+
meta.setAttribute("content", `version=${"8.1.4"}`);
|
|
1460
1462
|
document.head.appendChild(meta);
|
|
1463
|
+
var SpeechServices_default = createSpeechServicesPonyfill;
|
|
1461
1464
|
// Annotate the CommonJS export names for ESM import in node:
|
|
1462
1465
|
0 && (module.exports = {
|
|
1463
1466
|
createSpeechRecognitionPonyfill,
|