@layercode/js-sdk 2.1.5 → 2.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/layercode-js-sdk.esm.js +1885 -79
- package/dist/layercode-js-sdk.esm.js.map +1 -1
- package/dist/layercode-js-sdk.min.js +1646 -1722
- package/dist/layercode-js-sdk.min.js.map +1 -1
- package/dist/types/index.d.ts +14 -0
- package/package.json +2 -2
|
@@ -4,21 +4,6 @@
|
|
|
4
4
|
(global = typeof globalThis !== 'undefined' ? globalThis : global || self, global.LayercodeClient = factory());
|
|
5
5
|
})(this, (function () { 'use strict';
|
|
6
6
|
|
|
7
|
-
function _mergeNamespaces(n, m) {
|
|
8
|
-
m.forEach(function (e) {
|
|
9
|
-
e && typeof e !== 'string' && !Array.isArray(e) && Object.keys(e).forEach(function (k) {
|
|
10
|
-
if (k !== 'default' && !(k in n)) {
|
|
11
|
-
var d = Object.getOwnPropertyDescriptor(e, k);
|
|
12
|
-
Object.defineProperty(n, k, d.get ? d : {
|
|
13
|
-
enumerable: true,
|
|
14
|
-
get: function () { return e[k]; }
|
|
15
|
-
});
|
|
16
|
-
}
|
|
17
|
-
});
|
|
18
|
-
});
|
|
19
|
-
return Object.freeze(n);
|
|
20
|
-
}
|
|
21
|
-
|
|
22
7
|
/**
|
|
23
8
|
* Raw wav audio file contents
|
|
24
9
|
* @typedef {Object} WavPackerAudioType
|
|
@@ -1324,54 +1309,14 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
1324
1309
|
* @returns {Promise<true>}
|
|
1325
1310
|
*/
|
|
1326
1311
|
async requestPermission() {
|
|
1327
|
-
const ensureUserMediaAccess = async () => {
|
|
1328
|
-
const stream = await navigator.mediaDevices.getUserMedia({
|
|
1329
|
-
audio: true,
|
|
1330
|
-
});
|
|
1331
|
-
const tracks = stream.getTracks();
|
|
1332
|
-
tracks.forEach((track) => track.stop());
|
|
1333
|
-
};
|
|
1334
|
-
|
|
1335
|
-
const permissionsUnsupported =
|
|
1336
|
-
!navigator.permissions ||
|
|
1337
|
-
typeof navigator.permissions.query !== 'function';
|
|
1338
|
-
|
|
1339
|
-
if (permissionsUnsupported) {
|
|
1340
|
-
try {
|
|
1341
|
-
await ensureUserMediaAccess();
|
|
1342
|
-
} catch (error) {
|
|
1343
|
-
window.alert('You must grant microphone access to use this feature.');
|
|
1344
|
-
throw error;
|
|
1345
|
-
}
|
|
1346
|
-
return true;
|
|
1347
|
-
}
|
|
1348
|
-
|
|
1349
1312
|
try {
|
|
1350
|
-
|
|
1351
|
-
|
|
1313
|
+
console.log('ensureUserMediaAccess');
|
|
1314
|
+
await navigator.mediaDevices.getUserMedia({
|
|
1315
|
+
audio: true,
|
|
1352
1316
|
});
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
return true;
|
|
1357
|
-
}
|
|
1358
|
-
|
|
1359
|
-
if (permissionStatus.state === 'prompt') {
|
|
1360
|
-
try {
|
|
1361
|
-
await ensureUserMediaAccess();
|
|
1362
|
-
} catch (error) {
|
|
1363
|
-
window.alert('You must grant microphone access to use this feature.');
|
|
1364
|
-
throw error;
|
|
1365
|
-
}
|
|
1366
|
-
}
|
|
1367
|
-
} catch (error) {
|
|
1368
|
-
// Firefox rejects permissions.query with NotSupportedError – fall back to getUserMedia directly
|
|
1369
|
-
try {
|
|
1370
|
-
await ensureUserMediaAccess();
|
|
1371
|
-
} catch (fallbackError) {
|
|
1372
|
-
window.alert('You must grant microphone access to use this feature.');
|
|
1373
|
-
throw fallbackError;
|
|
1374
|
-
}
|
|
1317
|
+
} catch (fallbackError) {
|
|
1318
|
+
window.alert('You must grant microphone access to use this feature.');
|
|
1319
|
+
throw fallbackError;
|
|
1375
1320
|
}
|
|
1376
1321
|
return true;
|
|
1377
1322
|
}
|
|
@@ -1388,10 +1333,9 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
1388
1333
|
throw new Error('Could not request user devices');
|
|
1389
1334
|
}
|
|
1390
1335
|
await this.requestPermission();
|
|
1336
|
+
|
|
1391
1337
|
const devices = await navigator.mediaDevices.enumerateDevices();
|
|
1392
|
-
const audioDevices = devices.filter(
|
|
1393
|
-
(device) => device.kind === 'audioinput',
|
|
1394
|
-
);
|
|
1338
|
+
const audioDevices = devices.filter((device) => device.kind === 'audioinput');
|
|
1395
1339
|
const defaultDeviceIndex = audioDevices.findIndex(
|
|
1396
1340
|
(device) => device.deviceId === 'default',
|
|
1397
1341
|
);
|
|
@@ -1779,1777 +1723,1033 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
1779
1723
|
|
|
1780
1724
|
globalThis.WavRecorder = WavRecorder;
|
|
1781
1725
|
|
|
1782
|
-
|
|
1783
|
-
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1726
|
+
var commonjsGlobal = typeof globalThis !== 'undefined' ? globalThis : typeof window !== 'undefined' ? window : typeof global !== 'undefined' ? global : typeof self !== 'undefined' ? self : {};
|
|
1727
|
+
|
|
1728
|
+
function getAugmentedNamespace(n) {
|
|
1729
|
+
if (n.__esModule) return n;
|
|
1730
|
+
var f = n.default;
|
|
1731
|
+
if (typeof f == "function") {
|
|
1732
|
+
var a = function a () {
|
|
1733
|
+
if (this instanceof a) {
|
|
1734
|
+
return Reflect.construct(f, arguments, this.constructor);
|
|
1735
|
+
}
|
|
1736
|
+
return f.apply(this, arguments);
|
|
1737
|
+
};
|
|
1738
|
+
a.prototype = f.prototype;
|
|
1739
|
+
} else a = {};
|
|
1740
|
+
Object.defineProperty(a, '__esModule', {value: true});
|
|
1741
|
+
Object.keys(n).forEach(function (k) {
|
|
1742
|
+
var d = Object.getOwnPropertyDescriptor(n, k);
|
|
1743
|
+
Object.defineProperty(a, k, d.get ? d : {
|
|
1744
|
+
enumerable: true,
|
|
1745
|
+
get: function () {
|
|
1746
|
+
return n[k];
|
|
1747
|
+
}
|
|
1748
|
+
});
|
|
1749
|
+
});
|
|
1750
|
+
return a;
|
|
1795
1751
|
}
|
|
1796
1752
|
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
|
|
1801
|
-
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
|
|
1808
|
-
|
|
1809
|
-
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
}
|
|
1815
|
-
return btoa(binary);
|
|
1753
|
+
var dist = {};
|
|
1754
|
+
|
|
1755
|
+
var assetPath = {};
|
|
1756
|
+
|
|
1757
|
+
Object.defineProperty(assetPath, "__esModule", { value: true });
|
|
1758
|
+
assetPath.baseAssetPath = void 0;
|
|
1759
|
+
// nextjs@14 bundler may attempt to execute this during SSR and crash
|
|
1760
|
+
const isWeb = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
1761
|
+
const currentScript = isWeb
|
|
1762
|
+
? window.document.currentScript
|
|
1763
|
+
: null;
|
|
1764
|
+
let basePath = "/";
|
|
1765
|
+
if (currentScript) {
|
|
1766
|
+
basePath = currentScript.src
|
|
1767
|
+
.replace(/#.*$/, "")
|
|
1768
|
+
.replace(/\?.*$/, "")
|
|
1769
|
+
.replace(/\/[^\/]+$/, "/");
|
|
1816
1770
|
}
|
|
1771
|
+
assetPath.baseAssetPath = basePath;
|
|
1817
1772
|
|
|
1818
|
-
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
|
|
1828
|
-
|
|
1773
|
+
var defaultModelFetcher$1 = {};
|
|
1774
|
+
|
|
1775
|
+
Object.defineProperty(defaultModelFetcher$1, "__esModule", { value: true });
|
|
1776
|
+
defaultModelFetcher$1.defaultModelFetcher = void 0;
|
|
1777
|
+
const defaultModelFetcher = (path) => {
|
|
1778
|
+
return fetch(path).then((model) => model.arrayBuffer());
|
|
1779
|
+
};
|
|
1780
|
+
defaultModelFetcher$1.defaultModelFetcher = defaultModelFetcher;
|
|
1781
|
+
|
|
1782
|
+
var frameProcessor = {};
|
|
1783
|
+
|
|
1784
|
+
var logging = {};
|
|
1785
|
+
|
|
1786
|
+
(function (exports) {
|
|
1787
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
1788
|
+
exports.log = exports.LOG_PREFIX = void 0;
|
|
1789
|
+
exports.LOG_PREFIX = "[VAD]";
|
|
1790
|
+
const levels = ["error", "debug", "warn"];
|
|
1791
|
+
function getLog(level) {
|
|
1792
|
+
return (...args) => {
|
|
1793
|
+
console[level](exports.LOG_PREFIX, ...args);
|
|
1794
|
+
};
|
|
1795
|
+
}
|
|
1796
|
+
const _log = levels.reduce((acc, level) => {
|
|
1797
|
+
acc[level] = getLog(level);
|
|
1798
|
+
return acc;
|
|
1799
|
+
}, {});
|
|
1800
|
+
exports.log = _log;
|
|
1801
|
+
|
|
1802
|
+
} (logging));
|
|
1803
|
+
|
|
1804
|
+
var messages = {};
|
|
1805
|
+
|
|
1806
|
+
Object.defineProperty(messages, "__esModule", { value: true });
|
|
1807
|
+
messages.Message = void 0;
|
|
1808
|
+
var Message;
|
|
1809
|
+
(function (Message) {
|
|
1810
|
+
Message["AudioFrame"] = "AUDIO_FRAME";
|
|
1811
|
+
Message["SpeechStart"] = "SPEECH_START";
|
|
1812
|
+
Message["VADMisfire"] = "VAD_MISFIRE";
|
|
1813
|
+
Message["SpeechEnd"] = "SPEECH_END";
|
|
1814
|
+
Message["SpeechStop"] = "SPEECH_STOP";
|
|
1815
|
+
Message["SpeechRealStart"] = "SPEECH_REAL_START";
|
|
1816
|
+
Message["FrameProcessed"] = "FRAME_PROCESSED";
|
|
1817
|
+
})(Message || (messages.Message = Message = {}));
|
|
1818
|
+
|
|
1819
|
+
/*
|
|
1820
|
+
Some of this code, together with the default options found in index.ts,
|
|
1821
|
+
were taken (or took inspiration) from https://github.com/snakers4/silero-vad
|
|
1822
|
+
*/
|
|
1823
|
+
Object.defineProperty(frameProcessor, "__esModule", { value: true });
|
|
1824
|
+
frameProcessor.FrameProcessor = frameProcessor.validateOptions = frameProcessor.defaultV5FrameProcessorOptions = frameProcessor.defaultLegacyFrameProcessorOptions = void 0;
|
|
1825
|
+
const logging_1$3 = logging;
|
|
1826
|
+
const messages_1 = messages;
|
|
1827
|
+
const RECOMMENDED_FRAME_SAMPLES = [512, 1024, 1536];
|
|
1828
|
+
frameProcessor.defaultLegacyFrameProcessorOptions = {
|
|
1829
|
+
positiveSpeechThreshold: 0.5,
|
|
1830
|
+
negativeSpeechThreshold: 0.5 - 0.15,
|
|
1831
|
+
preSpeechPadFrames: 1,
|
|
1832
|
+
redemptionFrames: 8,
|
|
1833
|
+
frameSamples: 1536,
|
|
1834
|
+
minSpeechFrames: 3,
|
|
1835
|
+
submitUserSpeechOnPause: false,
|
|
1836
|
+
};
|
|
1837
|
+
frameProcessor.defaultV5FrameProcessorOptions = {
|
|
1838
|
+
positiveSpeechThreshold: 0.5,
|
|
1839
|
+
negativeSpeechThreshold: 0.5 - 0.15,
|
|
1840
|
+
preSpeechPadFrames: 3,
|
|
1841
|
+
redemptionFrames: 24,
|
|
1842
|
+
frameSamples: 512,
|
|
1843
|
+
minSpeechFrames: 9,
|
|
1844
|
+
submitUserSpeechOnPause: false,
|
|
1845
|
+
};
|
|
1846
|
+
function validateOptions(options) {
|
|
1847
|
+
if (!RECOMMENDED_FRAME_SAMPLES.includes(options.frameSamples)) {
|
|
1848
|
+
logging_1$3.log.warn("You are using an unusual frame size");
|
|
1849
|
+
}
|
|
1850
|
+
if (options.positiveSpeechThreshold < 0 ||
|
|
1851
|
+
options.positiveSpeechThreshold > 1) {
|
|
1852
|
+
logging_1$3.log.error("positiveSpeechThreshold should be a number between 0 and 1");
|
|
1853
|
+
}
|
|
1854
|
+
if (options.negativeSpeechThreshold < 0 ||
|
|
1855
|
+
options.negativeSpeechThreshold > options.positiveSpeechThreshold) {
|
|
1856
|
+
logging_1$3.log.error("negativeSpeechThreshold should be between 0 and positiveSpeechThreshold");
|
|
1857
|
+
}
|
|
1858
|
+
if (options.preSpeechPadFrames < 0) {
|
|
1859
|
+
logging_1$3.log.error("preSpeechPadFrames should be positive");
|
|
1829
1860
|
}
|
|
1830
|
-
if (
|
|
1831
|
-
|
|
1832
|
-
micVADModulePromise = Promise.resolve().then(function () { return index$1; });
|
|
1861
|
+
if (options.redemptionFrames < 0) {
|
|
1862
|
+
logging_1$3.log.error("redemptionFrames should be positive");
|
|
1833
1863
|
}
|
|
1834
|
-
|
|
1864
|
+
}
|
|
1865
|
+
frameProcessor.validateOptions = validateOptions;
|
|
1866
|
+
const concatArrays = (arrays) => {
|
|
1867
|
+
const sizes = arrays.reduce((out, next) => {
|
|
1868
|
+
out.push(out.at(-1) + next.length);
|
|
1869
|
+
return out;
|
|
1870
|
+
}, [0]);
|
|
1871
|
+
const outArray = new Float32Array(sizes.at(-1));
|
|
1872
|
+
arrays.forEach((arr, index) => {
|
|
1873
|
+
const place = sizes[index];
|
|
1874
|
+
outArray.set(arr, place);
|
|
1875
|
+
});
|
|
1876
|
+
return outArray;
|
|
1835
1877
|
};
|
|
1836
|
-
|
|
1837
|
-
|
|
1838
|
-
|
|
1839
|
-
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
|
|
1844
|
-
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1859
|
-
|
|
1860
|
-
|
|
1861
|
-
|
|
1862
|
-
|
|
1863
|
-
|
|
1864
|
-
|
|
1878
|
+
class FrameProcessor {
|
|
1879
|
+
constructor(modelProcessFunc, modelResetFunc, options) {
|
|
1880
|
+
this.modelProcessFunc = modelProcessFunc;
|
|
1881
|
+
this.modelResetFunc = modelResetFunc;
|
|
1882
|
+
this.options = options;
|
|
1883
|
+
this.speaking = false;
|
|
1884
|
+
this.redemptionCounter = 0;
|
|
1885
|
+
this.speechFrameCount = 0;
|
|
1886
|
+
this.active = false;
|
|
1887
|
+
this.speechRealStartFired = false;
|
|
1888
|
+
this.reset = () => {
|
|
1889
|
+
this.speaking = false;
|
|
1890
|
+
this.speechRealStartFired = false;
|
|
1891
|
+
this.audioBuffer = [];
|
|
1892
|
+
this.modelResetFunc();
|
|
1893
|
+
this.redemptionCounter = 0;
|
|
1894
|
+
this.speechFrameCount = 0;
|
|
1895
|
+
};
|
|
1896
|
+
this.pause = (handleEvent) => {
|
|
1897
|
+
this.active = false;
|
|
1898
|
+
if (this.options.submitUserSpeechOnPause) {
|
|
1899
|
+
this.endSegment(handleEvent);
|
|
1900
|
+
}
|
|
1901
|
+
else {
|
|
1902
|
+
this.reset();
|
|
1903
|
+
}
|
|
1904
|
+
};
|
|
1905
|
+
this.resume = () => {
|
|
1906
|
+
this.active = true;
|
|
1907
|
+
};
|
|
1908
|
+
this.endSegment = (handleEvent) => {
|
|
1909
|
+
const audioBuffer = this.audioBuffer;
|
|
1910
|
+
this.audioBuffer = [];
|
|
1911
|
+
const speaking = this.speaking;
|
|
1912
|
+
this.reset();
|
|
1913
|
+
if (speaking) {
|
|
1914
|
+
const speechFrameCount = audioBuffer.reduce((acc, item) => {
|
|
1915
|
+
return item.isSpeech ? (acc + 1) : acc;
|
|
1916
|
+
}, 0);
|
|
1917
|
+
if (speechFrameCount >= this.options.minSpeechFrames) {
|
|
1918
|
+
const audio = concatArrays(audioBuffer.map((item) => item.frame));
|
|
1919
|
+
handleEvent({ msg: messages_1.Message.SpeechEnd, audio });
|
|
1920
|
+
}
|
|
1921
|
+
else {
|
|
1922
|
+
handleEvent({ msg: messages_1.Message.VADMisfire });
|
|
1923
|
+
}
|
|
1924
|
+
}
|
|
1925
|
+
return {};
|
|
1926
|
+
};
|
|
1927
|
+
this.process = async (frame, handleEvent) => {
|
|
1928
|
+
if (!this.active) {
|
|
1929
|
+
return;
|
|
1930
|
+
}
|
|
1931
|
+
const probs = await this.modelProcessFunc(frame);
|
|
1932
|
+
const isSpeech = probs.isSpeech >= this.options.positiveSpeechThreshold;
|
|
1933
|
+
handleEvent({ probs, msg: messages_1.Message.FrameProcessed, frame });
|
|
1934
|
+
this.audioBuffer.push({
|
|
1935
|
+
frame,
|
|
1936
|
+
isSpeech,
|
|
1937
|
+
});
|
|
1938
|
+
if (isSpeech) {
|
|
1939
|
+
this.speechFrameCount++;
|
|
1940
|
+
this.redemptionCounter = 0;
|
|
1941
|
+
}
|
|
1942
|
+
if (isSpeech && !this.speaking) {
|
|
1943
|
+
this.speaking = true;
|
|
1944
|
+
handleEvent({ msg: messages_1.Message.SpeechStart });
|
|
1945
|
+
}
|
|
1946
|
+
if (this.speaking &&
|
|
1947
|
+
this.speechFrameCount === this.options.minSpeechFrames &&
|
|
1948
|
+
!this.speechRealStartFired) {
|
|
1949
|
+
this.speechRealStartFired = true;
|
|
1950
|
+
handleEvent({ msg: messages_1.Message.SpeechRealStart });
|
|
1951
|
+
}
|
|
1952
|
+
if (probs.isSpeech < this.options.negativeSpeechThreshold &&
|
|
1953
|
+
this.speaking &&
|
|
1954
|
+
++this.redemptionCounter >= this.options.redemptionFrames) {
|
|
1955
|
+
this.redemptionCounter = 0;
|
|
1956
|
+
this.speechFrameCount = 0;
|
|
1957
|
+
this.speaking = false;
|
|
1958
|
+
this.speechRealStartFired = false;
|
|
1959
|
+
const audioBuffer = this.audioBuffer;
|
|
1960
|
+
this.audioBuffer = [];
|
|
1961
|
+
const speechFrameCount = audioBuffer.reduce((acc, item) => {
|
|
1962
|
+
return item.isSpeech ? (acc + 1) : acc;
|
|
1963
|
+
}, 0);
|
|
1964
|
+
if (speechFrameCount >= this.options.minSpeechFrames) {
|
|
1965
|
+
const audio = concatArrays(audioBuffer.map((item) => item.frame));
|
|
1966
|
+
handleEvent({ msg: messages_1.Message.SpeechEnd, audio });
|
|
1967
|
+
}
|
|
1968
|
+
else {
|
|
1969
|
+
handleEvent({ msg: messages_1.Message.VADMisfire });
|
|
1970
|
+
}
|
|
1971
|
+
}
|
|
1972
|
+
if (!this.speaking) {
|
|
1973
|
+
while (this.audioBuffer.length > this.options.preSpeechPadFrames) {
|
|
1974
|
+
this.audioBuffer.shift();
|
|
1975
|
+
}
|
|
1976
|
+
this.speechFrameCount = 0;
|
|
1977
|
+
}
|
|
1865
1978
|
};
|
|
1866
|
-
this.AMPLITUDE_MONITORING_SAMPLE_RATE = 2;
|
|
1867
|
-
this._websocketUrl = DEFAULT_WS_URL;
|
|
1868
|
-
this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
|
|
1869
|
-
this.wavPlayer = new WavStreamPlayer({
|
|
1870
|
-
finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
|
|
1871
|
-
sampleRate: 16000, // TODO should be set my fetched agent config
|
|
1872
|
-
});
|
|
1873
|
-
this.vad = null;
|
|
1874
|
-
this.ws = null;
|
|
1875
|
-
this.status = 'disconnected';
|
|
1876
|
-
this.userAudioAmplitude = 0;
|
|
1877
|
-
this.agentAudioAmplitude = 0;
|
|
1878
|
-
this.conversationId = this.options.conversationId;
|
|
1879
|
-
this.pushToTalkActive = false;
|
|
1880
|
-
this.pushToTalkEnabled = false;
|
|
1881
|
-
this.canInterrupt = false;
|
|
1882
|
-
this.userIsSpeaking = false;
|
|
1883
|
-
this.recorderStarted = false;
|
|
1884
|
-
this.readySent = false;
|
|
1885
|
-
this.currentTurnId = null;
|
|
1886
1979
|
this.audioBuffer = [];
|
|
1887
|
-
this.
|
|
1888
|
-
this.activeDeviceId = null;
|
|
1889
|
-
this.useSystemDefaultDevice = false;
|
|
1890
|
-
this.lastReportedDeviceId = null;
|
|
1891
|
-
this.lastKnownSystemDefaultDeviceKey = null;
|
|
1892
|
-
this.isMuted = false;
|
|
1893
|
-
this.stopPlayerAmplitude = undefined;
|
|
1894
|
-
this.stopRecorderAmplitude = undefined;
|
|
1895
|
-
this.deviceChangeListener = null;
|
|
1896
|
-
// this.audioPauseTime = null;
|
|
1897
|
-
// Bind event handlers
|
|
1898
|
-
this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
|
|
1899
|
-
this._handleDataAvailable = this._handleDataAvailable.bind(this);
|
|
1980
|
+
this.reset();
|
|
1900
1981
|
}
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
|
|
1908
|
-
|
|
1909
|
-
|
|
1982
|
+
}
|
|
1983
|
+
frameProcessor.FrameProcessor = FrameProcessor;
|
|
1984
|
+
|
|
1985
|
+
var nonRealTimeVad = {};
|
|
1986
|
+
|
|
1987
|
+
var ortWeb_min = {exports: {}};
|
|
1988
|
+
|
|
1989
|
+
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
1990
|
+
// Licensed under the MIT License.
|
|
1991
|
+
const backends = {};
|
|
1992
|
+
const backendsSortedByPriority = [];
|
|
1993
|
+
/**
|
|
1994
|
+
* Register a backend.
|
|
1995
|
+
*
|
|
1996
|
+
* @param name - the name as a key to lookup as an execution provider.
|
|
1997
|
+
* @param backend - the backend object.
|
|
1998
|
+
* @param priority - an integer indicating the priority of the backend. Higher number means higher priority. if priority
|
|
1999
|
+
* < 0, it will be considered as a 'beta' version and will not be used as a fallback backend by default.
|
|
2000
|
+
*
|
|
2001
|
+
* @internal
|
|
2002
|
+
*/
|
|
2003
|
+
const registerBackend = (name, backend, priority) => {
|
|
2004
|
+
if (backend && typeof backend.init === 'function' && typeof backend.createSessionHandler === 'function') {
|
|
2005
|
+
const currentBackend = backends[name];
|
|
2006
|
+
if (currentBackend === undefined) {
|
|
2007
|
+
backends[name] = { backend, priority };
|
|
1910
2008
|
}
|
|
1911
|
-
|
|
1912
|
-
|
|
1913
|
-
console.log('VAD is disabled by backend configuration');
|
|
2009
|
+
else if (currentBackend.priority > priority) {
|
|
2010
|
+
// same name is already registered with a higher priority. skip registeration.
|
|
1914
2011
|
return;
|
|
1915
2012
|
}
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
|
|
1920
|
-
console.debug('onSpeechStart: sending vad_start');
|
|
1921
|
-
this.userIsSpeaking = true;
|
|
1922
|
-
this.options.onUserIsSpeakingChange(true);
|
|
1923
|
-
this._wsSend({
|
|
1924
|
-
type: 'vad_events',
|
|
1925
|
-
event: 'vad_start',
|
|
1926
|
-
});
|
|
1927
|
-
this.options.onMessage({
|
|
1928
|
-
type: 'vad_events',
|
|
1929
|
-
event: 'vad_start',
|
|
1930
|
-
});
|
|
1931
|
-
},
|
|
1932
|
-
onSpeechEnd: () => {
|
|
1933
|
-
console.debug('onSpeechEnd: sending vad_end');
|
|
1934
|
-
this.userIsSpeaking = false;
|
|
1935
|
-
this.options.onUserIsSpeakingChange(false);
|
|
1936
|
-
this.audioBuffer = []; // Clear buffer on speech end
|
|
1937
|
-
this._wsSend({
|
|
1938
|
-
type: 'vad_events',
|
|
1939
|
-
event: 'vad_end',
|
|
1940
|
-
});
|
|
1941
|
-
this.options.onMessage({
|
|
1942
|
-
type: 'vad_events',
|
|
1943
|
-
event: 'vad_end',
|
|
1944
|
-
});
|
|
1945
|
-
},
|
|
1946
|
-
};
|
|
1947
|
-
// Apply VAD configuration from backend if available
|
|
1948
|
-
if (this.vadConfig) {
|
|
1949
|
-
// Only add keys that are explicitly defined (not undefined)
|
|
1950
|
-
if (this.vadConfig.model !== undefined)
|
|
1951
|
-
vadOptions.model = this.vadConfig.model;
|
|
1952
|
-
if (this.vadConfig.positive_speech_threshold !== undefined)
|
|
1953
|
-
vadOptions.positiveSpeechThreshold = this.vadConfig.positive_speech_threshold;
|
|
1954
|
-
if (this.vadConfig.negative_speech_threshold !== undefined)
|
|
1955
|
-
vadOptions.negativeSpeechThreshold = this.vadConfig.negative_speech_threshold;
|
|
1956
|
-
if (this.vadConfig.redemption_frames !== undefined)
|
|
1957
|
-
vadOptions.redemptionFrames = this.vadConfig.redemption_frames;
|
|
1958
|
-
if (this.vadConfig.min_speech_frames !== undefined)
|
|
1959
|
-
vadOptions.minSpeechFrames = this.vadConfig.min_speech_frames;
|
|
1960
|
-
if (this.vadConfig.pre_speech_pad_frames !== undefined)
|
|
1961
|
-
vadOptions.preSpeechPadFrames = this.vadConfig.pre_speech_pad_frames;
|
|
1962
|
-
if (this.vadConfig.frame_samples !== undefined)
|
|
1963
|
-
vadOptions.frameSamples = this.vadConfig.frame_samples;
|
|
1964
|
-
}
|
|
1965
|
-
else {
|
|
1966
|
-
// Default values if no config from backend
|
|
1967
|
-
vadOptions.model = 'v5';
|
|
1968
|
-
vadOptions.positiveSpeechThreshold = 0.15;
|
|
1969
|
-
vadOptions.negativeSpeechThreshold = 0.05;
|
|
1970
|
-
vadOptions.redemptionFrames = 4;
|
|
1971
|
-
vadOptions.minSpeechFrames = 2;
|
|
1972
|
-
vadOptions.preSpeechPadFrames = 0;
|
|
1973
|
-
vadOptions.frameSamples = 512; // Required for v5
|
|
2013
|
+
else if (currentBackend.priority === priority) {
|
|
2014
|
+
if (currentBackend.backend !== backend) {
|
|
2015
|
+
throw new Error(`cannot register backend "${name}" using priority ${priority}`);
|
|
2016
|
+
}
|
|
1974
2017
|
}
|
|
1975
|
-
|
|
1976
|
-
|
|
1977
|
-
|
|
1978
|
-
|
|
1979
|
-
if (!vad) {
|
|
1980
|
-
throw new Error('MicVAD module not available');
|
|
2018
|
+
if (priority >= 0) {
|
|
2019
|
+
const i = backendsSortedByPriority.indexOf(name);
|
|
2020
|
+
if (i !== -1) {
|
|
2021
|
+
backendsSortedByPriority.splice(i, 1);
|
|
1981
2022
|
}
|
|
1982
|
-
|
|
1983
|
-
|
|
1984
|
-
|
|
1985
|
-
|
|
1986
|
-
|
|
1987
|
-
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
event: 'vad_model_failed',
|
|
1992
|
-
});
|
|
1993
|
-
});
|
|
1994
|
-
}
|
|
1995
|
-
/**
|
|
1996
|
-
* Updates the connection status and triggers the callback
|
|
1997
|
-
* @param {string} status - New status value
|
|
1998
|
-
*/
|
|
1999
|
-
_setStatus(status) {
|
|
2000
|
-
this.status = status;
|
|
2001
|
-
this.options.onStatusChange(status);
|
|
2023
|
+
for (let i = 0; i < backendsSortedByPriority.length; i++) {
|
|
2024
|
+
if (backends[backendsSortedByPriority[i]].priority <= priority) {
|
|
2025
|
+
backendsSortedByPriority.splice(i, 0, name);
|
|
2026
|
+
return;
|
|
2027
|
+
}
|
|
2028
|
+
}
|
|
2029
|
+
backendsSortedByPriority.push(name);
|
|
2030
|
+
}
|
|
2031
|
+
return;
|
|
2002
2032
|
}
|
|
2003
|
-
|
|
2004
|
-
|
|
2005
|
-
|
|
2006
|
-
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
|
|
2016
|
-
|
|
2017
|
-
|
|
2018
|
-
|
|
2019
|
-
|
|
2020
|
-
|
|
2021
|
-
}
|
|
2022
|
-
}
|
|
2023
|
-
async triggerUserTurnFinished() {
|
|
2024
|
-
if (this.pushToTalkActive) {
|
|
2025
|
-
this.pushToTalkActive = false;
|
|
2026
|
-
this._wsSend({ type: 'trigger.turn.end', role: 'user' });
|
|
2027
|
-
}
|
|
2028
|
-
}
|
|
2029
|
-
/**
|
|
2030
|
-
* Handles incoming WebSocket messages
|
|
2031
|
-
* @param {MessageEvent} event - The WebSocket message event
|
|
2032
|
-
*/
|
|
2033
|
-
async _handleWebSocketMessage(event) {
|
|
2034
|
-
try {
|
|
2035
|
-
const message = JSON.parse(event.data);
|
|
2036
|
-
if (message.type !== 'response.audio') {
|
|
2037
|
-
console.debug('msg:', message);
|
|
2038
|
-
}
|
|
2039
|
-
switch (message.type) {
|
|
2040
|
-
case 'turn.start':
|
|
2041
|
-
// Sent from the server to this client when a new user turn is detected
|
|
2042
|
-
if (message.role === 'assistant') {
|
|
2043
|
-
// Start tracking new assistant turn
|
|
2044
|
-
console.debug('Assistant turn started, will track new turn ID from audio/text');
|
|
2045
|
-
}
|
|
2046
|
-
else if (message.role === 'user' && !this.pushToTalkEnabled) {
|
|
2047
|
-
// Interrupt any playing assistant audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
|
|
2048
|
-
console.debug('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
|
|
2049
|
-
await this._clientInterruptAssistantReplay();
|
|
2050
|
-
}
|
|
2051
|
-
this.options.onMessage(message);
|
|
2052
|
-
break;
|
|
2053
|
-
case 'response.audio':
|
|
2054
|
-
const audioBuffer = base64ToArrayBuffer(message.content);
|
|
2055
|
-
this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
|
|
2056
|
-
// TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
|
|
2057
|
-
// Set current turn ID from first audio message, or update if different turn
|
|
2058
|
-
if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
|
|
2059
|
-
console.debug(`Setting current turn ID to: ${message.turn_id} (was: ${this.currentTurnId})`);
|
|
2060
|
-
this.currentTurnId = message.turn_id;
|
|
2061
|
-
// Clean up interrupted tracks, keeping only the current turn
|
|
2062
|
-
this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
|
|
2063
|
-
}
|
|
2064
|
-
break;
|
|
2065
|
-
case 'response.text':
|
|
2066
|
-
// Set turn ID from first text message if not set
|
|
2067
|
-
if (!this.currentTurnId) {
|
|
2068
|
-
this.currentTurnId = message.turn_id;
|
|
2069
|
-
console.debug(`Setting current turn ID to: ${message.turn_id} from text message`);
|
|
2070
|
-
}
|
|
2071
|
-
this.options.onMessage(message);
|
|
2072
|
-
break;
|
|
2073
|
-
case 'response.data':
|
|
2074
|
-
this.options.onDataMessage(message);
|
|
2075
|
-
break;
|
|
2076
|
-
case 'user.transcript':
|
|
2077
|
-
case 'user.transcript.delta':
|
|
2078
|
-
case 'user.transcript.interim_delta':
|
|
2079
|
-
this.options.onMessage(message);
|
|
2080
|
-
break;
|
|
2081
|
-
default:
|
|
2082
|
-
console.warn('Unknown message type received:', message);
|
|
2083
|
-
}
|
|
2084
|
-
}
|
|
2085
|
-
catch (error) {
|
|
2086
|
-
console.error('Error processing WebSocket message:', error);
|
|
2087
|
-
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
2088
|
-
}
|
|
2089
|
-
}
|
|
2090
|
-
/**
|
|
2091
|
-
* Handles available client browser microphone audio data and sends it over the WebSocket
|
|
2092
|
-
* @param {ArrayBuffer} data - The audio data buffer
|
|
2093
|
-
*/
|
|
2094
|
-
_handleDataAvailable(data) {
|
|
2095
|
-
var _a, _b, _c;
|
|
2096
|
-
try {
|
|
2097
|
-
const base64 = arrayBufferToBase64$1(data.mono);
|
|
2098
|
-
// Don't send audio if muted
|
|
2099
|
-
if (this.isMuted) {
|
|
2100
|
-
return;
|
|
2101
|
-
}
|
|
2102
|
-
// Determine if we should gate audio based on VAD configuration
|
|
2103
|
-
const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
|
|
2104
|
-
const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
|
|
2105
|
-
let sendAudio;
|
|
2106
|
-
if (this.pushToTalkEnabled) {
|
|
2107
|
-
sendAudio = this.pushToTalkActive;
|
|
2108
|
-
}
|
|
2109
|
-
else if (shouldGateAudio) {
|
|
2110
|
-
sendAudio = this.userIsSpeaking;
|
|
2033
|
+
throw new TypeError('not a valid backend');
|
|
2034
|
+
};
|
|
2035
|
+
/**
|
|
2036
|
+
* Resolve backend by specified hints.
|
|
2037
|
+
*
|
|
2038
|
+
* @param backendHints - a list of execution provider names to lookup. If omitted use registered backends as list.
|
|
2039
|
+
* @returns a promise that resolves to the backend.
|
|
2040
|
+
*
|
|
2041
|
+
* @internal
|
|
2042
|
+
*/
|
|
2043
|
+
const resolveBackend = async (backendHints) => {
|
|
2044
|
+
const backendNames = backendHints.length === 0 ? backendsSortedByPriority : backendHints;
|
|
2045
|
+
const errors = [];
|
|
2046
|
+
for (const backendName of backendNames) {
|
|
2047
|
+
const backendInfo = backends[backendName];
|
|
2048
|
+
if (backendInfo) {
|
|
2049
|
+
if (backendInfo.initialized) {
|
|
2050
|
+
return backendInfo.backend;
|
|
2111
2051
|
}
|
|
2112
|
-
else {
|
|
2113
|
-
//
|
|
2114
|
-
sendAudio = true;
|
|
2052
|
+
else if (backendInfo.aborted) {
|
|
2053
|
+
continue; // current backend is unavailable; try next
|
|
2115
2054
|
}
|
|
2116
|
-
|
|
2117
|
-
|
|
2118
|
-
if (
|
|
2119
|
-
|
|
2120
|
-
for (const bufferedAudio of this.audioBuffer) {
|
|
2121
|
-
this._wsSend({
|
|
2122
|
-
type: 'client.audio',
|
|
2123
|
-
content: bufferedAudio,
|
|
2124
|
-
});
|
|
2125
|
-
}
|
|
2126
|
-
this.audioBuffer = []; // Clear the buffer after sending
|
|
2055
|
+
const isInitializing = !!backendInfo.initPromise;
|
|
2056
|
+
try {
|
|
2057
|
+
if (!isInitializing) {
|
|
2058
|
+
backendInfo.initPromise = backendInfo.backend.init();
|
|
2127
2059
|
}
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
2131
|
-
content: base64,
|
|
2132
|
-
});
|
|
2060
|
+
await backendInfo.initPromise;
|
|
2061
|
+
backendInfo.initialized = true;
|
|
2062
|
+
return backendInfo.backend;
|
|
2133
2063
|
}
|
|
2134
|
-
|
|
2135
|
-
|
|
2136
|
-
|
|
2137
|
-
// Keep buffer size based on configuration
|
|
2138
|
-
if (this.audioBuffer.length > bufferFrames) {
|
|
2139
|
-
this.audioBuffer.shift(); // Remove oldest chunk
|
|
2064
|
+
catch (e) {
|
|
2065
|
+
if (!isInitializing) {
|
|
2066
|
+
errors.push({ name: backendName, err: e });
|
|
2140
2067
|
}
|
|
2068
|
+
backendInfo.aborted = true;
|
|
2069
|
+
}
|
|
2070
|
+
finally {
|
|
2071
|
+
delete backendInfo.initPromise;
|
|
2141
2072
|
}
|
|
2142
|
-
}
|
|
2143
|
-
catch (error) {
|
|
2144
|
-
console.error('Error processing audio:', error);
|
|
2145
|
-
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
2146
|
-
}
|
|
2147
|
-
}
|
|
2148
|
-
_wsSend(message) {
|
|
2149
|
-
var _a;
|
|
2150
|
-
if (message.type !== 'client.audio') {
|
|
2151
|
-
console.debug('sent_msg:', message);
|
|
2152
|
-
}
|
|
2153
|
-
const messageString = JSON.stringify(message);
|
|
2154
|
-
if (((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN) {
|
|
2155
|
-
this.ws.send(messageString);
|
|
2156
2073
|
}
|
|
2157
2074
|
}
|
|
2158
|
-
|
|
2159
|
-
|
|
2160
|
-
|
|
2161
|
-
|
|
2162
|
-
|
|
2163
|
-
|
|
2075
|
+
throw new Error(`no available backend found. ERR: ${errors.map(e => `[${e.name}] ${e.err}`).join(', ')}`);
|
|
2076
|
+
};
|
|
2077
|
+
|
|
2078
|
+
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
2079
|
+
// Licensed under the MIT License.
|
|
2080
|
+
class EnvImpl {
|
|
2081
|
+
constructor() {
|
|
2082
|
+
this.wasm = {};
|
|
2083
|
+
this.webgl = {};
|
|
2084
|
+
this.logLevelInternal = 'warning';
|
|
2164
2085
|
}
|
|
2165
|
-
|
|
2166
|
-
|
|
2167
|
-
|
|
2168
|
-
|
|
2169
|
-
* @param {(amplitude: number) => void} updateInternalState - Function to update the internal amplitude state.
|
|
2170
|
-
*/
|
|
2171
|
-
_setupAmplitudeMonitoring(source, callback, updateInternalState) {
|
|
2172
|
-
let updateCounter = 0;
|
|
2173
|
-
source.startAmplitudeMonitoring((amplitude) => {
|
|
2174
|
-
// Only update and call callback at the specified sample rate
|
|
2175
|
-
if (updateCounter >= this.AMPLITUDE_MONITORING_SAMPLE_RATE) {
|
|
2176
|
-
updateInternalState(amplitude);
|
|
2177
|
-
if (callback !== NOOP) {
|
|
2178
|
-
callback(amplitude);
|
|
2179
|
-
}
|
|
2180
|
-
updateCounter = 0; // Reset counter after sampling
|
|
2181
|
-
}
|
|
2182
|
-
updateCounter++;
|
|
2183
|
-
});
|
|
2184
|
-
const stop = () => { var _a; return (_a = source.stopAmplitudeMonitoring) === null || _a === void 0 ? void 0 : _a.call(source); };
|
|
2185
|
-
if (source === this.wavPlayer) {
|
|
2186
|
-
this.stopPlayerAmplitude = stop;
|
|
2086
|
+
// TODO standadize the getter and setter convention in env for other fields.
|
|
2087
|
+
set logLevel(value) {
|
|
2088
|
+
if (value === undefined) {
|
|
2089
|
+
return;
|
|
2187
2090
|
}
|
|
2188
|
-
if (
|
|
2189
|
-
|
|
2091
|
+
if (typeof value !== 'string' || ['verbose', 'info', 'warning', 'error', 'fatal'].indexOf(value) === -1) {
|
|
2092
|
+
throw new Error(`Unsupported logging level: ${value}`);
|
|
2190
2093
|
}
|
|
2094
|
+
this.logLevelInternal = value;
|
|
2191
2095
|
}
|
|
2192
|
-
|
|
2193
|
-
|
|
2194
|
-
(_a = this.stopPlayerAmplitude) === null || _a === void 0 ? void 0 : _a.call(this);
|
|
2195
|
-
(_b = this.stopRecorderAmplitude) === null || _b === void 0 ? void 0 : _b.call(this);
|
|
2196
|
-
this.stopPlayerAmplitude = undefined;
|
|
2197
|
-
this.stopRecorderAmplitude = undefined;
|
|
2096
|
+
get logLevel() {
|
|
2097
|
+
return this.logLevelInternal;
|
|
2198
2098
|
}
|
|
2199
|
-
|
|
2200
|
-
|
|
2201
|
-
|
|
2202
|
-
|
|
2203
|
-
|
|
2204
|
-
|
|
2205
|
-
|
|
2206
|
-
|
|
2207
|
-
|
|
2208
|
-
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
|
|
2212
|
-
|
|
2213
|
-
|
|
2214
|
-
|
|
2215
|
-
|
|
2216
|
-
|
|
2217
|
-
|
|
2218
|
-
|
|
2219
|
-
|
|
2220
|
-
|
|
2221
|
-
|
|
2222
|
-
|
|
2223
|
-
|
|
2224
|
-
|
|
2225
|
-
|
|
2226
|
-
|
|
2227
|
-
|
|
2228
|
-
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
|
|
2232
|
-
|
|
2099
|
+
}
|
|
2100
|
+
|
|
2101
|
+
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
2102
|
+
// Licensed under the MIT License.
|
|
2103
|
+
/**
|
|
2104
|
+
* Represent a set of flags as a global singleton.
|
|
2105
|
+
*/
|
|
2106
|
+
const env = new EnvImpl();
|
|
2107
|
+
|
|
2108
|
+
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
2109
|
+
// Licensed under the MIT License.
|
|
2110
|
+
const isBigInt64ArrayAvailable = typeof BigInt64Array !== 'undefined' && typeof BigInt64Array.from === 'function';
|
|
2111
|
+
const isBigUint64ArrayAvailable = typeof BigUint64Array !== 'undefined' && typeof BigUint64Array.from === 'function';
|
|
2112
|
+
// a runtime map that maps type string to TypedArray constructor. Should match Tensor.DataTypeMap.
|
|
2113
|
+
const NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP = new Map([
|
|
2114
|
+
['float32', Float32Array],
|
|
2115
|
+
['uint8', Uint8Array],
|
|
2116
|
+
['int8', Int8Array],
|
|
2117
|
+
['uint16', Uint16Array],
|
|
2118
|
+
['int16', Int16Array],
|
|
2119
|
+
['int32', Int32Array],
|
|
2120
|
+
['bool', Uint8Array],
|
|
2121
|
+
['float64', Float64Array],
|
|
2122
|
+
['uint32', Uint32Array],
|
|
2123
|
+
]);
|
|
2124
|
+
// a runtime map that maps type string to TypedArray constructor. Should match Tensor.DataTypeMap.
|
|
2125
|
+
const NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP = new Map([
|
|
2126
|
+
[Float32Array, 'float32'],
|
|
2127
|
+
[Uint8Array, 'uint8'],
|
|
2128
|
+
[Int8Array, 'int8'],
|
|
2129
|
+
[Uint16Array, 'uint16'],
|
|
2130
|
+
[Int16Array, 'int16'],
|
|
2131
|
+
[Int32Array, 'int32'],
|
|
2132
|
+
[Float64Array, 'float64'],
|
|
2133
|
+
[Uint32Array, 'uint32'],
|
|
2134
|
+
]);
|
|
2135
|
+
if (isBigInt64ArrayAvailable) {
|
|
2136
|
+
NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.set('int64', BigInt64Array);
|
|
2137
|
+
NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.set(BigInt64Array, 'int64');
|
|
2138
|
+
}
|
|
2139
|
+
if (isBigUint64ArrayAvailable) {
|
|
2140
|
+
NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.set('uint64', BigUint64Array);
|
|
2141
|
+
NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.set(BigUint64Array, 'uint64');
|
|
2142
|
+
}
|
|
2143
|
+
/**
|
|
2144
|
+
* calculate size from dims.
|
|
2145
|
+
*
|
|
2146
|
+
* @param dims the dims array. May be an illegal input.
|
|
2147
|
+
*/
|
|
2148
|
+
const calculateSize = (dims) => {
|
|
2149
|
+
let size = 1;
|
|
2150
|
+
for (let i = 0; i < dims.length; i++) {
|
|
2151
|
+
const dim = dims[i];
|
|
2152
|
+
if (typeof dim !== 'number' || !Number.isSafeInteger(dim)) {
|
|
2153
|
+
throw new TypeError(`dims[${i}] must be an integer, got: ${dim}`);
|
|
2154
|
+
}
|
|
2155
|
+
if (dim < 0) {
|
|
2156
|
+
throw new RangeError(`dims[${i}] must be a non-negative integer, got: ${dim}`);
|
|
2157
|
+
}
|
|
2158
|
+
size *= dim;
|
|
2159
|
+
}
|
|
2160
|
+
return size;
|
|
2161
|
+
};
|
|
2162
|
+
let Tensor$1 = class Tensor {
|
|
2163
|
+
constructor(arg0, arg1, arg2) {
|
|
2164
|
+
let type;
|
|
2165
|
+
let data;
|
|
2166
|
+
let dims;
|
|
2167
|
+
// check whether arg0 is type or data
|
|
2168
|
+
if (typeof arg0 === 'string') {
|
|
2169
|
+
//
|
|
2170
|
+
// Override: constructor(type, data, ...)
|
|
2171
|
+
//
|
|
2172
|
+
type = arg0;
|
|
2173
|
+
dims = arg2;
|
|
2174
|
+
if (arg0 === 'string') {
|
|
2175
|
+
// string tensor
|
|
2176
|
+
if (!Array.isArray(arg1)) {
|
|
2177
|
+
throw new TypeError('A string tensor\'s data must be a string array.');
|
|
2178
|
+
}
|
|
2179
|
+
// we don't check whether every element in the array is string; this is too slow. we assume it's correct and
|
|
2180
|
+
// error will be populated at inference
|
|
2181
|
+
data = arg1;
|
|
2233
2182
|
}
|
|
2234
|
-
|
|
2235
|
-
|
|
2236
|
-
|
|
2237
|
-
|
|
2238
|
-
|
|
2239
|
-
|
|
2240
|
-
|
|
2241
|
-
|
|
2242
|
-
|
|
2243
|
-
|
|
2244
|
-
|
|
2245
|
-
|
|
2246
|
-
|
|
2183
|
+
else {
|
|
2184
|
+
// numeric tensor
|
|
2185
|
+
const typedArrayConstructor = NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.get(arg0);
|
|
2186
|
+
if (typedArrayConstructor === undefined) {
|
|
2187
|
+
throw new TypeError(`Unsupported tensor type: ${arg0}.`);
|
|
2188
|
+
}
|
|
2189
|
+
if (Array.isArray(arg1)) {
|
|
2190
|
+
// use 'as any' here because TypeScript's check on type of 'SupportedTypedArrayConstructors.from()' produces
|
|
2191
|
+
// incorrect results.
|
|
2192
|
+
// 'typedArrayConstructor' should be one of the typed array prototype objects.
|
|
2193
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
2194
|
+
data = typedArrayConstructor.from(arg1);
|
|
2195
|
+
}
|
|
2196
|
+
else if (arg1 instanceof typedArrayConstructor) {
|
|
2197
|
+
data = arg1;
|
|
2198
|
+
}
|
|
2199
|
+
else {
|
|
2200
|
+
throw new TypeError(`A ${type} tensor's data must be type of ${typedArrayConstructor}`);
|
|
2201
|
+
}
|
|
2247
2202
|
}
|
|
2248
|
-
|
|
2249
|
-
|
|
2250
|
-
|
|
2203
|
+
}
|
|
2204
|
+
else {
|
|
2205
|
+
//
|
|
2206
|
+
// Override: constructor(data, ...)
|
|
2207
|
+
//
|
|
2208
|
+
dims = arg1;
|
|
2209
|
+
if (Array.isArray(arg0)) {
|
|
2210
|
+
// only boolean[] and string[] is supported
|
|
2211
|
+
if (arg0.length === 0) {
|
|
2212
|
+
throw new TypeError('Tensor type cannot be inferred from an empty array.');
|
|
2213
|
+
}
|
|
2214
|
+
const firstElementType = typeof arg0[0];
|
|
2215
|
+
if (firstElementType === 'string') {
|
|
2216
|
+
type = 'string';
|
|
2217
|
+
data = arg0;
|
|
2218
|
+
}
|
|
2219
|
+
else if (firstElementType === 'boolean') {
|
|
2220
|
+
type = 'bool';
|
|
2221
|
+
// 'arg0' is of type 'boolean[]'. Uint8Array.from(boolean[]) actually works, but typescript thinks this is
|
|
2222
|
+
// wrong type. We use 'as any' to make it happy.
|
|
2223
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
2224
|
+
data = Uint8Array.from(arg0);
|
|
2225
|
+
}
|
|
2226
|
+
else {
|
|
2227
|
+
throw new TypeError(`Invalid element type of data array: ${firstElementType}.`);
|
|
2228
|
+
}
|
|
2251
2229
|
}
|
|
2252
2230
|
else {
|
|
2253
|
-
|
|
2231
|
+
// get tensor type from TypedArray
|
|
2232
|
+
const mappedType = NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.get(arg0.constructor);
|
|
2233
|
+
if (mappedType === undefined) {
|
|
2234
|
+
throw new TypeError(`Unsupported type for tensor data: ${arg0.constructor}.`);
|
|
2235
|
+
}
|
|
2236
|
+
type = mappedType;
|
|
2237
|
+
data = arg0;
|
|
2254
2238
|
}
|
|
2255
|
-
// Bind the websocket message callbacks
|
|
2256
|
-
this.ws.onmessage = this._handleWebSocketMessage;
|
|
2257
|
-
this.ws.onopen = () => {
|
|
2258
|
-
console.log('WebSocket connection established');
|
|
2259
|
-
this._setStatus('connected');
|
|
2260
|
-
this.options.onConnect({ conversationId: this.conversationId });
|
|
2261
|
-
// Attempt to send ready message if recorder already started
|
|
2262
|
-
this._sendReadyIfNeeded();
|
|
2263
|
-
};
|
|
2264
|
-
this.ws.onclose = () => {
|
|
2265
|
-
console.log('WebSocket connection closed');
|
|
2266
|
-
this.ws = null;
|
|
2267
|
-
this._performDisconnectCleanup().catch((error) => {
|
|
2268
|
-
console.error('Error during disconnect cleanup:', error);
|
|
2269
|
-
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
2270
|
-
});
|
|
2271
|
-
};
|
|
2272
|
-
this.ws.onerror = (error) => {
|
|
2273
|
-
console.error('WebSocket error:', error);
|
|
2274
|
-
this._setStatus('error');
|
|
2275
|
-
this.options.onError(new Error('WebSocket connection error'));
|
|
2276
|
-
};
|
|
2277
|
-
// Initialize audio player
|
|
2278
|
-
await this.wavPlayer.connect();
|
|
2279
|
-
// Set up audio player amplitude monitoring
|
|
2280
|
-
this._setupAmplitudeMonitoring(this.wavPlayer, this.options.onAgentAmplitudeChange, (amp) => (this.agentAudioAmplitude = amp));
|
|
2281
|
-
// wavRecorder will be started from the onDeviceSwitched callback,
|
|
2282
|
-
// which is called when the device is first initialized and also when the device is switched
|
|
2283
|
-
// this is to ensure that the device is initialized before the recorder is started
|
|
2284
2239
|
}
|
|
2285
|
-
|
|
2286
|
-
|
|
2287
|
-
|
|
2288
|
-
|
|
2289
|
-
throw error;
|
|
2240
|
+
// type and data is processed, now processing dims
|
|
2241
|
+
if (dims === undefined) {
|
|
2242
|
+
// assume 1-D tensor if dims omitted
|
|
2243
|
+
dims = [data.length];
|
|
2290
2244
|
}
|
|
2291
|
-
|
|
2292
|
-
|
|
2293
|
-
this.currentTurnId = null;
|
|
2294
|
-
console.debug('Reset turn tracking state');
|
|
2295
|
-
}
|
|
2296
|
-
async disconnect() {
|
|
2297
|
-
if (this.status === 'disconnected') {
|
|
2298
|
-
return;
|
|
2245
|
+
else if (!Array.isArray(dims)) {
|
|
2246
|
+
throw new TypeError('A tensor\'s dims must be a number array');
|
|
2299
2247
|
}
|
|
2300
|
-
|
|
2301
|
-
|
|
2302
|
-
|
|
2303
|
-
|
|
2304
|
-
this.ws.onmessage = null;
|
|
2305
|
-
this.ws.close();
|
|
2306
|
-
this.ws = null;
|
|
2248
|
+
// perform check
|
|
2249
|
+
const size = calculateSize(dims);
|
|
2250
|
+
if (size !== data.length) {
|
|
2251
|
+
throw new Error(`Tensor's size(${size}) does not match data length(${data.length}).`);
|
|
2307
2252
|
}
|
|
2308
|
-
|
|
2309
|
-
|
|
2310
|
-
|
|
2311
|
-
|
|
2312
|
-
* @returns {MediaStream|null} The microphone stream or null if not initialized
|
|
2313
|
-
*/
|
|
2314
|
-
getStream() {
|
|
2315
|
-
return this.wavRecorder.getStream();
|
|
2253
|
+
this.dims = dims;
|
|
2254
|
+
this.type = type;
|
|
2255
|
+
this.data = data;
|
|
2256
|
+
this.size = size;
|
|
2316
2257
|
}
|
|
2258
|
+
// #endregion
|
|
2317
2259
|
/**
|
|
2318
|
-
*
|
|
2319
|
-
*
|
|
2260
|
+
* Create a new tensor object from image object
|
|
2261
|
+
*
|
|
2262
|
+
* @param buffer - Extracted image buffer data - assuming RGBA format
|
|
2263
|
+
* @param imageFormat - input image configuration - required configurations height, width, format
|
|
2264
|
+
* @param tensorFormat - output tensor configuration - Default is RGB format
|
|
2320
2265
|
*/
|
|
2321
|
-
|
|
2322
|
-
|
|
2323
|
-
|
|
2324
|
-
const normalizedDeviceId = !deviceId || deviceId === 'default' ? null : deviceId;
|
|
2325
|
-
this.useSystemDefaultDevice = normalizedDeviceId === null;
|
|
2326
|
-
this.deviceId = normalizedDeviceId;
|
|
2327
|
-
// Restart recording with the new device
|
|
2328
|
-
await this._restartAudioRecording();
|
|
2329
|
-
// Reinitialize VAD with the new audio stream if VAD is enabled
|
|
2330
|
-
const shouldUseVAD = !this.pushToTalkEnabled && ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) !== false;
|
|
2331
|
-
if (shouldUseVAD) {
|
|
2332
|
-
console.debug('Reinitializing VAD with new audio stream');
|
|
2333
|
-
const newStream = this.wavRecorder.getStream();
|
|
2334
|
-
await this._reinitializeVAD(newStream);
|
|
2335
|
-
}
|
|
2336
|
-
const reportedDeviceId = (_c = (_b = this.lastReportedDeviceId) !== null && _b !== void 0 ? _b : this.activeDeviceId) !== null && _c !== void 0 ? _c : (this.useSystemDefaultDevice ? 'default' : normalizedDeviceId !== null && normalizedDeviceId !== void 0 ? normalizedDeviceId : 'default');
|
|
2337
|
-
console.debug(`Successfully switched to input device: ${reportedDeviceId}`);
|
|
2266
|
+
static bufferToTensor(buffer, options) {
|
|
2267
|
+
if (buffer === undefined) {
|
|
2268
|
+
throw new Error('Image buffer must be defined');
|
|
2338
2269
|
}
|
|
2339
|
-
|
|
2340
|
-
|
|
2341
|
-
throw new Error(`Failed to switch to input device: ${error instanceof Error ? error.message : String(error)}`);
|
|
2342
|
-
}
|
|
2343
|
-
}
|
|
2344
|
-
/**
|
|
2345
|
-
* Restarts audio recording after a device switch to ensure audio is captured from the new device
|
|
2346
|
-
*/
|
|
2347
|
-
async _restartAudioRecording() {
|
|
2348
|
-
var _a, _b;
|
|
2349
|
-
try {
|
|
2350
|
-
console.debug('Restarting audio recording after device switch...');
|
|
2351
|
-
try {
|
|
2352
|
-
await this.wavRecorder.end();
|
|
2353
|
-
}
|
|
2354
|
-
catch (_c) {
|
|
2355
|
-
// Ignore cleanup errors
|
|
2356
|
-
}
|
|
2357
|
-
// Start with new device
|
|
2358
|
-
const targetDeviceId = this.useSystemDefaultDevice ? undefined : this.deviceId || undefined;
|
|
2359
|
-
await this.wavRecorder.begin(targetDeviceId);
|
|
2360
|
-
await this.wavRecorder.record(this._handleDataAvailable, 1638);
|
|
2361
|
-
// Re-setup amplitude monitoring with the new stream
|
|
2362
|
-
this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
|
|
2363
|
-
const previousReportedDeviceId = this.lastReportedDeviceId;
|
|
2364
|
-
const stream = this.wavRecorder.getStream();
|
|
2365
|
-
const activeTrack = (stream === null || stream === void 0 ? void 0 : stream.getAudioTracks()[0]) || null;
|
|
2366
|
-
const trackSettings = activeTrack && typeof activeTrack.getSettings === 'function' ? activeTrack.getSettings() : null;
|
|
2367
|
-
const trackDeviceId = trackSettings && typeof trackSettings.deviceId === 'string' ? trackSettings.deviceId : null;
|
|
2368
|
-
this.activeDeviceId = trackDeviceId !== null && trackDeviceId !== void 0 ? trackDeviceId : (this.useSystemDefaultDevice ? null : this.deviceId);
|
|
2369
|
-
if (!this.recorderStarted) {
|
|
2370
|
-
this.recorderStarted = true;
|
|
2371
|
-
this._sendReadyIfNeeded();
|
|
2372
|
-
}
|
|
2373
|
-
const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : (_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default');
|
|
2374
|
-
if (reportedDeviceId !== previousReportedDeviceId) {
|
|
2375
|
-
this.lastReportedDeviceId = reportedDeviceId;
|
|
2376
|
-
if (this.options.onDeviceSwitched) {
|
|
2377
|
-
this.options.onDeviceSwitched(reportedDeviceId);
|
|
2378
|
-
}
|
|
2379
|
-
}
|
|
2380
|
-
console.debug('Audio recording restart completed successfully');
|
|
2381
|
-
}
|
|
2382
|
-
catch (error) {
|
|
2383
|
-
console.error('Error restarting audio recording after device switch:', error);
|
|
2384
|
-
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
2385
|
-
}
|
|
2386
|
-
}
|
|
2387
|
-
/**
|
|
2388
|
-
* Reinitializes VAD with a new stream (used after device switching)
|
|
2389
|
-
*/
|
|
2390
|
-
async _reinitializeVAD(stream) {
|
|
2391
|
-
// Clean up existing VAD
|
|
2392
|
-
if (this.vad) {
|
|
2393
|
-
this.vad.pause();
|
|
2394
|
-
this.vad.destroy();
|
|
2395
|
-
this.vad = null;
|
|
2270
|
+
if (options.height === undefined || options.width === undefined) {
|
|
2271
|
+
throw new Error('Image height and width must be defined');
|
|
2396
2272
|
}
|
|
2397
|
-
|
|
2398
|
-
|
|
2399
|
-
|
|
2273
|
+
const { height, width } = options;
|
|
2274
|
+
const norm = options.norm;
|
|
2275
|
+
let normMean;
|
|
2276
|
+
let normBias;
|
|
2277
|
+
if (norm === undefined || norm.mean === undefined) {
|
|
2278
|
+
normMean = 255;
|
|
2400
2279
|
}
|
|
2401
|
-
|
|
2402
|
-
|
|
2403
|
-
* Sets up the device change event listener
|
|
2404
|
-
*/
|
|
2405
|
-
_setupDeviceChangeListener() {
|
|
2406
|
-
if (!this.deviceChangeListener) {
|
|
2407
|
-
this.deviceChangeListener = async (devices) => {
|
|
2408
|
-
try {
|
|
2409
|
-
const defaultDevice = devices.find((device) => device.default);
|
|
2410
|
-
const usingDefaultDevice = this.useSystemDefaultDevice;
|
|
2411
|
-
const previousDefaultDeviceKey = this.lastKnownSystemDefaultDeviceKey;
|
|
2412
|
-
const currentDefaultDeviceKey = this._getDeviceComparisonKey(defaultDevice);
|
|
2413
|
-
let shouldSwitch = !this.recorderStarted;
|
|
2414
|
-
if (!shouldSwitch) {
|
|
2415
|
-
if (usingDefaultDevice) {
|
|
2416
|
-
if (!defaultDevice) {
|
|
2417
|
-
shouldSwitch = true;
|
|
2418
|
-
}
|
|
2419
|
-
else if (this.activeDeviceId && defaultDevice.deviceId !== 'default' && defaultDevice.deviceId !== this.activeDeviceId) {
|
|
2420
|
-
shouldSwitch = true;
|
|
2421
|
-
}
|
|
2422
|
-
else if ((previousDefaultDeviceKey && previousDefaultDeviceKey !== currentDefaultDeviceKey) ||
|
|
2423
|
-
(!previousDefaultDeviceKey && !currentDefaultDeviceKey && this.recorderStarted)) {
|
|
2424
|
-
shouldSwitch = true;
|
|
2425
|
-
}
|
|
2426
|
-
}
|
|
2427
|
-
else {
|
|
2428
|
-
const matchesRequestedDevice = devices.some((device) => device.deviceId === this.deviceId || device.deviceId === this.activeDeviceId);
|
|
2429
|
-
shouldSwitch = !matchesRequestedDevice;
|
|
2430
|
-
}
|
|
2431
|
-
}
|
|
2432
|
-
this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
|
|
2433
|
-
if (shouldSwitch) {
|
|
2434
|
-
console.debug('Selecting fallback audio input device');
|
|
2435
|
-
const fallbackDevice = defaultDevice || devices[0];
|
|
2436
|
-
if (fallbackDevice) {
|
|
2437
|
-
const fallbackId = fallbackDevice.default ? 'default' : fallbackDevice.deviceId;
|
|
2438
|
-
await this.setInputDevice(fallbackId);
|
|
2439
|
-
}
|
|
2440
|
-
else {
|
|
2441
|
-
console.warn('No alternative audio device found');
|
|
2442
|
-
}
|
|
2443
|
-
}
|
|
2444
|
-
}
|
|
2445
|
-
catch (error) {
|
|
2446
|
-
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
2447
|
-
}
|
|
2448
|
-
};
|
|
2280
|
+
else {
|
|
2281
|
+
normMean = norm.mean;
|
|
2449
2282
|
}
|
|
2450
|
-
|
|
2451
|
-
|
|
2452
|
-
_teardownDeviceListeners() {
|
|
2453
|
-
this.wavRecorder.listenForDeviceChange(null);
|
|
2454
|
-
}
|
|
2455
|
-
async _performDisconnectCleanup() {
|
|
2456
|
-
var _a, _b;
|
|
2457
|
-
this.deviceId = null;
|
|
2458
|
-
this.activeDeviceId = null;
|
|
2459
|
-
this.useSystemDefaultDevice = false;
|
|
2460
|
-
this.lastReportedDeviceId = null;
|
|
2461
|
-
this.lastKnownSystemDefaultDeviceKey = null;
|
|
2462
|
-
this.recorderStarted = false;
|
|
2463
|
-
this.readySent = false;
|
|
2464
|
-
this._stopAmplitudeMonitoring();
|
|
2465
|
-
this._teardownDeviceListeners();
|
|
2466
|
-
if (this.vad) {
|
|
2467
|
-
this.vad.pause();
|
|
2468
|
-
this.vad.destroy();
|
|
2469
|
-
this.vad = null;
|
|
2283
|
+
if (norm === undefined || norm.bias === undefined) {
|
|
2284
|
+
normBias = 0;
|
|
2470
2285
|
}
|
|
2471
|
-
|
|
2472
|
-
|
|
2473
|
-
this.wavPlayer.disconnect();
|
|
2474
|
-
this._resetTurnTracking();
|
|
2475
|
-
this.options.conversationId = this.conversationId;
|
|
2476
|
-
this.userAudioAmplitude = 0;
|
|
2477
|
-
this.agentAudioAmplitude = 0;
|
|
2478
|
-
this._setStatus('disconnected');
|
|
2479
|
-
this.options.onDisconnect();
|
|
2480
|
-
}
|
|
2481
|
-
_getDeviceComparisonKey(device) {
|
|
2482
|
-
if (!device || typeof device !== 'object') {
|
|
2483
|
-
return null;
|
|
2286
|
+
else {
|
|
2287
|
+
normBias = norm.bias;
|
|
2484
2288
|
}
|
|
2485
|
-
const
|
|
2486
|
-
|
|
2487
|
-
|
|
2289
|
+
const inputformat = options.bitmapFormat !== undefined ? options.bitmapFormat : 'RGBA';
|
|
2290
|
+
// default value is RGBA since imagedata and HTMLImageElement uses it
|
|
2291
|
+
const outputformat = options.tensorFormat !== undefined ?
|
|
2292
|
+
(options.tensorFormat !== undefined ? options.tensorFormat : 'RGB') :
|
|
2293
|
+
'RGB';
|
|
2294
|
+
const offset = height * width;
|
|
2295
|
+
const float32Data = outputformat === 'RGBA' ? new Float32Array(offset * 4) : new Float32Array(offset * 3);
|
|
2296
|
+
// Default pointer assignments
|
|
2297
|
+
let step = 4, rImagePointer = 0, gImagePointer = 1, bImagePointer = 2, aImagePointer = 3;
|
|
2298
|
+
let rTensorPointer = 0, gTensorPointer = offset, bTensorPointer = offset * 2, aTensorPointer = -1;
|
|
2299
|
+
// Updating the pointer assignments based on the input image format
|
|
2300
|
+
if (inputformat === 'RGB') {
|
|
2301
|
+
step = 3;
|
|
2302
|
+
rImagePointer = 0;
|
|
2303
|
+
gImagePointer = 1;
|
|
2304
|
+
bImagePointer = 2;
|
|
2305
|
+
aImagePointer = -1;
|
|
2488
2306
|
}
|
|
2489
|
-
|
|
2490
|
-
if (
|
|
2491
|
-
|
|
2307
|
+
// Updating the pointer assignments based on the output tensor format
|
|
2308
|
+
if (outputformat === 'RGBA') {
|
|
2309
|
+
aTensorPointer = offset * 3;
|
|
2492
2310
|
}
|
|
2493
|
-
|
|
2494
|
-
|
|
2495
|
-
|
|
2311
|
+
else if (outputformat === 'RBG') {
|
|
2312
|
+
rTensorPointer = 0;
|
|
2313
|
+
bTensorPointer = offset;
|
|
2314
|
+
gTensorPointer = offset * 2;
|
|
2496
2315
|
}
|
|
2497
|
-
|
|
2498
|
-
|
|
2499
|
-
|
|
2500
|
-
|
|
2501
|
-
* The connection and recording remain active for quick unmute
|
|
2502
|
-
*/
|
|
2503
|
-
mute() {
|
|
2504
|
-
if (!this.isMuted) {
|
|
2505
|
-
this.isMuted = true;
|
|
2506
|
-
console.log('Microphone muted');
|
|
2507
|
-
this.options.onMuteStateChange(true);
|
|
2316
|
+
else if (outputformat === 'BGR') {
|
|
2317
|
+
bTensorPointer = 0;
|
|
2318
|
+
gTensorPointer = offset;
|
|
2319
|
+
rTensorPointer = offset * 2;
|
|
2508
2320
|
}
|
|
2509
|
-
|
|
2510
|
-
|
|
2511
|
-
|
|
2512
|
-
|
|
2513
|
-
|
|
2514
|
-
|
|
2515
|
-
|
|
2516
|
-
console.log('Microphone unmuted');
|
|
2517
|
-
this.options.onMuteStateChange(false);
|
|
2321
|
+
for (let i = 0; i < offset; i++, rImagePointer += step, bImagePointer += step, gImagePointer += step, aImagePointer += step) {
|
|
2322
|
+
float32Data[rTensorPointer++] = (buffer[rImagePointer] + normBias) / normMean;
|
|
2323
|
+
float32Data[gTensorPointer++] = (buffer[gImagePointer] + normBias) / normMean;
|
|
2324
|
+
float32Data[bTensorPointer++] = (buffer[bImagePointer] + normBias) / normMean;
|
|
2325
|
+
if (aTensorPointer !== -1 && aImagePointer !== -1) {
|
|
2326
|
+
float32Data[aTensorPointer++] = (buffer[aImagePointer] + normBias) / normMean;
|
|
2327
|
+
}
|
|
2518
2328
|
}
|
|
2329
|
+
// Float32Array -> ort.Tensor
|
|
2330
|
+
const outputTensor = outputformat === 'RGBA' ? new Tensor('float32', float32Data, [1, 4, height, width]) :
|
|
2331
|
+
new Tensor('float32', float32Data, [1, 3, height, width]);
|
|
2332
|
+
return outputTensor;
|
|
2519
2333
|
}
|
|
2520
|
-
|
|
2521
|
-
|
|
2522
|
-
|
|
2523
|
-
|
|
2524
|
-
|
|
2525
|
-
|
|
2526
|
-
|
|
2527
|
-
|
|
2528
|
-
|
|
2529
|
-
|
|
2530
|
-
|
|
2531
|
-
|
|
2532
|
-
|
|
2533
|
-
|
|
2534
|
-
|
|
2535
|
-
|
|
2536
|
-
|
|
2537
|
-
|
|
2538
|
-
|
|
2539
|
-
|
|
2540
|
-
|
|
2541
|
-
|
|
2542
|
-
|
|
2543
|
-
|
|
2544
|
-
|
|
2545
|
-
|
|
2546
|
-
|
|
2547
|
-
|
|
2548
|
-
|
|
2549
|
-
|
|
2550
|
-
|
|
2551
|
-
|
|
2552
|
-
|
|
2553
|
-
|
|
2554
|
-
|
|
2555
|
-
|
|
2556
|
-
|
|
2557
|
-
|
|
2558
|
-
|
|
2559
|
-
|
|
2560
|
-
const isWeb = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
2561
|
-
const currentScript = isWeb
|
|
2562
|
-
? window.document.currentScript
|
|
2563
|
-
: null;
|
|
2564
|
-
let basePath = "/";
|
|
2565
|
-
if (currentScript) {
|
|
2566
|
-
basePath = currentScript.src
|
|
2567
|
-
.replace(/#.*$/, "")
|
|
2568
|
-
.replace(/\?.*$/, "")
|
|
2569
|
-
.replace(/\/[^\/]+$/, "/");
|
|
2570
|
-
}
|
|
2571
|
-
assetPath.baseAssetPath = basePath;
|
|
2572
|
-
|
|
2573
|
-
var defaultModelFetcher$1 = {};
|
|
2574
|
-
|
|
2575
|
-
Object.defineProperty(defaultModelFetcher$1, "__esModule", { value: true });
|
|
2576
|
-
defaultModelFetcher$1.defaultModelFetcher = void 0;
|
|
2577
|
-
const defaultModelFetcher = (path) => {
|
|
2578
|
-
return fetch(path).then((model) => model.arrayBuffer());
|
|
2579
|
-
};
|
|
2580
|
-
defaultModelFetcher$1.defaultModelFetcher = defaultModelFetcher;
|
|
2581
|
-
|
|
2582
|
-
var frameProcessor = {};
|
|
2583
|
-
|
|
2584
|
-
var logging = {};
|
|
2585
|
-
|
|
2586
|
-
(function (exports) {
|
|
2587
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
2588
|
-
exports.log = exports.LOG_PREFIX = void 0;
|
|
2589
|
-
exports.LOG_PREFIX = "[VAD]";
|
|
2590
|
-
const levels = ["error", "debug", "warn"];
|
|
2591
|
-
function getLog(level) {
|
|
2592
|
-
return (...args) => {
|
|
2593
|
-
console[level](exports.LOG_PREFIX, ...args);
|
|
2594
|
-
};
|
|
2595
|
-
}
|
|
2596
|
-
const _log = levels.reduce((acc, level) => {
|
|
2597
|
-
acc[level] = getLog(level);
|
|
2598
|
-
return acc;
|
|
2599
|
-
}, {});
|
|
2600
|
-
exports.log = _log;
|
|
2601
|
-
|
|
2602
|
-
} (logging));
|
|
2603
|
-
|
|
2604
|
-
var messages = {};
|
|
2605
|
-
|
|
2606
|
-
Object.defineProperty(messages, "__esModule", { value: true });
|
|
2607
|
-
messages.Message = void 0;
|
|
2608
|
-
var Message;
|
|
2609
|
-
(function (Message) {
|
|
2610
|
-
Message["AudioFrame"] = "AUDIO_FRAME";
|
|
2611
|
-
Message["SpeechStart"] = "SPEECH_START";
|
|
2612
|
-
Message["VADMisfire"] = "VAD_MISFIRE";
|
|
2613
|
-
Message["SpeechEnd"] = "SPEECH_END";
|
|
2614
|
-
Message["SpeechStop"] = "SPEECH_STOP";
|
|
2615
|
-
Message["SpeechRealStart"] = "SPEECH_REAL_START";
|
|
2616
|
-
Message["FrameProcessed"] = "FRAME_PROCESSED";
|
|
2617
|
-
})(Message || (messages.Message = Message = {}));
|
|
2618
|
-
|
|
2619
|
-
/*
|
|
2620
|
-
Some of this code, together with the default options found in index.ts,
|
|
2621
|
-
were taken (or took inspiration) from https://github.com/snakers4/silero-vad
|
|
2622
|
-
*/
|
|
2623
|
-
Object.defineProperty(frameProcessor, "__esModule", { value: true });
|
|
2624
|
-
frameProcessor.FrameProcessor = frameProcessor.validateOptions = frameProcessor.defaultV5FrameProcessorOptions = frameProcessor.defaultLegacyFrameProcessorOptions = void 0;
|
|
2625
|
-
const logging_1$3 = logging;
|
|
2626
|
-
const messages_1 = messages;
|
|
2627
|
-
const RECOMMENDED_FRAME_SAMPLES = [512, 1024, 1536];
|
|
2628
|
-
frameProcessor.defaultLegacyFrameProcessorOptions = {
|
|
2629
|
-
positiveSpeechThreshold: 0.5,
|
|
2630
|
-
negativeSpeechThreshold: 0.5 - 0.15,
|
|
2631
|
-
preSpeechPadFrames: 1,
|
|
2632
|
-
redemptionFrames: 8,
|
|
2633
|
-
frameSamples: 1536,
|
|
2634
|
-
minSpeechFrames: 3,
|
|
2635
|
-
submitUserSpeechOnPause: false,
|
|
2636
|
-
};
|
|
2637
|
-
frameProcessor.defaultV5FrameProcessorOptions = {
|
|
2638
|
-
positiveSpeechThreshold: 0.5,
|
|
2639
|
-
negativeSpeechThreshold: 0.5 - 0.15,
|
|
2640
|
-
preSpeechPadFrames: 3,
|
|
2641
|
-
redemptionFrames: 24,
|
|
2642
|
-
frameSamples: 512,
|
|
2643
|
-
minSpeechFrames: 9,
|
|
2644
|
-
submitUserSpeechOnPause: false,
|
|
2645
|
-
};
|
|
2646
|
-
function validateOptions(options) {
|
|
2647
|
-
if (!RECOMMENDED_FRAME_SAMPLES.includes(options.frameSamples)) {
|
|
2648
|
-
logging_1$3.log.warn("You are using an unusual frame size");
|
|
2649
|
-
}
|
|
2650
|
-
if (options.positiveSpeechThreshold < 0 ||
|
|
2651
|
-
options.positiveSpeechThreshold > 1) {
|
|
2652
|
-
logging_1$3.log.error("positiveSpeechThreshold should be a number between 0 and 1");
|
|
2653
|
-
}
|
|
2654
|
-
if (options.negativeSpeechThreshold < 0 ||
|
|
2655
|
-
options.negativeSpeechThreshold > options.positiveSpeechThreshold) {
|
|
2656
|
-
logging_1$3.log.error("negativeSpeechThreshold should be between 0 and positiveSpeechThreshold");
|
|
2657
|
-
}
|
|
2658
|
-
if (options.preSpeechPadFrames < 0) {
|
|
2659
|
-
logging_1$3.log.error("preSpeechPadFrames should be positive");
|
|
2660
|
-
}
|
|
2661
|
-
if (options.redemptionFrames < 0) {
|
|
2662
|
-
logging_1$3.log.error("redemptionFrames should be positive");
|
|
2663
|
-
}
|
|
2664
|
-
}
|
|
2665
|
-
frameProcessor.validateOptions = validateOptions;
|
|
2666
|
-
const concatArrays = (arrays) => {
|
|
2667
|
-
const sizes = arrays.reduce((out, next) => {
|
|
2668
|
-
out.push(out.at(-1) + next.length);
|
|
2669
|
-
return out;
|
|
2670
|
-
}, [0]);
|
|
2671
|
-
const outArray = new Float32Array(sizes.at(-1));
|
|
2672
|
-
arrays.forEach((arr, index) => {
|
|
2673
|
-
const place = sizes[index];
|
|
2674
|
-
outArray.set(arr, place);
|
|
2675
|
-
});
|
|
2676
|
-
return outArray;
|
|
2677
|
-
};
|
|
2678
|
-
class FrameProcessor {
|
|
2679
|
-
constructor(modelProcessFunc, modelResetFunc, options) {
|
|
2680
|
-
this.modelProcessFunc = modelProcessFunc;
|
|
2681
|
-
this.modelResetFunc = modelResetFunc;
|
|
2682
|
-
this.options = options;
|
|
2683
|
-
this.speaking = false;
|
|
2684
|
-
this.redemptionCounter = 0;
|
|
2685
|
-
this.speechFrameCount = 0;
|
|
2686
|
-
this.active = false;
|
|
2687
|
-
this.speechRealStartFired = false;
|
|
2688
|
-
this.reset = () => {
|
|
2689
|
-
this.speaking = false;
|
|
2690
|
-
this.speechRealStartFired = false;
|
|
2691
|
-
this.audioBuffer = [];
|
|
2692
|
-
this.modelResetFunc();
|
|
2693
|
-
this.redemptionCounter = 0;
|
|
2694
|
-
this.speechFrameCount = 0;
|
|
2695
|
-
};
|
|
2696
|
-
this.pause = (handleEvent) => {
|
|
2697
|
-
this.active = false;
|
|
2698
|
-
if (this.options.submitUserSpeechOnPause) {
|
|
2699
|
-
this.endSegment(handleEvent);
|
|
2700
|
-
}
|
|
2701
|
-
else {
|
|
2702
|
-
this.reset();
|
|
2703
|
-
}
|
|
2704
|
-
};
|
|
2705
|
-
this.resume = () => {
|
|
2706
|
-
this.active = true;
|
|
2707
|
-
};
|
|
2708
|
-
this.endSegment = (handleEvent) => {
|
|
2709
|
-
const audioBuffer = this.audioBuffer;
|
|
2710
|
-
this.audioBuffer = [];
|
|
2711
|
-
const speaking = this.speaking;
|
|
2712
|
-
this.reset();
|
|
2713
|
-
if (speaking) {
|
|
2714
|
-
const speechFrameCount = audioBuffer.reduce((acc, item) => {
|
|
2715
|
-
return item.isSpeech ? (acc + 1) : acc;
|
|
2716
|
-
}, 0);
|
|
2717
|
-
if (speechFrameCount >= this.options.minSpeechFrames) {
|
|
2718
|
-
const audio = concatArrays(audioBuffer.map((item) => item.frame));
|
|
2719
|
-
handleEvent({ msg: messages_1.Message.SpeechEnd, audio });
|
|
2334
|
+
static async fromImage(image, options) {
|
|
2335
|
+
// checking the type of image object
|
|
2336
|
+
const isHTMLImageEle = typeof (HTMLImageElement) !== 'undefined' && image instanceof HTMLImageElement;
|
|
2337
|
+
const isImageDataEle = typeof (ImageData) !== 'undefined' && image instanceof ImageData;
|
|
2338
|
+
const isImageBitmap = typeof (ImageBitmap) !== 'undefined' && image instanceof ImageBitmap;
|
|
2339
|
+
const isURL = typeof (String) !== 'undefined' && (image instanceof String || typeof image === 'string');
|
|
2340
|
+
let data;
|
|
2341
|
+
let tensorConfig = {};
|
|
2342
|
+
// filling and checking image configuration options
|
|
2343
|
+
if (isHTMLImageEle) {
|
|
2344
|
+
// HTMLImageElement - image object - format is RGBA by default
|
|
2345
|
+
const canvas = document.createElement('canvas');
|
|
2346
|
+
const pixels2DContext = canvas.getContext('2d');
|
|
2347
|
+
if (pixels2DContext != null) {
|
|
2348
|
+
let height = image.naturalHeight;
|
|
2349
|
+
let width = image.naturalWidth;
|
|
2350
|
+
if (options !== undefined && options.resizedHeight !== undefined && options.resizedWidth !== undefined) {
|
|
2351
|
+
height = options.resizedHeight;
|
|
2352
|
+
width = options.resizedWidth;
|
|
2353
|
+
}
|
|
2354
|
+
if (options !== undefined) {
|
|
2355
|
+
tensorConfig = options;
|
|
2356
|
+
if (options.tensorFormat !== undefined) {
|
|
2357
|
+
throw new Error('Image input config format must be RGBA for HTMLImageElement');
|
|
2358
|
+
}
|
|
2359
|
+
else {
|
|
2360
|
+
tensorConfig.tensorFormat = 'RGBA';
|
|
2361
|
+
}
|
|
2362
|
+
if (options.height !== undefined && options.height !== height) {
|
|
2363
|
+
throw new Error('Image input config height doesn\'t match HTMLImageElement height');
|
|
2364
|
+
}
|
|
2365
|
+
else {
|
|
2366
|
+
tensorConfig.height = height;
|
|
2367
|
+
}
|
|
2368
|
+
if (options.width !== undefined && options.width !== width) {
|
|
2369
|
+
throw new Error('Image input config width doesn\'t match HTMLImageElement width');
|
|
2370
|
+
}
|
|
2371
|
+
else {
|
|
2372
|
+
tensorConfig.width = width;
|
|
2373
|
+
}
|
|
2720
2374
|
}
|
|
2721
2375
|
else {
|
|
2722
|
-
|
|
2376
|
+
tensorConfig.tensorFormat = 'RGBA';
|
|
2377
|
+
tensorConfig.height = height;
|
|
2378
|
+
tensorConfig.width = width;
|
|
2723
2379
|
}
|
|
2380
|
+
canvas.width = width;
|
|
2381
|
+
canvas.height = height;
|
|
2382
|
+
pixels2DContext.drawImage(image, 0, 0, width, height);
|
|
2383
|
+
data = pixels2DContext.getImageData(0, 0, width, height).data;
|
|
2724
2384
|
}
|
|
2725
|
-
|
|
2726
|
-
|
|
2727
|
-
this.process = async (frame, handleEvent) => {
|
|
2728
|
-
if (!this.active) {
|
|
2729
|
-
return;
|
|
2385
|
+
else {
|
|
2386
|
+
throw new Error('Can not access image data');
|
|
2730
2387
|
}
|
|
2731
|
-
|
|
2732
|
-
|
|
2733
|
-
|
|
2734
|
-
|
|
2735
|
-
|
|
2736
|
-
|
|
2737
|
-
|
|
2738
|
-
|
|
2739
|
-
|
|
2740
|
-
this.redemptionCounter = 0;
|
|
2388
|
+
}
|
|
2389
|
+
else if (isImageDataEle) {
|
|
2390
|
+
// ImageData - image object - format is RGBA by default
|
|
2391
|
+
const format = 'RGBA';
|
|
2392
|
+
let height;
|
|
2393
|
+
let width;
|
|
2394
|
+
if (options !== undefined && options.resizedWidth !== undefined && options.resizedHeight !== undefined) {
|
|
2395
|
+
height = options.resizedHeight;
|
|
2396
|
+
width = options.resizedWidth;
|
|
2741
2397
|
}
|
|
2742
|
-
|
|
2743
|
-
|
|
2744
|
-
|
|
2398
|
+
else {
|
|
2399
|
+
height = image.height;
|
|
2400
|
+
width = image.width;
|
|
2745
2401
|
}
|
|
2746
|
-
if (
|
|
2747
|
-
|
|
2748
|
-
|
|
2749
|
-
|
|
2750
|
-
|
|
2402
|
+
if (options !== undefined) {
|
|
2403
|
+
tensorConfig = options;
|
|
2404
|
+
if (options.bitmapFormat !== undefined && options.bitmapFormat !== format) {
|
|
2405
|
+
throw new Error('Image input config format must be RGBA for ImageData');
|
|
2406
|
+
}
|
|
2407
|
+
else {
|
|
2408
|
+
tensorConfig.bitmapFormat = 'RGBA';
|
|
2409
|
+
}
|
|
2751
2410
|
}
|
|
2752
|
-
|
|
2753
|
-
|
|
2754
|
-
|
|
2755
|
-
|
|
2756
|
-
|
|
2757
|
-
|
|
2758
|
-
|
|
2759
|
-
|
|
2760
|
-
|
|
2761
|
-
const
|
|
2762
|
-
|
|
2763
|
-
|
|
2764
|
-
|
|
2765
|
-
const audio = concatArrays(audioBuffer.map((item) => item.frame));
|
|
2766
|
-
handleEvent({ msg: messages_1.Message.SpeechEnd, audio });
|
|
2411
|
+
else {
|
|
2412
|
+
tensorConfig.bitmapFormat = 'RGBA';
|
|
2413
|
+
}
|
|
2414
|
+
tensorConfig.height = height;
|
|
2415
|
+
tensorConfig.width = width;
|
|
2416
|
+
if (options !== undefined) {
|
|
2417
|
+
const tempCanvas = document.createElement('canvas');
|
|
2418
|
+
tempCanvas.width = width;
|
|
2419
|
+
tempCanvas.height = height;
|
|
2420
|
+
const pixels2DContext = tempCanvas.getContext('2d');
|
|
2421
|
+
if (pixels2DContext != null) {
|
|
2422
|
+
pixels2DContext.putImageData(image, 0, 0);
|
|
2423
|
+
data = pixels2DContext.getImageData(0, 0, width, height).data;
|
|
2767
2424
|
}
|
|
2768
2425
|
else {
|
|
2769
|
-
|
|
2426
|
+
throw new Error('Can not access image data');
|
|
2770
2427
|
}
|
|
2771
2428
|
}
|
|
2772
|
-
|
|
2773
|
-
|
|
2774
|
-
this.audioBuffer.shift();
|
|
2775
|
-
}
|
|
2776
|
-
this.speechFrameCount = 0;
|
|
2429
|
+
else {
|
|
2430
|
+
data = image.data;
|
|
2777
2431
|
}
|
|
2778
|
-
};
|
|
2779
|
-
this.audioBuffer = [];
|
|
2780
|
-
this.reset();
|
|
2781
|
-
}
|
|
2782
|
-
}
|
|
2783
|
-
frameProcessor.FrameProcessor = FrameProcessor;
|
|
2784
|
-
|
|
2785
|
-
var nonRealTimeVad = {};
|
|
2786
|
-
|
|
2787
|
-
var ortWeb_min = {exports: {}};
|
|
2788
|
-
|
|
2789
|
-
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
2790
|
-
// Licensed under the MIT License.
|
|
2791
|
-
const backends = {};
|
|
2792
|
-
const backendsSortedByPriority = [];
|
|
2793
|
-
/**
|
|
2794
|
-
* Register a backend.
|
|
2795
|
-
*
|
|
2796
|
-
* @param name - the name as a key to lookup as an execution provider.
|
|
2797
|
-
* @param backend - the backend object.
|
|
2798
|
-
* @param priority - an integer indicating the priority of the backend. Higher number means higher priority. if priority
|
|
2799
|
-
* < 0, it will be considered as a 'beta' version and will not be used as a fallback backend by default.
|
|
2800
|
-
*
|
|
2801
|
-
* @internal
|
|
2802
|
-
*/
|
|
2803
|
-
const registerBackend = (name, backend, priority) => {
|
|
2804
|
-
if (backend && typeof backend.init === 'function' && typeof backend.createSessionHandler === 'function') {
|
|
2805
|
-
const currentBackend = backends[name];
|
|
2806
|
-
if (currentBackend === undefined) {
|
|
2807
|
-
backends[name] = { backend, priority };
|
|
2808
|
-
}
|
|
2809
|
-
else if (currentBackend.priority > priority) {
|
|
2810
|
-
// same name is already registered with a higher priority. skip registeration.
|
|
2811
|
-
return;
|
|
2812
2432
|
}
|
|
2813
|
-
else if (
|
|
2814
|
-
|
|
2815
|
-
|
|
2433
|
+
else if (isImageBitmap) {
|
|
2434
|
+
// ImageBitmap - image object - format must be provided by user
|
|
2435
|
+
if (options === undefined) {
|
|
2436
|
+
throw new Error('Please provide image config with format for Imagebitmap');
|
|
2816
2437
|
}
|
|
2817
|
-
|
|
2818
|
-
|
|
2819
|
-
const i = backendsSortedByPriority.indexOf(name);
|
|
2820
|
-
if (i !== -1) {
|
|
2821
|
-
backendsSortedByPriority.splice(i, 1);
|
|
2438
|
+
if (options.bitmapFormat !== undefined) {
|
|
2439
|
+
throw new Error('Image input config format must be defined for ImageBitmap');
|
|
2822
2440
|
}
|
|
2823
|
-
|
|
2824
|
-
|
|
2825
|
-
|
|
2826
|
-
|
|
2441
|
+
const pixels2DContext = document.createElement('canvas').getContext('2d');
|
|
2442
|
+
if (pixels2DContext != null) {
|
|
2443
|
+
const height = image.height;
|
|
2444
|
+
const width = image.width;
|
|
2445
|
+
pixels2DContext.drawImage(image, 0, 0, width, height);
|
|
2446
|
+
data = pixels2DContext.getImageData(0, 0, width, height).data;
|
|
2447
|
+
if (options !== undefined) {
|
|
2448
|
+
// using square brackets to avoid TS error - type 'never'
|
|
2449
|
+
if (options.height !== undefined && options.height !== height) {
|
|
2450
|
+
throw new Error('Image input config height doesn\'t match ImageBitmap height');
|
|
2451
|
+
}
|
|
2452
|
+
else {
|
|
2453
|
+
tensorConfig.height = height;
|
|
2454
|
+
}
|
|
2455
|
+
// using square brackets to avoid TS error - type 'never'
|
|
2456
|
+
if (options.width !== undefined && options.width !== width) {
|
|
2457
|
+
throw new Error('Image input config width doesn\'t match ImageBitmap width');
|
|
2458
|
+
}
|
|
2459
|
+
else {
|
|
2460
|
+
tensorConfig.width = width;
|
|
2461
|
+
}
|
|
2462
|
+
}
|
|
2463
|
+
else {
|
|
2464
|
+
tensorConfig.height = height;
|
|
2465
|
+
tensorConfig.width = width;
|
|
2827
2466
|
}
|
|
2467
|
+
return Tensor.bufferToTensor(data, tensorConfig);
|
|
2468
|
+
}
|
|
2469
|
+
else {
|
|
2470
|
+
throw new Error('Can not access image data');
|
|
2828
2471
|
}
|
|
2829
|
-
backendsSortedByPriority.push(name);
|
|
2830
2472
|
}
|
|
2831
|
-
|
|
2473
|
+
else if (isURL) {
|
|
2474
|
+
return new Promise((resolve, reject) => {
|
|
2475
|
+
const canvas = document.createElement('canvas');
|
|
2476
|
+
const context = canvas.getContext('2d');
|
|
2477
|
+
if (!image || !context) {
|
|
2478
|
+
return reject();
|
|
2479
|
+
}
|
|
2480
|
+
const newImage = new Image();
|
|
2481
|
+
newImage.crossOrigin = 'Anonymous';
|
|
2482
|
+
newImage.src = image;
|
|
2483
|
+
newImage.onload = () => {
|
|
2484
|
+
canvas.width = newImage.width;
|
|
2485
|
+
canvas.height = newImage.height;
|
|
2486
|
+
context.drawImage(newImage, 0, 0, canvas.width, canvas.height);
|
|
2487
|
+
const img = context.getImageData(0, 0, canvas.width, canvas.height);
|
|
2488
|
+
if (options !== undefined) {
|
|
2489
|
+
// using square brackets to avoid TS error - type 'never'
|
|
2490
|
+
if (options.height !== undefined && options.height !== canvas.height) {
|
|
2491
|
+
throw new Error('Image input config height doesn\'t match ImageBitmap height');
|
|
2492
|
+
}
|
|
2493
|
+
else {
|
|
2494
|
+
tensorConfig.height = canvas.height;
|
|
2495
|
+
}
|
|
2496
|
+
// using square brackets to avoid TS error - type 'never'
|
|
2497
|
+
if (options.width !== undefined && options.width !== canvas.width) {
|
|
2498
|
+
throw new Error('Image input config width doesn\'t match ImageBitmap width');
|
|
2499
|
+
}
|
|
2500
|
+
else {
|
|
2501
|
+
tensorConfig.width = canvas.width;
|
|
2502
|
+
}
|
|
2503
|
+
}
|
|
2504
|
+
else {
|
|
2505
|
+
tensorConfig.height = canvas.height;
|
|
2506
|
+
tensorConfig.width = canvas.width;
|
|
2507
|
+
}
|
|
2508
|
+
resolve(Tensor.bufferToTensor(img.data, tensorConfig));
|
|
2509
|
+
};
|
|
2510
|
+
});
|
|
2511
|
+
}
|
|
2512
|
+
else {
|
|
2513
|
+
throw new Error('Input data provided is not supported - aborted tensor creation');
|
|
2514
|
+
}
|
|
2515
|
+
if (data !== undefined) {
|
|
2516
|
+
return Tensor.bufferToTensor(data, tensorConfig);
|
|
2517
|
+
}
|
|
2518
|
+
else {
|
|
2519
|
+
throw new Error('Input data provided is not supported - aborted tensor creation');
|
|
2520
|
+
}
|
|
2832
2521
|
}
|
|
2833
|
-
|
|
2834
|
-
|
|
2835
|
-
|
|
2836
|
-
|
|
2837
|
-
|
|
2838
|
-
|
|
2839
|
-
|
|
2840
|
-
|
|
2841
|
-
|
|
2842
|
-
|
|
2843
|
-
|
|
2844
|
-
|
|
2845
|
-
|
|
2846
|
-
|
|
2847
|
-
|
|
2848
|
-
|
|
2849
|
-
|
|
2850
|
-
|
|
2522
|
+
toImageData(options) {
|
|
2523
|
+
var _a, _b;
|
|
2524
|
+
const pixels2DContext = document.createElement('canvas').getContext('2d');
|
|
2525
|
+
let image;
|
|
2526
|
+
if (pixels2DContext != null) {
|
|
2527
|
+
// Default values for height and width & format
|
|
2528
|
+
const width = this.dims[3];
|
|
2529
|
+
const height = this.dims[2];
|
|
2530
|
+
const channels = this.dims[1];
|
|
2531
|
+
const inputformat = options !== undefined ? (options.format !== undefined ? options.format : 'RGB') : 'RGB';
|
|
2532
|
+
const normMean = options !== undefined ? (((_a = options.norm) === null || _a === void 0 ? void 0 : _a.mean) !== undefined ? options.norm.mean : 255) : 255;
|
|
2533
|
+
const normBias = options !== undefined ? (((_b = options.norm) === null || _b === void 0 ? void 0 : _b.bias) !== undefined ? options.norm.bias : 0) : 0;
|
|
2534
|
+
const offset = height * width;
|
|
2535
|
+
if (options !== undefined) {
|
|
2536
|
+
if (options.height !== undefined && options.height !== height) {
|
|
2537
|
+
throw new Error('Image output config height doesn\'t match tensor height');
|
|
2538
|
+
}
|
|
2539
|
+
if (options.width !== undefined && options.width !== width) {
|
|
2540
|
+
throw new Error('Image output config width doesn\'t match tensor width');
|
|
2541
|
+
}
|
|
2542
|
+
if (options.format !== undefined && (channels === 4 && options.format !== 'RGBA') ||
|
|
2543
|
+
(channels === 3 && (options.format !== 'RGB' && options.format !== 'BGR'))) {
|
|
2544
|
+
throw new Error('Tensor format doesn\'t match input tensor dims');
|
|
2545
|
+
}
|
|
2851
2546
|
}
|
|
2852
|
-
|
|
2853
|
-
|
|
2547
|
+
// Default pointer assignments
|
|
2548
|
+
const step = 4;
|
|
2549
|
+
let rImagePointer = 0, gImagePointer = 1, bImagePointer = 2, aImagePointer = 3;
|
|
2550
|
+
let rTensorPointer = 0, gTensorPointer = offset, bTensorPointer = offset * 2, aTensorPointer = -1;
|
|
2551
|
+
// Updating the pointer assignments based on the input image format
|
|
2552
|
+
if (inputformat === 'RGBA') {
|
|
2553
|
+
rTensorPointer = 0;
|
|
2554
|
+
gTensorPointer = offset;
|
|
2555
|
+
bTensorPointer = offset * 2;
|
|
2556
|
+
aTensorPointer = offset * 3;
|
|
2854
2557
|
}
|
|
2855
|
-
|
|
2856
|
-
|
|
2857
|
-
|
|
2858
|
-
|
|
2859
|
-
}
|
|
2860
|
-
await backendInfo.initPromise;
|
|
2861
|
-
backendInfo.initialized = true;
|
|
2862
|
-
return backendInfo.backend;
|
|
2558
|
+
else if (inputformat === 'RGB') {
|
|
2559
|
+
rTensorPointer = 0;
|
|
2560
|
+
gTensorPointer = offset;
|
|
2561
|
+
bTensorPointer = offset * 2;
|
|
2863
2562
|
}
|
|
2864
|
-
|
|
2865
|
-
|
|
2866
|
-
|
|
2867
|
-
|
|
2868
|
-
backendInfo.aborted = true;
|
|
2563
|
+
else if (inputformat === 'RBG') {
|
|
2564
|
+
rTensorPointer = 0;
|
|
2565
|
+
bTensorPointer = offset;
|
|
2566
|
+
gTensorPointer = offset * 2;
|
|
2869
2567
|
}
|
|
2870
|
-
|
|
2871
|
-
|
|
2568
|
+
image = pixels2DContext.createImageData(width, height);
|
|
2569
|
+
for (let i = 0; i < height * width; rImagePointer += step, gImagePointer += step, bImagePointer += step, aImagePointer += step, i++) {
|
|
2570
|
+
image.data[rImagePointer] = (this.data[rTensorPointer++] - normBias) * normMean; // R value
|
|
2571
|
+
image.data[gImagePointer] = (this.data[gTensorPointer++] - normBias) * normMean; // G value
|
|
2572
|
+
image.data[bImagePointer] = (this.data[bTensorPointer++] - normBias) * normMean; // B value
|
|
2573
|
+
image.data[aImagePointer] =
|
|
2574
|
+
aTensorPointer === -1 ? 255 : (this.data[aTensorPointer++] - normBias) * normMean; // A value
|
|
2872
2575
|
}
|
|
2873
2576
|
}
|
|
2577
|
+
else {
|
|
2578
|
+
throw new Error('Can not access image data');
|
|
2579
|
+
}
|
|
2580
|
+
return image;
|
|
2874
2581
|
}
|
|
2875
|
-
|
|
2876
|
-
|
|
2877
|
-
|
|
2878
|
-
|
|
2879
|
-
// Licensed under the MIT License.
|
|
2880
|
-
class EnvImpl {
|
|
2881
|
-
constructor() {
|
|
2882
|
-
this.wasm = {};
|
|
2883
|
-
this.webgl = {};
|
|
2884
|
-
this.logLevelInternal = 'warning';
|
|
2885
|
-
}
|
|
2886
|
-
// TODO standadize the getter and setter convention in env for other fields.
|
|
2887
|
-
set logLevel(value) {
|
|
2888
|
-
if (value === undefined) {
|
|
2889
|
-
return;
|
|
2890
|
-
}
|
|
2891
|
-
if (typeof value !== 'string' || ['verbose', 'info', 'warning', 'error', 'fatal'].indexOf(value) === -1) {
|
|
2892
|
-
throw new Error(`Unsupported logging level: ${value}`);
|
|
2893
|
-
}
|
|
2894
|
-
this.logLevelInternal = value;
|
|
2895
|
-
}
|
|
2896
|
-
get logLevel() {
|
|
2897
|
-
return this.logLevelInternal;
|
|
2582
|
+
// #endregion
|
|
2583
|
+
// #region tensor utilities
|
|
2584
|
+
reshape(dims) {
|
|
2585
|
+
return new Tensor(this.type, this.data, dims);
|
|
2898
2586
|
}
|
|
2899
|
-
}
|
|
2587
|
+
};
|
|
2900
2588
|
|
|
2901
2589
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
2902
2590
|
// Licensed under the MIT License.
|
|
2903
|
-
|
|
2904
|
-
|
|
2905
|
-
*/
|
|
2906
|
-
const env = new EnvImpl();
|
|
2591
|
+
// eslint-disable-next-line @typescript-eslint/naming-convention
|
|
2592
|
+
const Tensor = Tensor$1;
|
|
2907
2593
|
|
|
2908
2594
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
2909
2595
|
// Licensed under the MIT License.
|
|
2910
|
-
|
|
2911
|
-
|
|
2912
|
-
|
|
2913
|
-
const NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP = new Map([
|
|
2914
|
-
['float32', Float32Array],
|
|
2915
|
-
['uint8', Uint8Array],
|
|
2916
|
-
['int8', Int8Array],
|
|
2917
|
-
['uint16', Uint16Array],
|
|
2918
|
-
['int16', Int16Array],
|
|
2919
|
-
['int32', Int32Array],
|
|
2920
|
-
['bool', Uint8Array],
|
|
2921
|
-
['float64', Float64Array],
|
|
2922
|
-
['uint32', Uint32Array],
|
|
2923
|
-
]);
|
|
2924
|
-
// a runtime map that maps type string to TypedArray constructor. Should match Tensor.DataTypeMap.
|
|
2925
|
-
const NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP = new Map([
|
|
2926
|
-
[Float32Array, 'float32'],
|
|
2927
|
-
[Uint8Array, 'uint8'],
|
|
2928
|
-
[Int8Array, 'int8'],
|
|
2929
|
-
[Uint16Array, 'uint16'],
|
|
2930
|
-
[Int16Array, 'int16'],
|
|
2931
|
-
[Int32Array, 'int32'],
|
|
2932
|
-
[Float64Array, 'float64'],
|
|
2933
|
-
[Uint32Array, 'uint32'],
|
|
2934
|
-
]);
|
|
2935
|
-
if (isBigInt64ArrayAvailable) {
|
|
2936
|
-
NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.set('int64', BigInt64Array);
|
|
2937
|
-
NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.set(BigInt64Array, 'int64');
|
|
2938
|
-
}
|
|
2939
|
-
if (isBigUint64ArrayAvailable) {
|
|
2940
|
-
NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.set('uint64', BigUint64Array);
|
|
2941
|
-
NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.set(BigUint64Array, 'uint64');
|
|
2942
|
-
}
|
|
2943
|
-
/**
|
|
2944
|
-
* calculate size from dims.
|
|
2945
|
-
*
|
|
2946
|
-
* @param dims the dims array. May be an illegal input.
|
|
2947
|
-
*/
|
|
2948
|
-
const calculateSize = (dims) => {
|
|
2949
|
-
let size = 1;
|
|
2950
|
-
for (let i = 0; i < dims.length; i++) {
|
|
2951
|
-
const dim = dims[i];
|
|
2952
|
-
if (typeof dim !== 'number' || !Number.isSafeInteger(dim)) {
|
|
2953
|
-
throw new TypeError(`dims[${i}] must be an integer, got: ${dim}`);
|
|
2954
|
-
}
|
|
2955
|
-
if (dim < 0) {
|
|
2956
|
-
throw new RangeError(`dims[${i}] must be a non-negative integer, got: ${dim}`);
|
|
2957
|
-
}
|
|
2958
|
-
size *= dim;
|
|
2596
|
+
let InferenceSession$1 = class InferenceSession {
|
|
2597
|
+
constructor(handler) {
|
|
2598
|
+
this.handler = handler;
|
|
2959
2599
|
}
|
|
2960
|
-
|
|
2961
|
-
|
|
2962
|
-
|
|
2963
|
-
|
|
2964
|
-
|
|
2965
|
-
|
|
2966
|
-
|
|
2967
|
-
|
|
2968
|
-
|
|
2969
|
-
|
|
2970
|
-
|
|
2971
|
-
|
|
2972
|
-
type = arg0;
|
|
2973
|
-
dims = arg2;
|
|
2974
|
-
if (arg0 === 'string') {
|
|
2975
|
-
// string tensor
|
|
2976
|
-
if (!Array.isArray(arg1)) {
|
|
2977
|
-
throw new TypeError('A string tensor\'s data must be a string array.');
|
|
2978
|
-
}
|
|
2979
|
-
// we don't check whether every element in the array is string; this is too slow. we assume it's correct and
|
|
2980
|
-
// error will be populated at inference
|
|
2981
|
-
data = arg1;
|
|
2600
|
+
async run(feeds, arg1, arg2) {
|
|
2601
|
+
const fetches = {};
|
|
2602
|
+
let options = {};
|
|
2603
|
+
// check inputs
|
|
2604
|
+
if (typeof feeds !== 'object' || feeds === null || feeds instanceof Tensor || Array.isArray(feeds)) {
|
|
2605
|
+
throw new TypeError('\'feeds\' must be an object that use input names as keys and OnnxValue as corresponding values.');
|
|
2606
|
+
}
|
|
2607
|
+
let isFetchesEmpty = true;
|
|
2608
|
+
// determine which override is being used
|
|
2609
|
+
if (typeof arg1 === 'object') {
|
|
2610
|
+
if (arg1 === null) {
|
|
2611
|
+
throw new TypeError('Unexpected argument[1]: cannot be null.');
|
|
2982
2612
|
}
|
|
2983
|
-
|
|
2984
|
-
|
|
2985
|
-
|
|
2986
|
-
|
|
2987
|
-
|
|
2613
|
+
if (arg1 instanceof Tensor) {
|
|
2614
|
+
throw new TypeError('\'fetches\' cannot be a Tensor');
|
|
2615
|
+
}
|
|
2616
|
+
if (Array.isArray(arg1)) {
|
|
2617
|
+
if (arg1.length === 0) {
|
|
2618
|
+
throw new TypeError('\'fetches\' cannot be an empty array.');
|
|
2988
2619
|
}
|
|
2989
|
-
|
|
2990
|
-
|
|
2991
|
-
|
|
2992
|
-
|
|
2993
|
-
|
|
2994
|
-
|
|
2620
|
+
isFetchesEmpty = false;
|
|
2621
|
+
// output names
|
|
2622
|
+
for (const name of arg1) {
|
|
2623
|
+
if (typeof name !== 'string') {
|
|
2624
|
+
throw new TypeError('\'fetches\' must be a string array or an object.');
|
|
2625
|
+
}
|
|
2626
|
+
if (this.outputNames.indexOf(name) === -1) {
|
|
2627
|
+
throw new RangeError(`'fetches' contains invalid output name: ${name}.`);
|
|
2628
|
+
}
|
|
2629
|
+
fetches[name] = null;
|
|
2995
2630
|
}
|
|
2996
|
-
|
|
2997
|
-
|
|
2631
|
+
if (typeof arg2 === 'object' && arg2 !== null) {
|
|
2632
|
+
options = arg2;
|
|
2998
2633
|
}
|
|
2999
|
-
else {
|
|
3000
|
-
throw new TypeError(
|
|
2634
|
+
else if (typeof arg2 !== 'undefined') {
|
|
2635
|
+
throw new TypeError('\'options\' must be an object.');
|
|
3001
2636
|
}
|
|
3002
2637
|
}
|
|
3003
|
-
|
|
3004
|
-
|
|
3005
|
-
|
|
3006
|
-
|
|
3007
|
-
|
|
3008
|
-
|
|
3009
|
-
|
|
3010
|
-
|
|
3011
|
-
|
|
3012
|
-
|
|
3013
|
-
|
|
3014
|
-
|
|
3015
|
-
|
|
3016
|
-
|
|
3017
|
-
data = arg0;
|
|
2638
|
+
else {
|
|
2639
|
+
// decide whether arg1 is fetches or options
|
|
2640
|
+
// if any output name is present and its value is valid OnnxValue, we consider it fetches
|
|
2641
|
+
let isFetches = false;
|
|
2642
|
+
const arg1Keys = Object.getOwnPropertyNames(arg1);
|
|
2643
|
+
for (const name of this.outputNames) {
|
|
2644
|
+
if (arg1Keys.indexOf(name) !== -1) {
|
|
2645
|
+
const v = arg1[name];
|
|
2646
|
+
if (v === null || v instanceof Tensor) {
|
|
2647
|
+
isFetches = true;
|
|
2648
|
+
isFetchesEmpty = false;
|
|
2649
|
+
fetches[name] = v;
|
|
2650
|
+
}
|
|
2651
|
+
}
|
|
3018
2652
|
}
|
|
3019
|
-
|
|
3020
|
-
|
|
3021
|
-
|
|
3022
|
-
|
|
3023
|
-
|
|
3024
|
-
|
|
2653
|
+
if (isFetches) {
|
|
2654
|
+
if (typeof arg2 === 'object' && arg2 !== null) {
|
|
2655
|
+
options = arg2;
|
|
2656
|
+
}
|
|
2657
|
+
else if (typeof arg2 !== 'undefined') {
|
|
2658
|
+
throw new TypeError('\'options\' must be an object.');
|
|
2659
|
+
}
|
|
3025
2660
|
}
|
|
3026
2661
|
else {
|
|
3027
|
-
|
|
3028
|
-
}
|
|
3029
|
-
}
|
|
3030
|
-
else {
|
|
3031
|
-
// get tensor type from TypedArray
|
|
3032
|
-
const mappedType = NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.get(arg0.constructor);
|
|
3033
|
-
if (mappedType === undefined) {
|
|
3034
|
-
throw new TypeError(`Unsupported type for tensor data: ${arg0.constructor}.`);
|
|
2662
|
+
options = arg1;
|
|
3035
2663
|
}
|
|
3036
|
-
type = mappedType;
|
|
3037
|
-
data = arg0;
|
|
3038
2664
|
}
|
|
3039
2665
|
}
|
|
3040
|
-
|
|
3041
|
-
|
|
3042
|
-
// assume 1-D tensor if dims omitted
|
|
3043
|
-
dims = [data.length];
|
|
2666
|
+
else if (typeof arg1 !== 'undefined') {
|
|
2667
|
+
throw new TypeError('Unexpected argument[1]: must be \'fetches\' or \'options\'.');
|
|
3044
2668
|
}
|
|
3045
|
-
|
|
3046
|
-
|
|
2669
|
+
// check if all inputs are in feed
|
|
2670
|
+
for (const name of this.inputNames) {
|
|
2671
|
+
if (typeof feeds[name] === 'undefined') {
|
|
2672
|
+
throw new Error(`input '${name}' is missing in 'feeds'.`);
|
|
2673
|
+
}
|
|
3047
2674
|
}
|
|
3048
|
-
//
|
|
3049
|
-
|
|
3050
|
-
|
|
3051
|
-
|
|
2675
|
+
// if no fetches is specified, we use the full output names list
|
|
2676
|
+
if (isFetchesEmpty) {
|
|
2677
|
+
for (const name of this.outputNames) {
|
|
2678
|
+
fetches[name] = null;
|
|
2679
|
+
}
|
|
3052
2680
|
}
|
|
3053
|
-
|
|
3054
|
-
this.
|
|
3055
|
-
|
|
3056
|
-
|
|
3057
|
-
|
|
3058
|
-
|
|
3059
|
-
|
|
3060
|
-
* Create a new tensor object from image object
|
|
3061
|
-
*
|
|
3062
|
-
* @param buffer - Extracted image buffer data - assuming RGBA format
|
|
3063
|
-
* @param imageFormat - input image configuration - required configurations height, width, format
|
|
3064
|
-
* @param tensorFormat - output tensor configuration - Default is RGB format
|
|
3065
|
-
*/
|
|
3066
|
-
static bufferToTensor(buffer, options) {
|
|
3067
|
-
if (buffer === undefined) {
|
|
3068
|
-
throw new Error('Image buffer must be defined');
|
|
2681
|
+
// feeds, fetches and options are prepared
|
|
2682
|
+
const results = await this.handler.run(feeds, fetches, options);
|
|
2683
|
+
const returnValue = {};
|
|
2684
|
+
for (const key in results) {
|
|
2685
|
+
if (Object.hasOwnProperty.call(results, key)) {
|
|
2686
|
+
returnValue[key] = new Tensor(results[key].type, results[key].data, results[key].dims);
|
|
2687
|
+
}
|
|
3069
2688
|
}
|
|
3070
|
-
|
|
3071
|
-
throw new Error('Image height and width must be defined');
|
|
3072
|
-
}
|
|
3073
|
-
const { height, width } = options;
|
|
3074
|
-
const norm = options.norm;
|
|
3075
|
-
let normMean;
|
|
3076
|
-
let normBias;
|
|
3077
|
-
if (norm === undefined || norm.mean === undefined) {
|
|
3078
|
-
normMean = 255;
|
|
3079
|
-
}
|
|
3080
|
-
else {
|
|
3081
|
-
normMean = norm.mean;
|
|
3082
|
-
}
|
|
3083
|
-
if (norm === undefined || norm.bias === undefined) {
|
|
3084
|
-
normBias = 0;
|
|
3085
|
-
}
|
|
3086
|
-
else {
|
|
3087
|
-
normBias = norm.bias;
|
|
3088
|
-
}
|
|
3089
|
-
const inputformat = options.bitmapFormat !== undefined ? options.bitmapFormat : 'RGBA';
|
|
3090
|
-
// default value is RGBA since imagedata and HTMLImageElement uses it
|
|
3091
|
-
const outputformat = options.tensorFormat !== undefined ?
|
|
3092
|
-
(options.tensorFormat !== undefined ? options.tensorFormat : 'RGB') :
|
|
3093
|
-
'RGB';
|
|
3094
|
-
const offset = height * width;
|
|
3095
|
-
const float32Data = outputformat === 'RGBA' ? new Float32Array(offset * 4) : new Float32Array(offset * 3);
|
|
3096
|
-
// Default pointer assignments
|
|
3097
|
-
let step = 4, rImagePointer = 0, gImagePointer = 1, bImagePointer = 2, aImagePointer = 3;
|
|
3098
|
-
let rTensorPointer = 0, gTensorPointer = offset, bTensorPointer = offset * 2, aTensorPointer = -1;
|
|
3099
|
-
// Updating the pointer assignments based on the input image format
|
|
3100
|
-
if (inputformat === 'RGB') {
|
|
3101
|
-
step = 3;
|
|
3102
|
-
rImagePointer = 0;
|
|
3103
|
-
gImagePointer = 1;
|
|
3104
|
-
bImagePointer = 2;
|
|
3105
|
-
aImagePointer = -1;
|
|
3106
|
-
}
|
|
3107
|
-
// Updating the pointer assignments based on the output tensor format
|
|
3108
|
-
if (outputformat === 'RGBA') {
|
|
3109
|
-
aTensorPointer = offset * 3;
|
|
3110
|
-
}
|
|
3111
|
-
else if (outputformat === 'RBG') {
|
|
3112
|
-
rTensorPointer = 0;
|
|
3113
|
-
bTensorPointer = offset;
|
|
3114
|
-
gTensorPointer = offset * 2;
|
|
3115
|
-
}
|
|
3116
|
-
else if (outputformat === 'BGR') {
|
|
3117
|
-
bTensorPointer = 0;
|
|
3118
|
-
gTensorPointer = offset;
|
|
3119
|
-
rTensorPointer = offset * 2;
|
|
3120
|
-
}
|
|
3121
|
-
for (let i = 0; i < offset; i++, rImagePointer += step, bImagePointer += step, gImagePointer += step, aImagePointer += step) {
|
|
3122
|
-
float32Data[rTensorPointer++] = (buffer[rImagePointer] + normBias) / normMean;
|
|
3123
|
-
float32Data[gTensorPointer++] = (buffer[gImagePointer] + normBias) / normMean;
|
|
3124
|
-
float32Data[bTensorPointer++] = (buffer[bImagePointer] + normBias) / normMean;
|
|
3125
|
-
if (aTensorPointer !== -1 && aImagePointer !== -1) {
|
|
3126
|
-
float32Data[aTensorPointer++] = (buffer[aImagePointer] + normBias) / normMean;
|
|
3127
|
-
}
|
|
3128
|
-
}
|
|
3129
|
-
// Float32Array -> ort.Tensor
|
|
3130
|
-
const outputTensor = outputformat === 'RGBA' ? new Tensor('float32', float32Data, [1, 4, height, width]) :
|
|
3131
|
-
new Tensor('float32', float32Data, [1, 3, height, width]);
|
|
3132
|
-
return outputTensor;
|
|
2689
|
+
return returnValue;
|
|
3133
2690
|
}
|
|
3134
|
-
static async
|
|
3135
|
-
//
|
|
3136
|
-
|
|
3137
|
-
|
|
3138
|
-
|
|
3139
|
-
|
|
3140
|
-
|
|
3141
|
-
|
|
3142
|
-
// filling and checking image configuration options
|
|
3143
|
-
if (isHTMLImageEle) {
|
|
3144
|
-
// HTMLImageElement - image object - format is RGBA by default
|
|
3145
|
-
const canvas = document.createElement('canvas');
|
|
3146
|
-
const pixels2DContext = canvas.getContext('2d');
|
|
3147
|
-
if (pixels2DContext != null) {
|
|
3148
|
-
let height = image.naturalHeight;
|
|
3149
|
-
let width = image.naturalWidth;
|
|
3150
|
-
if (options !== undefined && options.resizedHeight !== undefined && options.resizedWidth !== undefined) {
|
|
3151
|
-
height = options.resizedHeight;
|
|
3152
|
-
width = options.resizedWidth;
|
|
3153
|
-
}
|
|
3154
|
-
if (options !== undefined) {
|
|
3155
|
-
tensorConfig = options;
|
|
3156
|
-
if (options.tensorFormat !== undefined) {
|
|
3157
|
-
throw new Error('Image input config format must be RGBA for HTMLImageElement');
|
|
3158
|
-
}
|
|
3159
|
-
else {
|
|
3160
|
-
tensorConfig.tensorFormat = 'RGBA';
|
|
3161
|
-
}
|
|
3162
|
-
if (options.height !== undefined && options.height !== height) {
|
|
3163
|
-
throw new Error('Image input config height doesn\'t match HTMLImageElement height');
|
|
3164
|
-
}
|
|
3165
|
-
else {
|
|
3166
|
-
tensorConfig.height = height;
|
|
3167
|
-
}
|
|
3168
|
-
if (options.width !== undefined && options.width !== width) {
|
|
3169
|
-
throw new Error('Image input config width doesn\'t match HTMLImageElement width');
|
|
3170
|
-
}
|
|
3171
|
-
else {
|
|
3172
|
-
tensorConfig.width = width;
|
|
3173
|
-
}
|
|
3174
|
-
}
|
|
3175
|
-
else {
|
|
3176
|
-
tensorConfig.tensorFormat = 'RGBA';
|
|
3177
|
-
tensorConfig.height = height;
|
|
3178
|
-
tensorConfig.width = width;
|
|
3179
|
-
}
|
|
3180
|
-
canvas.width = width;
|
|
3181
|
-
canvas.height = height;
|
|
3182
|
-
pixels2DContext.drawImage(image, 0, 0, width, height);
|
|
3183
|
-
data = pixels2DContext.getImageData(0, 0, width, height).data;
|
|
2691
|
+
static async create(arg0, arg1, arg2, arg3) {
|
|
2692
|
+
// either load from a file or buffer
|
|
2693
|
+
let filePathOrUint8Array;
|
|
2694
|
+
let options = {};
|
|
2695
|
+
if (typeof arg0 === 'string') {
|
|
2696
|
+
filePathOrUint8Array = arg0;
|
|
2697
|
+
if (typeof arg1 === 'object' && arg1 !== null) {
|
|
2698
|
+
options = arg1;
|
|
3184
2699
|
}
|
|
3185
|
-
else {
|
|
3186
|
-
throw new
|
|
2700
|
+
else if (typeof arg1 !== 'undefined') {
|
|
2701
|
+
throw new TypeError('\'options\' must be an object.');
|
|
3187
2702
|
}
|
|
3188
2703
|
}
|
|
3189
|
-
else if (
|
|
3190
|
-
|
|
3191
|
-
|
|
3192
|
-
|
|
3193
|
-
let width;
|
|
3194
|
-
if (options !== undefined && options.resizedWidth !== undefined && options.resizedHeight !== undefined) {
|
|
3195
|
-
height = options.resizedHeight;
|
|
3196
|
-
width = options.resizedWidth;
|
|
3197
|
-
}
|
|
3198
|
-
else {
|
|
3199
|
-
height = image.height;
|
|
3200
|
-
width = image.width;
|
|
2704
|
+
else if (arg0 instanceof Uint8Array) {
|
|
2705
|
+
filePathOrUint8Array = arg0;
|
|
2706
|
+
if (typeof arg1 === 'object' && arg1 !== null) {
|
|
2707
|
+
options = arg1;
|
|
3201
2708
|
}
|
|
3202
|
-
if (
|
|
3203
|
-
|
|
3204
|
-
if (options.bitmapFormat !== undefined && options.bitmapFormat !== format) {
|
|
3205
|
-
throw new Error('Image input config format must be RGBA for ImageData');
|
|
3206
|
-
}
|
|
3207
|
-
else {
|
|
3208
|
-
tensorConfig.bitmapFormat = 'RGBA';
|
|
3209
|
-
}
|
|
2709
|
+
else if (typeof arg1 !== 'undefined') {
|
|
2710
|
+
throw new TypeError('\'options\' must be an object.');
|
|
3210
2711
|
}
|
|
3211
|
-
|
|
3212
|
-
|
|
2712
|
+
}
|
|
2713
|
+
else if (arg0 instanceof ArrayBuffer ||
|
|
2714
|
+
(typeof SharedArrayBuffer !== 'undefined' && arg0 instanceof SharedArrayBuffer)) {
|
|
2715
|
+
const buffer = arg0;
|
|
2716
|
+
let byteOffset = 0;
|
|
2717
|
+
let byteLength = arg0.byteLength;
|
|
2718
|
+
if (typeof arg1 === 'object' && arg1 !== null) {
|
|
2719
|
+
options = arg1;
|
|
3213
2720
|
}
|
|
3214
|
-
|
|
3215
|
-
|
|
3216
|
-
|
|
3217
|
-
|
|
3218
|
-
tempCanvas.width = width;
|
|
3219
|
-
tempCanvas.height = height;
|
|
3220
|
-
const pixels2DContext = tempCanvas.getContext('2d');
|
|
3221
|
-
if (pixels2DContext != null) {
|
|
3222
|
-
pixels2DContext.putImageData(image, 0, 0);
|
|
3223
|
-
data = pixels2DContext.getImageData(0, 0, width, height).data;
|
|
2721
|
+
else if (typeof arg1 === 'number') {
|
|
2722
|
+
byteOffset = arg1;
|
|
2723
|
+
if (!Number.isSafeInteger(byteOffset)) {
|
|
2724
|
+
throw new RangeError('\'byteOffset\' must be an integer.');
|
|
3224
2725
|
}
|
|
3225
|
-
|
|
3226
|
-
throw new
|
|
2726
|
+
if (byteOffset < 0 || byteOffset >= buffer.byteLength) {
|
|
2727
|
+
throw new RangeError(`'byteOffset' is out of range [0, ${buffer.byteLength}).`);
|
|
3227
2728
|
}
|
|
3228
|
-
|
|
3229
|
-
|
|
3230
|
-
|
|
3231
|
-
|
|
3232
|
-
|
|
3233
|
-
else if (isImageBitmap) {
|
|
3234
|
-
// ImageBitmap - image object - format must be provided by user
|
|
3235
|
-
if (options === undefined) {
|
|
3236
|
-
throw new Error('Please provide image config with format for Imagebitmap');
|
|
3237
|
-
}
|
|
3238
|
-
if (options.bitmapFormat !== undefined) {
|
|
3239
|
-
throw new Error('Image input config format must be defined for ImageBitmap');
|
|
3240
|
-
}
|
|
3241
|
-
const pixels2DContext = document.createElement('canvas').getContext('2d');
|
|
3242
|
-
if (pixels2DContext != null) {
|
|
3243
|
-
const height = image.height;
|
|
3244
|
-
const width = image.width;
|
|
3245
|
-
pixels2DContext.drawImage(image, 0, 0, width, height);
|
|
3246
|
-
data = pixels2DContext.getImageData(0, 0, width, height).data;
|
|
3247
|
-
if (options !== undefined) {
|
|
3248
|
-
// using square brackets to avoid TS error - type 'never'
|
|
3249
|
-
if (options.height !== undefined && options.height !== height) {
|
|
3250
|
-
throw new Error('Image input config height doesn\'t match ImageBitmap height');
|
|
2729
|
+
byteLength = arg0.byteLength - byteOffset;
|
|
2730
|
+
if (typeof arg2 === 'number') {
|
|
2731
|
+
byteLength = arg2;
|
|
2732
|
+
if (!Number.isSafeInteger(byteLength)) {
|
|
2733
|
+
throw new RangeError('\'byteLength\' must be an integer.');
|
|
3251
2734
|
}
|
|
3252
|
-
|
|
3253
|
-
|
|
2735
|
+
if (byteLength <= 0 || byteOffset + byteLength > buffer.byteLength) {
|
|
2736
|
+
throw new RangeError(`'byteLength' is out of range (0, ${buffer.byteLength - byteOffset}].`);
|
|
3254
2737
|
}
|
|
3255
|
-
|
|
3256
|
-
|
|
3257
|
-
throw new Error('Image input config width doesn\'t match ImageBitmap width');
|
|
2738
|
+
if (typeof arg3 === 'object' && arg3 !== null) {
|
|
2739
|
+
options = arg3;
|
|
3258
2740
|
}
|
|
3259
|
-
else {
|
|
3260
|
-
|
|
2741
|
+
else if (typeof arg3 !== 'undefined') {
|
|
2742
|
+
throw new TypeError('\'options\' must be an object.');
|
|
3261
2743
|
}
|
|
3262
2744
|
}
|
|
3263
|
-
else {
|
|
3264
|
-
|
|
3265
|
-
tensorConfig.width = width;
|
|
2745
|
+
else if (typeof arg2 !== 'undefined') {
|
|
2746
|
+
throw new TypeError('\'byteLength\' must be a number.');
|
|
3266
2747
|
}
|
|
3267
|
-
return Tensor.bufferToTensor(data, tensorConfig);
|
|
3268
2748
|
}
|
|
3269
|
-
else {
|
|
3270
|
-
throw new
|
|
2749
|
+
else if (typeof arg1 !== 'undefined') {
|
|
2750
|
+
throw new TypeError('\'options\' must be an object.');
|
|
3271
2751
|
}
|
|
3272
|
-
|
|
3273
|
-
else if (isURL) {
|
|
3274
|
-
return new Promise((resolve, reject) => {
|
|
3275
|
-
const canvas = document.createElement('canvas');
|
|
3276
|
-
const context = canvas.getContext('2d');
|
|
3277
|
-
if (!image || !context) {
|
|
3278
|
-
return reject();
|
|
3279
|
-
}
|
|
3280
|
-
const newImage = new Image();
|
|
3281
|
-
newImage.crossOrigin = 'Anonymous';
|
|
3282
|
-
newImage.src = image;
|
|
3283
|
-
newImage.onload = () => {
|
|
3284
|
-
canvas.width = newImage.width;
|
|
3285
|
-
canvas.height = newImage.height;
|
|
3286
|
-
context.drawImage(newImage, 0, 0, canvas.width, canvas.height);
|
|
3287
|
-
const img = context.getImageData(0, 0, canvas.width, canvas.height);
|
|
3288
|
-
if (options !== undefined) {
|
|
3289
|
-
// using square brackets to avoid TS error - type 'never'
|
|
3290
|
-
if (options.height !== undefined && options.height !== canvas.height) {
|
|
3291
|
-
throw new Error('Image input config height doesn\'t match ImageBitmap height');
|
|
3292
|
-
}
|
|
3293
|
-
else {
|
|
3294
|
-
tensorConfig.height = canvas.height;
|
|
3295
|
-
}
|
|
3296
|
-
// using square brackets to avoid TS error - type 'never'
|
|
3297
|
-
if (options.width !== undefined && options.width !== canvas.width) {
|
|
3298
|
-
throw new Error('Image input config width doesn\'t match ImageBitmap width');
|
|
3299
|
-
}
|
|
3300
|
-
else {
|
|
3301
|
-
tensorConfig.width = canvas.width;
|
|
3302
|
-
}
|
|
3303
|
-
}
|
|
3304
|
-
else {
|
|
3305
|
-
tensorConfig.height = canvas.height;
|
|
3306
|
-
tensorConfig.width = canvas.width;
|
|
3307
|
-
}
|
|
3308
|
-
resolve(Tensor.bufferToTensor(img.data, tensorConfig));
|
|
3309
|
-
};
|
|
3310
|
-
});
|
|
3311
|
-
}
|
|
3312
|
-
else {
|
|
3313
|
-
throw new Error('Input data provided is not supported - aborted tensor creation');
|
|
3314
|
-
}
|
|
3315
|
-
if (data !== undefined) {
|
|
3316
|
-
return Tensor.bufferToTensor(data, tensorConfig);
|
|
3317
|
-
}
|
|
3318
|
-
else {
|
|
3319
|
-
throw new Error('Input data provided is not supported - aborted tensor creation');
|
|
3320
|
-
}
|
|
3321
|
-
}
|
|
3322
|
-
toImageData(options) {
|
|
3323
|
-
var _a, _b;
|
|
3324
|
-
const pixels2DContext = document.createElement('canvas').getContext('2d');
|
|
3325
|
-
let image;
|
|
3326
|
-
if (pixels2DContext != null) {
|
|
3327
|
-
// Default values for height and width & format
|
|
3328
|
-
const width = this.dims[3];
|
|
3329
|
-
const height = this.dims[2];
|
|
3330
|
-
const channels = this.dims[1];
|
|
3331
|
-
const inputformat = options !== undefined ? (options.format !== undefined ? options.format : 'RGB') : 'RGB';
|
|
3332
|
-
const normMean = options !== undefined ? (((_a = options.norm) === null || _a === void 0 ? void 0 : _a.mean) !== undefined ? options.norm.mean : 255) : 255;
|
|
3333
|
-
const normBias = options !== undefined ? (((_b = options.norm) === null || _b === void 0 ? void 0 : _b.bias) !== undefined ? options.norm.bias : 0) : 0;
|
|
3334
|
-
const offset = height * width;
|
|
3335
|
-
if (options !== undefined) {
|
|
3336
|
-
if (options.height !== undefined && options.height !== height) {
|
|
3337
|
-
throw new Error('Image output config height doesn\'t match tensor height');
|
|
3338
|
-
}
|
|
3339
|
-
if (options.width !== undefined && options.width !== width) {
|
|
3340
|
-
throw new Error('Image output config width doesn\'t match tensor width');
|
|
3341
|
-
}
|
|
3342
|
-
if (options.format !== undefined && (channels === 4 && options.format !== 'RGBA') ||
|
|
3343
|
-
(channels === 3 && (options.format !== 'RGB' && options.format !== 'BGR'))) {
|
|
3344
|
-
throw new Error('Tensor format doesn\'t match input tensor dims');
|
|
3345
|
-
}
|
|
3346
|
-
}
|
|
3347
|
-
// Default pointer assignments
|
|
3348
|
-
const step = 4;
|
|
3349
|
-
let rImagePointer = 0, gImagePointer = 1, bImagePointer = 2, aImagePointer = 3;
|
|
3350
|
-
let rTensorPointer = 0, gTensorPointer = offset, bTensorPointer = offset * 2, aTensorPointer = -1;
|
|
3351
|
-
// Updating the pointer assignments based on the input image format
|
|
3352
|
-
if (inputformat === 'RGBA') {
|
|
3353
|
-
rTensorPointer = 0;
|
|
3354
|
-
gTensorPointer = offset;
|
|
3355
|
-
bTensorPointer = offset * 2;
|
|
3356
|
-
aTensorPointer = offset * 3;
|
|
3357
|
-
}
|
|
3358
|
-
else if (inputformat === 'RGB') {
|
|
3359
|
-
rTensorPointer = 0;
|
|
3360
|
-
gTensorPointer = offset;
|
|
3361
|
-
bTensorPointer = offset * 2;
|
|
3362
|
-
}
|
|
3363
|
-
else if (inputformat === 'RBG') {
|
|
3364
|
-
rTensorPointer = 0;
|
|
3365
|
-
bTensorPointer = offset;
|
|
3366
|
-
gTensorPointer = offset * 2;
|
|
3367
|
-
}
|
|
3368
|
-
image = pixels2DContext.createImageData(width, height);
|
|
3369
|
-
for (let i = 0; i < height * width; rImagePointer += step, gImagePointer += step, bImagePointer += step, aImagePointer += step, i++) {
|
|
3370
|
-
image.data[rImagePointer] = (this.data[rTensorPointer++] - normBias) * normMean; // R value
|
|
3371
|
-
image.data[gImagePointer] = (this.data[gTensorPointer++] - normBias) * normMean; // G value
|
|
3372
|
-
image.data[bImagePointer] = (this.data[bTensorPointer++] - normBias) * normMean; // B value
|
|
3373
|
-
image.data[aImagePointer] =
|
|
3374
|
-
aTensorPointer === -1 ? 255 : (this.data[aTensorPointer++] - normBias) * normMean; // A value
|
|
3375
|
-
}
|
|
3376
|
-
}
|
|
3377
|
-
else {
|
|
3378
|
-
throw new Error('Can not access image data');
|
|
3379
|
-
}
|
|
3380
|
-
return image;
|
|
3381
|
-
}
|
|
3382
|
-
// #endregion
|
|
3383
|
-
// #region tensor utilities
|
|
3384
|
-
reshape(dims) {
|
|
3385
|
-
return new Tensor(this.type, this.data, dims);
|
|
3386
|
-
}
|
|
3387
|
-
};
|
|
3388
|
-
|
|
3389
|
-
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3390
|
-
// Licensed under the MIT License.
|
|
3391
|
-
// eslint-disable-next-line @typescript-eslint/naming-convention
|
|
3392
|
-
const Tensor = Tensor$1;
|
|
3393
|
-
|
|
3394
|
-
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3395
|
-
// Licensed under the MIT License.
|
|
3396
|
-
let InferenceSession$1 = class InferenceSession {
|
|
3397
|
-
constructor(handler) {
|
|
3398
|
-
this.handler = handler;
|
|
3399
|
-
}
|
|
3400
|
-
async run(feeds, arg1, arg2) {
|
|
3401
|
-
const fetches = {};
|
|
3402
|
-
let options = {};
|
|
3403
|
-
// check inputs
|
|
3404
|
-
if (typeof feeds !== 'object' || feeds === null || feeds instanceof Tensor || Array.isArray(feeds)) {
|
|
3405
|
-
throw new TypeError('\'feeds\' must be an object that use input names as keys and OnnxValue as corresponding values.');
|
|
3406
|
-
}
|
|
3407
|
-
let isFetchesEmpty = true;
|
|
3408
|
-
// determine which override is being used
|
|
3409
|
-
if (typeof arg1 === 'object') {
|
|
3410
|
-
if (arg1 === null) {
|
|
3411
|
-
throw new TypeError('Unexpected argument[1]: cannot be null.');
|
|
3412
|
-
}
|
|
3413
|
-
if (arg1 instanceof Tensor) {
|
|
3414
|
-
throw new TypeError('\'fetches\' cannot be a Tensor');
|
|
3415
|
-
}
|
|
3416
|
-
if (Array.isArray(arg1)) {
|
|
3417
|
-
if (arg1.length === 0) {
|
|
3418
|
-
throw new TypeError('\'fetches\' cannot be an empty array.');
|
|
3419
|
-
}
|
|
3420
|
-
isFetchesEmpty = false;
|
|
3421
|
-
// output names
|
|
3422
|
-
for (const name of arg1) {
|
|
3423
|
-
if (typeof name !== 'string') {
|
|
3424
|
-
throw new TypeError('\'fetches\' must be a string array or an object.');
|
|
3425
|
-
}
|
|
3426
|
-
if (this.outputNames.indexOf(name) === -1) {
|
|
3427
|
-
throw new RangeError(`'fetches' contains invalid output name: ${name}.`);
|
|
3428
|
-
}
|
|
3429
|
-
fetches[name] = null;
|
|
3430
|
-
}
|
|
3431
|
-
if (typeof arg2 === 'object' && arg2 !== null) {
|
|
3432
|
-
options = arg2;
|
|
3433
|
-
}
|
|
3434
|
-
else if (typeof arg2 !== 'undefined') {
|
|
3435
|
-
throw new TypeError('\'options\' must be an object.');
|
|
3436
|
-
}
|
|
3437
|
-
}
|
|
3438
|
-
else {
|
|
3439
|
-
// decide whether arg1 is fetches or options
|
|
3440
|
-
// if any output name is present and its value is valid OnnxValue, we consider it fetches
|
|
3441
|
-
let isFetches = false;
|
|
3442
|
-
const arg1Keys = Object.getOwnPropertyNames(arg1);
|
|
3443
|
-
for (const name of this.outputNames) {
|
|
3444
|
-
if (arg1Keys.indexOf(name) !== -1) {
|
|
3445
|
-
const v = arg1[name];
|
|
3446
|
-
if (v === null || v instanceof Tensor) {
|
|
3447
|
-
isFetches = true;
|
|
3448
|
-
isFetchesEmpty = false;
|
|
3449
|
-
fetches[name] = v;
|
|
3450
|
-
}
|
|
3451
|
-
}
|
|
3452
|
-
}
|
|
3453
|
-
if (isFetches) {
|
|
3454
|
-
if (typeof arg2 === 'object' && arg2 !== null) {
|
|
3455
|
-
options = arg2;
|
|
3456
|
-
}
|
|
3457
|
-
else if (typeof arg2 !== 'undefined') {
|
|
3458
|
-
throw new TypeError('\'options\' must be an object.');
|
|
3459
|
-
}
|
|
3460
|
-
}
|
|
3461
|
-
else {
|
|
3462
|
-
options = arg1;
|
|
3463
|
-
}
|
|
3464
|
-
}
|
|
3465
|
-
}
|
|
3466
|
-
else if (typeof arg1 !== 'undefined') {
|
|
3467
|
-
throw new TypeError('Unexpected argument[1]: must be \'fetches\' or \'options\'.');
|
|
3468
|
-
}
|
|
3469
|
-
// check if all inputs are in feed
|
|
3470
|
-
for (const name of this.inputNames) {
|
|
3471
|
-
if (typeof feeds[name] === 'undefined') {
|
|
3472
|
-
throw new Error(`input '${name}' is missing in 'feeds'.`);
|
|
3473
|
-
}
|
|
3474
|
-
}
|
|
3475
|
-
// if no fetches is specified, we use the full output names list
|
|
3476
|
-
if (isFetchesEmpty) {
|
|
3477
|
-
for (const name of this.outputNames) {
|
|
3478
|
-
fetches[name] = null;
|
|
3479
|
-
}
|
|
3480
|
-
}
|
|
3481
|
-
// feeds, fetches and options are prepared
|
|
3482
|
-
const results = await this.handler.run(feeds, fetches, options);
|
|
3483
|
-
const returnValue = {};
|
|
3484
|
-
for (const key in results) {
|
|
3485
|
-
if (Object.hasOwnProperty.call(results, key)) {
|
|
3486
|
-
returnValue[key] = new Tensor(results[key].type, results[key].data, results[key].dims);
|
|
3487
|
-
}
|
|
3488
|
-
}
|
|
3489
|
-
return returnValue;
|
|
3490
|
-
}
|
|
3491
|
-
static async create(arg0, arg1, arg2, arg3) {
|
|
3492
|
-
// either load from a file or buffer
|
|
3493
|
-
let filePathOrUint8Array;
|
|
3494
|
-
let options = {};
|
|
3495
|
-
if (typeof arg0 === 'string') {
|
|
3496
|
-
filePathOrUint8Array = arg0;
|
|
3497
|
-
if (typeof arg1 === 'object' && arg1 !== null) {
|
|
3498
|
-
options = arg1;
|
|
3499
|
-
}
|
|
3500
|
-
else if (typeof arg1 !== 'undefined') {
|
|
3501
|
-
throw new TypeError('\'options\' must be an object.');
|
|
3502
|
-
}
|
|
3503
|
-
}
|
|
3504
|
-
else if (arg0 instanceof Uint8Array) {
|
|
3505
|
-
filePathOrUint8Array = arg0;
|
|
3506
|
-
if (typeof arg1 === 'object' && arg1 !== null) {
|
|
3507
|
-
options = arg1;
|
|
3508
|
-
}
|
|
3509
|
-
else if (typeof arg1 !== 'undefined') {
|
|
3510
|
-
throw new TypeError('\'options\' must be an object.');
|
|
3511
|
-
}
|
|
3512
|
-
}
|
|
3513
|
-
else if (arg0 instanceof ArrayBuffer ||
|
|
3514
|
-
(typeof SharedArrayBuffer !== 'undefined' && arg0 instanceof SharedArrayBuffer)) {
|
|
3515
|
-
const buffer = arg0;
|
|
3516
|
-
let byteOffset = 0;
|
|
3517
|
-
let byteLength = arg0.byteLength;
|
|
3518
|
-
if (typeof arg1 === 'object' && arg1 !== null) {
|
|
3519
|
-
options = arg1;
|
|
3520
|
-
}
|
|
3521
|
-
else if (typeof arg1 === 'number') {
|
|
3522
|
-
byteOffset = arg1;
|
|
3523
|
-
if (!Number.isSafeInteger(byteOffset)) {
|
|
3524
|
-
throw new RangeError('\'byteOffset\' must be an integer.');
|
|
3525
|
-
}
|
|
3526
|
-
if (byteOffset < 0 || byteOffset >= buffer.byteLength) {
|
|
3527
|
-
throw new RangeError(`'byteOffset' is out of range [0, ${buffer.byteLength}).`);
|
|
3528
|
-
}
|
|
3529
|
-
byteLength = arg0.byteLength - byteOffset;
|
|
3530
|
-
if (typeof arg2 === 'number') {
|
|
3531
|
-
byteLength = arg2;
|
|
3532
|
-
if (!Number.isSafeInteger(byteLength)) {
|
|
3533
|
-
throw new RangeError('\'byteLength\' must be an integer.');
|
|
3534
|
-
}
|
|
3535
|
-
if (byteLength <= 0 || byteOffset + byteLength > buffer.byteLength) {
|
|
3536
|
-
throw new RangeError(`'byteLength' is out of range (0, ${buffer.byteLength - byteOffset}].`);
|
|
3537
|
-
}
|
|
3538
|
-
if (typeof arg3 === 'object' && arg3 !== null) {
|
|
3539
|
-
options = arg3;
|
|
3540
|
-
}
|
|
3541
|
-
else if (typeof arg3 !== 'undefined') {
|
|
3542
|
-
throw new TypeError('\'options\' must be an object.');
|
|
3543
|
-
}
|
|
3544
|
-
}
|
|
3545
|
-
else if (typeof arg2 !== 'undefined') {
|
|
3546
|
-
throw new TypeError('\'byteLength\' must be a number.');
|
|
3547
|
-
}
|
|
3548
|
-
}
|
|
3549
|
-
else if (typeof arg1 !== 'undefined') {
|
|
3550
|
-
throw new TypeError('\'options\' must be an object.');
|
|
3551
|
-
}
|
|
3552
|
-
filePathOrUint8Array = new Uint8Array(buffer, byteOffset, byteLength);
|
|
2752
|
+
filePathOrUint8Array = new Uint8Array(buffer, byteOffset, byteLength);
|
|
3553
2753
|
}
|
|
3554
2754
|
else {
|
|
3555
2755
|
throw new TypeError('Unexpected argument[0]: must be \'path\' or \'buffer\'.');
|
|
@@ -3947,7 +3147,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3947
3147
|
return Math.ceil((targetDuration * sr) / 1000 / frameSamples);
|
|
3948
3148
|
}
|
|
3949
3149
|
utils.minFramesForTargetMS = minFramesForTargetMS;
|
|
3950
|
-
function arrayBufferToBase64(buffer) {
|
|
3150
|
+
function arrayBufferToBase64$1(buffer) {
|
|
3951
3151
|
const bytes = new Uint8Array(buffer);
|
|
3952
3152
|
const len = bytes.byteLength;
|
|
3953
3153
|
const binary = new Array(len);
|
|
@@ -3960,7 +3160,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3960
3160
|
}
|
|
3961
3161
|
return btoa(binary.join(""));
|
|
3962
3162
|
}
|
|
3963
|
-
utils.arrayBufferToBase64 = arrayBufferToBase64;
|
|
3163
|
+
utils.arrayBufferToBase64 = arrayBufferToBase64$1;
|
|
3964
3164
|
/*
|
|
3965
3165
|
This rest of this was mostly copied from https://github.com/linto-ai/WebVoiceSDK
|
|
3966
3166
|
*/
|
|
@@ -4379,12 +3579,736 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
4379
3579
|
|
|
4380
3580
|
} (dist));
|
|
4381
3581
|
|
|
4382
|
-
|
|
4383
|
-
|
|
4384
|
-
|
|
4385
|
-
|
|
4386
|
-
|
|
4387
|
-
|
|
3582
|
+
/**
|
|
3583
|
+
* Converts a base64 string to an ArrayBuffer.
|
|
3584
|
+
* @param {string} base64 - The base64 string to convert.
|
|
3585
|
+
* @returns {ArrayBuffer} The resulting ArrayBuffer.
|
|
3586
|
+
*/
|
|
3587
|
+
function base64ToArrayBuffer(base64) {
|
|
3588
|
+
const binaryString = atob(base64);
|
|
3589
|
+
const len = binaryString.length;
|
|
3590
|
+
const bytes = new Uint8Array(len);
|
|
3591
|
+
for (let i = 0; i < len; i++) {
|
|
3592
|
+
bytes[i] = binaryString.charCodeAt(i);
|
|
3593
|
+
}
|
|
3594
|
+
return bytes.buffer;
|
|
3595
|
+
}
|
|
3596
|
+
|
|
3597
|
+
/**
|
|
3598
|
+
* Converts an ArrayBuffer to a base64 string.
|
|
3599
|
+
* @param {ArrayBuffer|Float32Array|Int16Array} arrayBuffer - The ArrayBuffer to convert.
|
|
3600
|
+
* @returns {string} The resulting base64 string.
|
|
3601
|
+
*/
|
|
3602
|
+
function arrayBufferToBase64(arrayBuffer) {
|
|
3603
|
+
if (arrayBuffer instanceof Float32Array) {
|
|
3604
|
+
arrayBuffer = this.floatTo16BitPCM(arrayBuffer);
|
|
3605
|
+
} else if (arrayBuffer instanceof Int16Array) {
|
|
3606
|
+
arrayBuffer = arrayBuffer.buffer;
|
|
3607
|
+
}
|
|
3608
|
+
let binary = '';
|
|
3609
|
+
let bytes = new Uint8Array(arrayBuffer);
|
|
3610
|
+
const chunkSize = 0x8000; // 32KB chunk size
|
|
3611
|
+
for (let i = 0; i < bytes.length; i += chunkSize) {
|
|
3612
|
+
let chunk = bytes.subarray(i, i + chunkSize);
|
|
3613
|
+
binary += String.fromCharCode.apply(null, chunk);
|
|
3614
|
+
}
|
|
3615
|
+
return btoa(binary);
|
|
3616
|
+
}
|
|
3617
|
+
|
|
3618
|
+
/* eslint-env browser */
|
|
3619
|
+
// import { env as ortEnv } from 'onnxruntime-web';
|
|
3620
|
+
const NOOP = () => { };
|
|
3621
|
+
const DEFAULT_WS_URL = 'wss://api.layercode.com/v1/agents/web/websocket';
|
|
3622
|
+
// SDK version - updated when publishing
|
|
3623
|
+
const SDK_VERSION = '2.1.3';
|
|
3624
|
+
/**
|
|
3625
|
+
* @class LayercodeClient
|
|
3626
|
+
* @classdesc Core client for Layercode audio agent that manages audio recording, WebSocket communication, and speech processing.
|
|
3627
|
+
*/
|
|
3628
|
+
class LayercodeClient {
|
|
3629
|
+
/**
|
|
3630
|
+
* Creates an instance of LayercodeClient.
|
|
3631
|
+
* @param {Object} options - Configuration options
|
|
3632
|
+
*/
|
|
3633
|
+
constructor(options) {
|
|
3634
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p, _q;
|
|
3635
|
+
this.deviceId = null;
|
|
3636
|
+
this.options = {
|
|
3637
|
+
agentId: options.agentId,
|
|
3638
|
+
conversationId: (_a = options.conversationId) !== null && _a !== void 0 ? _a : null,
|
|
3639
|
+
authorizeSessionEndpoint: options.authorizeSessionEndpoint,
|
|
3640
|
+
metadata: (_b = options.metadata) !== null && _b !== void 0 ? _b : {},
|
|
3641
|
+
vadResumeDelay: (_c = options.vadResumeDelay) !== null && _c !== void 0 ? _c : 500,
|
|
3642
|
+
onConnect: (_d = options.onConnect) !== null && _d !== void 0 ? _d : NOOP,
|
|
3643
|
+
onDisconnect: (_e = options.onDisconnect) !== null && _e !== void 0 ? _e : NOOP,
|
|
3644
|
+
onError: (_f = options.onError) !== null && _f !== void 0 ? _f : NOOP,
|
|
3645
|
+
onDeviceSwitched: (_g = options.onDeviceSwitched) !== null && _g !== void 0 ? _g : NOOP,
|
|
3646
|
+
onDevicesChanged: (_h = options.onDevicesChanged) !== null && _h !== void 0 ? _h : NOOP,
|
|
3647
|
+
onDataMessage: (_j = options.onDataMessage) !== null && _j !== void 0 ? _j : NOOP,
|
|
3648
|
+
onMessage: (_k = options.onMessage) !== null && _k !== void 0 ? _k : NOOP,
|
|
3649
|
+
onUserAmplitudeChange: (_l = options.onUserAmplitudeChange) !== null && _l !== void 0 ? _l : NOOP,
|
|
3650
|
+
onAgentAmplitudeChange: (_m = options.onAgentAmplitudeChange) !== null && _m !== void 0 ? _m : NOOP,
|
|
3651
|
+
onStatusChange: (_o = options.onStatusChange) !== null && _o !== void 0 ? _o : NOOP,
|
|
3652
|
+
onUserIsSpeakingChange: (_p = options.onUserIsSpeakingChange) !== null && _p !== void 0 ? _p : NOOP,
|
|
3653
|
+
onMuteStateChange: (_q = options.onMuteStateChange) !== null && _q !== void 0 ? _q : NOOP,
|
|
3654
|
+
};
|
|
3655
|
+
this.AMPLITUDE_MONITORING_SAMPLE_RATE = 2;
|
|
3656
|
+
this._websocketUrl = DEFAULT_WS_URL;
|
|
3657
|
+
this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
|
|
3658
|
+
this.wavPlayer = new WavStreamPlayer({
|
|
3659
|
+
finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
|
|
3660
|
+
sampleRate: 16000, // TODO should be set my fetched agent config
|
|
3661
|
+
});
|
|
3662
|
+
this.vad = null;
|
|
3663
|
+
this.ws = null;
|
|
3664
|
+
this.status = 'disconnected';
|
|
3665
|
+
this.userAudioAmplitude = 0;
|
|
3666
|
+
this.agentAudioAmplitude = 0;
|
|
3667
|
+
this.conversationId = this.options.conversationId;
|
|
3668
|
+
this.pushToTalkActive = false;
|
|
3669
|
+
this.pushToTalkEnabled = false;
|
|
3670
|
+
this.canInterrupt = false;
|
|
3671
|
+
this.userIsSpeaking = false;
|
|
3672
|
+
this.recorderStarted = false;
|
|
3673
|
+
this.readySent = false;
|
|
3674
|
+
this.currentTurnId = null;
|
|
3675
|
+
this.audioBuffer = [];
|
|
3676
|
+
this.vadConfig = null;
|
|
3677
|
+
this.activeDeviceId = null;
|
|
3678
|
+
this.useSystemDefaultDevice = false;
|
|
3679
|
+
this.lastReportedDeviceId = null;
|
|
3680
|
+
this.lastKnownSystemDefaultDeviceKey = null;
|
|
3681
|
+
this.isMuted = false;
|
|
3682
|
+
this.stopPlayerAmplitude = undefined;
|
|
3683
|
+
this.stopRecorderAmplitude = undefined;
|
|
3684
|
+
this.deviceChangeListener = null;
|
|
3685
|
+
// this.audioPauseTime = null;
|
|
3686
|
+
// Bind event handlers
|
|
3687
|
+
this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
|
|
3688
|
+
this._handleDataAvailable = this._handleDataAvailable.bind(this);
|
|
3689
|
+
}
|
|
3690
|
+
_initializeVAD() {
|
|
3691
|
+
var _a;
|
|
3692
|
+
console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt, vadConfig: this.vadConfig });
|
|
3693
|
+
// If we're in push to talk mode, we don't need to use the VAD model
|
|
3694
|
+
if (this.pushToTalkEnabled) {
|
|
3695
|
+
return;
|
|
3696
|
+
}
|
|
3697
|
+
// Check if VAD is disabled
|
|
3698
|
+
if (((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) === false) {
|
|
3699
|
+
console.log('VAD is disabled by backend configuration');
|
|
3700
|
+
return;
|
|
3701
|
+
}
|
|
3702
|
+
// Build VAD configuration object, only including keys that are defined
|
|
3703
|
+
const vadOptions = {
|
|
3704
|
+
stream: this.wavRecorder.getStream() || undefined,
|
|
3705
|
+
onSpeechStart: () => {
|
|
3706
|
+
console.debug('onSpeechStart: sending vad_start');
|
|
3707
|
+
this.userIsSpeaking = true;
|
|
3708
|
+
this.options.onUserIsSpeakingChange(true);
|
|
3709
|
+
this._wsSend({
|
|
3710
|
+
type: 'vad_events',
|
|
3711
|
+
event: 'vad_start',
|
|
3712
|
+
});
|
|
3713
|
+
this.options.onMessage({
|
|
3714
|
+
type: 'vad_events',
|
|
3715
|
+
event: 'vad_start',
|
|
3716
|
+
});
|
|
3717
|
+
},
|
|
3718
|
+
onSpeechEnd: () => {
|
|
3719
|
+
console.debug('onSpeechEnd: sending vad_end');
|
|
3720
|
+
this.userIsSpeaking = false;
|
|
3721
|
+
this.options.onUserIsSpeakingChange(false);
|
|
3722
|
+
this.audioBuffer = []; // Clear buffer on speech end
|
|
3723
|
+
this._wsSend({
|
|
3724
|
+
type: 'vad_events',
|
|
3725
|
+
event: 'vad_end',
|
|
3726
|
+
});
|
|
3727
|
+
this.options.onMessage({
|
|
3728
|
+
type: 'vad_events',
|
|
3729
|
+
event: 'vad_end',
|
|
3730
|
+
});
|
|
3731
|
+
},
|
|
3732
|
+
};
|
|
3733
|
+
// Apply VAD configuration from backend if available
|
|
3734
|
+
if (this.vadConfig) {
|
|
3735
|
+
// Only add keys that are explicitly defined (not undefined)
|
|
3736
|
+
if (this.vadConfig.model !== undefined)
|
|
3737
|
+
vadOptions.model = this.vadConfig.model;
|
|
3738
|
+
if (this.vadConfig.positive_speech_threshold !== undefined)
|
|
3739
|
+
vadOptions.positiveSpeechThreshold = this.vadConfig.positive_speech_threshold;
|
|
3740
|
+
if (this.vadConfig.negative_speech_threshold !== undefined)
|
|
3741
|
+
vadOptions.negativeSpeechThreshold = this.vadConfig.negative_speech_threshold;
|
|
3742
|
+
if (this.vadConfig.redemption_frames !== undefined)
|
|
3743
|
+
vadOptions.redemptionFrames = this.vadConfig.redemption_frames;
|
|
3744
|
+
if (this.vadConfig.min_speech_frames !== undefined)
|
|
3745
|
+
vadOptions.minSpeechFrames = this.vadConfig.min_speech_frames;
|
|
3746
|
+
if (this.vadConfig.pre_speech_pad_frames !== undefined)
|
|
3747
|
+
vadOptions.preSpeechPadFrames = this.vadConfig.pre_speech_pad_frames;
|
|
3748
|
+
if (this.vadConfig.frame_samples !== undefined)
|
|
3749
|
+
vadOptions.frameSamples = this.vadConfig.frame_samples;
|
|
3750
|
+
}
|
|
3751
|
+
else {
|
|
3752
|
+
// Default values if no config from backend
|
|
3753
|
+
vadOptions.model = 'v5';
|
|
3754
|
+
vadOptions.positiveSpeechThreshold = 0.15;
|
|
3755
|
+
vadOptions.negativeSpeechThreshold = 0.05;
|
|
3756
|
+
vadOptions.redemptionFrames = 4;
|
|
3757
|
+
vadOptions.minSpeechFrames = 2;
|
|
3758
|
+
vadOptions.preSpeechPadFrames = 0;
|
|
3759
|
+
vadOptions.frameSamples = 512; // Required for v5
|
|
3760
|
+
}
|
|
3761
|
+
console.log('Creating VAD with options:', vadOptions);
|
|
3762
|
+
dist.MicVAD.new(vadOptions)
|
|
3763
|
+
.then((vad) => {
|
|
3764
|
+
this.vad = vad;
|
|
3765
|
+
this.vad.start();
|
|
3766
|
+
console.log('VAD started successfully');
|
|
3767
|
+
})
|
|
3768
|
+
.catch((error) => {
|
|
3769
|
+
console.warn('Error initializing VAD:', error);
|
|
3770
|
+
// Send a message to server indicating VAD failure
|
|
3771
|
+
this._wsSend({
|
|
3772
|
+
type: 'vad_events',
|
|
3773
|
+
event: 'vad_model_failed',
|
|
3774
|
+
});
|
|
3775
|
+
});
|
|
3776
|
+
}
|
|
3777
|
+
/**
|
|
3778
|
+
* Updates the connection status and triggers the callback
|
|
3779
|
+
* @param {string} status - New status value
|
|
3780
|
+
*/
|
|
3781
|
+
_setStatus(status) {
|
|
3782
|
+
this.status = status;
|
|
3783
|
+
this.options.onStatusChange(status);
|
|
3784
|
+
}
|
|
3785
|
+
/**
|
|
3786
|
+
* Handles when agent audio finishes playing
|
|
3787
|
+
*/
|
|
3788
|
+
_clientResponseAudioReplayFinished() {
|
|
3789
|
+
console.debug('clientResponseAudioReplayFinished');
|
|
3790
|
+
this._wsSend({
|
|
3791
|
+
type: 'trigger.response.audio.replay_finished',
|
|
3792
|
+
reason: 'completed',
|
|
3793
|
+
});
|
|
3794
|
+
}
|
|
3795
|
+
async _clientInterruptAssistantReplay() {
|
|
3796
|
+
await this.wavPlayer.interrupt();
|
|
3797
|
+
}
|
|
3798
|
+
async triggerUserTurnStarted() {
|
|
3799
|
+
if (!this.pushToTalkActive) {
|
|
3800
|
+
this.pushToTalkActive = true;
|
|
3801
|
+
this._wsSend({ type: 'trigger.turn.start', role: 'user' });
|
|
3802
|
+
await this._clientInterruptAssistantReplay();
|
|
3803
|
+
}
|
|
3804
|
+
}
|
|
3805
|
+
async triggerUserTurnFinished() {
|
|
3806
|
+
if (this.pushToTalkActive) {
|
|
3807
|
+
this.pushToTalkActive = false;
|
|
3808
|
+
this._wsSend({ type: 'trigger.turn.end', role: 'user' });
|
|
3809
|
+
}
|
|
3810
|
+
}
|
|
3811
|
+
/**
|
|
3812
|
+
* Handles incoming WebSocket messages
|
|
3813
|
+
* @param {MessageEvent} event - The WebSocket message event
|
|
3814
|
+
*/
|
|
3815
|
+
async _handleWebSocketMessage(event) {
|
|
3816
|
+
try {
|
|
3817
|
+
const message = JSON.parse(event.data);
|
|
3818
|
+
if (message.type !== 'response.audio') {
|
|
3819
|
+
console.debug('msg:', message);
|
|
3820
|
+
}
|
|
3821
|
+
switch (message.type) {
|
|
3822
|
+
case 'turn.start':
|
|
3823
|
+
// Sent from the server to this client when a new user turn is detected
|
|
3824
|
+
if (message.role === 'assistant') {
|
|
3825
|
+
// Start tracking new assistant turn
|
|
3826
|
+
console.debug('Assistant turn started, will track new turn ID from audio/text');
|
|
3827
|
+
}
|
|
3828
|
+
else if (message.role === 'user' && !this.pushToTalkEnabled) {
|
|
3829
|
+
// Interrupt any playing assistant audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
|
|
3830
|
+
console.debug('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
|
|
3831
|
+
await this._clientInterruptAssistantReplay();
|
|
3832
|
+
}
|
|
3833
|
+
this.options.onMessage(message);
|
|
3834
|
+
break;
|
|
3835
|
+
case 'response.audio':
|
|
3836
|
+
const audioBuffer = base64ToArrayBuffer(message.content);
|
|
3837
|
+
this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
|
|
3838
|
+
// TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
|
|
3839
|
+
// Set current turn ID from first audio message, or update if different turn
|
|
3840
|
+
if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
|
|
3841
|
+
console.debug(`Setting current turn ID to: ${message.turn_id} (was: ${this.currentTurnId})`);
|
|
3842
|
+
this.currentTurnId = message.turn_id;
|
|
3843
|
+
// Clean up interrupted tracks, keeping only the current turn
|
|
3844
|
+
this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
|
|
3845
|
+
}
|
|
3846
|
+
break;
|
|
3847
|
+
case 'response.text':
|
|
3848
|
+
// Set turn ID from first text message if not set
|
|
3849
|
+
if (!this.currentTurnId) {
|
|
3850
|
+
this.currentTurnId = message.turn_id;
|
|
3851
|
+
console.debug(`Setting current turn ID to: ${message.turn_id} from text message`);
|
|
3852
|
+
}
|
|
3853
|
+
this.options.onMessage(message);
|
|
3854
|
+
break;
|
|
3855
|
+
case 'response.data':
|
|
3856
|
+
this.options.onDataMessage(message);
|
|
3857
|
+
break;
|
|
3858
|
+
case 'user.transcript':
|
|
3859
|
+
case 'user.transcript.delta':
|
|
3860
|
+
case 'user.transcript.interim_delta':
|
|
3861
|
+
this.options.onMessage(message);
|
|
3862
|
+
break;
|
|
3863
|
+
default:
|
|
3864
|
+
console.warn('Unknown message type received:', message);
|
|
3865
|
+
}
|
|
3866
|
+
}
|
|
3867
|
+
catch (error) {
|
|
3868
|
+
console.error('Error processing WebSocket message:', error);
|
|
3869
|
+
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
3870
|
+
}
|
|
3871
|
+
}
|
|
3872
|
+
/**
|
|
3873
|
+
* Handles available client browser microphone audio data and sends it over the WebSocket
|
|
3874
|
+
* @param {ArrayBuffer} data - The audio data buffer
|
|
3875
|
+
*/
|
|
3876
|
+
_handleDataAvailable(data) {
|
|
3877
|
+
var _a, _b, _c;
|
|
3878
|
+
try {
|
|
3879
|
+
const base64 = arrayBufferToBase64(data.mono);
|
|
3880
|
+
// Don't send audio if muted
|
|
3881
|
+
if (this.isMuted) {
|
|
3882
|
+
return;
|
|
3883
|
+
}
|
|
3884
|
+
// Determine if we should gate audio based on VAD configuration
|
|
3885
|
+
const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
|
|
3886
|
+
const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
|
|
3887
|
+
let sendAudio;
|
|
3888
|
+
if (this.pushToTalkEnabled) {
|
|
3889
|
+
sendAudio = this.pushToTalkActive;
|
|
3890
|
+
}
|
|
3891
|
+
else if (shouldGateAudio) {
|
|
3892
|
+
sendAudio = this.userIsSpeaking;
|
|
3893
|
+
}
|
|
3894
|
+
else {
|
|
3895
|
+
// If gate_audio is false, always send audio
|
|
3896
|
+
sendAudio = true;
|
|
3897
|
+
}
|
|
3898
|
+
if (sendAudio) {
|
|
3899
|
+
// If we have buffered audio and we're gating, send it first
|
|
3900
|
+
if (shouldGateAudio && this.audioBuffer.length > 0) {
|
|
3901
|
+
console.debug(`Sending ${this.audioBuffer.length} buffered audio chunks`);
|
|
3902
|
+
for (const bufferedAudio of this.audioBuffer) {
|
|
3903
|
+
this._wsSend({
|
|
3904
|
+
type: 'client.audio',
|
|
3905
|
+
content: bufferedAudio,
|
|
3906
|
+
});
|
|
3907
|
+
}
|
|
3908
|
+
this.audioBuffer = []; // Clear the buffer after sending
|
|
3909
|
+
}
|
|
3910
|
+
// Send the current audio
|
|
3911
|
+
this._wsSend({
|
|
3912
|
+
type: 'client.audio',
|
|
3913
|
+
content: base64,
|
|
3914
|
+
});
|
|
3915
|
+
}
|
|
3916
|
+
else {
|
|
3917
|
+
// Buffer audio when not sending (to catch audio just before VAD triggers)
|
|
3918
|
+
this.audioBuffer.push(base64);
|
|
3919
|
+
// Keep buffer size based on configuration
|
|
3920
|
+
if (this.audioBuffer.length > bufferFrames) {
|
|
3921
|
+
this.audioBuffer.shift(); // Remove oldest chunk
|
|
3922
|
+
}
|
|
3923
|
+
}
|
|
3924
|
+
}
|
|
3925
|
+
catch (error) {
|
|
3926
|
+
console.error('Error processing audio:', error);
|
|
3927
|
+
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
3928
|
+
}
|
|
3929
|
+
}
|
|
3930
|
+
_wsSend(message) {
|
|
3931
|
+
var _a;
|
|
3932
|
+
if (message.type !== 'client.audio') {
|
|
3933
|
+
console.debug('sent_msg:', message);
|
|
3934
|
+
}
|
|
3935
|
+
const messageString = JSON.stringify(message);
|
|
3936
|
+
if (((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN) {
|
|
3937
|
+
this.ws.send(messageString);
|
|
3938
|
+
}
|
|
3939
|
+
}
|
|
3940
|
+
_sendReadyIfNeeded() {
|
|
3941
|
+
var _a;
|
|
3942
|
+
if (this.recorderStarted && ((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN && !this.readySent) {
|
|
3943
|
+
this._wsSend({ type: 'client.ready' });
|
|
3944
|
+
this.readySent = true;
|
|
3945
|
+
}
|
|
3946
|
+
}
|
|
3947
|
+
/**
|
|
3948
|
+
* Sets up amplitude monitoring for a given audio source.
|
|
3949
|
+
* @param {WavRecorder | WavStreamPlayer} source - The audio source (recorder or player).
|
|
3950
|
+
* @param {(amplitude: number) => void} callback - The callback function to invoke on amplitude change.
|
|
3951
|
+
* @param {(amplitude: number) => void} updateInternalState - Function to update the internal amplitude state.
|
|
3952
|
+
*/
|
|
3953
|
+
_setupAmplitudeMonitoring(source, callback, updateInternalState) {
|
|
3954
|
+
let updateCounter = 0;
|
|
3955
|
+
source.startAmplitudeMonitoring((amplitude) => {
|
|
3956
|
+
// Only update and call callback at the specified sample rate
|
|
3957
|
+
if (updateCounter >= this.AMPLITUDE_MONITORING_SAMPLE_RATE) {
|
|
3958
|
+
updateInternalState(amplitude);
|
|
3959
|
+
if (callback !== NOOP) {
|
|
3960
|
+
callback(amplitude);
|
|
3961
|
+
}
|
|
3962
|
+
updateCounter = 0; // Reset counter after sampling
|
|
3963
|
+
}
|
|
3964
|
+
updateCounter++;
|
|
3965
|
+
});
|
|
3966
|
+
const stop = () => { var _a; return (_a = source.stopAmplitudeMonitoring) === null || _a === void 0 ? void 0 : _a.call(source); };
|
|
3967
|
+
if (source === this.wavPlayer) {
|
|
3968
|
+
this.stopPlayerAmplitude = stop;
|
|
3969
|
+
}
|
|
3970
|
+
if (source === this.wavRecorder) {
|
|
3971
|
+
this.stopRecorderAmplitude = stop;
|
|
3972
|
+
}
|
|
3973
|
+
}
|
|
3974
|
+
_stopAmplitudeMonitoring() {
|
|
3975
|
+
var _a, _b;
|
|
3976
|
+
(_a = this.stopPlayerAmplitude) === null || _a === void 0 ? void 0 : _a.call(this);
|
|
3977
|
+
(_b = this.stopRecorderAmplitude) === null || _b === void 0 ? void 0 : _b.call(this);
|
|
3978
|
+
this.stopPlayerAmplitude = undefined;
|
|
3979
|
+
this.stopRecorderAmplitude = undefined;
|
|
3980
|
+
}
|
|
3981
|
+
/**
|
|
3982
|
+
* Connects to the Layercode agent using the stored conversation ID and starts the audio conversation
|
|
3983
|
+
* @async
|
|
3984
|
+
* @returns {Promise<void>}
|
|
3985
|
+
*/
|
|
3986
|
+
async connect() {
|
|
3987
|
+
if (this.status === 'connecting') {
|
|
3988
|
+
return;
|
|
3989
|
+
}
|
|
3990
|
+
try {
|
|
3991
|
+
this._setStatus('connecting');
|
|
3992
|
+
// Reset turn tracking for clean start
|
|
3993
|
+
this._resetTurnTracking();
|
|
3994
|
+
this._stopAmplitudeMonitoring();
|
|
3995
|
+
// Get conversation key from server
|
|
3996
|
+
let authorizeSessionRequestBody = {
|
|
3997
|
+
agent_id: this.options.agentId,
|
|
3998
|
+
metadata: this.options.metadata,
|
|
3999
|
+
sdk_version: SDK_VERSION,
|
|
4000
|
+
};
|
|
4001
|
+
// If we're reconnecting to a previous conversation, we need to include the conversation_id in the request. Otherwise we don't send conversation_id, and a new conversation will be created and the conversation_id will be returned in the response.
|
|
4002
|
+
if (this.options.conversationId) {
|
|
4003
|
+
authorizeSessionRequestBody.conversation_id = this.options.conversationId;
|
|
4004
|
+
}
|
|
4005
|
+
const authorizeSessionResponse = await fetch(this.options.authorizeSessionEndpoint, {
|
|
4006
|
+
method: 'POST',
|
|
4007
|
+
headers: {
|
|
4008
|
+
'Content-Type': 'application/json',
|
|
4009
|
+
},
|
|
4010
|
+
body: JSON.stringify(authorizeSessionRequestBody),
|
|
4011
|
+
});
|
|
4012
|
+
if (!authorizeSessionResponse.ok) {
|
|
4013
|
+
throw new Error(`Failed to authorize conversation: ${authorizeSessionResponse.statusText}`);
|
|
4014
|
+
}
|
|
4015
|
+
const authorizeSessionResponseBody = await authorizeSessionResponse.json();
|
|
4016
|
+
this.conversationId = authorizeSessionResponseBody.conversation_id; // Save the conversation_id for use in future reconnects
|
|
4017
|
+
this.options.conversationId = this.conversationId;
|
|
4018
|
+
await this.wavRecorder.requestPermission();
|
|
4019
|
+
this._setupDeviceChangeListener();
|
|
4020
|
+
// Connect WebSocket
|
|
4021
|
+
this.ws = new WebSocket(`${this._websocketUrl}?${new URLSearchParams({
|
|
4022
|
+
client_session_key: authorizeSessionResponseBody.client_session_key,
|
|
4023
|
+
})}`);
|
|
4024
|
+
const config = authorizeSessionResponseBody.config;
|
|
4025
|
+
console.log('AgentConfig', config);
|
|
4026
|
+
// Store VAD configuration
|
|
4027
|
+
this.vadConfig = config.vad || null;
|
|
4028
|
+
if (config.transcription.trigger === 'push_to_talk') {
|
|
4029
|
+
this.pushToTalkEnabled = true;
|
|
4030
|
+
}
|
|
4031
|
+
else if (config.transcription.trigger === 'automatic') {
|
|
4032
|
+
this.pushToTalkEnabled = false;
|
|
4033
|
+
this.canInterrupt = config.transcription.can_interrupt;
|
|
4034
|
+
}
|
|
4035
|
+
else {
|
|
4036
|
+
throw new Error(`Unknown trigger: ${config.transcription.trigger}`);
|
|
4037
|
+
}
|
|
4038
|
+
// Bind the websocket message callbacks
|
|
4039
|
+
this.ws.onmessage = this._handleWebSocketMessage;
|
|
4040
|
+
this.ws.onopen = () => {
|
|
4041
|
+
console.log('WebSocket connection established');
|
|
4042
|
+
this._setStatus('connected');
|
|
4043
|
+
this.options.onConnect({ conversationId: this.conversationId });
|
|
4044
|
+
// Attempt to send ready message if recorder already started
|
|
4045
|
+
this._sendReadyIfNeeded();
|
|
4046
|
+
};
|
|
4047
|
+
this.ws.onclose = () => {
|
|
4048
|
+
console.log('WebSocket connection closed');
|
|
4049
|
+
this.ws = null;
|
|
4050
|
+
this._performDisconnectCleanup().catch((error) => {
|
|
4051
|
+
console.error('Error during disconnect cleanup:', error);
|
|
4052
|
+
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
4053
|
+
});
|
|
4054
|
+
};
|
|
4055
|
+
this.ws.onerror = (error) => {
|
|
4056
|
+
console.error('WebSocket error:', error);
|
|
4057
|
+
this._setStatus('error');
|
|
4058
|
+
this.options.onError(new Error('WebSocket connection error'));
|
|
4059
|
+
};
|
|
4060
|
+
// Initialize audio player
|
|
4061
|
+
await this.wavPlayer.connect();
|
|
4062
|
+
// Set up audio player amplitude monitoring
|
|
4063
|
+
this._setupAmplitudeMonitoring(this.wavPlayer, this.options.onAgentAmplitudeChange, (amp) => (this.agentAudioAmplitude = amp));
|
|
4064
|
+
// wavRecorder will be started from the onDeviceSwitched callback,
|
|
4065
|
+
// which is called when the device is first initialized and also when the device is switched
|
|
4066
|
+
// this is to ensure that the device is initialized before the recorder is started
|
|
4067
|
+
}
|
|
4068
|
+
catch (error) {
|
|
4069
|
+
console.error('Error connecting to Layercode agent:', error);
|
|
4070
|
+
this._setStatus('error');
|
|
4071
|
+
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
4072
|
+
}
|
|
4073
|
+
}
|
|
4074
|
+
_resetTurnTracking() {
|
|
4075
|
+
this.currentTurnId = null;
|
|
4076
|
+
console.debug('Reset turn tracking state');
|
|
4077
|
+
}
|
|
4078
|
+
async disconnect() {
|
|
4079
|
+
if (this.status === 'disconnected') {
|
|
4080
|
+
return;
|
|
4081
|
+
}
|
|
4082
|
+
if (this.ws) {
|
|
4083
|
+
this.ws.onopen = null;
|
|
4084
|
+
this.ws.onclose = null;
|
|
4085
|
+
this.ws.onerror = null;
|
|
4086
|
+
this.ws.onmessage = null;
|
|
4087
|
+
this.ws.close();
|
|
4088
|
+
this.ws = null;
|
|
4089
|
+
}
|
|
4090
|
+
await this._performDisconnectCleanup();
|
|
4091
|
+
}
|
|
4092
|
+
/**
|
|
4093
|
+
* Gets the microphone MediaStream used by this client
|
|
4094
|
+
* @returns {MediaStream|null} The microphone stream or null if not initialized
|
|
4095
|
+
*/
|
|
4096
|
+
getStream() {
|
|
4097
|
+
return this.wavRecorder.getStream();
|
|
4098
|
+
}
|
|
4099
|
+
/**
|
|
4100
|
+
* List all available audio input devices
|
|
4101
|
+
* @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
|
|
4102
|
+
*/
|
|
4103
|
+
async listDevices() {
|
|
4104
|
+
return this.wavRecorder.listDevices();
|
|
4105
|
+
}
|
|
4106
|
+
/**
|
|
4107
|
+
* Switches the input device for the microphone and restarts recording
|
|
4108
|
+
* @param {string} deviceId - The deviceId of the new microphone
|
|
4109
|
+
*/
|
|
4110
|
+
async setInputDevice(deviceId) {
|
|
4111
|
+
var _a, _b, _c;
|
|
4112
|
+
try {
|
|
4113
|
+
const normalizedDeviceId = !deviceId || deviceId === 'default' ? null : deviceId;
|
|
4114
|
+
this.useSystemDefaultDevice = normalizedDeviceId === null;
|
|
4115
|
+
this.deviceId = normalizedDeviceId;
|
|
4116
|
+
// Restart recording with the new device
|
|
4117
|
+
await this._restartAudioRecording();
|
|
4118
|
+
// Reinitialize VAD with the new audio stream if VAD is enabled
|
|
4119
|
+
const shouldUseVAD = !this.pushToTalkEnabled && ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) !== false;
|
|
4120
|
+
if (shouldUseVAD) {
|
|
4121
|
+
console.debug('Reinitializing VAD with new audio stream');
|
|
4122
|
+
const newStream = this.wavRecorder.getStream();
|
|
4123
|
+
await this._reinitializeVAD(newStream);
|
|
4124
|
+
}
|
|
4125
|
+
const reportedDeviceId = (_c = (_b = this.lastReportedDeviceId) !== null && _b !== void 0 ? _b : this.activeDeviceId) !== null && _c !== void 0 ? _c : (this.useSystemDefaultDevice ? 'default' : normalizedDeviceId !== null && normalizedDeviceId !== void 0 ? normalizedDeviceId : 'default');
|
|
4126
|
+
console.debug(`Successfully switched to input device: ${reportedDeviceId}`);
|
|
4127
|
+
}
|
|
4128
|
+
catch (error) {
|
|
4129
|
+
console.error(`Failed to switch to input device ${deviceId}:`, error);
|
|
4130
|
+
throw new Error(`Failed to switch to input device: ${error instanceof Error ? error.message : String(error)}`);
|
|
4131
|
+
}
|
|
4132
|
+
}
|
|
4133
|
+
/**
|
|
4134
|
+
* Restarts audio recording after a device switch to ensure audio is captured from the new device
|
|
4135
|
+
*/
|
|
4136
|
+
async _restartAudioRecording() {
|
|
4137
|
+
var _a, _b;
|
|
4138
|
+
try {
|
|
4139
|
+
console.debug('Restarting audio recording after device switch...');
|
|
4140
|
+
try {
|
|
4141
|
+
await this.wavRecorder.end();
|
|
4142
|
+
}
|
|
4143
|
+
catch (_c) {
|
|
4144
|
+
// Ignore cleanup errors
|
|
4145
|
+
}
|
|
4146
|
+
// Start with new device
|
|
4147
|
+
const targetDeviceId = this.useSystemDefaultDevice ? undefined : this.deviceId || undefined;
|
|
4148
|
+
await this.wavRecorder.begin(targetDeviceId);
|
|
4149
|
+
await this.wavRecorder.record(this._handleDataAvailable, 1638);
|
|
4150
|
+
// Re-setup amplitude monitoring with the new stream
|
|
4151
|
+
this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
|
|
4152
|
+
const previousReportedDeviceId = this.lastReportedDeviceId;
|
|
4153
|
+
const stream = this.wavRecorder.getStream();
|
|
4154
|
+
const activeTrack = (stream === null || stream === void 0 ? void 0 : stream.getAudioTracks()[0]) || null;
|
|
4155
|
+
const trackSettings = activeTrack && typeof activeTrack.getSettings === 'function' ? activeTrack.getSettings() : null;
|
|
4156
|
+
const trackDeviceId = trackSettings && typeof trackSettings.deviceId === 'string' ? trackSettings.deviceId : null;
|
|
4157
|
+
this.activeDeviceId = trackDeviceId !== null && trackDeviceId !== void 0 ? trackDeviceId : (this.useSystemDefaultDevice ? null : this.deviceId);
|
|
4158
|
+
if (!this.recorderStarted) {
|
|
4159
|
+
this.recorderStarted = true;
|
|
4160
|
+
this._sendReadyIfNeeded();
|
|
4161
|
+
}
|
|
4162
|
+
const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : (_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default');
|
|
4163
|
+
if (reportedDeviceId !== previousReportedDeviceId) {
|
|
4164
|
+
this.lastReportedDeviceId = reportedDeviceId;
|
|
4165
|
+
if (this.options.onDeviceSwitched) {
|
|
4166
|
+
this.options.onDeviceSwitched(reportedDeviceId);
|
|
4167
|
+
}
|
|
4168
|
+
}
|
|
4169
|
+
console.debug('Audio recording restart completed successfully');
|
|
4170
|
+
}
|
|
4171
|
+
catch (error) {
|
|
4172
|
+
console.error('Error restarting audio recording after device switch:', error);
|
|
4173
|
+
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
4174
|
+
}
|
|
4175
|
+
}
|
|
4176
|
+
/**
|
|
4177
|
+
* Reinitializes VAD with a new stream (used after device switching)
|
|
4178
|
+
*/
|
|
4179
|
+
async _reinitializeVAD(stream) {
|
|
4180
|
+
// Clean up existing VAD
|
|
4181
|
+
if (this.vad) {
|
|
4182
|
+
this.vad.pause();
|
|
4183
|
+
this.vad.destroy();
|
|
4184
|
+
this.vad = null;
|
|
4185
|
+
}
|
|
4186
|
+
// Reinitialize with new stream
|
|
4187
|
+
if (stream) {
|
|
4188
|
+
this._initializeVAD();
|
|
4189
|
+
}
|
|
4190
|
+
}
|
|
4191
|
+
/**
|
|
4192
|
+
* Sets up the device change event listener
|
|
4193
|
+
*/
|
|
4194
|
+
_setupDeviceChangeListener() {
|
|
4195
|
+
if (!this.deviceChangeListener) {
|
|
4196
|
+
this.deviceChangeListener = async (devices) => {
|
|
4197
|
+
try {
|
|
4198
|
+
// Notify user that devices have changed
|
|
4199
|
+
this.options.onDevicesChanged(devices);
|
|
4200
|
+
const defaultDevice = devices.find((device) => device.default);
|
|
4201
|
+
const usingDefaultDevice = this.useSystemDefaultDevice;
|
|
4202
|
+
const previousDefaultDeviceKey = this.lastKnownSystemDefaultDeviceKey;
|
|
4203
|
+
const currentDefaultDeviceKey = this._getDeviceComparisonKey(defaultDevice);
|
|
4204
|
+
let shouldSwitch = !this.recorderStarted;
|
|
4205
|
+
if (!shouldSwitch) {
|
|
4206
|
+
if (usingDefaultDevice) {
|
|
4207
|
+
if (!defaultDevice) {
|
|
4208
|
+
shouldSwitch = true;
|
|
4209
|
+
}
|
|
4210
|
+
else if (this.activeDeviceId && defaultDevice.deviceId !== 'default' && defaultDevice.deviceId !== this.activeDeviceId) {
|
|
4211
|
+
shouldSwitch = true;
|
|
4212
|
+
}
|
|
4213
|
+
else if ((previousDefaultDeviceKey && previousDefaultDeviceKey !== currentDefaultDeviceKey) ||
|
|
4214
|
+
(!previousDefaultDeviceKey && !currentDefaultDeviceKey && this.recorderStarted)) {
|
|
4215
|
+
shouldSwitch = true;
|
|
4216
|
+
}
|
|
4217
|
+
}
|
|
4218
|
+
else {
|
|
4219
|
+
const matchesRequestedDevice = devices.some((device) => device.deviceId === this.deviceId || device.deviceId === this.activeDeviceId);
|
|
4220
|
+
shouldSwitch = !matchesRequestedDevice;
|
|
4221
|
+
}
|
|
4222
|
+
}
|
|
4223
|
+
this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
|
|
4224
|
+
if (shouldSwitch) {
|
|
4225
|
+
console.debug('Selecting fallback audio input device');
|
|
4226
|
+
const fallbackDevice = defaultDevice || devices[0];
|
|
4227
|
+
if (fallbackDevice) {
|
|
4228
|
+
const fallbackId = fallbackDevice.default ? 'default' : fallbackDevice.deviceId;
|
|
4229
|
+
await this.setInputDevice(fallbackId);
|
|
4230
|
+
}
|
|
4231
|
+
else {
|
|
4232
|
+
console.warn('No alternative audio device found');
|
|
4233
|
+
}
|
|
4234
|
+
}
|
|
4235
|
+
}
|
|
4236
|
+
catch (error) {
|
|
4237
|
+
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
4238
|
+
}
|
|
4239
|
+
};
|
|
4240
|
+
}
|
|
4241
|
+
this.wavRecorder.listenForDeviceChange(this.deviceChangeListener);
|
|
4242
|
+
}
|
|
4243
|
+
_teardownDeviceListeners() {
|
|
4244
|
+
this.wavRecorder.listenForDeviceChange(null);
|
|
4245
|
+
}
|
|
4246
|
+
async _performDisconnectCleanup() {
|
|
4247
|
+
var _a, _b;
|
|
4248
|
+
this.deviceId = null;
|
|
4249
|
+
this.activeDeviceId = null;
|
|
4250
|
+
this.useSystemDefaultDevice = false;
|
|
4251
|
+
this.lastReportedDeviceId = null;
|
|
4252
|
+
this.lastKnownSystemDefaultDeviceKey = null;
|
|
4253
|
+
this.recorderStarted = false;
|
|
4254
|
+
this.readySent = false;
|
|
4255
|
+
this._stopAmplitudeMonitoring();
|
|
4256
|
+
this._teardownDeviceListeners();
|
|
4257
|
+
if (this.vad) {
|
|
4258
|
+
this.vad.pause();
|
|
4259
|
+
this.vad.destroy();
|
|
4260
|
+
this.vad = null;
|
|
4261
|
+
}
|
|
4262
|
+
await this.wavRecorder.quit();
|
|
4263
|
+
(_b = (_a = this.wavPlayer).stop) === null || _b === void 0 ? void 0 : _b.call(_a);
|
|
4264
|
+
this.wavPlayer.disconnect();
|
|
4265
|
+
this._resetTurnTracking();
|
|
4266
|
+
this.options.conversationId = this.conversationId;
|
|
4267
|
+
this.userAudioAmplitude = 0;
|
|
4268
|
+
this.agentAudioAmplitude = 0;
|
|
4269
|
+
this._setStatus('disconnected');
|
|
4270
|
+
this.options.onDisconnect();
|
|
4271
|
+
}
|
|
4272
|
+
_getDeviceComparisonKey(device) {
|
|
4273
|
+
if (!device || typeof device !== 'object') {
|
|
4274
|
+
return null;
|
|
4275
|
+
}
|
|
4276
|
+
const deviceId = typeof device.deviceId === 'string' ? device.deviceId : '';
|
|
4277
|
+
if (deviceId && deviceId !== 'default') {
|
|
4278
|
+
return deviceId;
|
|
4279
|
+
}
|
|
4280
|
+
const groupId = typeof device.groupId === 'string' ? device.groupId : '';
|
|
4281
|
+
if (groupId) {
|
|
4282
|
+
return groupId;
|
|
4283
|
+
}
|
|
4284
|
+
const label = typeof device.label === 'string' ? device.label : '';
|
|
4285
|
+
if (label) {
|
|
4286
|
+
return label;
|
|
4287
|
+
}
|
|
4288
|
+
return null;
|
|
4289
|
+
}
|
|
4290
|
+
/**
|
|
4291
|
+
* Mutes the microphone to stop sending audio to the server
|
|
4292
|
+
* The connection and recording remain active for quick unmute
|
|
4293
|
+
*/
|
|
4294
|
+
mute() {
|
|
4295
|
+
if (!this.isMuted) {
|
|
4296
|
+
this.isMuted = true;
|
|
4297
|
+
console.log('Microphone muted');
|
|
4298
|
+
this.options.onMuteStateChange(true);
|
|
4299
|
+
}
|
|
4300
|
+
}
|
|
4301
|
+
/**
|
|
4302
|
+
* Unmutes the microphone to resume sending audio to the server
|
|
4303
|
+
*/
|
|
4304
|
+
unmute() {
|
|
4305
|
+
if (this.isMuted) {
|
|
4306
|
+
this.isMuted = false;
|
|
4307
|
+
console.log('Microphone unmuted');
|
|
4308
|
+
this.options.onMuteStateChange(false);
|
|
4309
|
+
}
|
|
4310
|
+
}
|
|
4311
|
+
}
|
|
4388
4312
|
|
|
4389
4313
|
return LayercodeClient;
|
|
4390
4314
|
|