@layercode/js-sdk 2.1.5 → 2.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,21 +4,6 @@
4
4
  (global = typeof globalThis !== 'undefined' ? globalThis : global || self, global.LayercodeClient = factory());
5
5
  })(this, (function () { 'use strict';
6
6
 
7
- function _mergeNamespaces(n, m) {
8
- m.forEach(function (e) {
9
- e && typeof e !== 'string' && !Array.isArray(e) && Object.keys(e).forEach(function (k) {
10
- if (k !== 'default' && !(k in n)) {
11
- var d = Object.getOwnPropertyDescriptor(e, k);
12
- Object.defineProperty(n, k, d.get ? d : {
13
- enumerable: true,
14
- get: function () { return e[k]; }
15
- });
16
- }
17
- });
18
- });
19
- return Object.freeze(n);
20
- }
21
-
22
7
  /**
23
8
  * Raw wav audio file contents
24
9
  * @typedef {Object} WavPackerAudioType
@@ -1324,54 +1309,14 @@ registerProcessor('audio_processor', AudioProcessor);
1324
1309
  * @returns {Promise<true>}
1325
1310
  */
1326
1311
  async requestPermission() {
1327
- const ensureUserMediaAccess = async () => {
1328
- const stream = await navigator.mediaDevices.getUserMedia({
1329
- audio: true,
1330
- });
1331
- const tracks = stream.getTracks();
1332
- tracks.forEach((track) => track.stop());
1333
- };
1334
-
1335
- const permissionsUnsupported =
1336
- !navigator.permissions ||
1337
- typeof navigator.permissions.query !== 'function';
1338
-
1339
- if (permissionsUnsupported) {
1340
- try {
1341
- await ensureUserMediaAccess();
1342
- } catch (error) {
1343
- window.alert('You must grant microphone access to use this feature.');
1344
- throw error;
1345
- }
1346
- return true;
1347
- }
1348
-
1349
1312
  try {
1350
- const permissionStatus = await navigator.permissions.query({
1351
- name: 'microphone',
1313
+ console.log('ensureUserMediaAccess');
1314
+ await navigator.mediaDevices.getUserMedia({
1315
+ audio: true,
1352
1316
  });
1353
-
1354
- if (permissionStatus.state === 'denied') {
1355
- window.alert('You must grant microphone access to use this feature.');
1356
- return true;
1357
- }
1358
-
1359
- if (permissionStatus.state === 'prompt') {
1360
- try {
1361
- await ensureUserMediaAccess();
1362
- } catch (error) {
1363
- window.alert('You must grant microphone access to use this feature.');
1364
- throw error;
1365
- }
1366
- }
1367
- } catch (error) {
1368
- // Firefox rejects permissions.query with NotSupportedError – fall back to getUserMedia directly
1369
- try {
1370
- await ensureUserMediaAccess();
1371
- } catch (fallbackError) {
1372
- window.alert('You must grant microphone access to use this feature.');
1373
- throw fallbackError;
1374
- }
1317
+ } catch (fallbackError) {
1318
+ window.alert('You must grant microphone access to use this feature.');
1319
+ throw fallbackError;
1375
1320
  }
1376
1321
  return true;
1377
1322
  }
@@ -1388,10 +1333,9 @@ registerProcessor('audio_processor', AudioProcessor);
1388
1333
  throw new Error('Could not request user devices');
1389
1334
  }
1390
1335
  await this.requestPermission();
1336
+
1391
1337
  const devices = await navigator.mediaDevices.enumerateDevices();
1392
- const audioDevices = devices.filter(
1393
- (device) => device.kind === 'audioinput',
1394
- );
1338
+ const audioDevices = devices.filter((device) => device.kind === 'audioinput');
1395
1339
  const defaultDeviceIndex = audioDevices.findIndex(
1396
1340
  (device) => device.deviceId === 'default',
1397
1341
  );
@@ -1779,1777 +1723,1033 @@ registerProcessor('audio_processor', AudioProcessor);
1779
1723
 
1780
1724
  globalThis.WavRecorder = WavRecorder;
1781
1725
 
1782
- /**
1783
- * Converts a base64 string to an ArrayBuffer.
1784
- * @param {string} base64 - The base64 string to convert.
1785
- * @returns {ArrayBuffer} The resulting ArrayBuffer.
1786
- */
1787
- function base64ToArrayBuffer(base64) {
1788
- const binaryString = atob(base64);
1789
- const len = binaryString.length;
1790
- const bytes = new Uint8Array(len);
1791
- for (let i = 0; i < len; i++) {
1792
- bytes[i] = binaryString.charCodeAt(i);
1793
- }
1794
- return bytes.buffer;
1726
+ var commonjsGlobal = typeof globalThis !== 'undefined' ? globalThis : typeof window !== 'undefined' ? window : typeof global !== 'undefined' ? global : typeof self !== 'undefined' ? self : {};
1727
+
1728
+ function getAugmentedNamespace(n) {
1729
+ if (n.__esModule) return n;
1730
+ var f = n.default;
1731
+ if (typeof f == "function") {
1732
+ var a = function a () {
1733
+ if (this instanceof a) {
1734
+ return Reflect.construct(f, arguments, this.constructor);
1735
+ }
1736
+ return f.apply(this, arguments);
1737
+ };
1738
+ a.prototype = f.prototype;
1739
+ } else a = {};
1740
+ Object.defineProperty(a, '__esModule', {value: true});
1741
+ Object.keys(n).forEach(function (k) {
1742
+ var d = Object.getOwnPropertyDescriptor(n, k);
1743
+ Object.defineProperty(a, k, d.get ? d : {
1744
+ enumerable: true,
1745
+ get: function () {
1746
+ return n[k];
1747
+ }
1748
+ });
1749
+ });
1750
+ return a;
1795
1751
  }
1796
1752
 
1797
- /**
1798
- * Converts an ArrayBuffer to a base64 string.
1799
- * @param {ArrayBuffer|Float32Array|Int16Array} arrayBuffer - The ArrayBuffer to convert.
1800
- * @returns {string} The resulting base64 string.
1801
- */
1802
- function arrayBufferToBase64$1(arrayBuffer) {
1803
- if (arrayBuffer instanceof Float32Array) {
1804
- arrayBuffer = this.floatTo16BitPCM(arrayBuffer);
1805
- } else if (arrayBuffer instanceof Int16Array) {
1806
- arrayBuffer = arrayBuffer.buffer;
1807
- }
1808
- let binary = '';
1809
- let bytes = new Uint8Array(arrayBuffer);
1810
- const chunkSize = 0x8000; // 32KB chunk size
1811
- for (let i = 0; i < bytes.length; i += chunkSize) {
1812
- let chunk = bytes.subarray(i, i + chunkSize);
1813
- binary += String.fromCharCode.apply(null, chunk);
1814
- }
1815
- return btoa(binary);
1753
+ var dist = {};
1754
+
1755
+ var assetPath = {};
1756
+
1757
+ Object.defineProperty(assetPath, "__esModule", { value: true });
1758
+ assetPath.baseAssetPath = void 0;
1759
+ // nextjs@14 bundler may attempt to execute this during SSR and crash
1760
+ const isWeb = typeof window !== "undefined" && typeof window.document !== "undefined";
1761
+ const currentScript = isWeb
1762
+ ? window.document.currentScript
1763
+ : null;
1764
+ let basePath = "/";
1765
+ if (currentScript) {
1766
+ basePath = currentScript.src
1767
+ .replace(/#.*$/, "")
1768
+ .replace(/\?.*$/, "")
1769
+ .replace(/\/[^\/]+$/, "/");
1816
1770
  }
1771
+ assetPath.baseAssetPath = basePath;
1817
1772
 
1818
- /* eslint-env browser */
1819
- // import { env as ortEnv } from 'onnxruntime-web';
1820
- const NOOP = () => { };
1821
- const DEFAULT_WS_URL = 'wss://api.layercode.com/v1/agents/web/websocket';
1822
- // SDK version - updated when publishing
1823
- const SDK_VERSION = '2.1.3';
1824
- // Lazily load the browser-only VAD module to avoid accessing `self` on the server
1825
- let micVADModulePromise = null;
1826
- const loadMicVADModule = () => {
1827
- if (typeof window === 'undefined') {
1828
- return Promise.resolve(null);
1773
+ var defaultModelFetcher$1 = {};
1774
+
1775
+ Object.defineProperty(defaultModelFetcher$1, "__esModule", { value: true });
1776
+ defaultModelFetcher$1.defaultModelFetcher = void 0;
1777
+ const defaultModelFetcher = (path) => {
1778
+ return fetch(path).then((model) => model.arrayBuffer());
1779
+ };
1780
+ defaultModelFetcher$1.defaultModelFetcher = defaultModelFetcher;
1781
+
1782
+ var frameProcessor = {};
1783
+
1784
+ var logging = {};
1785
+
1786
+ (function (exports) {
1787
+ Object.defineProperty(exports, "__esModule", { value: true });
1788
+ exports.log = exports.LOG_PREFIX = void 0;
1789
+ exports.LOG_PREFIX = "[VAD]";
1790
+ const levels = ["error", "debug", "warn"];
1791
+ function getLog(level) {
1792
+ return (...args) => {
1793
+ console[level](exports.LOG_PREFIX, ...args);
1794
+ };
1795
+ }
1796
+ const _log = levels.reduce((acc, level) => {
1797
+ acc[level] = getLog(level);
1798
+ return acc;
1799
+ }, {});
1800
+ exports.log = _log;
1801
+
1802
+ } (logging));
1803
+
1804
+ var messages = {};
1805
+
1806
+ Object.defineProperty(messages, "__esModule", { value: true });
1807
+ messages.Message = void 0;
1808
+ var Message;
1809
+ (function (Message) {
1810
+ Message["AudioFrame"] = "AUDIO_FRAME";
1811
+ Message["SpeechStart"] = "SPEECH_START";
1812
+ Message["VADMisfire"] = "VAD_MISFIRE";
1813
+ Message["SpeechEnd"] = "SPEECH_END";
1814
+ Message["SpeechStop"] = "SPEECH_STOP";
1815
+ Message["SpeechRealStart"] = "SPEECH_REAL_START";
1816
+ Message["FrameProcessed"] = "FRAME_PROCESSED";
1817
+ })(Message || (messages.Message = Message = {}));
1818
+
1819
+ /*
1820
+ Some of this code, together with the default options found in index.ts,
1821
+ were taken (or took inspiration) from https://github.com/snakers4/silero-vad
1822
+ */
1823
+ Object.defineProperty(frameProcessor, "__esModule", { value: true });
1824
+ frameProcessor.FrameProcessor = frameProcessor.validateOptions = frameProcessor.defaultV5FrameProcessorOptions = frameProcessor.defaultLegacyFrameProcessorOptions = void 0;
1825
+ const logging_1$3 = logging;
1826
+ const messages_1 = messages;
1827
+ const RECOMMENDED_FRAME_SAMPLES = [512, 1024, 1536];
1828
+ frameProcessor.defaultLegacyFrameProcessorOptions = {
1829
+ positiveSpeechThreshold: 0.5,
1830
+ negativeSpeechThreshold: 0.5 - 0.15,
1831
+ preSpeechPadFrames: 1,
1832
+ redemptionFrames: 8,
1833
+ frameSamples: 1536,
1834
+ minSpeechFrames: 3,
1835
+ submitUserSpeechOnPause: false,
1836
+ };
1837
+ frameProcessor.defaultV5FrameProcessorOptions = {
1838
+ positiveSpeechThreshold: 0.5,
1839
+ negativeSpeechThreshold: 0.5 - 0.15,
1840
+ preSpeechPadFrames: 3,
1841
+ redemptionFrames: 24,
1842
+ frameSamples: 512,
1843
+ minSpeechFrames: 9,
1844
+ submitUserSpeechOnPause: false,
1845
+ };
1846
+ function validateOptions(options) {
1847
+ if (!RECOMMENDED_FRAME_SAMPLES.includes(options.frameSamples)) {
1848
+ logging_1$3.log.warn("You are using an unusual frame size");
1849
+ }
1850
+ if (options.positiveSpeechThreshold < 0 ||
1851
+ options.positiveSpeechThreshold > 1) {
1852
+ logging_1$3.log.error("positiveSpeechThreshold should be a number between 0 and 1");
1853
+ }
1854
+ if (options.negativeSpeechThreshold < 0 ||
1855
+ options.negativeSpeechThreshold > options.positiveSpeechThreshold) {
1856
+ logging_1$3.log.error("negativeSpeechThreshold should be between 0 and positiveSpeechThreshold");
1857
+ }
1858
+ if (options.preSpeechPadFrames < 0) {
1859
+ logging_1$3.log.error("preSpeechPadFrames should be positive");
1829
1860
  }
1830
- if (!micVADModulePromise) {
1831
- // @ts-ignore - VAD package does not provide TypeScript types
1832
- micVADModulePromise = Promise.resolve().then(function () { return index$1; });
1861
+ if (options.redemptionFrames < 0) {
1862
+ logging_1$3.log.error("redemptionFrames should be positive");
1833
1863
  }
1834
- return micVADModulePromise;
1864
+ }
1865
+ frameProcessor.validateOptions = validateOptions;
1866
+ const concatArrays = (arrays) => {
1867
+ const sizes = arrays.reduce((out, next) => {
1868
+ out.push(out.at(-1) + next.length);
1869
+ return out;
1870
+ }, [0]);
1871
+ const outArray = new Float32Array(sizes.at(-1));
1872
+ arrays.forEach((arr, index) => {
1873
+ const place = sizes[index];
1874
+ outArray.set(arr, place);
1875
+ });
1876
+ return outArray;
1835
1877
  };
1836
- /**
1837
- * @class LayercodeClient
1838
- * @classdesc Core client for Layercode audio agent that manages audio recording, WebSocket communication, and speech processing.
1839
- */
1840
- class LayercodeClient {
1841
- /**
1842
- * Creates an instance of LayercodeClient.
1843
- * @param {Object} options - Configuration options
1844
- */
1845
- constructor(options) {
1846
- var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p;
1847
- this.deviceId = null;
1848
- this.options = {
1849
- agentId: options.agentId,
1850
- conversationId: (_a = options.conversationId) !== null && _a !== void 0 ? _a : null,
1851
- authorizeSessionEndpoint: options.authorizeSessionEndpoint,
1852
- metadata: (_b = options.metadata) !== null && _b !== void 0 ? _b : {},
1853
- vadResumeDelay: (_c = options.vadResumeDelay) !== null && _c !== void 0 ? _c : 500,
1854
- onConnect: (_d = options.onConnect) !== null && _d !== void 0 ? _d : NOOP,
1855
- onDisconnect: (_e = options.onDisconnect) !== null && _e !== void 0 ? _e : NOOP,
1856
- onError: (_f = options.onError) !== null && _f !== void 0 ? _f : NOOP,
1857
- onDeviceSwitched: (_g = options.onDeviceSwitched) !== null && _g !== void 0 ? _g : NOOP,
1858
- onDataMessage: (_h = options.onDataMessage) !== null && _h !== void 0 ? _h : NOOP,
1859
- onMessage: (_j = options.onMessage) !== null && _j !== void 0 ? _j : NOOP,
1860
- onUserAmplitudeChange: (_k = options.onUserAmplitudeChange) !== null && _k !== void 0 ? _k : NOOP,
1861
- onAgentAmplitudeChange: (_l = options.onAgentAmplitudeChange) !== null && _l !== void 0 ? _l : NOOP,
1862
- onStatusChange: (_m = options.onStatusChange) !== null && _m !== void 0 ? _m : NOOP,
1863
- onUserIsSpeakingChange: (_o = options.onUserIsSpeakingChange) !== null && _o !== void 0 ? _o : NOOP,
1864
- onMuteStateChange: (_p = options.onMuteStateChange) !== null && _p !== void 0 ? _p : NOOP,
1878
+ class FrameProcessor {
1879
+ constructor(modelProcessFunc, modelResetFunc, options) {
1880
+ this.modelProcessFunc = modelProcessFunc;
1881
+ this.modelResetFunc = modelResetFunc;
1882
+ this.options = options;
1883
+ this.speaking = false;
1884
+ this.redemptionCounter = 0;
1885
+ this.speechFrameCount = 0;
1886
+ this.active = false;
1887
+ this.speechRealStartFired = false;
1888
+ this.reset = () => {
1889
+ this.speaking = false;
1890
+ this.speechRealStartFired = false;
1891
+ this.audioBuffer = [];
1892
+ this.modelResetFunc();
1893
+ this.redemptionCounter = 0;
1894
+ this.speechFrameCount = 0;
1895
+ };
1896
+ this.pause = (handleEvent) => {
1897
+ this.active = false;
1898
+ if (this.options.submitUserSpeechOnPause) {
1899
+ this.endSegment(handleEvent);
1900
+ }
1901
+ else {
1902
+ this.reset();
1903
+ }
1904
+ };
1905
+ this.resume = () => {
1906
+ this.active = true;
1907
+ };
1908
+ this.endSegment = (handleEvent) => {
1909
+ const audioBuffer = this.audioBuffer;
1910
+ this.audioBuffer = [];
1911
+ const speaking = this.speaking;
1912
+ this.reset();
1913
+ if (speaking) {
1914
+ const speechFrameCount = audioBuffer.reduce((acc, item) => {
1915
+ return item.isSpeech ? (acc + 1) : acc;
1916
+ }, 0);
1917
+ if (speechFrameCount >= this.options.minSpeechFrames) {
1918
+ const audio = concatArrays(audioBuffer.map((item) => item.frame));
1919
+ handleEvent({ msg: messages_1.Message.SpeechEnd, audio });
1920
+ }
1921
+ else {
1922
+ handleEvent({ msg: messages_1.Message.VADMisfire });
1923
+ }
1924
+ }
1925
+ return {};
1926
+ };
1927
+ this.process = async (frame, handleEvent) => {
1928
+ if (!this.active) {
1929
+ return;
1930
+ }
1931
+ const probs = await this.modelProcessFunc(frame);
1932
+ const isSpeech = probs.isSpeech >= this.options.positiveSpeechThreshold;
1933
+ handleEvent({ probs, msg: messages_1.Message.FrameProcessed, frame });
1934
+ this.audioBuffer.push({
1935
+ frame,
1936
+ isSpeech,
1937
+ });
1938
+ if (isSpeech) {
1939
+ this.speechFrameCount++;
1940
+ this.redemptionCounter = 0;
1941
+ }
1942
+ if (isSpeech && !this.speaking) {
1943
+ this.speaking = true;
1944
+ handleEvent({ msg: messages_1.Message.SpeechStart });
1945
+ }
1946
+ if (this.speaking &&
1947
+ this.speechFrameCount === this.options.minSpeechFrames &&
1948
+ !this.speechRealStartFired) {
1949
+ this.speechRealStartFired = true;
1950
+ handleEvent({ msg: messages_1.Message.SpeechRealStart });
1951
+ }
1952
+ if (probs.isSpeech < this.options.negativeSpeechThreshold &&
1953
+ this.speaking &&
1954
+ ++this.redemptionCounter >= this.options.redemptionFrames) {
1955
+ this.redemptionCounter = 0;
1956
+ this.speechFrameCount = 0;
1957
+ this.speaking = false;
1958
+ this.speechRealStartFired = false;
1959
+ const audioBuffer = this.audioBuffer;
1960
+ this.audioBuffer = [];
1961
+ const speechFrameCount = audioBuffer.reduce((acc, item) => {
1962
+ return item.isSpeech ? (acc + 1) : acc;
1963
+ }, 0);
1964
+ if (speechFrameCount >= this.options.minSpeechFrames) {
1965
+ const audio = concatArrays(audioBuffer.map((item) => item.frame));
1966
+ handleEvent({ msg: messages_1.Message.SpeechEnd, audio });
1967
+ }
1968
+ else {
1969
+ handleEvent({ msg: messages_1.Message.VADMisfire });
1970
+ }
1971
+ }
1972
+ if (!this.speaking) {
1973
+ while (this.audioBuffer.length > this.options.preSpeechPadFrames) {
1974
+ this.audioBuffer.shift();
1975
+ }
1976
+ this.speechFrameCount = 0;
1977
+ }
1865
1978
  };
1866
- this.AMPLITUDE_MONITORING_SAMPLE_RATE = 2;
1867
- this._websocketUrl = DEFAULT_WS_URL;
1868
- this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
1869
- this.wavPlayer = new WavStreamPlayer({
1870
- finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
1871
- sampleRate: 16000, // TODO should be set my fetched agent config
1872
- });
1873
- this.vad = null;
1874
- this.ws = null;
1875
- this.status = 'disconnected';
1876
- this.userAudioAmplitude = 0;
1877
- this.agentAudioAmplitude = 0;
1878
- this.conversationId = this.options.conversationId;
1879
- this.pushToTalkActive = false;
1880
- this.pushToTalkEnabled = false;
1881
- this.canInterrupt = false;
1882
- this.userIsSpeaking = false;
1883
- this.recorderStarted = false;
1884
- this.readySent = false;
1885
- this.currentTurnId = null;
1886
1979
  this.audioBuffer = [];
1887
- this.vadConfig = null;
1888
- this.activeDeviceId = null;
1889
- this.useSystemDefaultDevice = false;
1890
- this.lastReportedDeviceId = null;
1891
- this.lastKnownSystemDefaultDeviceKey = null;
1892
- this.isMuted = false;
1893
- this.stopPlayerAmplitude = undefined;
1894
- this.stopRecorderAmplitude = undefined;
1895
- this.deviceChangeListener = null;
1896
- // this.audioPauseTime = null;
1897
- // Bind event handlers
1898
- this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
1899
- this._handleDataAvailable = this._handleDataAvailable.bind(this);
1980
+ this.reset();
1900
1981
  }
1901
- _initializeVAD() {
1902
- var _a;
1903
- if (typeof window === 'undefined') {
1904
- return;
1905
- }
1906
- console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt, vadConfig: this.vadConfig });
1907
- // If we're in push to talk mode, we don't need to use the VAD model
1908
- if (this.pushToTalkEnabled) {
1909
- return;
1982
+ }
1983
+ frameProcessor.FrameProcessor = FrameProcessor;
1984
+
1985
+ var nonRealTimeVad = {};
1986
+
1987
+ var ortWeb_min = {exports: {}};
1988
+
1989
+ // Copyright (c) Microsoft Corporation. All rights reserved.
1990
+ // Licensed under the MIT License.
1991
+ const backends = {};
1992
+ const backendsSortedByPriority = [];
1993
+ /**
1994
+ * Register a backend.
1995
+ *
1996
+ * @param name - the name as a key to lookup as an execution provider.
1997
+ * @param backend - the backend object.
1998
+ * @param priority - an integer indicating the priority of the backend. Higher number means higher priority. if priority
1999
+ * < 0, it will be considered as a 'beta' version and will not be used as a fallback backend by default.
2000
+ *
2001
+ * @internal
2002
+ */
2003
+ const registerBackend = (name, backend, priority) => {
2004
+ if (backend && typeof backend.init === 'function' && typeof backend.createSessionHandler === 'function') {
2005
+ const currentBackend = backends[name];
2006
+ if (currentBackend === undefined) {
2007
+ backends[name] = { backend, priority };
1910
2008
  }
1911
- // Check if VAD is disabled
1912
- if (((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) === false) {
1913
- console.log('VAD is disabled by backend configuration');
2009
+ else if (currentBackend.priority > priority) {
2010
+ // same name is already registered with a higher priority. skip registeration.
1914
2011
  return;
1915
2012
  }
1916
- // Build VAD configuration object, only including keys that are defined
1917
- const vadOptions = {
1918
- stream: this.wavRecorder.getStream() || undefined,
1919
- onSpeechStart: () => {
1920
- console.debug('onSpeechStart: sending vad_start');
1921
- this.userIsSpeaking = true;
1922
- this.options.onUserIsSpeakingChange(true);
1923
- this._wsSend({
1924
- type: 'vad_events',
1925
- event: 'vad_start',
1926
- });
1927
- this.options.onMessage({
1928
- type: 'vad_events',
1929
- event: 'vad_start',
1930
- });
1931
- },
1932
- onSpeechEnd: () => {
1933
- console.debug('onSpeechEnd: sending vad_end');
1934
- this.userIsSpeaking = false;
1935
- this.options.onUserIsSpeakingChange(false);
1936
- this.audioBuffer = []; // Clear buffer on speech end
1937
- this._wsSend({
1938
- type: 'vad_events',
1939
- event: 'vad_end',
1940
- });
1941
- this.options.onMessage({
1942
- type: 'vad_events',
1943
- event: 'vad_end',
1944
- });
1945
- },
1946
- };
1947
- // Apply VAD configuration from backend if available
1948
- if (this.vadConfig) {
1949
- // Only add keys that are explicitly defined (not undefined)
1950
- if (this.vadConfig.model !== undefined)
1951
- vadOptions.model = this.vadConfig.model;
1952
- if (this.vadConfig.positive_speech_threshold !== undefined)
1953
- vadOptions.positiveSpeechThreshold = this.vadConfig.positive_speech_threshold;
1954
- if (this.vadConfig.negative_speech_threshold !== undefined)
1955
- vadOptions.negativeSpeechThreshold = this.vadConfig.negative_speech_threshold;
1956
- if (this.vadConfig.redemption_frames !== undefined)
1957
- vadOptions.redemptionFrames = this.vadConfig.redemption_frames;
1958
- if (this.vadConfig.min_speech_frames !== undefined)
1959
- vadOptions.minSpeechFrames = this.vadConfig.min_speech_frames;
1960
- if (this.vadConfig.pre_speech_pad_frames !== undefined)
1961
- vadOptions.preSpeechPadFrames = this.vadConfig.pre_speech_pad_frames;
1962
- if (this.vadConfig.frame_samples !== undefined)
1963
- vadOptions.frameSamples = this.vadConfig.frame_samples;
1964
- }
1965
- else {
1966
- // Default values if no config from backend
1967
- vadOptions.model = 'v5';
1968
- vadOptions.positiveSpeechThreshold = 0.15;
1969
- vadOptions.negativeSpeechThreshold = 0.05;
1970
- vadOptions.redemptionFrames = 4;
1971
- vadOptions.minSpeechFrames = 2;
1972
- vadOptions.preSpeechPadFrames = 0;
1973
- vadOptions.frameSamples = 512; // Required for v5
2013
+ else if (currentBackend.priority === priority) {
2014
+ if (currentBackend.backend !== backend) {
2015
+ throw new Error(`cannot register backend "${name}" using priority ${priority}`);
2016
+ }
1974
2017
  }
1975
- console.log('Creating VAD with options:', vadOptions);
1976
- loadMicVADModule()
1977
- .then((module) => { var _a, _b, _c; return (_c = (_b = (_a = module === null || module === void 0 ? void 0 : module.MicVAD) === null || _a === void 0 ? void 0 : _a.new) === null || _b === void 0 ? void 0 : _b.call(_a, vadOptions)) !== null && _c !== void 0 ? _c : null; })
1978
- .then((vad) => {
1979
- if (!vad) {
1980
- throw new Error('MicVAD module not available');
2018
+ if (priority >= 0) {
2019
+ const i = backendsSortedByPriority.indexOf(name);
2020
+ if (i !== -1) {
2021
+ backendsSortedByPriority.splice(i, 1);
1981
2022
  }
1982
- this.vad = vad;
1983
- this.vad.start();
1984
- console.log('VAD started successfully');
1985
- })
1986
- .catch((error) => {
1987
- console.warn('Error initializing VAD:', error);
1988
- // Send a message to server indicating VAD failure
1989
- this._wsSend({
1990
- type: 'vad_events',
1991
- event: 'vad_model_failed',
1992
- });
1993
- });
1994
- }
1995
- /**
1996
- * Updates the connection status and triggers the callback
1997
- * @param {string} status - New status value
1998
- */
1999
- _setStatus(status) {
2000
- this.status = status;
2001
- this.options.onStatusChange(status);
2023
+ for (let i = 0; i < backendsSortedByPriority.length; i++) {
2024
+ if (backends[backendsSortedByPriority[i]].priority <= priority) {
2025
+ backendsSortedByPriority.splice(i, 0, name);
2026
+ return;
2027
+ }
2028
+ }
2029
+ backendsSortedByPriority.push(name);
2030
+ }
2031
+ return;
2002
2032
  }
2003
- /**
2004
- * Handles when agent audio finishes playing
2005
- */
2006
- _clientResponseAudioReplayFinished() {
2007
- console.debug('clientResponseAudioReplayFinished');
2008
- this._wsSend({
2009
- type: 'trigger.response.audio.replay_finished',
2010
- reason: 'completed',
2011
- });
2012
- }
2013
- async _clientInterruptAssistantReplay() {
2014
- await this.wavPlayer.interrupt();
2015
- }
2016
- async triggerUserTurnStarted() {
2017
- if (!this.pushToTalkActive) {
2018
- this.pushToTalkActive = true;
2019
- this._wsSend({ type: 'trigger.turn.start', role: 'user' });
2020
- await this._clientInterruptAssistantReplay();
2021
- }
2022
- }
2023
- async triggerUserTurnFinished() {
2024
- if (this.pushToTalkActive) {
2025
- this.pushToTalkActive = false;
2026
- this._wsSend({ type: 'trigger.turn.end', role: 'user' });
2027
- }
2028
- }
2029
- /**
2030
- * Handles incoming WebSocket messages
2031
- * @param {MessageEvent} event - The WebSocket message event
2032
- */
2033
- async _handleWebSocketMessage(event) {
2034
- try {
2035
- const message = JSON.parse(event.data);
2036
- if (message.type !== 'response.audio') {
2037
- console.debug('msg:', message);
2038
- }
2039
- switch (message.type) {
2040
- case 'turn.start':
2041
- // Sent from the server to this client when a new user turn is detected
2042
- if (message.role === 'assistant') {
2043
- // Start tracking new assistant turn
2044
- console.debug('Assistant turn started, will track new turn ID from audio/text');
2045
- }
2046
- else if (message.role === 'user' && !this.pushToTalkEnabled) {
2047
- // Interrupt any playing assistant audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
2048
- console.debug('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
2049
- await this._clientInterruptAssistantReplay();
2050
- }
2051
- this.options.onMessage(message);
2052
- break;
2053
- case 'response.audio':
2054
- const audioBuffer = base64ToArrayBuffer(message.content);
2055
- this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
2056
- // TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
2057
- // Set current turn ID from first audio message, or update if different turn
2058
- if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
2059
- console.debug(`Setting current turn ID to: ${message.turn_id} (was: ${this.currentTurnId})`);
2060
- this.currentTurnId = message.turn_id;
2061
- // Clean up interrupted tracks, keeping only the current turn
2062
- this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
2063
- }
2064
- break;
2065
- case 'response.text':
2066
- // Set turn ID from first text message if not set
2067
- if (!this.currentTurnId) {
2068
- this.currentTurnId = message.turn_id;
2069
- console.debug(`Setting current turn ID to: ${message.turn_id} from text message`);
2070
- }
2071
- this.options.onMessage(message);
2072
- break;
2073
- case 'response.data':
2074
- this.options.onDataMessage(message);
2075
- break;
2076
- case 'user.transcript':
2077
- case 'user.transcript.delta':
2078
- case 'user.transcript.interim_delta':
2079
- this.options.onMessage(message);
2080
- break;
2081
- default:
2082
- console.warn('Unknown message type received:', message);
2083
- }
2084
- }
2085
- catch (error) {
2086
- console.error('Error processing WebSocket message:', error);
2087
- this.options.onError(error instanceof Error ? error : new Error(String(error)));
2088
- }
2089
- }
2090
- /**
2091
- * Handles available client browser microphone audio data and sends it over the WebSocket
2092
- * @param {ArrayBuffer} data - The audio data buffer
2093
- */
2094
- _handleDataAvailable(data) {
2095
- var _a, _b, _c;
2096
- try {
2097
- const base64 = arrayBufferToBase64$1(data.mono);
2098
- // Don't send audio if muted
2099
- if (this.isMuted) {
2100
- return;
2101
- }
2102
- // Determine if we should gate audio based on VAD configuration
2103
- const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
2104
- const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
2105
- let sendAudio;
2106
- if (this.pushToTalkEnabled) {
2107
- sendAudio = this.pushToTalkActive;
2108
- }
2109
- else if (shouldGateAudio) {
2110
- sendAudio = this.userIsSpeaking;
2033
+ throw new TypeError('not a valid backend');
2034
+ };
2035
+ /**
2036
+ * Resolve backend by specified hints.
2037
+ *
2038
+ * @param backendHints - a list of execution provider names to lookup. If omitted use registered backends as list.
2039
+ * @returns a promise that resolves to the backend.
2040
+ *
2041
+ * @internal
2042
+ */
2043
+ const resolveBackend = async (backendHints) => {
2044
+ const backendNames = backendHints.length === 0 ? backendsSortedByPriority : backendHints;
2045
+ const errors = [];
2046
+ for (const backendName of backendNames) {
2047
+ const backendInfo = backends[backendName];
2048
+ if (backendInfo) {
2049
+ if (backendInfo.initialized) {
2050
+ return backendInfo.backend;
2111
2051
  }
2112
- else {
2113
- // If gate_audio is false, always send audio
2114
- sendAudio = true;
2052
+ else if (backendInfo.aborted) {
2053
+ continue; // current backend is unavailable; try next
2115
2054
  }
2116
- if (sendAudio) {
2117
- // If we have buffered audio and we're gating, send it first
2118
- if (shouldGateAudio && this.audioBuffer.length > 0) {
2119
- console.debug(`Sending ${this.audioBuffer.length} buffered audio chunks`);
2120
- for (const bufferedAudio of this.audioBuffer) {
2121
- this._wsSend({
2122
- type: 'client.audio',
2123
- content: bufferedAudio,
2124
- });
2125
- }
2126
- this.audioBuffer = []; // Clear the buffer after sending
2055
+ const isInitializing = !!backendInfo.initPromise;
2056
+ try {
2057
+ if (!isInitializing) {
2058
+ backendInfo.initPromise = backendInfo.backend.init();
2127
2059
  }
2128
- // Send the current audio
2129
- this._wsSend({
2130
- type: 'client.audio',
2131
- content: base64,
2132
- });
2060
+ await backendInfo.initPromise;
2061
+ backendInfo.initialized = true;
2062
+ return backendInfo.backend;
2133
2063
  }
2134
- else {
2135
- // Buffer audio when not sending (to catch audio just before VAD triggers)
2136
- this.audioBuffer.push(base64);
2137
- // Keep buffer size based on configuration
2138
- if (this.audioBuffer.length > bufferFrames) {
2139
- this.audioBuffer.shift(); // Remove oldest chunk
2064
+ catch (e) {
2065
+ if (!isInitializing) {
2066
+ errors.push({ name: backendName, err: e });
2140
2067
  }
2068
+ backendInfo.aborted = true;
2069
+ }
2070
+ finally {
2071
+ delete backendInfo.initPromise;
2141
2072
  }
2142
- }
2143
- catch (error) {
2144
- console.error('Error processing audio:', error);
2145
- this.options.onError(error instanceof Error ? error : new Error(String(error)));
2146
- }
2147
- }
2148
- _wsSend(message) {
2149
- var _a;
2150
- if (message.type !== 'client.audio') {
2151
- console.debug('sent_msg:', message);
2152
- }
2153
- const messageString = JSON.stringify(message);
2154
- if (((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN) {
2155
- this.ws.send(messageString);
2156
2073
  }
2157
2074
  }
2158
- _sendReadyIfNeeded() {
2159
- var _a;
2160
- if (this.recorderStarted && ((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN && !this.readySent) {
2161
- this._wsSend({ type: 'client.ready' });
2162
- this.readySent = true;
2163
- }
2075
+ throw new Error(`no available backend found. ERR: ${errors.map(e => `[${e.name}] ${e.err}`).join(', ')}`);
2076
+ };
2077
+
2078
+ // Copyright (c) Microsoft Corporation. All rights reserved.
2079
+ // Licensed under the MIT License.
2080
+ class EnvImpl {
2081
+ constructor() {
2082
+ this.wasm = {};
2083
+ this.webgl = {};
2084
+ this.logLevelInternal = 'warning';
2164
2085
  }
2165
- /**
2166
- * Sets up amplitude monitoring for a given audio source.
2167
- * @param {WavRecorder | WavStreamPlayer} source - The audio source (recorder or player).
2168
- * @param {(amplitude: number) => void} callback - The callback function to invoke on amplitude change.
2169
- * @param {(amplitude: number) => void} updateInternalState - Function to update the internal amplitude state.
2170
- */
2171
- _setupAmplitudeMonitoring(source, callback, updateInternalState) {
2172
- let updateCounter = 0;
2173
- source.startAmplitudeMonitoring((amplitude) => {
2174
- // Only update and call callback at the specified sample rate
2175
- if (updateCounter >= this.AMPLITUDE_MONITORING_SAMPLE_RATE) {
2176
- updateInternalState(amplitude);
2177
- if (callback !== NOOP) {
2178
- callback(amplitude);
2179
- }
2180
- updateCounter = 0; // Reset counter after sampling
2181
- }
2182
- updateCounter++;
2183
- });
2184
- const stop = () => { var _a; return (_a = source.stopAmplitudeMonitoring) === null || _a === void 0 ? void 0 : _a.call(source); };
2185
- if (source === this.wavPlayer) {
2186
- this.stopPlayerAmplitude = stop;
2086
+ // TODO standadize the getter and setter convention in env for other fields.
2087
+ set logLevel(value) {
2088
+ if (value === undefined) {
2089
+ return;
2187
2090
  }
2188
- if (source === this.wavRecorder) {
2189
- this.stopRecorderAmplitude = stop;
2091
+ if (typeof value !== 'string' || ['verbose', 'info', 'warning', 'error', 'fatal'].indexOf(value) === -1) {
2092
+ throw new Error(`Unsupported logging level: ${value}`);
2190
2093
  }
2094
+ this.logLevelInternal = value;
2191
2095
  }
2192
- _stopAmplitudeMonitoring() {
2193
- var _a, _b;
2194
- (_a = this.stopPlayerAmplitude) === null || _a === void 0 ? void 0 : _a.call(this);
2195
- (_b = this.stopRecorderAmplitude) === null || _b === void 0 ? void 0 : _b.call(this);
2196
- this.stopPlayerAmplitude = undefined;
2197
- this.stopRecorderAmplitude = undefined;
2096
+ get logLevel() {
2097
+ return this.logLevelInternal;
2198
2098
  }
2199
- /**
2200
- * Connects to the Layercode agent using the stored conversation ID and starts the audio conversation
2201
- * @async
2202
- * @returns {Promise<void>}
2203
- */
2204
- async connect() {
2205
- if (this.status === 'connecting') {
2206
- return;
2207
- }
2208
- try {
2209
- this._setStatus('connecting');
2210
- // Reset turn tracking for clean start
2211
- this._resetTurnTracking();
2212
- this._stopAmplitudeMonitoring();
2213
- this._setupDeviceChangeListener();
2214
- // Get conversation key from server
2215
- let authorizeSessionRequestBody = {
2216
- agent_id: this.options.agentId,
2217
- metadata: this.options.metadata,
2218
- sdk_version: SDK_VERSION,
2219
- };
2220
- // If we're reconnecting to a previous conversation, we need to include the conversation_id in the request. Otherwise we don't send conversation_id, and a new conversation will be created and the conversation_id will be returned in the response.
2221
- if (this.options.conversationId) {
2222
- authorizeSessionRequestBody.conversation_id = this.options.conversationId;
2223
- }
2224
- const authorizeSessionResponse = await fetch(this.options.authorizeSessionEndpoint, {
2225
- method: 'POST',
2226
- headers: {
2227
- 'Content-Type': 'application/json',
2228
- },
2229
- body: JSON.stringify(authorizeSessionRequestBody),
2230
- });
2231
- if (!authorizeSessionResponse.ok) {
2232
- throw new Error(`Failed to authorize conversation: ${authorizeSessionResponse.statusText}`);
2099
+ }
2100
+
2101
+ // Copyright (c) Microsoft Corporation. All rights reserved.
2102
+ // Licensed under the MIT License.
2103
+ /**
2104
+ * Represent a set of flags as a global singleton.
2105
+ */
2106
+ const env = new EnvImpl();
2107
+
2108
+ // Copyright (c) Microsoft Corporation. All rights reserved.
2109
+ // Licensed under the MIT License.
2110
+ const isBigInt64ArrayAvailable = typeof BigInt64Array !== 'undefined' && typeof BigInt64Array.from === 'function';
2111
+ const isBigUint64ArrayAvailable = typeof BigUint64Array !== 'undefined' && typeof BigUint64Array.from === 'function';
2112
+ // a runtime map that maps type string to TypedArray constructor. Should match Tensor.DataTypeMap.
2113
+ const NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP = new Map([
2114
+ ['float32', Float32Array],
2115
+ ['uint8', Uint8Array],
2116
+ ['int8', Int8Array],
2117
+ ['uint16', Uint16Array],
2118
+ ['int16', Int16Array],
2119
+ ['int32', Int32Array],
2120
+ ['bool', Uint8Array],
2121
+ ['float64', Float64Array],
2122
+ ['uint32', Uint32Array],
2123
+ ]);
2124
+ // a runtime map that maps type string to TypedArray constructor. Should match Tensor.DataTypeMap.
2125
+ const NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP = new Map([
2126
+ [Float32Array, 'float32'],
2127
+ [Uint8Array, 'uint8'],
2128
+ [Int8Array, 'int8'],
2129
+ [Uint16Array, 'uint16'],
2130
+ [Int16Array, 'int16'],
2131
+ [Int32Array, 'int32'],
2132
+ [Float64Array, 'float64'],
2133
+ [Uint32Array, 'uint32'],
2134
+ ]);
2135
+ if (isBigInt64ArrayAvailable) {
2136
+ NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.set('int64', BigInt64Array);
2137
+ NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.set(BigInt64Array, 'int64');
2138
+ }
2139
+ if (isBigUint64ArrayAvailable) {
2140
+ NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.set('uint64', BigUint64Array);
2141
+ NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.set(BigUint64Array, 'uint64');
2142
+ }
2143
+ /**
2144
+ * calculate size from dims.
2145
+ *
2146
+ * @param dims the dims array. May be an illegal input.
2147
+ */
2148
+ const calculateSize = (dims) => {
2149
+ let size = 1;
2150
+ for (let i = 0; i < dims.length; i++) {
2151
+ const dim = dims[i];
2152
+ if (typeof dim !== 'number' || !Number.isSafeInteger(dim)) {
2153
+ throw new TypeError(`dims[${i}] must be an integer, got: ${dim}`);
2154
+ }
2155
+ if (dim < 0) {
2156
+ throw new RangeError(`dims[${i}] must be a non-negative integer, got: ${dim}`);
2157
+ }
2158
+ size *= dim;
2159
+ }
2160
+ return size;
2161
+ };
2162
+ let Tensor$1 = class Tensor {
2163
+ constructor(arg0, arg1, arg2) {
2164
+ let type;
2165
+ let data;
2166
+ let dims;
2167
+ // check whether arg0 is type or data
2168
+ if (typeof arg0 === 'string') {
2169
+ //
2170
+ // Override: constructor(type, data, ...)
2171
+ //
2172
+ type = arg0;
2173
+ dims = arg2;
2174
+ if (arg0 === 'string') {
2175
+ // string tensor
2176
+ if (!Array.isArray(arg1)) {
2177
+ throw new TypeError('A string tensor\'s data must be a string array.');
2178
+ }
2179
+ // we don't check whether every element in the array is string; this is too slow. we assume it's correct and
2180
+ // error will be populated at inference
2181
+ data = arg1;
2233
2182
  }
2234
- const authorizeSessionResponseBody = await authorizeSessionResponse.json();
2235
- this.conversationId = authorizeSessionResponseBody.conversation_id; // Save the conversation_id for use in future reconnects
2236
- this.options.conversationId = this.conversationId;
2237
- // Connect WebSocket
2238
- this.ws = new WebSocket(`${this._websocketUrl}?${new URLSearchParams({
2239
- client_session_key: authorizeSessionResponseBody.client_session_key,
2240
- })}`);
2241
- const config = authorizeSessionResponseBody.config;
2242
- console.log('AgentConfig', config);
2243
- // Store VAD configuration
2244
- this.vadConfig = config.vad || null;
2245
- if (config.transcription.trigger === 'push_to_talk') {
2246
- this.pushToTalkEnabled = true;
2183
+ else {
2184
+ // numeric tensor
2185
+ const typedArrayConstructor = NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.get(arg0);
2186
+ if (typedArrayConstructor === undefined) {
2187
+ throw new TypeError(`Unsupported tensor type: ${arg0}.`);
2188
+ }
2189
+ if (Array.isArray(arg1)) {
2190
+ // use 'as any' here because TypeScript's check on type of 'SupportedTypedArrayConstructors.from()' produces
2191
+ // incorrect results.
2192
+ // 'typedArrayConstructor' should be one of the typed array prototype objects.
2193
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
2194
+ data = typedArrayConstructor.from(arg1);
2195
+ }
2196
+ else if (arg1 instanceof typedArrayConstructor) {
2197
+ data = arg1;
2198
+ }
2199
+ else {
2200
+ throw new TypeError(`A ${type} tensor's data must be type of ${typedArrayConstructor}`);
2201
+ }
2247
2202
  }
2248
- else if (config.transcription.trigger === 'automatic') {
2249
- this.pushToTalkEnabled = false;
2250
- this.canInterrupt = config.transcription.can_interrupt;
2203
+ }
2204
+ else {
2205
+ //
2206
+ // Override: constructor(data, ...)
2207
+ //
2208
+ dims = arg1;
2209
+ if (Array.isArray(arg0)) {
2210
+ // only boolean[] and string[] is supported
2211
+ if (arg0.length === 0) {
2212
+ throw new TypeError('Tensor type cannot be inferred from an empty array.');
2213
+ }
2214
+ const firstElementType = typeof arg0[0];
2215
+ if (firstElementType === 'string') {
2216
+ type = 'string';
2217
+ data = arg0;
2218
+ }
2219
+ else if (firstElementType === 'boolean') {
2220
+ type = 'bool';
2221
+ // 'arg0' is of type 'boolean[]'. Uint8Array.from(boolean[]) actually works, but typescript thinks this is
2222
+ // wrong type. We use 'as any' to make it happy.
2223
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
2224
+ data = Uint8Array.from(arg0);
2225
+ }
2226
+ else {
2227
+ throw new TypeError(`Invalid element type of data array: ${firstElementType}.`);
2228
+ }
2251
2229
  }
2252
2230
  else {
2253
- throw new Error(`Unknown trigger: ${config.transcription.trigger}`);
2231
+ // get tensor type from TypedArray
2232
+ const mappedType = NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.get(arg0.constructor);
2233
+ if (mappedType === undefined) {
2234
+ throw new TypeError(`Unsupported type for tensor data: ${arg0.constructor}.`);
2235
+ }
2236
+ type = mappedType;
2237
+ data = arg0;
2254
2238
  }
2255
- // Bind the websocket message callbacks
2256
- this.ws.onmessage = this._handleWebSocketMessage;
2257
- this.ws.onopen = () => {
2258
- console.log('WebSocket connection established');
2259
- this._setStatus('connected');
2260
- this.options.onConnect({ conversationId: this.conversationId });
2261
- // Attempt to send ready message if recorder already started
2262
- this._sendReadyIfNeeded();
2263
- };
2264
- this.ws.onclose = () => {
2265
- console.log('WebSocket connection closed');
2266
- this.ws = null;
2267
- this._performDisconnectCleanup().catch((error) => {
2268
- console.error('Error during disconnect cleanup:', error);
2269
- this.options.onError(error instanceof Error ? error : new Error(String(error)));
2270
- });
2271
- };
2272
- this.ws.onerror = (error) => {
2273
- console.error('WebSocket error:', error);
2274
- this._setStatus('error');
2275
- this.options.onError(new Error('WebSocket connection error'));
2276
- };
2277
- // Initialize audio player
2278
- await this.wavPlayer.connect();
2279
- // Set up audio player amplitude monitoring
2280
- this._setupAmplitudeMonitoring(this.wavPlayer, this.options.onAgentAmplitudeChange, (amp) => (this.agentAudioAmplitude = amp));
2281
- // wavRecorder will be started from the onDeviceSwitched callback,
2282
- // which is called when the device is first initialized and also when the device is switched
2283
- // this is to ensure that the device is initialized before the recorder is started
2284
2239
  }
2285
- catch (error) {
2286
- console.error('Error connecting to Layercode agent:', error);
2287
- this._setStatus('error');
2288
- this.options.onError(error instanceof Error ? error : new Error(String(error)));
2289
- throw error;
2240
+ // type and data is processed, now processing dims
2241
+ if (dims === undefined) {
2242
+ // assume 1-D tensor if dims omitted
2243
+ dims = [data.length];
2290
2244
  }
2291
- }
2292
- _resetTurnTracking() {
2293
- this.currentTurnId = null;
2294
- console.debug('Reset turn tracking state');
2295
- }
2296
- async disconnect() {
2297
- if (this.status === 'disconnected') {
2298
- return;
2245
+ else if (!Array.isArray(dims)) {
2246
+ throw new TypeError('A tensor\'s dims must be a number array');
2299
2247
  }
2300
- if (this.ws) {
2301
- this.ws.onopen = null;
2302
- this.ws.onclose = null;
2303
- this.ws.onerror = null;
2304
- this.ws.onmessage = null;
2305
- this.ws.close();
2306
- this.ws = null;
2248
+ // perform check
2249
+ const size = calculateSize(dims);
2250
+ if (size !== data.length) {
2251
+ throw new Error(`Tensor's size(${size}) does not match data length(${data.length}).`);
2307
2252
  }
2308
- await this._performDisconnectCleanup();
2309
- }
2310
- /**
2311
- * Gets the microphone MediaStream used by this client
2312
- * @returns {MediaStream|null} The microphone stream or null if not initialized
2313
- */
2314
- getStream() {
2315
- return this.wavRecorder.getStream();
2253
+ this.dims = dims;
2254
+ this.type = type;
2255
+ this.data = data;
2256
+ this.size = size;
2316
2257
  }
2258
+ // #endregion
2317
2259
  /**
2318
- * Switches the input device for the microphone and restarts recording
2319
- * @param {string} deviceId - The deviceId of the new microphone
2260
+ * Create a new tensor object from image object
2261
+ *
2262
+ * @param buffer - Extracted image buffer data - assuming RGBA format
2263
+ * @param imageFormat - input image configuration - required configurations height, width, format
2264
+ * @param tensorFormat - output tensor configuration - Default is RGB format
2320
2265
  */
2321
- async setInputDevice(deviceId) {
2322
- var _a, _b, _c;
2323
- try {
2324
- const normalizedDeviceId = !deviceId || deviceId === 'default' ? null : deviceId;
2325
- this.useSystemDefaultDevice = normalizedDeviceId === null;
2326
- this.deviceId = normalizedDeviceId;
2327
- // Restart recording with the new device
2328
- await this._restartAudioRecording();
2329
- // Reinitialize VAD with the new audio stream if VAD is enabled
2330
- const shouldUseVAD = !this.pushToTalkEnabled && ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) !== false;
2331
- if (shouldUseVAD) {
2332
- console.debug('Reinitializing VAD with new audio stream');
2333
- const newStream = this.wavRecorder.getStream();
2334
- await this._reinitializeVAD(newStream);
2335
- }
2336
- const reportedDeviceId = (_c = (_b = this.lastReportedDeviceId) !== null && _b !== void 0 ? _b : this.activeDeviceId) !== null && _c !== void 0 ? _c : (this.useSystemDefaultDevice ? 'default' : normalizedDeviceId !== null && normalizedDeviceId !== void 0 ? normalizedDeviceId : 'default');
2337
- console.debug(`Successfully switched to input device: ${reportedDeviceId}`);
2266
+ static bufferToTensor(buffer, options) {
2267
+ if (buffer === undefined) {
2268
+ throw new Error('Image buffer must be defined');
2338
2269
  }
2339
- catch (error) {
2340
- console.error(`Failed to switch to input device ${deviceId}:`, error);
2341
- throw new Error(`Failed to switch to input device: ${error instanceof Error ? error.message : String(error)}`);
2342
- }
2343
- }
2344
- /**
2345
- * Restarts audio recording after a device switch to ensure audio is captured from the new device
2346
- */
2347
- async _restartAudioRecording() {
2348
- var _a, _b;
2349
- try {
2350
- console.debug('Restarting audio recording after device switch...');
2351
- try {
2352
- await this.wavRecorder.end();
2353
- }
2354
- catch (_c) {
2355
- // Ignore cleanup errors
2356
- }
2357
- // Start with new device
2358
- const targetDeviceId = this.useSystemDefaultDevice ? undefined : this.deviceId || undefined;
2359
- await this.wavRecorder.begin(targetDeviceId);
2360
- await this.wavRecorder.record(this._handleDataAvailable, 1638);
2361
- // Re-setup amplitude monitoring with the new stream
2362
- this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
2363
- const previousReportedDeviceId = this.lastReportedDeviceId;
2364
- const stream = this.wavRecorder.getStream();
2365
- const activeTrack = (stream === null || stream === void 0 ? void 0 : stream.getAudioTracks()[0]) || null;
2366
- const trackSettings = activeTrack && typeof activeTrack.getSettings === 'function' ? activeTrack.getSettings() : null;
2367
- const trackDeviceId = trackSettings && typeof trackSettings.deviceId === 'string' ? trackSettings.deviceId : null;
2368
- this.activeDeviceId = trackDeviceId !== null && trackDeviceId !== void 0 ? trackDeviceId : (this.useSystemDefaultDevice ? null : this.deviceId);
2369
- if (!this.recorderStarted) {
2370
- this.recorderStarted = true;
2371
- this._sendReadyIfNeeded();
2372
- }
2373
- const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : (_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default');
2374
- if (reportedDeviceId !== previousReportedDeviceId) {
2375
- this.lastReportedDeviceId = reportedDeviceId;
2376
- if (this.options.onDeviceSwitched) {
2377
- this.options.onDeviceSwitched(reportedDeviceId);
2378
- }
2379
- }
2380
- console.debug('Audio recording restart completed successfully');
2381
- }
2382
- catch (error) {
2383
- console.error('Error restarting audio recording after device switch:', error);
2384
- this.options.onError(error instanceof Error ? error : new Error(String(error)));
2385
- }
2386
- }
2387
- /**
2388
- * Reinitializes VAD with a new stream (used after device switching)
2389
- */
2390
- async _reinitializeVAD(stream) {
2391
- // Clean up existing VAD
2392
- if (this.vad) {
2393
- this.vad.pause();
2394
- this.vad.destroy();
2395
- this.vad = null;
2270
+ if (options.height === undefined || options.width === undefined) {
2271
+ throw new Error('Image height and width must be defined');
2396
2272
  }
2397
- // Reinitialize with new stream
2398
- if (stream) {
2399
- this._initializeVAD();
2273
+ const { height, width } = options;
2274
+ const norm = options.norm;
2275
+ let normMean;
2276
+ let normBias;
2277
+ if (norm === undefined || norm.mean === undefined) {
2278
+ normMean = 255;
2400
2279
  }
2401
- }
2402
- /**
2403
- * Sets up the device change event listener
2404
- */
2405
- _setupDeviceChangeListener() {
2406
- if (!this.deviceChangeListener) {
2407
- this.deviceChangeListener = async (devices) => {
2408
- try {
2409
- const defaultDevice = devices.find((device) => device.default);
2410
- const usingDefaultDevice = this.useSystemDefaultDevice;
2411
- const previousDefaultDeviceKey = this.lastKnownSystemDefaultDeviceKey;
2412
- const currentDefaultDeviceKey = this._getDeviceComparisonKey(defaultDevice);
2413
- let shouldSwitch = !this.recorderStarted;
2414
- if (!shouldSwitch) {
2415
- if (usingDefaultDevice) {
2416
- if (!defaultDevice) {
2417
- shouldSwitch = true;
2418
- }
2419
- else if (this.activeDeviceId && defaultDevice.deviceId !== 'default' && defaultDevice.deviceId !== this.activeDeviceId) {
2420
- shouldSwitch = true;
2421
- }
2422
- else if ((previousDefaultDeviceKey && previousDefaultDeviceKey !== currentDefaultDeviceKey) ||
2423
- (!previousDefaultDeviceKey && !currentDefaultDeviceKey && this.recorderStarted)) {
2424
- shouldSwitch = true;
2425
- }
2426
- }
2427
- else {
2428
- const matchesRequestedDevice = devices.some((device) => device.deviceId === this.deviceId || device.deviceId === this.activeDeviceId);
2429
- shouldSwitch = !matchesRequestedDevice;
2430
- }
2431
- }
2432
- this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
2433
- if (shouldSwitch) {
2434
- console.debug('Selecting fallback audio input device');
2435
- const fallbackDevice = defaultDevice || devices[0];
2436
- if (fallbackDevice) {
2437
- const fallbackId = fallbackDevice.default ? 'default' : fallbackDevice.deviceId;
2438
- await this.setInputDevice(fallbackId);
2439
- }
2440
- else {
2441
- console.warn('No alternative audio device found');
2442
- }
2443
- }
2444
- }
2445
- catch (error) {
2446
- this.options.onError(error instanceof Error ? error : new Error(String(error)));
2447
- }
2448
- };
2280
+ else {
2281
+ normMean = norm.mean;
2449
2282
  }
2450
- this.wavRecorder.listenForDeviceChange(this.deviceChangeListener);
2451
- }
2452
- _teardownDeviceListeners() {
2453
- this.wavRecorder.listenForDeviceChange(null);
2454
- }
2455
- async _performDisconnectCleanup() {
2456
- var _a, _b;
2457
- this.deviceId = null;
2458
- this.activeDeviceId = null;
2459
- this.useSystemDefaultDevice = false;
2460
- this.lastReportedDeviceId = null;
2461
- this.lastKnownSystemDefaultDeviceKey = null;
2462
- this.recorderStarted = false;
2463
- this.readySent = false;
2464
- this._stopAmplitudeMonitoring();
2465
- this._teardownDeviceListeners();
2466
- if (this.vad) {
2467
- this.vad.pause();
2468
- this.vad.destroy();
2469
- this.vad = null;
2283
+ if (norm === undefined || norm.bias === undefined) {
2284
+ normBias = 0;
2470
2285
  }
2471
- await this.wavRecorder.quit();
2472
- (_b = (_a = this.wavPlayer).stop) === null || _b === void 0 ? void 0 : _b.call(_a);
2473
- this.wavPlayer.disconnect();
2474
- this._resetTurnTracking();
2475
- this.options.conversationId = this.conversationId;
2476
- this.userAudioAmplitude = 0;
2477
- this.agentAudioAmplitude = 0;
2478
- this._setStatus('disconnected');
2479
- this.options.onDisconnect();
2480
- }
2481
- _getDeviceComparisonKey(device) {
2482
- if (!device || typeof device !== 'object') {
2483
- return null;
2286
+ else {
2287
+ normBias = norm.bias;
2484
2288
  }
2485
- const deviceId = typeof device.deviceId === 'string' ? device.deviceId : '';
2486
- if (deviceId && deviceId !== 'default') {
2487
- return deviceId;
2289
+ const inputformat = options.bitmapFormat !== undefined ? options.bitmapFormat : 'RGBA';
2290
+ // default value is RGBA since imagedata and HTMLImageElement uses it
2291
+ const outputformat = options.tensorFormat !== undefined ?
2292
+ (options.tensorFormat !== undefined ? options.tensorFormat : 'RGB') :
2293
+ 'RGB';
2294
+ const offset = height * width;
2295
+ const float32Data = outputformat === 'RGBA' ? new Float32Array(offset * 4) : new Float32Array(offset * 3);
2296
+ // Default pointer assignments
2297
+ let step = 4, rImagePointer = 0, gImagePointer = 1, bImagePointer = 2, aImagePointer = 3;
2298
+ let rTensorPointer = 0, gTensorPointer = offset, bTensorPointer = offset * 2, aTensorPointer = -1;
2299
+ // Updating the pointer assignments based on the input image format
2300
+ if (inputformat === 'RGB') {
2301
+ step = 3;
2302
+ rImagePointer = 0;
2303
+ gImagePointer = 1;
2304
+ bImagePointer = 2;
2305
+ aImagePointer = -1;
2488
2306
  }
2489
- const groupId = typeof device.groupId === 'string' ? device.groupId : '';
2490
- if (groupId) {
2491
- return groupId;
2307
+ // Updating the pointer assignments based on the output tensor format
2308
+ if (outputformat === 'RGBA') {
2309
+ aTensorPointer = offset * 3;
2492
2310
  }
2493
- const label = typeof device.label === 'string' ? device.label : '';
2494
- if (label) {
2495
- return label;
2311
+ else if (outputformat === 'RBG') {
2312
+ rTensorPointer = 0;
2313
+ bTensorPointer = offset;
2314
+ gTensorPointer = offset * 2;
2496
2315
  }
2497
- return null;
2498
- }
2499
- /**
2500
- * Mutes the microphone to stop sending audio to the server
2501
- * The connection and recording remain active for quick unmute
2502
- */
2503
- mute() {
2504
- if (!this.isMuted) {
2505
- this.isMuted = true;
2506
- console.log('Microphone muted');
2507
- this.options.onMuteStateChange(true);
2316
+ else if (outputformat === 'BGR') {
2317
+ bTensorPointer = 0;
2318
+ gTensorPointer = offset;
2319
+ rTensorPointer = offset * 2;
2508
2320
  }
2509
- }
2510
- /**
2511
- * Unmutes the microphone to resume sending audio to the server
2512
- */
2513
- unmute() {
2514
- if (this.isMuted) {
2515
- this.isMuted = false;
2516
- console.log('Microphone unmuted');
2517
- this.options.onMuteStateChange(false);
2321
+ for (let i = 0; i < offset; i++, rImagePointer += step, bImagePointer += step, gImagePointer += step, aImagePointer += step) {
2322
+ float32Data[rTensorPointer++] = (buffer[rImagePointer] + normBias) / normMean;
2323
+ float32Data[gTensorPointer++] = (buffer[gImagePointer] + normBias) / normMean;
2324
+ float32Data[bTensorPointer++] = (buffer[bImagePointer] + normBias) / normMean;
2325
+ if (aTensorPointer !== -1 && aImagePointer !== -1) {
2326
+ float32Data[aTensorPointer++] = (buffer[aImagePointer] + normBias) / normMean;
2327
+ }
2518
2328
  }
2329
+ // Float32Array -> ort.Tensor
2330
+ const outputTensor = outputformat === 'RGBA' ? new Tensor('float32', float32Data, [1, 4, height, width]) :
2331
+ new Tensor('float32', float32Data, [1, 3, height, width]);
2332
+ return outputTensor;
2519
2333
  }
2520
- }
2521
-
2522
- var commonjsGlobal = typeof globalThis !== 'undefined' ? globalThis : typeof window !== 'undefined' ? window : typeof global !== 'undefined' ? global : typeof self !== 'undefined' ? self : {};
2523
-
2524
- function getDefaultExportFromCjs (x) {
2525
- return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, 'default') ? x['default'] : x;
2526
- }
2527
-
2528
- function getAugmentedNamespace(n) {
2529
- if (n.__esModule) return n;
2530
- var f = n.default;
2531
- if (typeof f == "function") {
2532
- var a = function a () {
2533
- if (this instanceof a) {
2534
- return Reflect.construct(f, arguments, this.constructor);
2535
- }
2536
- return f.apply(this, arguments);
2537
- };
2538
- a.prototype = f.prototype;
2539
- } else a = {};
2540
- Object.defineProperty(a, '__esModule', {value: true});
2541
- Object.keys(n).forEach(function (k) {
2542
- var d = Object.getOwnPropertyDescriptor(n, k);
2543
- Object.defineProperty(a, k, d.get ? d : {
2544
- enumerable: true,
2545
- get: function () {
2546
- return n[k];
2547
- }
2548
- });
2549
- });
2550
- return a;
2551
- }
2552
-
2553
- var dist = {};
2554
-
2555
- var assetPath = {};
2556
-
2557
- Object.defineProperty(assetPath, "__esModule", { value: true });
2558
- assetPath.baseAssetPath = void 0;
2559
- // nextjs@14 bundler may attempt to execute this during SSR and crash
2560
- const isWeb = typeof window !== "undefined" && typeof window.document !== "undefined";
2561
- const currentScript = isWeb
2562
- ? window.document.currentScript
2563
- : null;
2564
- let basePath = "/";
2565
- if (currentScript) {
2566
- basePath = currentScript.src
2567
- .replace(/#.*$/, "")
2568
- .replace(/\?.*$/, "")
2569
- .replace(/\/[^\/]+$/, "/");
2570
- }
2571
- assetPath.baseAssetPath = basePath;
2572
-
2573
- var defaultModelFetcher$1 = {};
2574
-
2575
- Object.defineProperty(defaultModelFetcher$1, "__esModule", { value: true });
2576
- defaultModelFetcher$1.defaultModelFetcher = void 0;
2577
- const defaultModelFetcher = (path) => {
2578
- return fetch(path).then((model) => model.arrayBuffer());
2579
- };
2580
- defaultModelFetcher$1.defaultModelFetcher = defaultModelFetcher;
2581
-
2582
- var frameProcessor = {};
2583
-
2584
- var logging = {};
2585
-
2586
- (function (exports) {
2587
- Object.defineProperty(exports, "__esModule", { value: true });
2588
- exports.log = exports.LOG_PREFIX = void 0;
2589
- exports.LOG_PREFIX = "[VAD]";
2590
- const levels = ["error", "debug", "warn"];
2591
- function getLog(level) {
2592
- return (...args) => {
2593
- console[level](exports.LOG_PREFIX, ...args);
2594
- };
2595
- }
2596
- const _log = levels.reduce((acc, level) => {
2597
- acc[level] = getLog(level);
2598
- return acc;
2599
- }, {});
2600
- exports.log = _log;
2601
-
2602
- } (logging));
2603
-
2604
- var messages = {};
2605
-
2606
- Object.defineProperty(messages, "__esModule", { value: true });
2607
- messages.Message = void 0;
2608
- var Message;
2609
- (function (Message) {
2610
- Message["AudioFrame"] = "AUDIO_FRAME";
2611
- Message["SpeechStart"] = "SPEECH_START";
2612
- Message["VADMisfire"] = "VAD_MISFIRE";
2613
- Message["SpeechEnd"] = "SPEECH_END";
2614
- Message["SpeechStop"] = "SPEECH_STOP";
2615
- Message["SpeechRealStart"] = "SPEECH_REAL_START";
2616
- Message["FrameProcessed"] = "FRAME_PROCESSED";
2617
- })(Message || (messages.Message = Message = {}));
2618
-
2619
- /*
2620
- Some of this code, together with the default options found in index.ts,
2621
- were taken (or took inspiration) from https://github.com/snakers4/silero-vad
2622
- */
2623
- Object.defineProperty(frameProcessor, "__esModule", { value: true });
2624
- frameProcessor.FrameProcessor = frameProcessor.validateOptions = frameProcessor.defaultV5FrameProcessorOptions = frameProcessor.defaultLegacyFrameProcessorOptions = void 0;
2625
- const logging_1$3 = logging;
2626
- const messages_1 = messages;
2627
- const RECOMMENDED_FRAME_SAMPLES = [512, 1024, 1536];
2628
- frameProcessor.defaultLegacyFrameProcessorOptions = {
2629
- positiveSpeechThreshold: 0.5,
2630
- negativeSpeechThreshold: 0.5 - 0.15,
2631
- preSpeechPadFrames: 1,
2632
- redemptionFrames: 8,
2633
- frameSamples: 1536,
2634
- minSpeechFrames: 3,
2635
- submitUserSpeechOnPause: false,
2636
- };
2637
- frameProcessor.defaultV5FrameProcessorOptions = {
2638
- positiveSpeechThreshold: 0.5,
2639
- negativeSpeechThreshold: 0.5 - 0.15,
2640
- preSpeechPadFrames: 3,
2641
- redemptionFrames: 24,
2642
- frameSamples: 512,
2643
- minSpeechFrames: 9,
2644
- submitUserSpeechOnPause: false,
2645
- };
2646
- function validateOptions(options) {
2647
- if (!RECOMMENDED_FRAME_SAMPLES.includes(options.frameSamples)) {
2648
- logging_1$3.log.warn("You are using an unusual frame size");
2649
- }
2650
- if (options.positiveSpeechThreshold < 0 ||
2651
- options.positiveSpeechThreshold > 1) {
2652
- logging_1$3.log.error("positiveSpeechThreshold should be a number between 0 and 1");
2653
- }
2654
- if (options.negativeSpeechThreshold < 0 ||
2655
- options.negativeSpeechThreshold > options.positiveSpeechThreshold) {
2656
- logging_1$3.log.error("negativeSpeechThreshold should be between 0 and positiveSpeechThreshold");
2657
- }
2658
- if (options.preSpeechPadFrames < 0) {
2659
- logging_1$3.log.error("preSpeechPadFrames should be positive");
2660
- }
2661
- if (options.redemptionFrames < 0) {
2662
- logging_1$3.log.error("redemptionFrames should be positive");
2663
- }
2664
- }
2665
- frameProcessor.validateOptions = validateOptions;
2666
- const concatArrays = (arrays) => {
2667
- const sizes = arrays.reduce((out, next) => {
2668
- out.push(out.at(-1) + next.length);
2669
- return out;
2670
- }, [0]);
2671
- const outArray = new Float32Array(sizes.at(-1));
2672
- arrays.forEach((arr, index) => {
2673
- const place = sizes[index];
2674
- outArray.set(arr, place);
2675
- });
2676
- return outArray;
2677
- };
2678
- class FrameProcessor {
2679
- constructor(modelProcessFunc, modelResetFunc, options) {
2680
- this.modelProcessFunc = modelProcessFunc;
2681
- this.modelResetFunc = modelResetFunc;
2682
- this.options = options;
2683
- this.speaking = false;
2684
- this.redemptionCounter = 0;
2685
- this.speechFrameCount = 0;
2686
- this.active = false;
2687
- this.speechRealStartFired = false;
2688
- this.reset = () => {
2689
- this.speaking = false;
2690
- this.speechRealStartFired = false;
2691
- this.audioBuffer = [];
2692
- this.modelResetFunc();
2693
- this.redemptionCounter = 0;
2694
- this.speechFrameCount = 0;
2695
- };
2696
- this.pause = (handleEvent) => {
2697
- this.active = false;
2698
- if (this.options.submitUserSpeechOnPause) {
2699
- this.endSegment(handleEvent);
2700
- }
2701
- else {
2702
- this.reset();
2703
- }
2704
- };
2705
- this.resume = () => {
2706
- this.active = true;
2707
- };
2708
- this.endSegment = (handleEvent) => {
2709
- const audioBuffer = this.audioBuffer;
2710
- this.audioBuffer = [];
2711
- const speaking = this.speaking;
2712
- this.reset();
2713
- if (speaking) {
2714
- const speechFrameCount = audioBuffer.reduce((acc, item) => {
2715
- return item.isSpeech ? (acc + 1) : acc;
2716
- }, 0);
2717
- if (speechFrameCount >= this.options.minSpeechFrames) {
2718
- const audio = concatArrays(audioBuffer.map((item) => item.frame));
2719
- handleEvent({ msg: messages_1.Message.SpeechEnd, audio });
2334
+ static async fromImage(image, options) {
2335
+ // checking the type of image object
2336
+ const isHTMLImageEle = typeof (HTMLImageElement) !== 'undefined' && image instanceof HTMLImageElement;
2337
+ const isImageDataEle = typeof (ImageData) !== 'undefined' && image instanceof ImageData;
2338
+ const isImageBitmap = typeof (ImageBitmap) !== 'undefined' && image instanceof ImageBitmap;
2339
+ const isURL = typeof (String) !== 'undefined' && (image instanceof String || typeof image === 'string');
2340
+ let data;
2341
+ let tensorConfig = {};
2342
+ // filling and checking image configuration options
2343
+ if (isHTMLImageEle) {
2344
+ // HTMLImageElement - image object - format is RGBA by default
2345
+ const canvas = document.createElement('canvas');
2346
+ const pixels2DContext = canvas.getContext('2d');
2347
+ if (pixels2DContext != null) {
2348
+ let height = image.naturalHeight;
2349
+ let width = image.naturalWidth;
2350
+ if (options !== undefined && options.resizedHeight !== undefined && options.resizedWidth !== undefined) {
2351
+ height = options.resizedHeight;
2352
+ width = options.resizedWidth;
2353
+ }
2354
+ if (options !== undefined) {
2355
+ tensorConfig = options;
2356
+ if (options.tensorFormat !== undefined) {
2357
+ throw new Error('Image input config format must be RGBA for HTMLImageElement');
2358
+ }
2359
+ else {
2360
+ tensorConfig.tensorFormat = 'RGBA';
2361
+ }
2362
+ if (options.height !== undefined && options.height !== height) {
2363
+ throw new Error('Image input config height doesn\'t match HTMLImageElement height');
2364
+ }
2365
+ else {
2366
+ tensorConfig.height = height;
2367
+ }
2368
+ if (options.width !== undefined && options.width !== width) {
2369
+ throw new Error('Image input config width doesn\'t match HTMLImageElement width');
2370
+ }
2371
+ else {
2372
+ tensorConfig.width = width;
2373
+ }
2720
2374
  }
2721
2375
  else {
2722
- handleEvent({ msg: messages_1.Message.VADMisfire });
2376
+ tensorConfig.tensorFormat = 'RGBA';
2377
+ tensorConfig.height = height;
2378
+ tensorConfig.width = width;
2723
2379
  }
2380
+ canvas.width = width;
2381
+ canvas.height = height;
2382
+ pixels2DContext.drawImage(image, 0, 0, width, height);
2383
+ data = pixels2DContext.getImageData(0, 0, width, height).data;
2724
2384
  }
2725
- return {};
2726
- };
2727
- this.process = async (frame, handleEvent) => {
2728
- if (!this.active) {
2729
- return;
2385
+ else {
2386
+ throw new Error('Can not access image data');
2730
2387
  }
2731
- const probs = await this.modelProcessFunc(frame);
2732
- const isSpeech = probs.isSpeech >= this.options.positiveSpeechThreshold;
2733
- handleEvent({ probs, msg: messages_1.Message.FrameProcessed, frame });
2734
- this.audioBuffer.push({
2735
- frame,
2736
- isSpeech,
2737
- });
2738
- if (isSpeech) {
2739
- this.speechFrameCount++;
2740
- this.redemptionCounter = 0;
2388
+ }
2389
+ else if (isImageDataEle) {
2390
+ // ImageData - image object - format is RGBA by default
2391
+ const format = 'RGBA';
2392
+ let height;
2393
+ let width;
2394
+ if (options !== undefined && options.resizedWidth !== undefined && options.resizedHeight !== undefined) {
2395
+ height = options.resizedHeight;
2396
+ width = options.resizedWidth;
2741
2397
  }
2742
- if (isSpeech && !this.speaking) {
2743
- this.speaking = true;
2744
- handleEvent({ msg: messages_1.Message.SpeechStart });
2398
+ else {
2399
+ height = image.height;
2400
+ width = image.width;
2745
2401
  }
2746
- if (this.speaking &&
2747
- this.speechFrameCount === this.options.minSpeechFrames &&
2748
- !this.speechRealStartFired) {
2749
- this.speechRealStartFired = true;
2750
- handleEvent({ msg: messages_1.Message.SpeechRealStart });
2402
+ if (options !== undefined) {
2403
+ tensorConfig = options;
2404
+ if (options.bitmapFormat !== undefined && options.bitmapFormat !== format) {
2405
+ throw new Error('Image input config format must be RGBA for ImageData');
2406
+ }
2407
+ else {
2408
+ tensorConfig.bitmapFormat = 'RGBA';
2409
+ }
2751
2410
  }
2752
- if (probs.isSpeech < this.options.negativeSpeechThreshold &&
2753
- this.speaking &&
2754
- ++this.redemptionCounter >= this.options.redemptionFrames) {
2755
- this.redemptionCounter = 0;
2756
- this.speechFrameCount = 0;
2757
- this.speaking = false;
2758
- this.speechRealStartFired = false;
2759
- const audioBuffer = this.audioBuffer;
2760
- this.audioBuffer = [];
2761
- const speechFrameCount = audioBuffer.reduce((acc, item) => {
2762
- return item.isSpeech ? (acc + 1) : acc;
2763
- }, 0);
2764
- if (speechFrameCount >= this.options.minSpeechFrames) {
2765
- const audio = concatArrays(audioBuffer.map((item) => item.frame));
2766
- handleEvent({ msg: messages_1.Message.SpeechEnd, audio });
2411
+ else {
2412
+ tensorConfig.bitmapFormat = 'RGBA';
2413
+ }
2414
+ tensorConfig.height = height;
2415
+ tensorConfig.width = width;
2416
+ if (options !== undefined) {
2417
+ const tempCanvas = document.createElement('canvas');
2418
+ tempCanvas.width = width;
2419
+ tempCanvas.height = height;
2420
+ const pixels2DContext = tempCanvas.getContext('2d');
2421
+ if (pixels2DContext != null) {
2422
+ pixels2DContext.putImageData(image, 0, 0);
2423
+ data = pixels2DContext.getImageData(0, 0, width, height).data;
2767
2424
  }
2768
2425
  else {
2769
- handleEvent({ msg: messages_1.Message.VADMisfire });
2426
+ throw new Error('Can not access image data');
2770
2427
  }
2771
2428
  }
2772
- if (!this.speaking) {
2773
- while (this.audioBuffer.length > this.options.preSpeechPadFrames) {
2774
- this.audioBuffer.shift();
2775
- }
2776
- this.speechFrameCount = 0;
2429
+ else {
2430
+ data = image.data;
2777
2431
  }
2778
- };
2779
- this.audioBuffer = [];
2780
- this.reset();
2781
- }
2782
- }
2783
- frameProcessor.FrameProcessor = FrameProcessor;
2784
-
2785
- var nonRealTimeVad = {};
2786
-
2787
- var ortWeb_min = {exports: {}};
2788
-
2789
- // Copyright (c) Microsoft Corporation. All rights reserved.
2790
- // Licensed under the MIT License.
2791
- const backends = {};
2792
- const backendsSortedByPriority = [];
2793
- /**
2794
- * Register a backend.
2795
- *
2796
- * @param name - the name as a key to lookup as an execution provider.
2797
- * @param backend - the backend object.
2798
- * @param priority - an integer indicating the priority of the backend. Higher number means higher priority. if priority
2799
- * < 0, it will be considered as a 'beta' version and will not be used as a fallback backend by default.
2800
- *
2801
- * @internal
2802
- */
2803
- const registerBackend = (name, backend, priority) => {
2804
- if (backend && typeof backend.init === 'function' && typeof backend.createSessionHandler === 'function') {
2805
- const currentBackend = backends[name];
2806
- if (currentBackend === undefined) {
2807
- backends[name] = { backend, priority };
2808
- }
2809
- else if (currentBackend.priority > priority) {
2810
- // same name is already registered with a higher priority. skip registeration.
2811
- return;
2812
2432
  }
2813
- else if (currentBackend.priority === priority) {
2814
- if (currentBackend.backend !== backend) {
2815
- throw new Error(`cannot register backend "${name}" using priority ${priority}`);
2433
+ else if (isImageBitmap) {
2434
+ // ImageBitmap - image object - format must be provided by user
2435
+ if (options === undefined) {
2436
+ throw new Error('Please provide image config with format for Imagebitmap');
2816
2437
  }
2817
- }
2818
- if (priority >= 0) {
2819
- const i = backendsSortedByPriority.indexOf(name);
2820
- if (i !== -1) {
2821
- backendsSortedByPriority.splice(i, 1);
2438
+ if (options.bitmapFormat !== undefined) {
2439
+ throw new Error('Image input config format must be defined for ImageBitmap');
2822
2440
  }
2823
- for (let i = 0; i < backendsSortedByPriority.length; i++) {
2824
- if (backends[backendsSortedByPriority[i]].priority <= priority) {
2825
- backendsSortedByPriority.splice(i, 0, name);
2826
- return;
2441
+ const pixels2DContext = document.createElement('canvas').getContext('2d');
2442
+ if (pixels2DContext != null) {
2443
+ const height = image.height;
2444
+ const width = image.width;
2445
+ pixels2DContext.drawImage(image, 0, 0, width, height);
2446
+ data = pixels2DContext.getImageData(0, 0, width, height).data;
2447
+ if (options !== undefined) {
2448
+ // using square brackets to avoid TS error - type 'never'
2449
+ if (options.height !== undefined && options.height !== height) {
2450
+ throw new Error('Image input config height doesn\'t match ImageBitmap height');
2451
+ }
2452
+ else {
2453
+ tensorConfig.height = height;
2454
+ }
2455
+ // using square brackets to avoid TS error - type 'never'
2456
+ if (options.width !== undefined && options.width !== width) {
2457
+ throw new Error('Image input config width doesn\'t match ImageBitmap width');
2458
+ }
2459
+ else {
2460
+ tensorConfig.width = width;
2461
+ }
2462
+ }
2463
+ else {
2464
+ tensorConfig.height = height;
2465
+ tensorConfig.width = width;
2827
2466
  }
2467
+ return Tensor.bufferToTensor(data, tensorConfig);
2468
+ }
2469
+ else {
2470
+ throw new Error('Can not access image data');
2828
2471
  }
2829
- backendsSortedByPriority.push(name);
2830
2472
  }
2831
- return;
2473
+ else if (isURL) {
2474
+ return new Promise((resolve, reject) => {
2475
+ const canvas = document.createElement('canvas');
2476
+ const context = canvas.getContext('2d');
2477
+ if (!image || !context) {
2478
+ return reject();
2479
+ }
2480
+ const newImage = new Image();
2481
+ newImage.crossOrigin = 'Anonymous';
2482
+ newImage.src = image;
2483
+ newImage.onload = () => {
2484
+ canvas.width = newImage.width;
2485
+ canvas.height = newImage.height;
2486
+ context.drawImage(newImage, 0, 0, canvas.width, canvas.height);
2487
+ const img = context.getImageData(0, 0, canvas.width, canvas.height);
2488
+ if (options !== undefined) {
2489
+ // using square brackets to avoid TS error - type 'never'
2490
+ if (options.height !== undefined && options.height !== canvas.height) {
2491
+ throw new Error('Image input config height doesn\'t match ImageBitmap height');
2492
+ }
2493
+ else {
2494
+ tensorConfig.height = canvas.height;
2495
+ }
2496
+ // using square brackets to avoid TS error - type 'never'
2497
+ if (options.width !== undefined && options.width !== canvas.width) {
2498
+ throw new Error('Image input config width doesn\'t match ImageBitmap width');
2499
+ }
2500
+ else {
2501
+ tensorConfig.width = canvas.width;
2502
+ }
2503
+ }
2504
+ else {
2505
+ tensorConfig.height = canvas.height;
2506
+ tensorConfig.width = canvas.width;
2507
+ }
2508
+ resolve(Tensor.bufferToTensor(img.data, tensorConfig));
2509
+ };
2510
+ });
2511
+ }
2512
+ else {
2513
+ throw new Error('Input data provided is not supported - aborted tensor creation');
2514
+ }
2515
+ if (data !== undefined) {
2516
+ return Tensor.bufferToTensor(data, tensorConfig);
2517
+ }
2518
+ else {
2519
+ throw new Error('Input data provided is not supported - aborted tensor creation');
2520
+ }
2832
2521
  }
2833
- throw new TypeError('not a valid backend');
2834
- };
2835
- /**
2836
- * Resolve backend by specified hints.
2837
- *
2838
- * @param backendHints - a list of execution provider names to lookup. If omitted use registered backends as list.
2839
- * @returns a promise that resolves to the backend.
2840
- *
2841
- * @internal
2842
- */
2843
- const resolveBackend = async (backendHints) => {
2844
- const backendNames = backendHints.length === 0 ? backendsSortedByPriority : backendHints;
2845
- const errors = [];
2846
- for (const backendName of backendNames) {
2847
- const backendInfo = backends[backendName];
2848
- if (backendInfo) {
2849
- if (backendInfo.initialized) {
2850
- return backendInfo.backend;
2522
+ toImageData(options) {
2523
+ var _a, _b;
2524
+ const pixels2DContext = document.createElement('canvas').getContext('2d');
2525
+ let image;
2526
+ if (pixels2DContext != null) {
2527
+ // Default values for height and width & format
2528
+ const width = this.dims[3];
2529
+ const height = this.dims[2];
2530
+ const channels = this.dims[1];
2531
+ const inputformat = options !== undefined ? (options.format !== undefined ? options.format : 'RGB') : 'RGB';
2532
+ const normMean = options !== undefined ? (((_a = options.norm) === null || _a === void 0 ? void 0 : _a.mean) !== undefined ? options.norm.mean : 255) : 255;
2533
+ const normBias = options !== undefined ? (((_b = options.norm) === null || _b === void 0 ? void 0 : _b.bias) !== undefined ? options.norm.bias : 0) : 0;
2534
+ const offset = height * width;
2535
+ if (options !== undefined) {
2536
+ if (options.height !== undefined && options.height !== height) {
2537
+ throw new Error('Image output config height doesn\'t match tensor height');
2538
+ }
2539
+ if (options.width !== undefined && options.width !== width) {
2540
+ throw new Error('Image output config width doesn\'t match tensor width');
2541
+ }
2542
+ if (options.format !== undefined && (channels === 4 && options.format !== 'RGBA') ||
2543
+ (channels === 3 && (options.format !== 'RGB' && options.format !== 'BGR'))) {
2544
+ throw new Error('Tensor format doesn\'t match input tensor dims');
2545
+ }
2851
2546
  }
2852
- else if (backendInfo.aborted) {
2853
- continue; // current backend is unavailable; try next
2547
+ // Default pointer assignments
2548
+ const step = 4;
2549
+ let rImagePointer = 0, gImagePointer = 1, bImagePointer = 2, aImagePointer = 3;
2550
+ let rTensorPointer = 0, gTensorPointer = offset, bTensorPointer = offset * 2, aTensorPointer = -1;
2551
+ // Updating the pointer assignments based on the input image format
2552
+ if (inputformat === 'RGBA') {
2553
+ rTensorPointer = 0;
2554
+ gTensorPointer = offset;
2555
+ bTensorPointer = offset * 2;
2556
+ aTensorPointer = offset * 3;
2854
2557
  }
2855
- const isInitializing = !!backendInfo.initPromise;
2856
- try {
2857
- if (!isInitializing) {
2858
- backendInfo.initPromise = backendInfo.backend.init();
2859
- }
2860
- await backendInfo.initPromise;
2861
- backendInfo.initialized = true;
2862
- return backendInfo.backend;
2558
+ else if (inputformat === 'RGB') {
2559
+ rTensorPointer = 0;
2560
+ gTensorPointer = offset;
2561
+ bTensorPointer = offset * 2;
2863
2562
  }
2864
- catch (e) {
2865
- if (!isInitializing) {
2866
- errors.push({ name: backendName, err: e });
2867
- }
2868
- backendInfo.aborted = true;
2563
+ else if (inputformat === 'RBG') {
2564
+ rTensorPointer = 0;
2565
+ bTensorPointer = offset;
2566
+ gTensorPointer = offset * 2;
2869
2567
  }
2870
- finally {
2871
- delete backendInfo.initPromise;
2568
+ image = pixels2DContext.createImageData(width, height);
2569
+ for (let i = 0; i < height * width; rImagePointer += step, gImagePointer += step, bImagePointer += step, aImagePointer += step, i++) {
2570
+ image.data[rImagePointer] = (this.data[rTensorPointer++] - normBias) * normMean; // R value
2571
+ image.data[gImagePointer] = (this.data[gTensorPointer++] - normBias) * normMean; // G value
2572
+ image.data[bImagePointer] = (this.data[bTensorPointer++] - normBias) * normMean; // B value
2573
+ image.data[aImagePointer] =
2574
+ aTensorPointer === -1 ? 255 : (this.data[aTensorPointer++] - normBias) * normMean; // A value
2872
2575
  }
2873
2576
  }
2577
+ else {
2578
+ throw new Error('Can not access image data');
2579
+ }
2580
+ return image;
2874
2581
  }
2875
- throw new Error(`no available backend found. ERR: ${errors.map(e => `[${e.name}] ${e.err}`).join(', ')}`);
2876
- };
2877
-
2878
- // Copyright (c) Microsoft Corporation. All rights reserved.
2879
- // Licensed under the MIT License.
2880
- class EnvImpl {
2881
- constructor() {
2882
- this.wasm = {};
2883
- this.webgl = {};
2884
- this.logLevelInternal = 'warning';
2885
- }
2886
- // TODO standadize the getter and setter convention in env for other fields.
2887
- set logLevel(value) {
2888
- if (value === undefined) {
2889
- return;
2890
- }
2891
- if (typeof value !== 'string' || ['verbose', 'info', 'warning', 'error', 'fatal'].indexOf(value) === -1) {
2892
- throw new Error(`Unsupported logging level: ${value}`);
2893
- }
2894
- this.logLevelInternal = value;
2895
- }
2896
- get logLevel() {
2897
- return this.logLevelInternal;
2582
+ // #endregion
2583
+ // #region tensor utilities
2584
+ reshape(dims) {
2585
+ return new Tensor(this.type, this.data, dims);
2898
2586
  }
2899
- }
2587
+ };
2900
2588
 
2901
2589
  // Copyright (c) Microsoft Corporation. All rights reserved.
2902
2590
  // Licensed under the MIT License.
2903
- /**
2904
- * Represent a set of flags as a global singleton.
2905
- */
2906
- const env = new EnvImpl();
2591
+ // eslint-disable-next-line @typescript-eslint/naming-convention
2592
+ const Tensor = Tensor$1;
2907
2593
 
2908
2594
  // Copyright (c) Microsoft Corporation. All rights reserved.
2909
2595
  // Licensed under the MIT License.
2910
- const isBigInt64ArrayAvailable = typeof BigInt64Array !== 'undefined' && typeof BigInt64Array.from === 'function';
2911
- const isBigUint64ArrayAvailable = typeof BigUint64Array !== 'undefined' && typeof BigUint64Array.from === 'function';
2912
- // a runtime map that maps type string to TypedArray constructor. Should match Tensor.DataTypeMap.
2913
- const NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP = new Map([
2914
- ['float32', Float32Array],
2915
- ['uint8', Uint8Array],
2916
- ['int8', Int8Array],
2917
- ['uint16', Uint16Array],
2918
- ['int16', Int16Array],
2919
- ['int32', Int32Array],
2920
- ['bool', Uint8Array],
2921
- ['float64', Float64Array],
2922
- ['uint32', Uint32Array],
2923
- ]);
2924
- // a runtime map that maps type string to TypedArray constructor. Should match Tensor.DataTypeMap.
2925
- const NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP = new Map([
2926
- [Float32Array, 'float32'],
2927
- [Uint8Array, 'uint8'],
2928
- [Int8Array, 'int8'],
2929
- [Uint16Array, 'uint16'],
2930
- [Int16Array, 'int16'],
2931
- [Int32Array, 'int32'],
2932
- [Float64Array, 'float64'],
2933
- [Uint32Array, 'uint32'],
2934
- ]);
2935
- if (isBigInt64ArrayAvailable) {
2936
- NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.set('int64', BigInt64Array);
2937
- NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.set(BigInt64Array, 'int64');
2938
- }
2939
- if (isBigUint64ArrayAvailable) {
2940
- NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.set('uint64', BigUint64Array);
2941
- NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.set(BigUint64Array, 'uint64');
2942
- }
2943
- /**
2944
- * calculate size from dims.
2945
- *
2946
- * @param dims the dims array. May be an illegal input.
2947
- */
2948
- const calculateSize = (dims) => {
2949
- let size = 1;
2950
- for (let i = 0; i < dims.length; i++) {
2951
- const dim = dims[i];
2952
- if (typeof dim !== 'number' || !Number.isSafeInteger(dim)) {
2953
- throw new TypeError(`dims[${i}] must be an integer, got: ${dim}`);
2954
- }
2955
- if (dim < 0) {
2956
- throw new RangeError(`dims[${i}] must be a non-negative integer, got: ${dim}`);
2957
- }
2958
- size *= dim;
2596
+ let InferenceSession$1 = class InferenceSession {
2597
+ constructor(handler) {
2598
+ this.handler = handler;
2959
2599
  }
2960
- return size;
2961
- };
2962
- let Tensor$1 = class Tensor {
2963
- constructor(arg0, arg1, arg2) {
2964
- let type;
2965
- let data;
2966
- let dims;
2967
- // check whether arg0 is type or data
2968
- if (typeof arg0 === 'string') {
2969
- //
2970
- // Override: constructor(type, data, ...)
2971
- //
2972
- type = arg0;
2973
- dims = arg2;
2974
- if (arg0 === 'string') {
2975
- // string tensor
2976
- if (!Array.isArray(arg1)) {
2977
- throw new TypeError('A string tensor\'s data must be a string array.');
2978
- }
2979
- // we don't check whether every element in the array is string; this is too slow. we assume it's correct and
2980
- // error will be populated at inference
2981
- data = arg1;
2600
+ async run(feeds, arg1, arg2) {
2601
+ const fetches = {};
2602
+ let options = {};
2603
+ // check inputs
2604
+ if (typeof feeds !== 'object' || feeds === null || feeds instanceof Tensor || Array.isArray(feeds)) {
2605
+ throw new TypeError('\'feeds\' must be an object that use input names as keys and OnnxValue as corresponding values.');
2606
+ }
2607
+ let isFetchesEmpty = true;
2608
+ // determine which override is being used
2609
+ if (typeof arg1 === 'object') {
2610
+ if (arg1 === null) {
2611
+ throw new TypeError('Unexpected argument[1]: cannot be null.');
2982
2612
  }
2983
- else {
2984
- // numeric tensor
2985
- const typedArrayConstructor = NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.get(arg0);
2986
- if (typedArrayConstructor === undefined) {
2987
- throw new TypeError(`Unsupported tensor type: ${arg0}.`);
2613
+ if (arg1 instanceof Tensor) {
2614
+ throw new TypeError('\'fetches\' cannot be a Tensor');
2615
+ }
2616
+ if (Array.isArray(arg1)) {
2617
+ if (arg1.length === 0) {
2618
+ throw new TypeError('\'fetches\' cannot be an empty array.');
2988
2619
  }
2989
- if (Array.isArray(arg1)) {
2990
- // use 'as any' here because TypeScript's check on type of 'SupportedTypedArrayConstructors.from()' produces
2991
- // incorrect results.
2992
- // 'typedArrayConstructor' should be one of the typed array prototype objects.
2993
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
2994
- data = typedArrayConstructor.from(arg1);
2620
+ isFetchesEmpty = false;
2621
+ // output names
2622
+ for (const name of arg1) {
2623
+ if (typeof name !== 'string') {
2624
+ throw new TypeError('\'fetches\' must be a string array or an object.');
2625
+ }
2626
+ if (this.outputNames.indexOf(name) === -1) {
2627
+ throw new RangeError(`'fetches' contains invalid output name: ${name}.`);
2628
+ }
2629
+ fetches[name] = null;
2995
2630
  }
2996
- else if (arg1 instanceof typedArrayConstructor) {
2997
- data = arg1;
2631
+ if (typeof arg2 === 'object' && arg2 !== null) {
2632
+ options = arg2;
2998
2633
  }
2999
- else {
3000
- throw new TypeError(`A ${type} tensor's data must be type of ${typedArrayConstructor}`);
2634
+ else if (typeof arg2 !== 'undefined') {
2635
+ throw new TypeError('\'options\' must be an object.');
3001
2636
  }
3002
2637
  }
3003
- }
3004
- else {
3005
- //
3006
- // Override: constructor(data, ...)
3007
- //
3008
- dims = arg1;
3009
- if (Array.isArray(arg0)) {
3010
- // only boolean[] and string[] is supported
3011
- if (arg0.length === 0) {
3012
- throw new TypeError('Tensor type cannot be inferred from an empty array.');
3013
- }
3014
- const firstElementType = typeof arg0[0];
3015
- if (firstElementType === 'string') {
3016
- type = 'string';
3017
- data = arg0;
2638
+ else {
2639
+ // decide whether arg1 is fetches or options
2640
+ // if any output name is present and its value is valid OnnxValue, we consider it fetches
2641
+ let isFetches = false;
2642
+ const arg1Keys = Object.getOwnPropertyNames(arg1);
2643
+ for (const name of this.outputNames) {
2644
+ if (arg1Keys.indexOf(name) !== -1) {
2645
+ const v = arg1[name];
2646
+ if (v === null || v instanceof Tensor) {
2647
+ isFetches = true;
2648
+ isFetchesEmpty = false;
2649
+ fetches[name] = v;
2650
+ }
2651
+ }
3018
2652
  }
3019
- else if (firstElementType === 'boolean') {
3020
- type = 'bool';
3021
- // 'arg0' is of type 'boolean[]'. Uint8Array.from(boolean[]) actually works, but typescript thinks this is
3022
- // wrong type. We use 'as any' to make it happy.
3023
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
3024
- data = Uint8Array.from(arg0);
2653
+ if (isFetches) {
2654
+ if (typeof arg2 === 'object' && arg2 !== null) {
2655
+ options = arg2;
2656
+ }
2657
+ else if (typeof arg2 !== 'undefined') {
2658
+ throw new TypeError('\'options\' must be an object.');
2659
+ }
3025
2660
  }
3026
2661
  else {
3027
- throw new TypeError(`Invalid element type of data array: ${firstElementType}.`);
3028
- }
3029
- }
3030
- else {
3031
- // get tensor type from TypedArray
3032
- const mappedType = NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.get(arg0.constructor);
3033
- if (mappedType === undefined) {
3034
- throw new TypeError(`Unsupported type for tensor data: ${arg0.constructor}.`);
2662
+ options = arg1;
3035
2663
  }
3036
- type = mappedType;
3037
- data = arg0;
3038
2664
  }
3039
2665
  }
3040
- // type and data is processed, now processing dims
3041
- if (dims === undefined) {
3042
- // assume 1-D tensor if dims omitted
3043
- dims = [data.length];
2666
+ else if (typeof arg1 !== 'undefined') {
2667
+ throw new TypeError('Unexpected argument[1]: must be \'fetches\' or \'options\'.');
3044
2668
  }
3045
- else if (!Array.isArray(dims)) {
3046
- throw new TypeError('A tensor\'s dims must be a number array');
2669
+ // check if all inputs are in feed
2670
+ for (const name of this.inputNames) {
2671
+ if (typeof feeds[name] === 'undefined') {
2672
+ throw new Error(`input '${name}' is missing in 'feeds'.`);
2673
+ }
3047
2674
  }
3048
- // perform check
3049
- const size = calculateSize(dims);
3050
- if (size !== data.length) {
3051
- throw new Error(`Tensor's size(${size}) does not match data length(${data.length}).`);
2675
+ // if no fetches is specified, we use the full output names list
2676
+ if (isFetchesEmpty) {
2677
+ for (const name of this.outputNames) {
2678
+ fetches[name] = null;
2679
+ }
3052
2680
  }
3053
- this.dims = dims;
3054
- this.type = type;
3055
- this.data = data;
3056
- this.size = size;
3057
- }
3058
- // #endregion
3059
- /**
3060
- * Create a new tensor object from image object
3061
- *
3062
- * @param buffer - Extracted image buffer data - assuming RGBA format
3063
- * @param imageFormat - input image configuration - required configurations height, width, format
3064
- * @param tensorFormat - output tensor configuration - Default is RGB format
3065
- */
3066
- static bufferToTensor(buffer, options) {
3067
- if (buffer === undefined) {
3068
- throw new Error('Image buffer must be defined');
2681
+ // feeds, fetches and options are prepared
2682
+ const results = await this.handler.run(feeds, fetches, options);
2683
+ const returnValue = {};
2684
+ for (const key in results) {
2685
+ if (Object.hasOwnProperty.call(results, key)) {
2686
+ returnValue[key] = new Tensor(results[key].type, results[key].data, results[key].dims);
2687
+ }
3069
2688
  }
3070
- if (options.height === undefined || options.width === undefined) {
3071
- throw new Error('Image height and width must be defined');
3072
- }
3073
- const { height, width } = options;
3074
- const norm = options.norm;
3075
- let normMean;
3076
- let normBias;
3077
- if (norm === undefined || norm.mean === undefined) {
3078
- normMean = 255;
3079
- }
3080
- else {
3081
- normMean = norm.mean;
3082
- }
3083
- if (norm === undefined || norm.bias === undefined) {
3084
- normBias = 0;
3085
- }
3086
- else {
3087
- normBias = norm.bias;
3088
- }
3089
- const inputformat = options.bitmapFormat !== undefined ? options.bitmapFormat : 'RGBA';
3090
- // default value is RGBA since imagedata and HTMLImageElement uses it
3091
- const outputformat = options.tensorFormat !== undefined ?
3092
- (options.tensorFormat !== undefined ? options.tensorFormat : 'RGB') :
3093
- 'RGB';
3094
- const offset = height * width;
3095
- const float32Data = outputformat === 'RGBA' ? new Float32Array(offset * 4) : new Float32Array(offset * 3);
3096
- // Default pointer assignments
3097
- let step = 4, rImagePointer = 0, gImagePointer = 1, bImagePointer = 2, aImagePointer = 3;
3098
- let rTensorPointer = 0, gTensorPointer = offset, bTensorPointer = offset * 2, aTensorPointer = -1;
3099
- // Updating the pointer assignments based on the input image format
3100
- if (inputformat === 'RGB') {
3101
- step = 3;
3102
- rImagePointer = 0;
3103
- gImagePointer = 1;
3104
- bImagePointer = 2;
3105
- aImagePointer = -1;
3106
- }
3107
- // Updating the pointer assignments based on the output tensor format
3108
- if (outputformat === 'RGBA') {
3109
- aTensorPointer = offset * 3;
3110
- }
3111
- else if (outputformat === 'RBG') {
3112
- rTensorPointer = 0;
3113
- bTensorPointer = offset;
3114
- gTensorPointer = offset * 2;
3115
- }
3116
- else if (outputformat === 'BGR') {
3117
- bTensorPointer = 0;
3118
- gTensorPointer = offset;
3119
- rTensorPointer = offset * 2;
3120
- }
3121
- for (let i = 0; i < offset; i++, rImagePointer += step, bImagePointer += step, gImagePointer += step, aImagePointer += step) {
3122
- float32Data[rTensorPointer++] = (buffer[rImagePointer] + normBias) / normMean;
3123
- float32Data[gTensorPointer++] = (buffer[gImagePointer] + normBias) / normMean;
3124
- float32Data[bTensorPointer++] = (buffer[bImagePointer] + normBias) / normMean;
3125
- if (aTensorPointer !== -1 && aImagePointer !== -1) {
3126
- float32Data[aTensorPointer++] = (buffer[aImagePointer] + normBias) / normMean;
3127
- }
3128
- }
3129
- // Float32Array -> ort.Tensor
3130
- const outputTensor = outputformat === 'RGBA' ? new Tensor('float32', float32Data, [1, 4, height, width]) :
3131
- new Tensor('float32', float32Data, [1, 3, height, width]);
3132
- return outputTensor;
2689
+ return returnValue;
3133
2690
  }
3134
- static async fromImage(image, options) {
3135
- // checking the type of image object
3136
- const isHTMLImageEle = typeof (HTMLImageElement) !== 'undefined' && image instanceof HTMLImageElement;
3137
- const isImageDataEle = typeof (ImageData) !== 'undefined' && image instanceof ImageData;
3138
- const isImageBitmap = typeof (ImageBitmap) !== 'undefined' && image instanceof ImageBitmap;
3139
- const isURL = typeof (String) !== 'undefined' && (image instanceof String || typeof image === 'string');
3140
- let data;
3141
- let tensorConfig = {};
3142
- // filling and checking image configuration options
3143
- if (isHTMLImageEle) {
3144
- // HTMLImageElement - image object - format is RGBA by default
3145
- const canvas = document.createElement('canvas');
3146
- const pixels2DContext = canvas.getContext('2d');
3147
- if (pixels2DContext != null) {
3148
- let height = image.naturalHeight;
3149
- let width = image.naturalWidth;
3150
- if (options !== undefined && options.resizedHeight !== undefined && options.resizedWidth !== undefined) {
3151
- height = options.resizedHeight;
3152
- width = options.resizedWidth;
3153
- }
3154
- if (options !== undefined) {
3155
- tensorConfig = options;
3156
- if (options.tensorFormat !== undefined) {
3157
- throw new Error('Image input config format must be RGBA for HTMLImageElement');
3158
- }
3159
- else {
3160
- tensorConfig.tensorFormat = 'RGBA';
3161
- }
3162
- if (options.height !== undefined && options.height !== height) {
3163
- throw new Error('Image input config height doesn\'t match HTMLImageElement height');
3164
- }
3165
- else {
3166
- tensorConfig.height = height;
3167
- }
3168
- if (options.width !== undefined && options.width !== width) {
3169
- throw new Error('Image input config width doesn\'t match HTMLImageElement width');
3170
- }
3171
- else {
3172
- tensorConfig.width = width;
3173
- }
3174
- }
3175
- else {
3176
- tensorConfig.tensorFormat = 'RGBA';
3177
- tensorConfig.height = height;
3178
- tensorConfig.width = width;
3179
- }
3180
- canvas.width = width;
3181
- canvas.height = height;
3182
- pixels2DContext.drawImage(image, 0, 0, width, height);
3183
- data = pixels2DContext.getImageData(0, 0, width, height).data;
2691
+ static async create(arg0, arg1, arg2, arg3) {
2692
+ // either load from a file or buffer
2693
+ let filePathOrUint8Array;
2694
+ let options = {};
2695
+ if (typeof arg0 === 'string') {
2696
+ filePathOrUint8Array = arg0;
2697
+ if (typeof arg1 === 'object' && arg1 !== null) {
2698
+ options = arg1;
3184
2699
  }
3185
- else {
3186
- throw new Error('Can not access image data');
2700
+ else if (typeof arg1 !== 'undefined') {
2701
+ throw new TypeError('\'options\' must be an object.');
3187
2702
  }
3188
2703
  }
3189
- else if (isImageDataEle) {
3190
- // ImageData - image object - format is RGBA by default
3191
- const format = 'RGBA';
3192
- let height;
3193
- let width;
3194
- if (options !== undefined && options.resizedWidth !== undefined && options.resizedHeight !== undefined) {
3195
- height = options.resizedHeight;
3196
- width = options.resizedWidth;
3197
- }
3198
- else {
3199
- height = image.height;
3200
- width = image.width;
2704
+ else if (arg0 instanceof Uint8Array) {
2705
+ filePathOrUint8Array = arg0;
2706
+ if (typeof arg1 === 'object' && arg1 !== null) {
2707
+ options = arg1;
3201
2708
  }
3202
- if (options !== undefined) {
3203
- tensorConfig = options;
3204
- if (options.bitmapFormat !== undefined && options.bitmapFormat !== format) {
3205
- throw new Error('Image input config format must be RGBA for ImageData');
3206
- }
3207
- else {
3208
- tensorConfig.bitmapFormat = 'RGBA';
3209
- }
2709
+ else if (typeof arg1 !== 'undefined') {
2710
+ throw new TypeError('\'options\' must be an object.');
3210
2711
  }
3211
- else {
3212
- tensorConfig.bitmapFormat = 'RGBA';
2712
+ }
2713
+ else if (arg0 instanceof ArrayBuffer ||
2714
+ (typeof SharedArrayBuffer !== 'undefined' && arg0 instanceof SharedArrayBuffer)) {
2715
+ const buffer = arg0;
2716
+ let byteOffset = 0;
2717
+ let byteLength = arg0.byteLength;
2718
+ if (typeof arg1 === 'object' && arg1 !== null) {
2719
+ options = arg1;
3213
2720
  }
3214
- tensorConfig.height = height;
3215
- tensorConfig.width = width;
3216
- if (options !== undefined) {
3217
- const tempCanvas = document.createElement('canvas');
3218
- tempCanvas.width = width;
3219
- tempCanvas.height = height;
3220
- const pixels2DContext = tempCanvas.getContext('2d');
3221
- if (pixels2DContext != null) {
3222
- pixels2DContext.putImageData(image, 0, 0);
3223
- data = pixels2DContext.getImageData(0, 0, width, height).data;
2721
+ else if (typeof arg1 === 'number') {
2722
+ byteOffset = arg1;
2723
+ if (!Number.isSafeInteger(byteOffset)) {
2724
+ throw new RangeError('\'byteOffset\' must be an integer.');
3224
2725
  }
3225
- else {
3226
- throw new Error('Can not access image data');
2726
+ if (byteOffset < 0 || byteOffset >= buffer.byteLength) {
2727
+ throw new RangeError(`'byteOffset' is out of range [0, ${buffer.byteLength}).`);
3227
2728
  }
3228
- }
3229
- else {
3230
- data = image.data;
3231
- }
3232
- }
3233
- else if (isImageBitmap) {
3234
- // ImageBitmap - image object - format must be provided by user
3235
- if (options === undefined) {
3236
- throw new Error('Please provide image config with format for Imagebitmap');
3237
- }
3238
- if (options.bitmapFormat !== undefined) {
3239
- throw new Error('Image input config format must be defined for ImageBitmap');
3240
- }
3241
- const pixels2DContext = document.createElement('canvas').getContext('2d');
3242
- if (pixels2DContext != null) {
3243
- const height = image.height;
3244
- const width = image.width;
3245
- pixels2DContext.drawImage(image, 0, 0, width, height);
3246
- data = pixels2DContext.getImageData(0, 0, width, height).data;
3247
- if (options !== undefined) {
3248
- // using square brackets to avoid TS error - type 'never'
3249
- if (options.height !== undefined && options.height !== height) {
3250
- throw new Error('Image input config height doesn\'t match ImageBitmap height');
2729
+ byteLength = arg0.byteLength - byteOffset;
2730
+ if (typeof arg2 === 'number') {
2731
+ byteLength = arg2;
2732
+ if (!Number.isSafeInteger(byteLength)) {
2733
+ throw new RangeError('\'byteLength\' must be an integer.');
3251
2734
  }
3252
- else {
3253
- tensorConfig.height = height;
2735
+ if (byteLength <= 0 || byteOffset + byteLength > buffer.byteLength) {
2736
+ throw new RangeError(`'byteLength' is out of range (0, ${buffer.byteLength - byteOffset}].`);
3254
2737
  }
3255
- // using square brackets to avoid TS error - type 'never'
3256
- if (options.width !== undefined && options.width !== width) {
3257
- throw new Error('Image input config width doesn\'t match ImageBitmap width');
2738
+ if (typeof arg3 === 'object' && arg3 !== null) {
2739
+ options = arg3;
3258
2740
  }
3259
- else {
3260
- tensorConfig.width = width;
2741
+ else if (typeof arg3 !== 'undefined') {
2742
+ throw new TypeError('\'options\' must be an object.');
3261
2743
  }
3262
2744
  }
3263
- else {
3264
- tensorConfig.height = height;
3265
- tensorConfig.width = width;
2745
+ else if (typeof arg2 !== 'undefined') {
2746
+ throw new TypeError('\'byteLength\' must be a number.');
3266
2747
  }
3267
- return Tensor.bufferToTensor(data, tensorConfig);
3268
2748
  }
3269
- else {
3270
- throw new Error('Can not access image data');
2749
+ else if (typeof arg1 !== 'undefined') {
2750
+ throw new TypeError('\'options\' must be an object.');
3271
2751
  }
3272
- }
3273
- else if (isURL) {
3274
- return new Promise((resolve, reject) => {
3275
- const canvas = document.createElement('canvas');
3276
- const context = canvas.getContext('2d');
3277
- if (!image || !context) {
3278
- return reject();
3279
- }
3280
- const newImage = new Image();
3281
- newImage.crossOrigin = 'Anonymous';
3282
- newImage.src = image;
3283
- newImage.onload = () => {
3284
- canvas.width = newImage.width;
3285
- canvas.height = newImage.height;
3286
- context.drawImage(newImage, 0, 0, canvas.width, canvas.height);
3287
- const img = context.getImageData(0, 0, canvas.width, canvas.height);
3288
- if (options !== undefined) {
3289
- // using square brackets to avoid TS error - type 'never'
3290
- if (options.height !== undefined && options.height !== canvas.height) {
3291
- throw new Error('Image input config height doesn\'t match ImageBitmap height');
3292
- }
3293
- else {
3294
- tensorConfig.height = canvas.height;
3295
- }
3296
- // using square brackets to avoid TS error - type 'never'
3297
- if (options.width !== undefined && options.width !== canvas.width) {
3298
- throw new Error('Image input config width doesn\'t match ImageBitmap width');
3299
- }
3300
- else {
3301
- tensorConfig.width = canvas.width;
3302
- }
3303
- }
3304
- else {
3305
- tensorConfig.height = canvas.height;
3306
- tensorConfig.width = canvas.width;
3307
- }
3308
- resolve(Tensor.bufferToTensor(img.data, tensorConfig));
3309
- };
3310
- });
3311
- }
3312
- else {
3313
- throw new Error('Input data provided is not supported - aborted tensor creation');
3314
- }
3315
- if (data !== undefined) {
3316
- return Tensor.bufferToTensor(data, tensorConfig);
3317
- }
3318
- else {
3319
- throw new Error('Input data provided is not supported - aborted tensor creation');
3320
- }
3321
- }
3322
- toImageData(options) {
3323
- var _a, _b;
3324
- const pixels2DContext = document.createElement('canvas').getContext('2d');
3325
- let image;
3326
- if (pixels2DContext != null) {
3327
- // Default values for height and width & format
3328
- const width = this.dims[3];
3329
- const height = this.dims[2];
3330
- const channels = this.dims[1];
3331
- const inputformat = options !== undefined ? (options.format !== undefined ? options.format : 'RGB') : 'RGB';
3332
- const normMean = options !== undefined ? (((_a = options.norm) === null || _a === void 0 ? void 0 : _a.mean) !== undefined ? options.norm.mean : 255) : 255;
3333
- const normBias = options !== undefined ? (((_b = options.norm) === null || _b === void 0 ? void 0 : _b.bias) !== undefined ? options.norm.bias : 0) : 0;
3334
- const offset = height * width;
3335
- if (options !== undefined) {
3336
- if (options.height !== undefined && options.height !== height) {
3337
- throw new Error('Image output config height doesn\'t match tensor height');
3338
- }
3339
- if (options.width !== undefined && options.width !== width) {
3340
- throw new Error('Image output config width doesn\'t match tensor width');
3341
- }
3342
- if (options.format !== undefined && (channels === 4 && options.format !== 'RGBA') ||
3343
- (channels === 3 && (options.format !== 'RGB' && options.format !== 'BGR'))) {
3344
- throw new Error('Tensor format doesn\'t match input tensor dims');
3345
- }
3346
- }
3347
- // Default pointer assignments
3348
- const step = 4;
3349
- let rImagePointer = 0, gImagePointer = 1, bImagePointer = 2, aImagePointer = 3;
3350
- let rTensorPointer = 0, gTensorPointer = offset, bTensorPointer = offset * 2, aTensorPointer = -1;
3351
- // Updating the pointer assignments based on the input image format
3352
- if (inputformat === 'RGBA') {
3353
- rTensorPointer = 0;
3354
- gTensorPointer = offset;
3355
- bTensorPointer = offset * 2;
3356
- aTensorPointer = offset * 3;
3357
- }
3358
- else if (inputformat === 'RGB') {
3359
- rTensorPointer = 0;
3360
- gTensorPointer = offset;
3361
- bTensorPointer = offset * 2;
3362
- }
3363
- else if (inputformat === 'RBG') {
3364
- rTensorPointer = 0;
3365
- bTensorPointer = offset;
3366
- gTensorPointer = offset * 2;
3367
- }
3368
- image = pixels2DContext.createImageData(width, height);
3369
- for (let i = 0; i < height * width; rImagePointer += step, gImagePointer += step, bImagePointer += step, aImagePointer += step, i++) {
3370
- image.data[rImagePointer] = (this.data[rTensorPointer++] - normBias) * normMean; // R value
3371
- image.data[gImagePointer] = (this.data[gTensorPointer++] - normBias) * normMean; // G value
3372
- image.data[bImagePointer] = (this.data[bTensorPointer++] - normBias) * normMean; // B value
3373
- image.data[aImagePointer] =
3374
- aTensorPointer === -1 ? 255 : (this.data[aTensorPointer++] - normBias) * normMean; // A value
3375
- }
3376
- }
3377
- else {
3378
- throw new Error('Can not access image data');
3379
- }
3380
- return image;
3381
- }
3382
- // #endregion
3383
- // #region tensor utilities
3384
- reshape(dims) {
3385
- return new Tensor(this.type, this.data, dims);
3386
- }
3387
- };
3388
-
3389
- // Copyright (c) Microsoft Corporation. All rights reserved.
3390
- // Licensed under the MIT License.
3391
- // eslint-disable-next-line @typescript-eslint/naming-convention
3392
- const Tensor = Tensor$1;
3393
-
3394
- // Copyright (c) Microsoft Corporation. All rights reserved.
3395
- // Licensed under the MIT License.
3396
- let InferenceSession$1 = class InferenceSession {
3397
- constructor(handler) {
3398
- this.handler = handler;
3399
- }
3400
- async run(feeds, arg1, arg2) {
3401
- const fetches = {};
3402
- let options = {};
3403
- // check inputs
3404
- if (typeof feeds !== 'object' || feeds === null || feeds instanceof Tensor || Array.isArray(feeds)) {
3405
- throw new TypeError('\'feeds\' must be an object that use input names as keys and OnnxValue as corresponding values.');
3406
- }
3407
- let isFetchesEmpty = true;
3408
- // determine which override is being used
3409
- if (typeof arg1 === 'object') {
3410
- if (arg1 === null) {
3411
- throw new TypeError('Unexpected argument[1]: cannot be null.');
3412
- }
3413
- if (arg1 instanceof Tensor) {
3414
- throw new TypeError('\'fetches\' cannot be a Tensor');
3415
- }
3416
- if (Array.isArray(arg1)) {
3417
- if (arg1.length === 0) {
3418
- throw new TypeError('\'fetches\' cannot be an empty array.');
3419
- }
3420
- isFetchesEmpty = false;
3421
- // output names
3422
- for (const name of arg1) {
3423
- if (typeof name !== 'string') {
3424
- throw new TypeError('\'fetches\' must be a string array or an object.');
3425
- }
3426
- if (this.outputNames.indexOf(name) === -1) {
3427
- throw new RangeError(`'fetches' contains invalid output name: ${name}.`);
3428
- }
3429
- fetches[name] = null;
3430
- }
3431
- if (typeof arg2 === 'object' && arg2 !== null) {
3432
- options = arg2;
3433
- }
3434
- else if (typeof arg2 !== 'undefined') {
3435
- throw new TypeError('\'options\' must be an object.');
3436
- }
3437
- }
3438
- else {
3439
- // decide whether arg1 is fetches or options
3440
- // if any output name is present and its value is valid OnnxValue, we consider it fetches
3441
- let isFetches = false;
3442
- const arg1Keys = Object.getOwnPropertyNames(arg1);
3443
- for (const name of this.outputNames) {
3444
- if (arg1Keys.indexOf(name) !== -1) {
3445
- const v = arg1[name];
3446
- if (v === null || v instanceof Tensor) {
3447
- isFetches = true;
3448
- isFetchesEmpty = false;
3449
- fetches[name] = v;
3450
- }
3451
- }
3452
- }
3453
- if (isFetches) {
3454
- if (typeof arg2 === 'object' && arg2 !== null) {
3455
- options = arg2;
3456
- }
3457
- else if (typeof arg2 !== 'undefined') {
3458
- throw new TypeError('\'options\' must be an object.');
3459
- }
3460
- }
3461
- else {
3462
- options = arg1;
3463
- }
3464
- }
3465
- }
3466
- else if (typeof arg1 !== 'undefined') {
3467
- throw new TypeError('Unexpected argument[1]: must be \'fetches\' or \'options\'.');
3468
- }
3469
- // check if all inputs are in feed
3470
- for (const name of this.inputNames) {
3471
- if (typeof feeds[name] === 'undefined') {
3472
- throw new Error(`input '${name}' is missing in 'feeds'.`);
3473
- }
3474
- }
3475
- // if no fetches is specified, we use the full output names list
3476
- if (isFetchesEmpty) {
3477
- for (const name of this.outputNames) {
3478
- fetches[name] = null;
3479
- }
3480
- }
3481
- // feeds, fetches and options are prepared
3482
- const results = await this.handler.run(feeds, fetches, options);
3483
- const returnValue = {};
3484
- for (const key in results) {
3485
- if (Object.hasOwnProperty.call(results, key)) {
3486
- returnValue[key] = new Tensor(results[key].type, results[key].data, results[key].dims);
3487
- }
3488
- }
3489
- return returnValue;
3490
- }
3491
- static async create(arg0, arg1, arg2, arg3) {
3492
- // either load from a file or buffer
3493
- let filePathOrUint8Array;
3494
- let options = {};
3495
- if (typeof arg0 === 'string') {
3496
- filePathOrUint8Array = arg0;
3497
- if (typeof arg1 === 'object' && arg1 !== null) {
3498
- options = arg1;
3499
- }
3500
- else if (typeof arg1 !== 'undefined') {
3501
- throw new TypeError('\'options\' must be an object.');
3502
- }
3503
- }
3504
- else if (arg0 instanceof Uint8Array) {
3505
- filePathOrUint8Array = arg0;
3506
- if (typeof arg1 === 'object' && arg1 !== null) {
3507
- options = arg1;
3508
- }
3509
- else if (typeof arg1 !== 'undefined') {
3510
- throw new TypeError('\'options\' must be an object.');
3511
- }
3512
- }
3513
- else if (arg0 instanceof ArrayBuffer ||
3514
- (typeof SharedArrayBuffer !== 'undefined' && arg0 instanceof SharedArrayBuffer)) {
3515
- const buffer = arg0;
3516
- let byteOffset = 0;
3517
- let byteLength = arg0.byteLength;
3518
- if (typeof arg1 === 'object' && arg1 !== null) {
3519
- options = arg1;
3520
- }
3521
- else if (typeof arg1 === 'number') {
3522
- byteOffset = arg1;
3523
- if (!Number.isSafeInteger(byteOffset)) {
3524
- throw new RangeError('\'byteOffset\' must be an integer.');
3525
- }
3526
- if (byteOffset < 0 || byteOffset >= buffer.byteLength) {
3527
- throw new RangeError(`'byteOffset' is out of range [0, ${buffer.byteLength}).`);
3528
- }
3529
- byteLength = arg0.byteLength - byteOffset;
3530
- if (typeof arg2 === 'number') {
3531
- byteLength = arg2;
3532
- if (!Number.isSafeInteger(byteLength)) {
3533
- throw new RangeError('\'byteLength\' must be an integer.');
3534
- }
3535
- if (byteLength <= 0 || byteOffset + byteLength > buffer.byteLength) {
3536
- throw new RangeError(`'byteLength' is out of range (0, ${buffer.byteLength - byteOffset}].`);
3537
- }
3538
- if (typeof arg3 === 'object' && arg3 !== null) {
3539
- options = arg3;
3540
- }
3541
- else if (typeof arg3 !== 'undefined') {
3542
- throw new TypeError('\'options\' must be an object.');
3543
- }
3544
- }
3545
- else if (typeof arg2 !== 'undefined') {
3546
- throw new TypeError('\'byteLength\' must be a number.');
3547
- }
3548
- }
3549
- else if (typeof arg1 !== 'undefined') {
3550
- throw new TypeError('\'options\' must be an object.');
3551
- }
3552
- filePathOrUint8Array = new Uint8Array(buffer, byteOffset, byteLength);
2752
+ filePathOrUint8Array = new Uint8Array(buffer, byteOffset, byteLength);
3553
2753
  }
3554
2754
  else {
3555
2755
  throw new TypeError('Unexpected argument[0]: must be \'path\' or \'buffer\'.');
@@ -3947,7 +3147,7 @@ registerProcessor('audio_processor', AudioProcessor);
3947
3147
  return Math.ceil((targetDuration * sr) / 1000 / frameSamples);
3948
3148
  }
3949
3149
  utils.minFramesForTargetMS = minFramesForTargetMS;
3950
- function arrayBufferToBase64(buffer) {
3150
+ function arrayBufferToBase64$1(buffer) {
3951
3151
  const bytes = new Uint8Array(buffer);
3952
3152
  const len = bytes.byteLength;
3953
3153
  const binary = new Array(len);
@@ -3960,7 +3160,7 @@ registerProcessor('audio_processor', AudioProcessor);
3960
3160
  }
3961
3161
  return btoa(binary.join(""));
3962
3162
  }
3963
- utils.arrayBufferToBase64 = arrayBufferToBase64;
3163
+ utils.arrayBufferToBase64 = arrayBufferToBase64$1;
3964
3164
  /*
3965
3165
  This rest of this was mostly copied from https://github.com/linto-ai/WebVoiceSDK
3966
3166
  */
@@ -4379,12 +3579,736 @@ registerProcessor('audio_processor', AudioProcessor);
4379
3579
 
4380
3580
  } (dist));
4381
3581
 
4382
- var index = /*@__PURE__*/getDefaultExportFromCjs(dist);
4383
-
4384
- var index$1 = /*#__PURE__*/_mergeNamespaces({
4385
- __proto__: null,
4386
- default: index
4387
- }, [dist]);
3582
+ /**
3583
+ * Converts a base64 string to an ArrayBuffer.
3584
+ * @param {string} base64 - The base64 string to convert.
3585
+ * @returns {ArrayBuffer} The resulting ArrayBuffer.
3586
+ */
3587
+ function base64ToArrayBuffer(base64) {
3588
+ const binaryString = atob(base64);
3589
+ const len = binaryString.length;
3590
+ const bytes = new Uint8Array(len);
3591
+ for (let i = 0; i < len; i++) {
3592
+ bytes[i] = binaryString.charCodeAt(i);
3593
+ }
3594
+ return bytes.buffer;
3595
+ }
3596
+
3597
+ /**
3598
+ * Converts an ArrayBuffer to a base64 string.
3599
+ * @param {ArrayBuffer|Float32Array|Int16Array} arrayBuffer - The ArrayBuffer to convert.
3600
+ * @returns {string} The resulting base64 string.
3601
+ */
3602
+ function arrayBufferToBase64(arrayBuffer) {
3603
+ if (arrayBuffer instanceof Float32Array) {
3604
+ arrayBuffer = this.floatTo16BitPCM(arrayBuffer);
3605
+ } else if (arrayBuffer instanceof Int16Array) {
3606
+ arrayBuffer = arrayBuffer.buffer;
3607
+ }
3608
+ let binary = '';
3609
+ let bytes = new Uint8Array(arrayBuffer);
3610
+ const chunkSize = 0x8000; // 32KB chunk size
3611
+ for (let i = 0; i < bytes.length; i += chunkSize) {
3612
+ let chunk = bytes.subarray(i, i + chunkSize);
3613
+ binary += String.fromCharCode.apply(null, chunk);
3614
+ }
3615
+ return btoa(binary);
3616
+ }
3617
+
3618
+ /* eslint-env browser */
3619
+ // import { env as ortEnv } from 'onnxruntime-web';
3620
+ const NOOP = () => { };
3621
+ const DEFAULT_WS_URL = 'wss://api.layercode.com/v1/agents/web/websocket';
3622
+ // SDK version - updated when publishing
3623
+ const SDK_VERSION = '2.1.3';
3624
+ /**
3625
+ * @class LayercodeClient
3626
+ * @classdesc Core client for Layercode audio agent that manages audio recording, WebSocket communication, and speech processing.
3627
+ */
3628
+ class LayercodeClient {
3629
+ /**
3630
+ * Creates an instance of LayercodeClient.
3631
+ * @param {Object} options - Configuration options
3632
+ */
3633
+ constructor(options) {
3634
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p, _q;
3635
+ this.deviceId = null;
3636
+ this.options = {
3637
+ agentId: options.agentId,
3638
+ conversationId: (_a = options.conversationId) !== null && _a !== void 0 ? _a : null,
3639
+ authorizeSessionEndpoint: options.authorizeSessionEndpoint,
3640
+ metadata: (_b = options.metadata) !== null && _b !== void 0 ? _b : {},
3641
+ vadResumeDelay: (_c = options.vadResumeDelay) !== null && _c !== void 0 ? _c : 500,
3642
+ onConnect: (_d = options.onConnect) !== null && _d !== void 0 ? _d : NOOP,
3643
+ onDisconnect: (_e = options.onDisconnect) !== null && _e !== void 0 ? _e : NOOP,
3644
+ onError: (_f = options.onError) !== null && _f !== void 0 ? _f : NOOP,
3645
+ onDeviceSwitched: (_g = options.onDeviceSwitched) !== null && _g !== void 0 ? _g : NOOP,
3646
+ onDevicesChanged: (_h = options.onDevicesChanged) !== null && _h !== void 0 ? _h : NOOP,
3647
+ onDataMessage: (_j = options.onDataMessage) !== null && _j !== void 0 ? _j : NOOP,
3648
+ onMessage: (_k = options.onMessage) !== null && _k !== void 0 ? _k : NOOP,
3649
+ onUserAmplitudeChange: (_l = options.onUserAmplitudeChange) !== null && _l !== void 0 ? _l : NOOP,
3650
+ onAgentAmplitudeChange: (_m = options.onAgentAmplitudeChange) !== null && _m !== void 0 ? _m : NOOP,
3651
+ onStatusChange: (_o = options.onStatusChange) !== null && _o !== void 0 ? _o : NOOP,
3652
+ onUserIsSpeakingChange: (_p = options.onUserIsSpeakingChange) !== null && _p !== void 0 ? _p : NOOP,
3653
+ onMuteStateChange: (_q = options.onMuteStateChange) !== null && _q !== void 0 ? _q : NOOP,
3654
+ };
3655
+ this.AMPLITUDE_MONITORING_SAMPLE_RATE = 2;
3656
+ this._websocketUrl = DEFAULT_WS_URL;
3657
+ this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
3658
+ this.wavPlayer = new WavStreamPlayer({
3659
+ finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
3660
+ sampleRate: 16000, // TODO should be set my fetched agent config
3661
+ });
3662
+ this.vad = null;
3663
+ this.ws = null;
3664
+ this.status = 'disconnected';
3665
+ this.userAudioAmplitude = 0;
3666
+ this.agentAudioAmplitude = 0;
3667
+ this.conversationId = this.options.conversationId;
3668
+ this.pushToTalkActive = false;
3669
+ this.pushToTalkEnabled = false;
3670
+ this.canInterrupt = false;
3671
+ this.userIsSpeaking = false;
3672
+ this.recorderStarted = false;
3673
+ this.readySent = false;
3674
+ this.currentTurnId = null;
3675
+ this.audioBuffer = [];
3676
+ this.vadConfig = null;
3677
+ this.activeDeviceId = null;
3678
+ this.useSystemDefaultDevice = false;
3679
+ this.lastReportedDeviceId = null;
3680
+ this.lastKnownSystemDefaultDeviceKey = null;
3681
+ this.isMuted = false;
3682
+ this.stopPlayerAmplitude = undefined;
3683
+ this.stopRecorderAmplitude = undefined;
3684
+ this.deviceChangeListener = null;
3685
+ // this.audioPauseTime = null;
3686
+ // Bind event handlers
3687
+ this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
3688
+ this._handleDataAvailable = this._handleDataAvailable.bind(this);
3689
+ }
3690
+ _initializeVAD() {
3691
+ var _a;
3692
+ console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt, vadConfig: this.vadConfig });
3693
+ // If we're in push to talk mode, we don't need to use the VAD model
3694
+ if (this.pushToTalkEnabled) {
3695
+ return;
3696
+ }
3697
+ // Check if VAD is disabled
3698
+ if (((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) === false) {
3699
+ console.log('VAD is disabled by backend configuration');
3700
+ return;
3701
+ }
3702
+ // Build VAD configuration object, only including keys that are defined
3703
+ const vadOptions = {
3704
+ stream: this.wavRecorder.getStream() || undefined,
3705
+ onSpeechStart: () => {
3706
+ console.debug('onSpeechStart: sending vad_start');
3707
+ this.userIsSpeaking = true;
3708
+ this.options.onUserIsSpeakingChange(true);
3709
+ this._wsSend({
3710
+ type: 'vad_events',
3711
+ event: 'vad_start',
3712
+ });
3713
+ this.options.onMessage({
3714
+ type: 'vad_events',
3715
+ event: 'vad_start',
3716
+ });
3717
+ },
3718
+ onSpeechEnd: () => {
3719
+ console.debug('onSpeechEnd: sending vad_end');
3720
+ this.userIsSpeaking = false;
3721
+ this.options.onUserIsSpeakingChange(false);
3722
+ this.audioBuffer = []; // Clear buffer on speech end
3723
+ this._wsSend({
3724
+ type: 'vad_events',
3725
+ event: 'vad_end',
3726
+ });
3727
+ this.options.onMessage({
3728
+ type: 'vad_events',
3729
+ event: 'vad_end',
3730
+ });
3731
+ },
3732
+ };
3733
+ // Apply VAD configuration from backend if available
3734
+ if (this.vadConfig) {
3735
+ // Only add keys that are explicitly defined (not undefined)
3736
+ if (this.vadConfig.model !== undefined)
3737
+ vadOptions.model = this.vadConfig.model;
3738
+ if (this.vadConfig.positive_speech_threshold !== undefined)
3739
+ vadOptions.positiveSpeechThreshold = this.vadConfig.positive_speech_threshold;
3740
+ if (this.vadConfig.negative_speech_threshold !== undefined)
3741
+ vadOptions.negativeSpeechThreshold = this.vadConfig.negative_speech_threshold;
3742
+ if (this.vadConfig.redemption_frames !== undefined)
3743
+ vadOptions.redemptionFrames = this.vadConfig.redemption_frames;
3744
+ if (this.vadConfig.min_speech_frames !== undefined)
3745
+ vadOptions.minSpeechFrames = this.vadConfig.min_speech_frames;
3746
+ if (this.vadConfig.pre_speech_pad_frames !== undefined)
3747
+ vadOptions.preSpeechPadFrames = this.vadConfig.pre_speech_pad_frames;
3748
+ if (this.vadConfig.frame_samples !== undefined)
3749
+ vadOptions.frameSamples = this.vadConfig.frame_samples;
3750
+ }
3751
+ else {
3752
+ // Default values if no config from backend
3753
+ vadOptions.model = 'v5';
3754
+ vadOptions.positiveSpeechThreshold = 0.15;
3755
+ vadOptions.negativeSpeechThreshold = 0.05;
3756
+ vadOptions.redemptionFrames = 4;
3757
+ vadOptions.minSpeechFrames = 2;
3758
+ vadOptions.preSpeechPadFrames = 0;
3759
+ vadOptions.frameSamples = 512; // Required for v5
3760
+ }
3761
+ console.log('Creating VAD with options:', vadOptions);
3762
+ dist.MicVAD.new(vadOptions)
3763
+ .then((vad) => {
3764
+ this.vad = vad;
3765
+ this.vad.start();
3766
+ console.log('VAD started successfully');
3767
+ })
3768
+ .catch((error) => {
3769
+ console.warn('Error initializing VAD:', error);
3770
+ // Send a message to server indicating VAD failure
3771
+ this._wsSend({
3772
+ type: 'vad_events',
3773
+ event: 'vad_model_failed',
3774
+ });
3775
+ });
3776
+ }
3777
+ /**
3778
+ * Updates the connection status and triggers the callback
3779
+ * @param {string} status - New status value
3780
+ */
3781
+ _setStatus(status) {
3782
+ this.status = status;
3783
+ this.options.onStatusChange(status);
3784
+ }
3785
+ /**
3786
+ * Handles when agent audio finishes playing
3787
+ */
3788
+ _clientResponseAudioReplayFinished() {
3789
+ console.debug('clientResponseAudioReplayFinished');
3790
+ this._wsSend({
3791
+ type: 'trigger.response.audio.replay_finished',
3792
+ reason: 'completed',
3793
+ });
3794
+ }
3795
+ async _clientInterruptAssistantReplay() {
3796
+ await this.wavPlayer.interrupt();
3797
+ }
3798
+ async triggerUserTurnStarted() {
3799
+ if (!this.pushToTalkActive) {
3800
+ this.pushToTalkActive = true;
3801
+ this._wsSend({ type: 'trigger.turn.start', role: 'user' });
3802
+ await this._clientInterruptAssistantReplay();
3803
+ }
3804
+ }
3805
+ async triggerUserTurnFinished() {
3806
+ if (this.pushToTalkActive) {
3807
+ this.pushToTalkActive = false;
3808
+ this._wsSend({ type: 'trigger.turn.end', role: 'user' });
3809
+ }
3810
+ }
3811
+ /**
3812
+ * Handles incoming WebSocket messages
3813
+ * @param {MessageEvent} event - The WebSocket message event
3814
+ */
3815
+ async _handleWebSocketMessage(event) {
3816
+ try {
3817
+ const message = JSON.parse(event.data);
3818
+ if (message.type !== 'response.audio') {
3819
+ console.debug('msg:', message);
3820
+ }
3821
+ switch (message.type) {
3822
+ case 'turn.start':
3823
+ // Sent from the server to this client when a new user turn is detected
3824
+ if (message.role === 'assistant') {
3825
+ // Start tracking new assistant turn
3826
+ console.debug('Assistant turn started, will track new turn ID from audio/text');
3827
+ }
3828
+ else if (message.role === 'user' && !this.pushToTalkEnabled) {
3829
+ // Interrupt any playing assistant audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
3830
+ console.debug('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
3831
+ await this._clientInterruptAssistantReplay();
3832
+ }
3833
+ this.options.onMessage(message);
3834
+ break;
3835
+ case 'response.audio':
3836
+ const audioBuffer = base64ToArrayBuffer(message.content);
3837
+ this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
3838
+ // TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
3839
+ // Set current turn ID from first audio message, or update if different turn
3840
+ if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
3841
+ console.debug(`Setting current turn ID to: ${message.turn_id} (was: ${this.currentTurnId})`);
3842
+ this.currentTurnId = message.turn_id;
3843
+ // Clean up interrupted tracks, keeping only the current turn
3844
+ this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
3845
+ }
3846
+ break;
3847
+ case 'response.text':
3848
+ // Set turn ID from first text message if not set
3849
+ if (!this.currentTurnId) {
3850
+ this.currentTurnId = message.turn_id;
3851
+ console.debug(`Setting current turn ID to: ${message.turn_id} from text message`);
3852
+ }
3853
+ this.options.onMessage(message);
3854
+ break;
3855
+ case 'response.data':
3856
+ this.options.onDataMessage(message);
3857
+ break;
3858
+ case 'user.transcript':
3859
+ case 'user.transcript.delta':
3860
+ case 'user.transcript.interim_delta':
3861
+ this.options.onMessage(message);
3862
+ break;
3863
+ default:
3864
+ console.warn('Unknown message type received:', message);
3865
+ }
3866
+ }
3867
+ catch (error) {
3868
+ console.error('Error processing WebSocket message:', error);
3869
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
3870
+ }
3871
+ }
3872
+ /**
3873
+ * Handles available client browser microphone audio data and sends it over the WebSocket
3874
+ * @param {ArrayBuffer} data - The audio data buffer
3875
+ */
3876
+ _handleDataAvailable(data) {
3877
+ var _a, _b, _c;
3878
+ try {
3879
+ const base64 = arrayBufferToBase64(data.mono);
3880
+ // Don't send audio if muted
3881
+ if (this.isMuted) {
3882
+ return;
3883
+ }
3884
+ // Determine if we should gate audio based on VAD configuration
3885
+ const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
3886
+ const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
3887
+ let sendAudio;
3888
+ if (this.pushToTalkEnabled) {
3889
+ sendAudio = this.pushToTalkActive;
3890
+ }
3891
+ else if (shouldGateAudio) {
3892
+ sendAudio = this.userIsSpeaking;
3893
+ }
3894
+ else {
3895
+ // If gate_audio is false, always send audio
3896
+ sendAudio = true;
3897
+ }
3898
+ if (sendAudio) {
3899
+ // If we have buffered audio and we're gating, send it first
3900
+ if (shouldGateAudio && this.audioBuffer.length > 0) {
3901
+ console.debug(`Sending ${this.audioBuffer.length} buffered audio chunks`);
3902
+ for (const bufferedAudio of this.audioBuffer) {
3903
+ this._wsSend({
3904
+ type: 'client.audio',
3905
+ content: bufferedAudio,
3906
+ });
3907
+ }
3908
+ this.audioBuffer = []; // Clear the buffer after sending
3909
+ }
3910
+ // Send the current audio
3911
+ this._wsSend({
3912
+ type: 'client.audio',
3913
+ content: base64,
3914
+ });
3915
+ }
3916
+ else {
3917
+ // Buffer audio when not sending (to catch audio just before VAD triggers)
3918
+ this.audioBuffer.push(base64);
3919
+ // Keep buffer size based on configuration
3920
+ if (this.audioBuffer.length > bufferFrames) {
3921
+ this.audioBuffer.shift(); // Remove oldest chunk
3922
+ }
3923
+ }
3924
+ }
3925
+ catch (error) {
3926
+ console.error('Error processing audio:', error);
3927
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
3928
+ }
3929
+ }
3930
+ _wsSend(message) {
3931
+ var _a;
3932
+ if (message.type !== 'client.audio') {
3933
+ console.debug('sent_msg:', message);
3934
+ }
3935
+ const messageString = JSON.stringify(message);
3936
+ if (((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN) {
3937
+ this.ws.send(messageString);
3938
+ }
3939
+ }
3940
+ _sendReadyIfNeeded() {
3941
+ var _a;
3942
+ if (this.recorderStarted && ((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN && !this.readySent) {
3943
+ this._wsSend({ type: 'client.ready' });
3944
+ this.readySent = true;
3945
+ }
3946
+ }
3947
+ /**
3948
+ * Sets up amplitude monitoring for a given audio source.
3949
+ * @param {WavRecorder | WavStreamPlayer} source - The audio source (recorder or player).
3950
+ * @param {(amplitude: number) => void} callback - The callback function to invoke on amplitude change.
3951
+ * @param {(amplitude: number) => void} updateInternalState - Function to update the internal amplitude state.
3952
+ */
3953
+ _setupAmplitudeMonitoring(source, callback, updateInternalState) {
3954
+ let updateCounter = 0;
3955
+ source.startAmplitudeMonitoring((amplitude) => {
3956
+ // Only update and call callback at the specified sample rate
3957
+ if (updateCounter >= this.AMPLITUDE_MONITORING_SAMPLE_RATE) {
3958
+ updateInternalState(amplitude);
3959
+ if (callback !== NOOP) {
3960
+ callback(amplitude);
3961
+ }
3962
+ updateCounter = 0; // Reset counter after sampling
3963
+ }
3964
+ updateCounter++;
3965
+ });
3966
+ const stop = () => { var _a; return (_a = source.stopAmplitudeMonitoring) === null || _a === void 0 ? void 0 : _a.call(source); };
3967
+ if (source === this.wavPlayer) {
3968
+ this.stopPlayerAmplitude = stop;
3969
+ }
3970
+ if (source === this.wavRecorder) {
3971
+ this.stopRecorderAmplitude = stop;
3972
+ }
3973
+ }
3974
+ _stopAmplitudeMonitoring() {
3975
+ var _a, _b;
3976
+ (_a = this.stopPlayerAmplitude) === null || _a === void 0 ? void 0 : _a.call(this);
3977
+ (_b = this.stopRecorderAmplitude) === null || _b === void 0 ? void 0 : _b.call(this);
3978
+ this.stopPlayerAmplitude = undefined;
3979
+ this.stopRecorderAmplitude = undefined;
3980
+ }
3981
+ /**
3982
+ * Connects to the Layercode agent using the stored conversation ID and starts the audio conversation
3983
+ * @async
3984
+ * @returns {Promise<void>}
3985
+ */
3986
+ async connect() {
3987
+ if (this.status === 'connecting') {
3988
+ return;
3989
+ }
3990
+ try {
3991
+ this._setStatus('connecting');
3992
+ // Reset turn tracking for clean start
3993
+ this._resetTurnTracking();
3994
+ this._stopAmplitudeMonitoring();
3995
+ // Get conversation key from server
3996
+ let authorizeSessionRequestBody = {
3997
+ agent_id: this.options.agentId,
3998
+ metadata: this.options.metadata,
3999
+ sdk_version: SDK_VERSION,
4000
+ };
4001
+ // If we're reconnecting to a previous conversation, we need to include the conversation_id in the request. Otherwise we don't send conversation_id, and a new conversation will be created and the conversation_id will be returned in the response.
4002
+ if (this.options.conversationId) {
4003
+ authorizeSessionRequestBody.conversation_id = this.options.conversationId;
4004
+ }
4005
+ const authorizeSessionResponse = await fetch(this.options.authorizeSessionEndpoint, {
4006
+ method: 'POST',
4007
+ headers: {
4008
+ 'Content-Type': 'application/json',
4009
+ },
4010
+ body: JSON.stringify(authorizeSessionRequestBody),
4011
+ });
4012
+ if (!authorizeSessionResponse.ok) {
4013
+ throw new Error(`Failed to authorize conversation: ${authorizeSessionResponse.statusText}`);
4014
+ }
4015
+ const authorizeSessionResponseBody = await authorizeSessionResponse.json();
4016
+ this.conversationId = authorizeSessionResponseBody.conversation_id; // Save the conversation_id for use in future reconnects
4017
+ this.options.conversationId = this.conversationId;
4018
+ await this.wavRecorder.requestPermission();
4019
+ this._setupDeviceChangeListener();
4020
+ // Connect WebSocket
4021
+ this.ws = new WebSocket(`${this._websocketUrl}?${new URLSearchParams({
4022
+ client_session_key: authorizeSessionResponseBody.client_session_key,
4023
+ })}`);
4024
+ const config = authorizeSessionResponseBody.config;
4025
+ console.log('AgentConfig', config);
4026
+ // Store VAD configuration
4027
+ this.vadConfig = config.vad || null;
4028
+ if (config.transcription.trigger === 'push_to_talk') {
4029
+ this.pushToTalkEnabled = true;
4030
+ }
4031
+ else if (config.transcription.trigger === 'automatic') {
4032
+ this.pushToTalkEnabled = false;
4033
+ this.canInterrupt = config.transcription.can_interrupt;
4034
+ }
4035
+ else {
4036
+ throw new Error(`Unknown trigger: ${config.transcription.trigger}`);
4037
+ }
4038
+ // Bind the websocket message callbacks
4039
+ this.ws.onmessage = this._handleWebSocketMessage;
4040
+ this.ws.onopen = () => {
4041
+ console.log('WebSocket connection established');
4042
+ this._setStatus('connected');
4043
+ this.options.onConnect({ conversationId: this.conversationId });
4044
+ // Attempt to send ready message if recorder already started
4045
+ this._sendReadyIfNeeded();
4046
+ };
4047
+ this.ws.onclose = () => {
4048
+ console.log('WebSocket connection closed');
4049
+ this.ws = null;
4050
+ this._performDisconnectCleanup().catch((error) => {
4051
+ console.error('Error during disconnect cleanup:', error);
4052
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
4053
+ });
4054
+ };
4055
+ this.ws.onerror = (error) => {
4056
+ console.error('WebSocket error:', error);
4057
+ this._setStatus('error');
4058
+ this.options.onError(new Error('WebSocket connection error'));
4059
+ };
4060
+ // Initialize audio player
4061
+ await this.wavPlayer.connect();
4062
+ // Set up audio player amplitude monitoring
4063
+ this._setupAmplitudeMonitoring(this.wavPlayer, this.options.onAgentAmplitudeChange, (amp) => (this.agentAudioAmplitude = amp));
4064
+ // wavRecorder will be started from the onDeviceSwitched callback,
4065
+ // which is called when the device is first initialized and also when the device is switched
4066
+ // this is to ensure that the device is initialized before the recorder is started
4067
+ }
4068
+ catch (error) {
4069
+ console.error('Error connecting to Layercode agent:', error);
4070
+ this._setStatus('error');
4071
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
4072
+ }
4073
+ }
4074
+ _resetTurnTracking() {
4075
+ this.currentTurnId = null;
4076
+ console.debug('Reset turn tracking state');
4077
+ }
4078
+ async disconnect() {
4079
+ if (this.status === 'disconnected') {
4080
+ return;
4081
+ }
4082
+ if (this.ws) {
4083
+ this.ws.onopen = null;
4084
+ this.ws.onclose = null;
4085
+ this.ws.onerror = null;
4086
+ this.ws.onmessage = null;
4087
+ this.ws.close();
4088
+ this.ws = null;
4089
+ }
4090
+ await this._performDisconnectCleanup();
4091
+ }
4092
+ /**
4093
+ * Gets the microphone MediaStream used by this client
4094
+ * @returns {MediaStream|null} The microphone stream or null if not initialized
4095
+ */
4096
+ getStream() {
4097
+ return this.wavRecorder.getStream();
4098
+ }
4099
+ /**
4100
+ * List all available audio input devices
4101
+ * @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
4102
+ */
4103
+ async listDevices() {
4104
+ return this.wavRecorder.listDevices();
4105
+ }
4106
+ /**
4107
+ * Switches the input device for the microphone and restarts recording
4108
+ * @param {string} deviceId - The deviceId of the new microphone
4109
+ */
4110
+ async setInputDevice(deviceId) {
4111
+ var _a, _b, _c;
4112
+ try {
4113
+ const normalizedDeviceId = !deviceId || deviceId === 'default' ? null : deviceId;
4114
+ this.useSystemDefaultDevice = normalizedDeviceId === null;
4115
+ this.deviceId = normalizedDeviceId;
4116
+ // Restart recording with the new device
4117
+ await this._restartAudioRecording();
4118
+ // Reinitialize VAD with the new audio stream if VAD is enabled
4119
+ const shouldUseVAD = !this.pushToTalkEnabled && ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) !== false;
4120
+ if (shouldUseVAD) {
4121
+ console.debug('Reinitializing VAD with new audio stream');
4122
+ const newStream = this.wavRecorder.getStream();
4123
+ await this._reinitializeVAD(newStream);
4124
+ }
4125
+ const reportedDeviceId = (_c = (_b = this.lastReportedDeviceId) !== null && _b !== void 0 ? _b : this.activeDeviceId) !== null && _c !== void 0 ? _c : (this.useSystemDefaultDevice ? 'default' : normalizedDeviceId !== null && normalizedDeviceId !== void 0 ? normalizedDeviceId : 'default');
4126
+ console.debug(`Successfully switched to input device: ${reportedDeviceId}`);
4127
+ }
4128
+ catch (error) {
4129
+ console.error(`Failed to switch to input device ${deviceId}:`, error);
4130
+ throw new Error(`Failed to switch to input device: ${error instanceof Error ? error.message : String(error)}`);
4131
+ }
4132
+ }
4133
+ /**
4134
+ * Restarts audio recording after a device switch to ensure audio is captured from the new device
4135
+ */
4136
+ async _restartAudioRecording() {
4137
+ var _a, _b;
4138
+ try {
4139
+ console.debug('Restarting audio recording after device switch...');
4140
+ try {
4141
+ await this.wavRecorder.end();
4142
+ }
4143
+ catch (_c) {
4144
+ // Ignore cleanup errors
4145
+ }
4146
+ // Start with new device
4147
+ const targetDeviceId = this.useSystemDefaultDevice ? undefined : this.deviceId || undefined;
4148
+ await this.wavRecorder.begin(targetDeviceId);
4149
+ await this.wavRecorder.record(this._handleDataAvailable, 1638);
4150
+ // Re-setup amplitude monitoring with the new stream
4151
+ this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
4152
+ const previousReportedDeviceId = this.lastReportedDeviceId;
4153
+ const stream = this.wavRecorder.getStream();
4154
+ const activeTrack = (stream === null || stream === void 0 ? void 0 : stream.getAudioTracks()[0]) || null;
4155
+ const trackSettings = activeTrack && typeof activeTrack.getSettings === 'function' ? activeTrack.getSettings() : null;
4156
+ const trackDeviceId = trackSettings && typeof trackSettings.deviceId === 'string' ? trackSettings.deviceId : null;
4157
+ this.activeDeviceId = trackDeviceId !== null && trackDeviceId !== void 0 ? trackDeviceId : (this.useSystemDefaultDevice ? null : this.deviceId);
4158
+ if (!this.recorderStarted) {
4159
+ this.recorderStarted = true;
4160
+ this._sendReadyIfNeeded();
4161
+ }
4162
+ const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : (_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default');
4163
+ if (reportedDeviceId !== previousReportedDeviceId) {
4164
+ this.lastReportedDeviceId = reportedDeviceId;
4165
+ if (this.options.onDeviceSwitched) {
4166
+ this.options.onDeviceSwitched(reportedDeviceId);
4167
+ }
4168
+ }
4169
+ console.debug('Audio recording restart completed successfully');
4170
+ }
4171
+ catch (error) {
4172
+ console.error('Error restarting audio recording after device switch:', error);
4173
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
4174
+ }
4175
+ }
4176
+ /**
4177
+ * Reinitializes VAD with a new stream (used after device switching)
4178
+ */
4179
+ async _reinitializeVAD(stream) {
4180
+ // Clean up existing VAD
4181
+ if (this.vad) {
4182
+ this.vad.pause();
4183
+ this.vad.destroy();
4184
+ this.vad = null;
4185
+ }
4186
+ // Reinitialize with new stream
4187
+ if (stream) {
4188
+ this._initializeVAD();
4189
+ }
4190
+ }
4191
+ /**
4192
+ * Sets up the device change event listener
4193
+ */
4194
+ _setupDeviceChangeListener() {
4195
+ if (!this.deviceChangeListener) {
4196
+ this.deviceChangeListener = async (devices) => {
4197
+ try {
4198
+ // Notify user that devices have changed
4199
+ this.options.onDevicesChanged(devices);
4200
+ const defaultDevice = devices.find((device) => device.default);
4201
+ const usingDefaultDevice = this.useSystemDefaultDevice;
4202
+ const previousDefaultDeviceKey = this.lastKnownSystemDefaultDeviceKey;
4203
+ const currentDefaultDeviceKey = this._getDeviceComparisonKey(defaultDevice);
4204
+ let shouldSwitch = !this.recorderStarted;
4205
+ if (!shouldSwitch) {
4206
+ if (usingDefaultDevice) {
4207
+ if (!defaultDevice) {
4208
+ shouldSwitch = true;
4209
+ }
4210
+ else if (this.activeDeviceId && defaultDevice.deviceId !== 'default' && defaultDevice.deviceId !== this.activeDeviceId) {
4211
+ shouldSwitch = true;
4212
+ }
4213
+ else if ((previousDefaultDeviceKey && previousDefaultDeviceKey !== currentDefaultDeviceKey) ||
4214
+ (!previousDefaultDeviceKey && !currentDefaultDeviceKey && this.recorderStarted)) {
4215
+ shouldSwitch = true;
4216
+ }
4217
+ }
4218
+ else {
4219
+ const matchesRequestedDevice = devices.some((device) => device.deviceId === this.deviceId || device.deviceId === this.activeDeviceId);
4220
+ shouldSwitch = !matchesRequestedDevice;
4221
+ }
4222
+ }
4223
+ this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
4224
+ if (shouldSwitch) {
4225
+ console.debug('Selecting fallback audio input device');
4226
+ const fallbackDevice = defaultDevice || devices[0];
4227
+ if (fallbackDevice) {
4228
+ const fallbackId = fallbackDevice.default ? 'default' : fallbackDevice.deviceId;
4229
+ await this.setInputDevice(fallbackId);
4230
+ }
4231
+ else {
4232
+ console.warn('No alternative audio device found');
4233
+ }
4234
+ }
4235
+ }
4236
+ catch (error) {
4237
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
4238
+ }
4239
+ };
4240
+ }
4241
+ this.wavRecorder.listenForDeviceChange(this.deviceChangeListener);
4242
+ }
4243
+ _teardownDeviceListeners() {
4244
+ this.wavRecorder.listenForDeviceChange(null);
4245
+ }
4246
+ async _performDisconnectCleanup() {
4247
+ var _a, _b;
4248
+ this.deviceId = null;
4249
+ this.activeDeviceId = null;
4250
+ this.useSystemDefaultDevice = false;
4251
+ this.lastReportedDeviceId = null;
4252
+ this.lastKnownSystemDefaultDeviceKey = null;
4253
+ this.recorderStarted = false;
4254
+ this.readySent = false;
4255
+ this._stopAmplitudeMonitoring();
4256
+ this._teardownDeviceListeners();
4257
+ if (this.vad) {
4258
+ this.vad.pause();
4259
+ this.vad.destroy();
4260
+ this.vad = null;
4261
+ }
4262
+ await this.wavRecorder.quit();
4263
+ (_b = (_a = this.wavPlayer).stop) === null || _b === void 0 ? void 0 : _b.call(_a);
4264
+ this.wavPlayer.disconnect();
4265
+ this._resetTurnTracking();
4266
+ this.options.conversationId = this.conversationId;
4267
+ this.userAudioAmplitude = 0;
4268
+ this.agentAudioAmplitude = 0;
4269
+ this._setStatus('disconnected');
4270
+ this.options.onDisconnect();
4271
+ }
4272
+ _getDeviceComparisonKey(device) {
4273
+ if (!device || typeof device !== 'object') {
4274
+ return null;
4275
+ }
4276
+ const deviceId = typeof device.deviceId === 'string' ? device.deviceId : '';
4277
+ if (deviceId && deviceId !== 'default') {
4278
+ return deviceId;
4279
+ }
4280
+ const groupId = typeof device.groupId === 'string' ? device.groupId : '';
4281
+ if (groupId) {
4282
+ return groupId;
4283
+ }
4284
+ const label = typeof device.label === 'string' ? device.label : '';
4285
+ if (label) {
4286
+ return label;
4287
+ }
4288
+ return null;
4289
+ }
4290
+ /**
4291
+ * Mutes the microphone to stop sending audio to the server
4292
+ * The connection and recording remain active for quick unmute
4293
+ */
4294
+ mute() {
4295
+ if (!this.isMuted) {
4296
+ this.isMuted = true;
4297
+ console.log('Microphone muted');
4298
+ this.options.onMuteStateChange(true);
4299
+ }
4300
+ }
4301
+ /**
4302
+ * Unmutes the microphone to resume sending audio to the server
4303
+ */
4304
+ unmute() {
4305
+ if (this.isMuted) {
4306
+ this.isMuted = false;
4307
+ console.log('Microphone unmuted');
4308
+ this.options.onMuteStateChange(false);
4309
+ }
4310
+ }
4311
+ }
4388
4312
 
4389
4313
  return LayercodeClient;
4390
4314