@layercode/js-sdk 2.1.5 → 2.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,21 +4,6 @@
4
4
  (global = typeof globalThis !== 'undefined' ? globalThis : global || self, global.LayercodeClient = factory());
5
5
  })(this, (function () { 'use strict';
6
6
 
7
- function _mergeNamespaces(n, m) {
8
- m.forEach(function (e) {
9
- e && typeof e !== 'string' && !Array.isArray(e) && Object.keys(e).forEach(function (k) {
10
- if (k !== 'default' && !(k in n)) {
11
- var d = Object.getOwnPropertyDescriptor(e, k);
12
- Object.defineProperty(n, k, d.get ? d : {
13
- enumerable: true,
14
- get: function () { return e[k]; }
15
- });
16
- }
17
- });
18
- });
19
- return Object.freeze(n);
20
- }
21
-
22
7
  /**
23
8
  * Raw wav audio file contents
24
9
  * @typedef {Object} WavPackerAudioType
@@ -1779,1777 +1764,1033 @@ registerProcessor('audio_processor', AudioProcessor);
1779
1764
 
1780
1765
  globalThis.WavRecorder = WavRecorder;
1781
1766
 
1782
- /**
1783
- * Converts a base64 string to an ArrayBuffer.
1784
- * @param {string} base64 - The base64 string to convert.
1785
- * @returns {ArrayBuffer} The resulting ArrayBuffer.
1786
- */
1787
- function base64ToArrayBuffer(base64) {
1788
- const binaryString = atob(base64);
1789
- const len = binaryString.length;
1790
- const bytes = new Uint8Array(len);
1791
- for (let i = 0; i < len; i++) {
1792
- bytes[i] = binaryString.charCodeAt(i);
1793
- }
1794
- return bytes.buffer;
1767
+ var commonjsGlobal = typeof globalThis !== 'undefined' ? globalThis : typeof window !== 'undefined' ? window : typeof global !== 'undefined' ? global : typeof self !== 'undefined' ? self : {};
1768
+
1769
+ function getAugmentedNamespace(n) {
1770
+ if (n.__esModule) return n;
1771
+ var f = n.default;
1772
+ if (typeof f == "function") {
1773
+ var a = function a () {
1774
+ if (this instanceof a) {
1775
+ return Reflect.construct(f, arguments, this.constructor);
1776
+ }
1777
+ return f.apply(this, arguments);
1778
+ };
1779
+ a.prototype = f.prototype;
1780
+ } else a = {};
1781
+ Object.defineProperty(a, '__esModule', {value: true});
1782
+ Object.keys(n).forEach(function (k) {
1783
+ var d = Object.getOwnPropertyDescriptor(n, k);
1784
+ Object.defineProperty(a, k, d.get ? d : {
1785
+ enumerable: true,
1786
+ get: function () {
1787
+ return n[k];
1788
+ }
1789
+ });
1790
+ });
1791
+ return a;
1795
1792
  }
1796
1793
 
1797
- /**
1798
- * Converts an ArrayBuffer to a base64 string.
1799
- * @param {ArrayBuffer|Float32Array|Int16Array} arrayBuffer - The ArrayBuffer to convert.
1800
- * @returns {string} The resulting base64 string.
1801
- */
1802
- function arrayBufferToBase64$1(arrayBuffer) {
1803
- if (arrayBuffer instanceof Float32Array) {
1804
- arrayBuffer = this.floatTo16BitPCM(arrayBuffer);
1805
- } else if (arrayBuffer instanceof Int16Array) {
1806
- arrayBuffer = arrayBuffer.buffer;
1807
- }
1808
- let binary = '';
1809
- let bytes = new Uint8Array(arrayBuffer);
1810
- const chunkSize = 0x8000; // 32KB chunk size
1811
- for (let i = 0; i < bytes.length; i += chunkSize) {
1812
- let chunk = bytes.subarray(i, i + chunkSize);
1813
- binary += String.fromCharCode.apply(null, chunk);
1814
- }
1815
- return btoa(binary);
1794
+ var dist = {};
1795
+
1796
+ var assetPath = {};
1797
+
1798
+ Object.defineProperty(assetPath, "__esModule", { value: true });
1799
+ assetPath.baseAssetPath = void 0;
1800
+ // nextjs@14 bundler may attempt to execute this during SSR and crash
1801
+ const isWeb = typeof window !== "undefined" && typeof window.document !== "undefined";
1802
+ const currentScript = isWeb
1803
+ ? window.document.currentScript
1804
+ : null;
1805
+ let basePath = "/";
1806
+ if (currentScript) {
1807
+ basePath = currentScript.src
1808
+ .replace(/#.*$/, "")
1809
+ .replace(/\?.*$/, "")
1810
+ .replace(/\/[^\/]+$/, "/");
1816
1811
  }
1812
+ assetPath.baseAssetPath = basePath;
1817
1813
 
1818
- /* eslint-env browser */
1819
- // import { env as ortEnv } from 'onnxruntime-web';
1820
- const NOOP = () => { };
1821
- const DEFAULT_WS_URL = 'wss://api.layercode.com/v1/agents/web/websocket';
1822
- // SDK version - updated when publishing
1823
- const SDK_VERSION = '2.1.3';
1824
- // Lazily load the browser-only VAD module to avoid accessing `self` on the server
1825
- let micVADModulePromise = null;
1826
- const loadMicVADModule = () => {
1827
- if (typeof window === 'undefined') {
1828
- return Promise.resolve(null);
1814
+ var defaultModelFetcher$1 = {};
1815
+
1816
+ Object.defineProperty(defaultModelFetcher$1, "__esModule", { value: true });
1817
+ defaultModelFetcher$1.defaultModelFetcher = void 0;
1818
+ const defaultModelFetcher = (path) => {
1819
+ return fetch(path).then((model) => model.arrayBuffer());
1820
+ };
1821
+ defaultModelFetcher$1.defaultModelFetcher = defaultModelFetcher;
1822
+
1823
+ var frameProcessor = {};
1824
+
1825
+ var logging = {};
1826
+
1827
+ (function (exports) {
1828
+ Object.defineProperty(exports, "__esModule", { value: true });
1829
+ exports.log = exports.LOG_PREFIX = void 0;
1830
+ exports.LOG_PREFIX = "[VAD]";
1831
+ const levels = ["error", "debug", "warn"];
1832
+ function getLog(level) {
1833
+ return (...args) => {
1834
+ console[level](exports.LOG_PREFIX, ...args);
1835
+ };
1836
+ }
1837
+ const _log = levels.reduce((acc, level) => {
1838
+ acc[level] = getLog(level);
1839
+ return acc;
1840
+ }, {});
1841
+ exports.log = _log;
1842
+
1843
+ } (logging));
1844
+
1845
+ var messages = {};
1846
+
1847
+ Object.defineProperty(messages, "__esModule", { value: true });
1848
+ messages.Message = void 0;
1849
+ var Message;
1850
+ (function (Message) {
1851
+ Message["AudioFrame"] = "AUDIO_FRAME";
1852
+ Message["SpeechStart"] = "SPEECH_START";
1853
+ Message["VADMisfire"] = "VAD_MISFIRE";
1854
+ Message["SpeechEnd"] = "SPEECH_END";
1855
+ Message["SpeechStop"] = "SPEECH_STOP";
1856
+ Message["SpeechRealStart"] = "SPEECH_REAL_START";
1857
+ Message["FrameProcessed"] = "FRAME_PROCESSED";
1858
+ })(Message || (messages.Message = Message = {}));
1859
+
1860
+ /*
1861
+ Some of this code, together with the default options found in index.ts,
1862
+ were taken (or took inspiration) from https://github.com/snakers4/silero-vad
1863
+ */
1864
+ Object.defineProperty(frameProcessor, "__esModule", { value: true });
1865
+ frameProcessor.FrameProcessor = frameProcessor.validateOptions = frameProcessor.defaultV5FrameProcessorOptions = frameProcessor.defaultLegacyFrameProcessorOptions = void 0;
1866
+ const logging_1$3 = logging;
1867
+ const messages_1 = messages;
1868
+ const RECOMMENDED_FRAME_SAMPLES = [512, 1024, 1536];
1869
+ frameProcessor.defaultLegacyFrameProcessorOptions = {
1870
+ positiveSpeechThreshold: 0.5,
1871
+ negativeSpeechThreshold: 0.5 - 0.15,
1872
+ preSpeechPadFrames: 1,
1873
+ redemptionFrames: 8,
1874
+ frameSamples: 1536,
1875
+ minSpeechFrames: 3,
1876
+ submitUserSpeechOnPause: false,
1877
+ };
1878
+ frameProcessor.defaultV5FrameProcessorOptions = {
1879
+ positiveSpeechThreshold: 0.5,
1880
+ negativeSpeechThreshold: 0.5 - 0.15,
1881
+ preSpeechPadFrames: 3,
1882
+ redemptionFrames: 24,
1883
+ frameSamples: 512,
1884
+ minSpeechFrames: 9,
1885
+ submitUserSpeechOnPause: false,
1886
+ };
1887
+ function validateOptions(options) {
1888
+ if (!RECOMMENDED_FRAME_SAMPLES.includes(options.frameSamples)) {
1889
+ logging_1$3.log.warn("You are using an unusual frame size");
1890
+ }
1891
+ if (options.positiveSpeechThreshold < 0 ||
1892
+ options.positiveSpeechThreshold > 1) {
1893
+ logging_1$3.log.error("positiveSpeechThreshold should be a number between 0 and 1");
1894
+ }
1895
+ if (options.negativeSpeechThreshold < 0 ||
1896
+ options.negativeSpeechThreshold > options.positiveSpeechThreshold) {
1897
+ logging_1$3.log.error("negativeSpeechThreshold should be between 0 and positiveSpeechThreshold");
1898
+ }
1899
+ if (options.preSpeechPadFrames < 0) {
1900
+ logging_1$3.log.error("preSpeechPadFrames should be positive");
1829
1901
  }
1830
- if (!micVADModulePromise) {
1831
- // @ts-ignore - VAD package does not provide TypeScript types
1832
- micVADModulePromise = Promise.resolve().then(function () { return index$1; });
1902
+ if (options.redemptionFrames < 0) {
1903
+ logging_1$3.log.error("redemptionFrames should be positive");
1833
1904
  }
1834
- return micVADModulePromise;
1905
+ }
1906
+ frameProcessor.validateOptions = validateOptions;
1907
+ const concatArrays = (arrays) => {
1908
+ const sizes = arrays.reduce((out, next) => {
1909
+ out.push(out.at(-1) + next.length);
1910
+ return out;
1911
+ }, [0]);
1912
+ const outArray = new Float32Array(sizes.at(-1));
1913
+ arrays.forEach((arr, index) => {
1914
+ const place = sizes[index];
1915
+ outArray.set(arr, place);
1916
+ });
1917
+ return outArray;
1835
1918
  };
1836
- /**
1837
- * @class LayercodeClient
1838
- * @classdesc Core client for Layercode audio agent that manages audio recording, WebSocket communication, and speech processing.
1839
- */
1840
- class LayercodeClient {
1841
- /**
1842
- * Creates an instance of LayercodeClient.
1843
- * @param {Object} options - Configuration options
1844
- */
1845
- constructor(options) {
1846
- var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p;
1847
- this.deviceId = null;
1848
- this.options = {
1849
- agentId: options.agentId,
1850
- conversationId: (_a = options.conversationId) !== null && _a !== void 0 ? _a : null,
1851
- authorizeSessionEndpoint: options.authorizeSessionEndpoint,
1852
- metadata: (_b = options.metadata) !== null && _b !== void 0 ? _b : {},
1853
- vadResumeDelay: (_c = options.vadResumeDelay) !== null && _c !== void 0 ? _c : 500,
1854
- onConnect: (_d = options.onConnect) !== null && _d !== void 0 ? _d : NOOP,
1855
- onDisconnect: (_e = options.onDisconnect) !== null && _e !== void 0 ? _e : NOOP,
1856
- onError: (_f = options.onError) !== null && _f !== void 0 ? _f : NOOP,
1857
- onDeviceSwitched: (_g = options.onDeviceSwitched) !== null && _g !== void 0 ? _g : NOOP,
1858
- onDataMessage: (_h = options.onDataMessage) !== null && _h !== void 0 ? _h : NOOP,
1859
- onMessage: (_j = options.onMessage) !== null && _j !== void 0 ? _j : NOOP,
1860
- onUserAmplitudeChange: (_k = options.onUserAmplitudeChange) !== null && _k !== void 0 ? _k : NOOP,
1861
- onAgentAmplitudeChange: (_l = options.onAgentAmplitudeChange) !== null && _l !== void 0 ? _l : NOOP,
1862
- onStatusChange: (_m = options.onStatusChange) !== null && _m !== void 0 ? _m : NOOP,
1863
- onUserIsSpeakingChange: (_o = options.onUserIsSpeakingChange) !== null && _o !== void 0 ? _o : NOOP,
1864
- onMuteStateChange: (_p = options.onMuteStateChange) !== null && _p !== void 0 ? _p : NOOP,
1919
+ class FrameProcessor {
1920
+ constructor(modelProcessFunc, modelResetFunc, options) {
1921
+ this.modelProcessFunc = modelProcessFunc;
1922
+ this.modelResetFunc = modelResetFunc;
1923
+ this.options = options;
1924
+ this.speaking = false;
1925
+ this.redemptionCounter = 0;
1926
+ this.speechFrameCount = 0;
1927
+ this.active = false;
1928
+ this.speechRealStartFired = false;
1929
+ this.reset = () => {
1930
+ this.speaking = false;
1931
+ this.speechRealStartFired = false;
1932
+ this.audioBuffer = [];
1933
+ this.modelResetFunc();
1934
+ this.redemptionCounter = 0;
1935
+ this.speechFrameCount = 0;
1865
1936
  };
1866
- this.AMPLITUDE_MONITORING_SAMPLE_RATE = 2;
1867
- this._websocketUrl = DEFAULT_WS_URL;
1868
- this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
1869
- this.wavPlayer = new WavStreamPlayer({
1870
- finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
1871
- sampleRate: 16000, // TODO should be set my fetched agent config
1872
- });
1873
- this.vad = null;
1874
- this.ws = null;
1875
- this.status = 'disconnected';
1876
- this.userAudioAmplitude = 0;
1877
- this.agentAudioAmplitude = 0;
1878
- this.conversationId = this.options.conversationId;
1879
- this.pushToTalkActive = false;
1880
- this.pushToTalkEnabled = false;
1881
- this.canInterrupt = false;
1882
- this.userIsSpeaking = false;
1883
- this.recorderStarted = false;
1884
- this.readySent = false;
1885
- this.currentTurnId = null;
1886
- this.audioBuffer = [];
1887
- this.vadConfig = null;
1888
- this.activeDeviceId = null;
1889
- this.useSystemDefaultDevice = false;
1890
- this.lastReportedDeviceId = null;
1891
- this.lastKnownSystemDefaultDeviceKey = null;
1892
- this.isMuted = false;
1893
- this.stopPlayerAmplitude = undefined;
1894
- this.stopRecorderAmplitude = undefined;
1895
- this.deviceChangeListener = null;
1896
- // this.audioPauseTime = null;
1897
- // Bind event handlers
1898
- this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
1899
- this._handleDataAvailable = this._handleDataAvailable.bind(this);
1937
+ this.pause = (handleEvent) => {
1938
+ this.active = false;
1939
+ if (this.options.submitUserSpeechOnPause) {
1940
+ this.endSegment(handleEvent);
1941
+ }
1942
+ else {
1943
+ this.reset();
1944
+ }
1945
+ };
1946
+ this.resume = () => {
1947
+ this.active = true;
1948
+ };
1949
+ this.endSegment = (handleEvent) => {
1950
+ const audioBuffer = this.audioBuffer;
1951
+ this.audioBuffer = [];
1952
+ const speaking = this.speaking;
1953
+ this.reset();
1954
+ if (speaking) {
1955
+ const speechFrameCount = audioBuffer.reduce((acc, item) => {
1956
+ return item.isSpeech ? (acc + 1) : acc;
1957
+ }, 0);
1958
+ if (speechFrameCount >= this.options.minSpeechFrames) {
1959
+ const audio = concatArrays(audioBuffer.map((item) => item.frame));
1960
+ handleEvent({ msg: messages_1.Message.SpeechEnd, audio });
1961
+ }
1962
+ else {
1963
+ handleEvent({ msg: messages_1.Message.VADMisfire });
1964
+ }
1965
+ }
1966
+ return {};
1967
+ };
1968
+ this.process = async (frame, handleEvent) => {
1969
+ if (!this.active) {
1970
+ return;
1971
+ }
1972
+ const probs = await this.modelProcessFunc(frame);
1973
+ const isSpeech = probs.isSpeech >= this.options.positiveSpeechThreshold;
1974
+ handleEvent({ probs, msg: messages_1.Message.FrameProcessed, frame });
1975
+ this.audioBuffer.push({
1976
+ frame,
1977
+ isSpeech,
1978
+ });
1979
+ if (isSpeech) {
1980
+ this.speechFrameCount++;
1981
+ this.redemptionCounter = 0;
1982
+ }
1983
+ if (isSpeech && !this.speaking) {
1984
+ this.speaking = true;
1985
+ handleEvent({ msg: messages_1.Message.SpeechStart });
1986
+ }
1987
+ if (this.speaking &&
1988
+ this.speechFrameCount === this.options.minSpeechFrames &&
1989
+ !this.speechRealStartFired) {
1990
+ this.speechRealStartFired = true;
1991
+ handleEvent({ msg: messages_1.Message.SpeechRealStart });
1992
+ }
1993
+ if (probs.isSpeech < this.options.negativeSpeechThreshold &&
1994
+ this.speaking &&
1995
+ ++this.redemptionCounter >= this.options.redemptionFrames) {
1996
+ this.redemptionCounter = 0;
1997
+ this.speechFrameCount = 0;
1998
+ this.speaking = false;
1999
+ this.speechRealStartFired = false;
2000
+ const audioBuffer = this.audioBuffer;
2001
+ this.audioBuffer = [];
2002
+ const speechFrameCount = audioBuffer.reduce((acc, item) => {
2003
+ return item.isSpeech ? (acc + 1) : acc;
2004
+ }, 0);
2005
+ if (speechFrameCount >= this.options.minSpeechFrames) {
2006
+ const audio = concatArrays(audioBuffer.map((item) => item.frame));
2007
+ handleEvent({ msg: messages_1.Message.SpeechEnd, audio });
2008
+ }
2009
+ else {
2010
+ handleEvent({ msg: messages_1.Message.VADMisfire });
2011
+ }
2012
+ }
2013
+ if (!this.speaking) {
2014
+ while (this.audioBuffer.length > this.options.preSpeechPadFrames) {
2015
+ this.audioBuffer.shift();
2016
+ }
2017
+ this.speechFrameCount = 0;
2018
+ }
2019
+ };
2020
+ this.audioBuffer = [];
2021
+ this.reset();
1900
2022
  }
1901
- _initializeVAD() {
1902
- var _a;
1903
- if (typeof window === 'undefined') {
1904
- return;
1905
- }
1906
- console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt, vadConfig: this.vadConfig });
1907
- // If we're in push to talk mode, we don't need to use the VAD model
1908
- if (this.pushToTalkEnabled) {
1909
- return;
2023
+ }
2024
+ frameProcessor.FrameProcessor = FrameProcessor;
2025
+
2026
+ var nonRealTimeVad = {};
2027
+
2028
+ var ortWeb_min = {exports: {}};
2029
+
2030
+ // Copyright (c) Microsoft Corporation. All rights reserved.
2031
+ // Licensed under the MIT License.
2032
+ const backends = {};
2033
+ const backendsSortedByPriority = [];
2034
+ /**
2035
+ * Register a backend.
2036
+ *
2037
+ * @param name - the name as a key to lookup as an execution provider.
2038
+ * @param backend - the backend object.
2039
+ * @param priority - an integer indicating the priority of the backend. Higher number means higher priority. if priority
2040
+ * < 0, it will be considered as a 'beta' version and will not be used as a fallback backend by default.
2041
+ *
2042
+ * @internal
2043
+ */
2044
+ const registerBackend = (name, backend, priority) => {
2045
+ if (backend && typeof backend.init === 'function' && typeof backend.createSessionHandler === 'function') {
2046
+ const currentBackend = backends[name];
2047
+ if (currentBackend === undefined) {
2048
+ backends[name] = { backend, priority };
1910
2049
  }
1911
- // Check if VAD is disabled
1912
- if (((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) === false) {
1913
- console.log('VAD is disabled by backend configuration');
2050
+ else if (currentBackend.priority > priority) {
2051
+ // same name is already registered with a higher priority. skip registeration.
1914
2052
  return;
1915
2053
  }
1916
- // Build VAD configuration object, only including keys that are defined
1917
- const vadOptions = {
1918
- stream: this.wavRecorder.getStream() || undefined,
1919
- onSpeechStart: () => {
1920
- console.debug('onSpeechStart: sending vad_start');
1921
- this.userIsSpeaking = true;
1922
- this.options.onUserIsSpeakingChange(true);
1923
- this._wsSend({
1924
- type: 'vad_events',
1925
- event: 'vad_start',
1926
- });
1927
- this.options.onMessage({
1928
- type: 'vad_events',
1929
- event: 'vad_start',
1930
- });
1931
- },
1932
- onSpeechEnd: () => {
1933
- console.debug('onSpeechEnd: sending vad_end');
1934
- this.userIsSpeaking = false;
1935
- this.options.onUserIsSpeakingChange(false);
1936
- this.audioBuffer = []; // Clear buffer on speech end
1937
- this._wsSend({
1938
- type: 'vad_events',
1939
- event: 'vad_end',
1940
- });
1941
- this.options.onMessage({
1942
- type: 'vad_events',
1943
- event: 'vad_end',
1944
- });
1945
- },
1946
- };
1947
- // Apply VAD configuration from backend if available
1948
- if (this.vadConfig) {
1949
- // Only add keys that are explicitly defined (not undefined)
1950
- if (this.vadConfig.model !== undefined)
1951
- vadOptions.model = this.vadConfig.model;
1952
- if (this.vadConfig.positive_speech_threshold !== undefined)
1953
- vadOptions.positiveSpeechThreshold = this.vadConfig.positive_speech_threshold;
1954
- if (this.vadConfig.negative_speech_threshold !== undefined)
1955
- vadOptions.negativeSpeechThreshold = this.vadConfig.negative_speech_threshold;
1956
- if (this.vadConfig.redemption_frames !== undefined)
1957
- vadOptions.redemptionFrames = this.vadConfig.redemption_frames;
1958
- if (this.vadConfig.min_speech_frames !== undefined)
1959
- vadOptions.minSpeechFrames = this.vadConfig.min_speech_frames;
1960
- if (this.vadConfig.pre_speech_pad_frames !== undefined)
1961
- vadOptions.preSpeechPadFrames = this.vadConfig.pre_speech_pad_frames;
1962
- if (this.vadConfig.frame_samples !== undefined)
1963
- vadOptions.frameSamples = this.vadConfig.frame_samples;
1964
- }
1965
- else {
1966
- // Default values if no config from backend
1967
- vadOptions.model = 'v5';
1968
- vadOptions.positiveSpeechThreshold = 0.15;
1969
- vadOptions.negativeSpeechThreshold = 0.05;
1970
- vadOptions.redemptionFrames = 4;
1971
- vadOptions.minSpeechFrames = 2;
1972
- vadOptions.preSpeechPadFrames = 0;
1973
- vadOptions.frameSamples = 512; // Required for v5
2054
+ else if (currentBackend.priority === priority) {
2055
+ if (currentBackend.backend !== backend) {
2056
+ throw new Error(`cannot register backend "${name}" using priority ${priority}`);
2057
+ }
1974
2058
  }
1975
- console.log('Creating VAD with options:', vadOptions);
1976
- loadMicVADModule()
1977
- .then((module) => { var _a, _b, _c; return (_c = (_b = (_a = module === null || module === void 0 ? void 0 : module.MicVAD) === null || _a === void 0 ? void 0 : _a.new) === null || _b === void 0 ? void 0 : _b.call(_a, vadOptions)) !== null && _c !== void 0 ? _c : null; })
1978
- .then((vad) => {
1979
- if (!vad) {
1980
- throw new Error('MicVAD module not available');
2059
+ if (priority >= 0) {
2060
+ const i = backendsSortedByPriority.indexOf(name);
2061
+ if (i !== -1) {
2062
+ backendsSortedByPriority.splice(i, 1);
1981
2063
  }
1982
- this.vad = vad;
1983
- this.vad.start();
1984
- console.log('VAD started successfully');
1985
- })
1986
- .catch((error) => {
1987
- console.warn('Error initializing VAD:', error);
1988
- // Send a message to server indicating VAD failure
1989
- this._wsSend({
1990
- type: 'vad_events',
1991
- event: 'vad_model_failed',
1992
- });
1993
- });
1994
- }
1995
- /**
1996
- * Updates the connection status and triggers the callback
1997
- * @param {string} status - New status value
1998
- */
1999
- _setStatus(status) {
2000
- this.status = status;
2001
- this.options.onStatusChange(status);
2002
- }
2003
- /**
2004
- * Handles when agent audio finishes playing
2005
- */
2006
- _clientResponseAudioReplayFinished() {
2007
- console.debug('clientResponseAudioReplayFinished');
2008
- this._wsSend({
2009
- type: 'trigger.response.audio.replay_finished',
2010
- reason: 'completed',
2011
- });
2012
- }
2013
- async _clientInterruptAssistantReplay() {
2014
- await this.wavPlayer.interrupt();
2015
- }
2016
- async triggerUserTurnStarted() {
2017
- if (!this.pushToTalkActive) {
2018
- this.pushToTalkActive = true;
2019
- this._wsSend({ type: 'trigger.turn.start', role: 'user' });
2020
- await this._clientInterruptAssistantReplay();
2064
+ for (let i = 0; i < backendsSortedByPriority.length; i++) {
2065
+ if (backends[backendsSortedByPriority[i]].priority <= priority) {
2066
+ backendsSortedByPriority.splice(i, 0, name);
2067
+ return;
2068
+ }
2069
+ }
2070
+ backendsSortedByPriority.push(name);
2021
2071
  }
2072
+ return;
2022
2073
  }
2023
- async triggerUserTurnFinished() {
2024
- if (this.pushToTalkActive) {
2025
- this.pushToTalkActive = false;
2026
- this._wsSend({ type: 'trigger.turn.end', role: 'user' });
2027
- }
2028
- }
2029
- /**
2030
- * Handles incoming WebSocket messages
2031
- * @param {MessageEvent} event - The WebSocket message event
2032
- */
2033
- async _handleWebSocketMessage(event) {
2034
- try {
2035
- const message = JSON.parse(event.data);
2036
- if (message.type !== 'response.audio') {
2037
- console.debug('msg:', message);
2038
- }
2039
- switch (message.type) {
2040
- case 'turn.start':
2041
- // Sent from the server to this client when a new user turn is detected
2042
- if (message.role === 'assistant') {
2043
- // Start tracking new assistant turn
2044
- console.debug('Assistant turn started, will track new turn ID from audio/text');
2045
- }
2046
- else if (message.role === 'user' && !this.pushToTalkEnabled) {
2047
- // Interrupt any playing assistant audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
2048
- console.debug('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
2049
- await this._clientInterruptAssistantReplay();
2050
- }
2051
- this.options.onMessage(message);
2052
- break;
2053
- case 'response.audio':
2054
- const audioBuffer = base64ToArrayBuffer(message.content);
2055
- this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
2056
- // TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
2057
- // Set current turn ID from first audio message, or update if different turn
2058
- if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
2059
- console.debug(`Setting current turn ID to: ${message.turn_id} (was: ${this.currentTurnId})`);
2060
- this.currentTurnId = message.turn_id;
2061
- // Clean up interrupted tracks, keeping only the current turn
2062
- this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
2063
- }
2064
- break;
2065
- case 'response.text':
2066
- // Set turn ID from first text message if not set
2067
- if (!this.currentTurnId) {
2068
- this.currentTurnId = message.turn_id;
2069
- console.debug(`Setting current turn ID to: ${message.turn_id} from text message`);
2070
- }
2071
- this.options.onMessage(message);
2072
- break;
2073
- case 'response.data':
2074
- this.options.onDataMessage(message);
2075
- break;
2076
- case 'user.transcript':
2077
- case 'user.transcript.delta':
2078
- case 'user.transcript.interim_delta':
2079
- this.options.onMessage(message);
2080
- break;
2081
- default:
2082
- console.warn('Unknown message type received:', message);
2083
- }
2084
- }
2085
- catch (error) {
2086
- console.error('Error processing WebSocket message:', error);
2087
- this.options.onError(error instanceof Error ? error : new Error(String(error)));
2088
- }
2089
- }
2090
- /**
2091
- * Handles available client browser microphone audio data and sends it over the WebSocket
2092
- * @param {ArrayBuffer} data - The audio data buffer
2093
- */
2094
- _handleDataAvailable(data) {
2095
- var _a, _b, _c;
2096
- try {
2097
- const base64 = arrayBufferToBase64$1(data.mono);
2098
- // Don't send audio if muted
2099
- if (this.isMuted) {
2100
- return;
2101
- }
2102
- // Determine if we should gate audio based on VAD configuration
2103
- const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
2104
- const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
2105
- let sendAudio;
2106
- if (this.pushToTalkEnabled) {
2107
- sendAudio = this.pushToTalkActive;
2108
- }
2109
- else if (shouldGateAudio) {
2110
- sendAudio = this.userIsSpeaking;
2074
+ throw new TypeError('not a valid backend');
2075
+ };
2076
+ /**
2077
+ * Resolve backend by specified hints.
2078
+ *
2079
+ * @param backendHints - a list of execution provider names to lookup. If omitted use registered backends as list.
2080
+ * @returns a promise that resolves to the backend.
2081
+ *
2082
+ * @internal
2083
+ */
2084
+ const resolveBackend = async (backendHints) => {
2085
+ const backendNames = backendHints.length === 0 ? backendsSortedByPriority : backendHints;
2086
+ const errors = [];
2087
+ for (const backendName of backendNames) {
2088
+ const backendInfo = backends[backendName];
2089
+ if (backendInfo) {
2090
+ if (backendInfo.initialized) {
2091
+ return backendInfo.backend;
2111
2092
  }
2112
- else {
2113
- // If gate_audio is false, always send audio
2114
- sendAudio = true;
2093
+ else if (backendInfo.aborted) {
2094
+ continue; // current backend is unavailable; try next
2115
2095
  }
2116
- if (sendAudio) {
2117
- // If we have buffered audio and we're gating, send it first
2118
- if (shouldGateAudio && this.audioBuffer.length > 0) {
2119
- console.debug(`Sending ${this.audioBuffer.length} buffered audio chunks`);
2120
- for (const bufferedAudio of this.audioBuffer) {
2121
- this._wsSend({
2122
- type: 'client.audio',
2123
- content: bufferedAudio,
2124
- });
2125
- }
2126
- this.audioBuffer = []; // Clear the buffer after sending
2096
+ const isInitializing = !!backendInfo.initPromise;
2097
+ try {
2098
+ if (!isInitializing) {
2099
+ backendInfo.initPromise = backendInfo.backend.init();
2127
2100
  }
2128
- // Send the current audio
2129
- this._wsSend({
2130
- type: 'client.audio',
2131
- content: base64,
2132
- });
2101
+ await backendInfo.initPromise;
2102
+ backendInfo.initialized = true;
2103
+ return backendInfo.backend;
2133
2104
  }
2134
- else {
2135
- // Buffer audio when not sending (to catch audio just before VAD triggers)
2136
- this.audioBuffer.push(base64);
2137
- // Keep buffer size based on configuration
2138
- if (this.audioBuffer.length > bufferFrames) {
2139
- this.audioBuffer.shift(); // Remove oldest chunk
2105
+ catch (e) {
2106
+ if (!isInitializing) {
2107
+ errors.push({ name: backendName, err: e });
2140
2108
  }
2109
+ backendInfo.aborted = true;
2110
+ }
2111
+ finally {
2112
+ delete backendInfo.initPromise;
2141
2113
  }
2142
- }
2143
- catch (error) {
2144
- console.error('Error processing audio:', error);
2145
- this.options.onError(error instanceof Error ? error : new Error(String(error)));
2146
- }
2147
- }
2148
- _wsSend(message) {
2149
- var _a;
2150
- if (message.type !== 'client.audio') {
2151
- console.debug('sent_msg:', message);
2152
- }
2153
- const messageString = JSON.stringify(message);
2154
- if (((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN) {
2155
- this.ws.send(messageString);
2156
2114
  }
2157
2115
  }
2158
- _sendReadyIfNeeded() {
2159
- var _a;
2160
- if (this.recorderStarted && ((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN && !this.readySent) {
2161
- this._wsSend({ type: 'client.ready' });
2162
- this.readySent = true;
2163
- }
2116
+ throw new Error(`no available backend found. ERR: ${errors.map(e => `[${e.name}] ${e.err}`).join(', ')}`);
2117
+ };
2118
+
2119
+ // Copyright (c) Microsoft Corporation. All rights reserved.
2120
+ // Licensed under the MIT License.
2121
+ class EnvImpl {
2122
+ constructor() {
2123
+ this.wasm = {};
2124
+ this.webgl = {};
2125
+ this.logLevelInternal = 'warning';
2164
2126
  }
2165
- /**
2166
- * Sets up amplitude monitoring for a given audio source.
2167
- * @param {WavRecorder | WavStreamPlayer} source - The audio source (recorder or player).
2168
- * @param {(amplitude: number) => void} callback - The callback function to invoke on amplitude change.
2169
- * @param {(amplitude: number) => void} updateInternalState - Function to update the internal amplitude state.
2170
- */
2171
- _setupAmplitudeMonitoring(source, callback, updateInternalState) {
2172
- let updateCounter = 0;
2173
- source.startAmplitudeMonitoring((amplitude) => {
2174
- // Only update and call callback at the specified sample rate
2175
- if (updateCounter >= this.AMPLITUDE_MONITORING_SAMPLE_RATE) {
2176
- updateInternalState(amplitude);
2177
- if (callback !== NOOP) {
2178
- callback(amplitude);
2179
- }
2180
- updateCounter = 0; // Reset counter after sampling
2181
- }
2182
- updateCounter++;
2183
- });
2184
- const stop = () => { var _a; return (_a = source.stopAmplitudeMonitoring) === null || _a === void 0 ? void 0 : _a.call(source); };
2185
- if (source === this.wavPlayer) {
2186
- this.stopPlayerAmplitude = stop;
2127
+ // TODO standadize the getter and setter convention in env for other fields.
2128
+ set logLevel(value) {
2129
+ if (value === undefined) {
2130
+ return;
2187
2131
  }
2188
- if (source === this.wavRecorder) {
2189
- this.stopRecorderAmplitude = stop;
2132
+ if (typeof value !== 'string' || ['verbose', 'info', 'warning', 'error', 'fatal'].indexOf(value) === -1) {
2133
+ throw new Error(`Unsupported logging level: ${value}`);
2190
2134
  }
2135
+ this.logLevelInternal = value;
2191
2136
  }
2192
- _stopAmplitudeMonitoring() {
2193
- var _a, _b;
2194
- (_a = this.stopPlayerAmplitude) === null || _a === void 0 ? void 0 : _a.call(this);
2195
- (_b = this.stopRecorderAmplitude) === null || _b === void 0 ? void 0 : _b.call(this);
2196
- this.stopPlayerAmplitude = undefined;
2197
- this.stopRecorderAmplitude = undefined;
2137
+ get logLevel() {
2138
+ return this.logLevelInternal;
2198
2139
  }
2199
- /**
2200
- * Connects to the Layercode agent using the stored conversation ID and starts the audio conversation
2201
- * @async
2202
- * @returns {Promise<void>}
2203
- */
2204
- async connect() {
2205
- if (this.status === 'connecting') {
2206
- return;
2207
- }
2208
- try {
2209
- this._setStatus('connecting');
2210
- // Reset turn tracking for clean start
2211
- this._resetTurnTracking();
2212
- this._stopAmplitudeMonitoring();
2213
- this._setupDeviceChangeListener();
2214
- // Get conversation key from server
2215
- let authorizeSessionRequestBody = {
2216
- agent_id: this.options.agentId,
2217
- metadata: this.options.metadata,
2218
- sdk_version: SDK_VERSION,
2219
- };
2220
- // If we're reconnecting to a previous conversation, we need to include the conversation_id in the request. Otherwise we don't send conversation_id, and a new conversation will be created and the conversation_id will be returned in the response.
2221
- if (this.options.conversationId) {
2222
- authorizeSessionRequestBody.conversation_id = this.options.conversationId;
2223
- }
2224
- const authorizeSessionResponse = await fetch(this.options.authorizeSessionEndpoint, {
2225
- method: 'POST',
2226
- headers: {
2227
- 'Content-Type': 'application/json',
2228
- },
2229
- body: JSON.stringify(authorizeSessionRequestBody),
2230
- });
2231
- if (!authorizeSessionResponse.ok) {
2232
- throw new Error(`Failed to authorize conversation: ${authorizeSessionResponse.statusText}`);
2233
- }
2234
- const authorizeSessionResponseBody = await authorizeSessionResponse.json();
2235
- this.conversationId = authorizeSessionResponseBody.conversation_id; // Save the conversation_id for use in future reconnects
2236
- this.options.conversationId = this.conversationId;
2237
- // Connect WebSocket
2238
- this.ws = new WebSocket(`${this._websocketUrl}?${new URLSearchParams({
2239
- client_session_key: authorizeSessionResponseBody.client_session_key,
2240
- })}`);
2241
- const config = authorizeSessionResponseBody.config;
2242
- console.log('AgentConfig', config);
2243
- // Store VAD configuration
2244
- this.vadConfig = config.vad || null;
2245
- if (config.transcription.trigger === 'push_to_talk') {
2246
- this.pushToTalkEnabled = true;
2140
+ }
2141
+
2142
+ // Copyright (c) Microsoft Corporation. All rights reserved.
2143
+ // Licensed under the MIT License.
2144
+ /**
2145
+ * Represent a set of flags as a global singleton.
2146
+ */
2147
+ const env = new EnvImpl();
2148
+
2149
+ // Copyright (c) Microsoft Corporation. All rights reserved.
2150
+ // Licensed under the MIT License.
2151
+ const isBigInt64ArrayAvailable = typeof BigInt64Array !== 'undefined' && typeof BigInt64Array.from === 'function';
2152
+ const isBigUint64ArrayAvailable = typeof BigUint64Array !== 'undefined' && typeof BigUint64Array.from === 'function';
2153
+ // a runtime map that maps type string to TypedArray constructor. Should match Tensor.DataTypeMap.
2154
+ const NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP = new Map([
2155
+ ['float32', Float32Array],
2156
+ ['uint8', Uint8Array],
2157
+ ['int8', Int8Array],
2158
+ ['uint16', Uint16Array],
2159
+ ['int16', Int16Array],
2160
+ ['int32', Int32Array],
2161
+ ['bool', Uint8Array],
2162
+ ['float64', Float64Array],
2163
+ ['uint32', Uint32Array],
2164
+ ]);
2165
+ // a runtime map that maps type string to TypedArray constructor. Should match Tensor.DataTypeMap.
2166
+ const NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP = new Map([
2167
+ [Float32Array, 'float32'],
2168
+ [Uint8Array, 'uint8'],
2169
+ [Int8Array, 'int8'],
2170
+ [Uint16Array, 'uint16'],
2171
+ [Int16Array, 'int16'],
2172
+ [Int32Array, 'int32'],
2173
+ [Float64Array, 'float64'],
2174
+ [Uint32Array, 'uint32'],
2175
+ ]);
2176
+ if (isBigInt64ArrayAvailable) {
2177
+ NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.set('int64', BigInt64Array);
2178
+ NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.set(BigInt64Array, 'int64');
2179
+ }
2180
+ if (isBigUint64ArrayAvailable) {
2181
+ NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.set('uint64', BigUint64Array);
2182
+ NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.set(BigUint64Array, 'uint64');
2183
+ }
2184
+ /**
2185
+ * calculate size from dims.
2186
+ *
2187
+ * @param dims the dims array. May be an illegal input.
2188
+ */
2189
+ const calculateSize = (dims) => {
2190
+ let size = 1;
2191
+ for (let i = 0; i < dims.length; i++) {
2192
+ const dim = dims[i];
2193
+ if (typeof dim !== 'number' || !Number.isSafeInteger(dim)) {
2194
+ throw new TypeError(`dims[${i}] must be an integer, got: ${dim}`);
2195
+ }
2196
+ if (dim < 0) {
2197
+ throw new RangeError(`dims[${i}] must be a non-negative integer, got: ${dim}`);
2198
+ }
2199
+ size *= dim;
2200
+ }
2201
+ return size;
2202
+ };
2203
+ let Tensor$1 = class Tensor {
2204
+ constructor(arg0, arg1, arg2) {
2205
+ let type;
2206
+ let data;
2207
+ let dims;
2208
+ // check whether arg0 is type or data
2209
+ if (typeof arg0 === 'string') {
2210
+ //
2211
+ // Override: constructor(type, data, ...)
2212
+ //
2213
+ type = arg0;
2214
+ dims = arg2;
2215
+ if (arg0 === 'string') {
2216
+ // string tensor
2217
+ if (!Array.isArray(arg1)) {
2218
+ throw new TypeError('A string tensor\'s data must be a string array.');
2219
+ }
2220
+ // we don't check whether every element in the array is string; this is too slow. we assume it's correct and
2221
+ // error will be populated at inference
2222
+ data = arg1;
2247
2223
  }
2248
- else if (config.transcription.trigger === 'automatic') {
2249
- this.pushToTalkEnabled = false;
2250
- this.canInterrupt = config.transcription.can_interrupt;
2224
+ else {
2225
+ // numeric tensor
2226
+ const typedArrayConstructor = NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.get(arg0);
2227
+ if (typedArrayConstructor === undefined) {
2228
+ throw new TypeError(`Unsupported tensor type: ${arg0}.`);
2229
+ }
2230
+ if (Array.isArray(arg1)) {
2231
+ // use 'as any' here because TypeScript's check on type of 'SupportedTypedArrayConstructors.from()' produces
2232
+ // incorrect results.
2233
+ // 'typedArrayConstructor' should be one of the typed array prototype objects.
2234
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
2235
+ data = typedArrayConstructor.from(arg1);
2236
+ }
2237
+ else if (arg1 instanceof typedArrayConstructor) {
2238
+ data = arg1;
2239
+ }
2240
+ else {
2241
+ throw new TypeError(`A ${type} tensor's data must be type of ${typedArrayConstructor}`);
2242
+ }
2243
+ }
2244
+ }
2245
+ else {
2246
+ //
2247
+ // Override: constructor(data, ...)
2248
+ //
2249
+ dims = arg1;
2250
+ if (Array.isArray(arg0)) {
2251
+ // only boolean[] and string[] is supported
2252
+ if (arg0.length === 0) {
2253
+ throw new TypeError('Tensor type cannot be inferred from an empty array.');
2254
+ }
2255
+ const firstElementType = typeof arg0[0];
2256
+ if (firstElementType === 'string') {
2257
+ type = 'string';
2258
+ data = arg0;
2259
+ }
2260
+ else if (firstElementType === 'boolean') {
2261
+ type = 'bool';
2262
+ // 'arg0' is of type 'boolean[]'. Uint8Array.from(boolean[]) actually works, but typescript thinks this is
2263
+ // wrong type. We use 'as any' to make it happy.
2264
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
2265
+ data = Uint8Array.from(arg0);
2266
+ }
2267
+ else {
2268
+ throw new TypeError(`Invalid element type of data array: ${firstElementType}.`);
2269
+ }
2251
2270
  }
2252
2271
  else {
2253
- throw new Error(`Unknown trigger: ${config.transcription.trigger}`);
2272
+ // get tensor type from TypedArray
2273
+ const mappedType = NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.get(arg0.constructor);
2274
+ if (mappedType === undefined) {
2275
+ throw new TypeError(`Unsupported type for tensor data: ${arg0.constructor}.`);
2276
+ }
2277
+ type = mappedType;
2278
+ data = arg0;
2254
2279
  }
2255
- // Bind the websocket message callbacks
2256
- this.ws.onmessage = this._handleWebSocketMessage;
2257
- this.ws.onopen = () => {
2258
- console.log('WebSocket connection established');
2259
- this._setStatus('connected');
2260
- this.options.onConnect({ conversationId: this.conversationId });
2261
- // Attempt to send ready message if recorder already started
2262
- this._sendReadyIfNeeded();
2263
- };
2264
- this.ws.onclose = () => {
2265
- console.log('WebSocket connection closed');
2266
- this.ws = null;
2267
- this._performDisconnectCleanup().catch((error) => {
2268
- console.error('Error during disconnect cleanup:', error);
2269
- this.options.onError(error instanceof Error ? error : new Error(String(error)));
2270
- });
2271
- };
2272
- this.ws.onerror = (error) => {
2273
- console.error('WebSocket error:', error);
2274
- this._setStatus('error');
2275
- this.options.onError(new Error('WebSocket connection error'));
2276
- };
2277
- // Initialize audio player
2278
- await this.wavPlayer.connect();
2279
- // Set up audio player amplitude monitoring
2280
- this._setupAmplitudeMonitoring(this.wavPlayer, this.options.onAgentAmplitudeChange, (amp) => (this.agentAudioAmplitude = amp));
2281
- // wavRecorder will be started from the onDeviceSwitched callback,
2282
- // which is called when the device is first initialized and also when the device is switched
2283
- // this is to ensure that the device is initialized before the recorder is started
2284
2280
  }
2285
- catch (error) {
2286
- console.error('Error connecting to Layercode agent:', error);
2287
- this._setStatus('error');
2288
- this.options.onError(error instanceof Error ? error : new Error(String(error)));
2289
- throw error;
2281
+ // type and data is processed, now processing dims
2282
+ if (dims === undefined) {
2283
+ // assume 1-D tensor if dims omitted
2284
+ dims = [data.length];
2290
2285
  }
2291
- }
2292
- _resetTurnTracking() {
2293
- this.currentTurnId = null;
2294
- console.debug('Reset turn tracking state');
2295
- }
2296
- async disconnect() {
2297
- if (this.status === 'disconnected') {
2298
- return;
2286
+ else if (!Array.isArray(dims)) {
2287
+ throw new TypeError('A tensor\'s dims must be a number array');
2299
2288
  }
2300
- if (this.ws) {
2301
- this.ws.onopen = null;
2302
- this.ws.onclose = null;
2303
- this.ws.onerror = null;
2304
- this.ws.onmessage = null;
2305
- this.ws.close();
2306
- this.ws = null;
2289
+ // perform check
2290
+ const size = calculateSize(dims);
2291
+ if (size !== data.length) {
2292
+ throw new Error(`Tensor's size(${size}) does not match data length(${data.length}).`);
2307
2293
  }
2308
- await this._performDisconnectCleanup();
2309
- }
2310
- /**
2311
- * Gets the microphone MediaStream used by this client
2312
- * @returns {MediaStream|null} The microphone stream or null if not initialized
2313
- */
2314
- getStream() {
2315
- return this.wavRecorder.getStream();
2294
+ this.dims = dims;
2295
+ this.type = type;
2296
+ this.data = data;
2297
+ this.size = size;
2316
2298
  }
2299
+ // #endregion
2317
2300
  /**
2318
- * Switches the input device for the microphone and restarts recording
2319
- * @param {string} deviceId - The deviceId of the new microphone
2301
+ * Create a new tensor object from image object
2302
+ *
2303
+ * @param buffer - Extracted image buffer data - assuming RGBA format
2304
+ * @param imageFormat - input image configuration - required configurations height, width, format
2305
+ * @param tensorFormat - output tensor configuration - Default is RGB format
2320
2306
  */
2321
- async setInputDevice(deviceId) {
2322
- var _a, _b, _c;
2323
- try {
2324
- const normalizedDeviceId = !deviceId || deviceId === 'default' ? null : deviceId;
2325
- this.useSystemDefaultDevice = normalizedDeviceId === null;
2326
- this.deviceId = normalizedDeviceId;
2327
- // Restart recording with the new device
2328
- await this._restartAudioRecording();
2329
- // Reinitialize VAD with the new audio stream if VAD is enabled
2330
- const shouldUseVAD = !this.pushToTalkEnabled && ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) !== false;
2331
- if (shouldUseVAD) {
2332
- console.debug('Reinitializing VAD with new audio stream');
2333
- const newStream = this.wavRecorder.getStream();
2334
- await this._reinitializeVAD(newStream);
2335
- }
2336
- const reportedDeviceId = (_c = (_b = this.lastReportedDeviceId) !== null && _b !== void 0 ? _b : this.activeDeviceId) !== null && _c !== void 0 ? _c : (this.useSystemDefaultDevice ? 'default' : normalizedDeviceId !== null && normalizedDeviceId !== void 0 ? normalizedDeviceId : 'default');
2337
- console.debug(`Successfully switched to input device: ${reportedDeviceId}`);
2307
+ static bufferToTensor(buffer, options) {
2308
+ if (buffer === undefined) {
2309
+ throw new Error('Image buffer must be defined');
2338
2310
  }
2339
- catch (error) {
2340
- console.error(`Failed to switch to input device ${deviceId}:`, error);
2341
- throw new Error(`Failed to switch to input device: ${error instanceof Error ? error.message : String(error)}`);
2311
+ if (options.height === undefined || options.width === undefined) {
2312
+ throw new Error('Image height and width must be defined');
2342
2313
  }
2343
- }
2344
- /**
2345
- * Restarts audio recording after a device switch to ensure audio is captured from the new device
2346
- */
2347
- async _restartAudioRecording() {
2348
- var _a, _b;
2349
- try {
2350
- console.debug('Restarting audio recording after device switch...');
2351
- try {
2352
- await this.wavRecorder.end();
2353
- }
2354
- catch (_c) {
2355
- // Ignore cleanup errors
2356
- }
2357
- // Start with new device
2358
- const targetDeviceId = this.useSystemDefaultDevice ? undefined : this.deviceId || undefined;
2359
- await this.wavRecorder.begin(targetDeviceId);
2360
- await this.wavRecorder.record(this._handleDataAvailable, 1638);
2361
- // Re-setup amplitude monitoring with the new stream
2362
- this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
2363
- const previousReportedDeviceId = this.lastReportedDeviceId;
2364
- const stream = this.wavRecorder.getStream();
2365
- const activeTrack = (stream === null || stream === void 0 ? void 0 : stream.getAudioTracks()[0]) || null;
2366
- const trackSettings = activeTrack && typeof activeTrack.getSettings === 'function' ? activeTrack.getSettings() : null;
2367
- const trackDeviceId = trackSettings && typeof trackSettings.deviceId === 'string' ? trackSettings.deviceId : null;
2368
- this.activeDeviceId = trackDeviceId !== null && trackDeviceId !== void 0 ? trackDeviceId : (this.useSystemDefaultDevice ? null : this.deviceId);
2369
- if (!this.recorderStarted) {
2370
- this.recorderStarted = true;
2371
- this._sendReadyIfNeeded();
2372
- }
2373
- const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : (_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default');
2374
- if (reportedDeviceId !== previousReportedDeviceId) {
2375
- this.lastReportedDeviceId = reportedDeviceId;
2376
- if (this.options.onDeviceSwitched) {
2377
- this.options.onDeviceSwitched(reportedDeviceId);
2378
- }
2379
- }
2380
- console.debug('Audio recording restart completed successfully');
2381
- }
2382
- catch (error) {
2383
- console.error('Error restarting audio recording after device switch:', error);
2384
- this.options.onError(error instanceof Error ? error : new Error(String(error)));
2314
+ const { height, width } = options;
2315
+ const norm = options.norm;
2316
+ let normMean;
2317
+ let normBias;
2318
+ if (norm === undefined || norm.mean === undefined) {
2319
+ normMean = 255;
2385
2320
  }
2386
- }
2387
- /**
2388
- * Reinitializes VAD with a new stream (used after device switching)
2389
- */
2390
- async _reinitializeVAD(stream) {
2391
- // Clean up existing VAD
2392
- if (this.vad) {
2393
- this.vad.pause();
2394
- this.vad.destroy();
2395
- this.vad = null;
2321
+ else {
2322
+ normMean = norm.mean;
2396
2323
  }
2397
- // Reinitialize with new stream
2398
- if (stream) {
2399
- this._initializeVAD();
2324
+ if (norm === undefined || norm.bias === undefined) {
2325
+ normBias = 0;
2400
2326
  }
2401
- }
2402
- /**
2403
- * Sets up the device change event listener
2404
- */
2405
- _setupDeviceChangeListener() {
2406
- if (!this.deviceChangeListener) {
2407
- this.deviceChangeListener = async (devices) => {
2408
- try {
2409
- const defaultDevice = devices.find((device) => device.default);
2410
- const usingDefaultDevice = this.useSystemDefaultDevice;
2411
- const previousDefaultDeviceKey = this.lastKnownSystemDefaultDeviceKey;
2412
- const currentDefaultDeviceKey = this._getDeviceComparisonKey(defaultDevice);
2413
- let shouldSwitch = !this.recorderStarted;
2414
- if (!shouldSwitch) {
2415
- if (usingDefaultDevice) {
2416
- if (!defaultDevice) {
2417
- shouldSwitch = true;
2418
- }
2419
- else if (this.activeDeviceId && defaultDevice.deviceId !== 'default' && defaultDevice.deviceId !== this.activeDeviceId) {
2420
- shouldSwitch = true;
2421
- }
2422
- else if ((previousDefaultDeviceKey && previousDefaultDeviceKey !== currentDefaultDeviceKey) ||
2423
- (!previousDefaultDeviceKey && !currentDefaultDeviceKey && this.recorderStarted)) {
2424
- shouldSwitch = true;
2425
- }
2426
- }
2427
- else {
2428
- const matchesRequestedDevice = devices.some((device) => device.deviceId === this.deviceId || device.deviceId === this.activeDeviceId);
2429
- shouldSwitch = !matchesRequestedDevice;
2430
- }
2431
- }
2432
- this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
2433
- if (shouldSwitch) {
2434
- console.debug('Selecting fallback audio input device');
2435
- const fallbackDevice = defaultDevice || devices[0];
2436
- if (fallbackDevice) {
2437
- const fallbackId = fallbackDevice.default ? 'default' : fallbackDevice.deviceId;
2438
- await this.setInputDevice(fallbackId);
2439
- }
2440
- else {
2441
- console.warn('No alternative audio device found');
2442
- }
2443
- }
2444
- }
2445
- catch (error) {
2446
- this.options.onError(error instanceof Error ? error : new Error(String(error)));
2447
- }
2448
- };
2327
+ else {
2328
+ normBias = norm.bias;
2449
2329
  }
2450
- this.wavRecorder.listenForDeviceChange(this.deviceChangeListener);
2451
- }
2452
- _teardownDeviceListeners() {
2453
- this.wavRecorder.listenForDeviceChange(null);
2454
- }
2455
- async _performDisconnectCleanup() {
2456
- var _a, _b;
2457
- this.deviceId = null;
2458
- this.activeDeviceId = null;
2459
- this.useSystemDefaultDevice = false;
2460
- this.lastReportedDeviceId = null;
2461
- this.lastKnownSystemDefaultDeviceKey = null;
2462
- this.recorderStarted = false;
2463
- this.readySent = false;
2464
- this._stopAmplitudeMonitoring();
2465
- this._teardownDeviceListeners();
2466
- if (this.vad) {
2467
- this.vad.pause();
2468
- this.vad.destroy();
2469
- this.vad = null;
2330
+ const inputformat = options.bitmapFormat !== undefined ? options.bitmapFormat : 'RGBA';
2331
+ // default value is RGBA since imagedata and HTMLImageElement uses it
2332
+ const outputformat = options.tensorFormat !== undefined ?
2333
+ (options.tensorFormat !== undefined ? options.tensorFormat : 'RGB') :
2334
+ 'RGB';
2335
+ const offset = height * width;
2336
+ const float32Data = outputformat === 'RGBA' ? new Float32Array(offset * 4) : new Float32Array(offset * 3);
2337
+ // Default pointer assignments
2338
+ let step = 4, rImagePointer = 0, gImagePointer = 1, bImagePointer = 2, aImagePointer = 3;
2339
+ let rTensorPointer = 0, gTensorPointer = offset, bTensorPointer = offset * 2, aTensorPointer = -1;
2340
+ // Updating the pointer assignments based on the input image format
2341
+ if (inputformat === 'RGB') {
2342
+ step = 3;
2343
+ rImagePointer = 0;
2344
+ gImagePointer = 1;
2345
+ bImagePointer = 2;
2346
+ aImagePointer = -1;
2470
2347
  }
2471
- await this.wavRecorder.quit();
2472
- (_b = (_a = this.wavPlayer).stop) === null || _b === void 0 ? void 0 : _b.call(_a);
2473
- this.wavPlayer.disconnect();
2474
- this._resetTurnTracking();
2475
- this.options.conversationId = this.conversationId;
2476
- this.userAudioAmplitude = 0;
2477
- this.agentAudioAmplitude = 0;
2478
- this._setStatus('disconnected');
2479
- this.options.onDisconnect();
2480
- }
2481
- _getDeviceComparisonKey(device) {
2482
- if (!device || typeof device !== 'object') {
2483
- return null;
2348
+ // Updating the pointer assignments based on the output tensor format
2349
+ if (outputformat === 'RGBA') {
2350
+ aTensorPointer = offset * 3;
2484
2351
  }
2485
- const deviceId = typeof device.deviceId === 'string' ? device.deviceId : '';
2486
- if (deviceId && deviceId !== 'default') {
2487
- return deviceId;
2352
+ else if (outputformat === 'RBG') {
2353
+ rTensorPointer = 0;
2354
+ bTensorPointer = offset;
2355
+ gTensorPointer = offset * 2;
2488
2356
  }
2489
- const groupId = typeof device.groupId === 'string' ? device.groupId : '';
2490
- if (groupId) {
2491
- return groupId;
2357
+ else if (outputformat === 'BGR') {
2358
+ bTensorPointer = 0;
2359
+ gTensorPointer = offset;
2360
+ rTensorPointer = offset * 2;
2492
2361
  }
2493
- const label = typeof device.label === 'string' ? device.label : '';
2494
- if (label) {
2495
- return label;
2362
+ for (let i = 0; i < offset; i++, rImagePointer += step, bImagePointer += step, gImagePointer += step, aImagePointer += step) {
2363
+ float32Data[rTensorPointer++] = (buffer[rImagePointer] + normBias) / normMean;
2364
+ float32Data[gTensorPointer++] = (buffer[gImagePointer] + normBias) / normMean;
2365
+ float32Data[bTensorPointer++] = (buffer[bImagePointer] + normBias) / normMean;
2366
+ if (aTensorPointer !== -1 && aImagePointer !== -1) {
2367
+ float32Data[aTensorPointer++] = (buffer[aImagePointer] + normBias) / normMean;
2368
+ }
2496
2369
  }
2497
- return null;
2370
+ // Float32Array -> ort.Tensor
2371
+ const outputTensor = outputformat === 'RGBA' ? new Tensor('float32', float32Data, [1, 4, height, width]) :
2372
+ new Tensor('float32', float32Data, [1, 3, height, width]);
2373
+ return outputTensor;
2498
2374
  }
2499
- /**
2500
- * Mutes the microphone to stop sending audio to the server
2501
- * The connection and recording remain active for quick unmute
2502
- */
2503
- mute() {
2504
- if (!this.isMuted) {
2505
- this.isMuted = true;
2506
- console.log('Microphone muted');
2507
- this.options.onMuteStateChange(true);
2375
+ static async fromImage(image, options) {
2376
+ // checking the type of image object
2377
+ const isHTMLImageEle = typeof (HTMLImageElement) !== 'undefined' && image instanceof HTMLImageElement;
2378
+ const isImageDataEle = typeof (ImageData) !== 'undefined' && image instanceof ImageData;
2379
+ const isImageBitmap = typeof (ImageBitmap) !== 'undefined' && image instanceof ImageBitmap;
2380
+ const isURL = typeof (String) !== 'undefined' && (image instanceof String || typeof image === 'string');
2381
+ let data;
2382
+ let tensorConfig = {};
2383
+ // filling and checking image configuration options
2384
+ if (isHTMLImageEle) {
2385
+ // HTMLImageElement - image object - format is RGBA by default
2386
+ const canvas = document.createElement('canvas');
2387
+ const pixels2DContext = canvas.getContext('2d');
2388
+ if (pixels2DContext != null) {
2389
+ let height = image.naturalHeight;
2390
+ let width = image.naturalWidth;
2391
+ if (options !== undefined && options.resizedHeight !== undefined && options.resizedWidth !== undefined) {
2392
+ height = options.resizedHeight;
2393
+ width = options.resizedWidth;
2394
+ }
2395
+ if (options !== undefined) {
2396
+ tensorConfig = options;
2397
+ if (options.tensorFormat !== undefined) {
2398
+ throw new Error('Image input config format must be RGBA for HTMLImageElement');
2399
+ }
2400
+ else {
2401
+ tensorConfig.tensorFormat = 'RGBA';
2402
+ }
2403
+ if (options.height !== undefined && options.height !== height) {
2404
+ throw new Error('Image input config height doesn\'t match HTMLImageElement height');
2405
+ }
2406
+ else {
2407
+ tensorConfig.height = height;
2408
+ }
2409
+ if (options.width !== undefined && options.width !== width) {
2410
+ throw new Error('Image input config width doesn\'t match HTMLImageElement width');
2411
+ }
2412
+ else {
2413
+ tensorConfig.width = width;
2414
+ }
2415
+ }
2416
+ else {
2417
+ tensorConfig.tensorFormat = 'RGBA';
2418
+ tensorConfig.height = height;
2419
+ tensorConfig.width = width;
2420
+ }
2421
+ canvas.width = width;
2422
+ canvas.height = height;
2423
+ pixels2DContext.drawImage(image, 0, 0, width, height);
2424
+ data = pixels2DContext.getImageData(0, 0, width, height).data;
2425
+ }
2426
+ else {
2427
+ throw new Error('Can not access image data');
2428
+ }
2508
2429
  }
2509
- }
2510
- /**
2511
- * Unmutes the microphone to resume sending audio to the server
2512
- */
2513
- unmute() {
2514
- if (this.isMuted) {
2515
- this.isMuted = false;
2516
- console.log('Microphone unmuted');
2517
- this.options.onMuteStateChange(false);
2518
- }
2519
- }
2520
- }
2521
-
2522
- var commonjsGlobal = typeof globalThis !== 'undefined' ? globalThis : typeof window !== 'undefined' ? window : typeof global !== 'undefined' ? global : typeof self !== 'undefined' ? self : {};
2523
-
2524
- function getDefaultExportFromCjs (x) {
2525
- return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, 'default') ? x['default'] : x;
2526
- }
2527
-
2528
- function getAugmentedNamespace(n) {
2529
- if (n.__esModule) return n;
2530
- var f = n.default;
2531
- if (typeof f == "function") {
2532
- var a = function a () {
2533
- if (this instanceof a) {
2534
- return Reflect.construct(f, arguments, this.constructor);
2535
- }
2536
- return f.apply(this, arguments);
2537
- };
2538
- a.prototype = f.prototype;
2539
- } else a = {};
2540
- Object.defineProperty(a, '__esModule', {value: true});
2541
- Object.keys(n).forEach(function (k) {
2542
- var d = Object.getOwnPropertyDescriptor(n, k);
2543
- Object.defineProperty(a, k, d.get ? d : {
2544
- enumerable: true,
2545
- get: function () {
2546
- return n[k];
2547
- }
2548
- });
2549
- });
2550
- return a;
2551
- }
2552
-
2553
- var dist = {};
2554
-
2555
- var assetPath = {};
2556
-
2557
- Object.defineProperty(assetPath, "__esModule", { value: true });
2558
- assetPath.baseAssetPath = void 0;
2559
- // nextjs@14 bundler may attempt to execute this during SSR and crash
2560
- const isWeb = typeof window !== "undefined" && typeof window.document !== "undefined";
2561
- const currentScript = isWeb
2562
- ? window.document.currentScript
2563
- : null;
2564
- let basePath = "/";
2565
- if (currentScript) {
2566
- basePath = currentScript.src
2567
- .replace(/#.*$/, "")
2568
- .replace(/\?.*$/, "")
2569
- .replace(/\/[^\/]+$/, "/");
2570
- }
2571
- assetPath.baseAssetPath = basePath;
2572
-
2573
- var defaultModelFetcher$1 = {};
2574
-
2575
- Object.defineProperty(defaultModelFetcher$1, "__esModule", { value: true });
2576
- defaultModelFetcher$1.defaultModelFetcher = void 0;
2577
- const defaultModelFetcher = (path) => {
2578
- return fetch(path).then((model) => model.arrayBuffer());
2579
- };
2580
- defaultModelFetcher$1.defaultModelFetcher = defaultModelFetcher;
2581
-
2582
- var frameProcessor = {};
2583
-
2584
- var logging = {};
2585
-
2586
- (function (exports) {
2587
- Object.defineProperty(exports, "__esModule", { value: true });
2588
- exports.log = exports.LOG_PREFIX = void 0;
2589
- exports.LOG_PREFIX = "[VAD]";
2590
- const levels = ["error", "debug", "warn"];
2591
- function getLog(level) {
2592
- return (...args) => {
2593
- console[level](exports.LOG_PREFIX, ...args);
2594
- };
2595
- }
2596
- const _log = levels.reduce((acc, level) => {
2597
- acc[level] = getLog(level);
2598
- return acc;
2599
- }, {});
2600
- exports.log = _log;
2601
-
2602
- } (logging));
2603
-
2604
- var messages = {};
2605
-
2606
- Object.defineProperty(messages, "__esModule", { value: true });
2607
- messages.Message = void 0;
2608
- var Message;
2609
- (function (Message) {
2610
- Message["AudioFrame"] = "AUDIO_FRAME";
2611
- Message["SpeechStart"] = "SPEECH_START";
2612
- Message["VADMisfire"] = "VAD_MISFIRE";
2613
- Message["SpeechEnd"] = "SPEECH_END";
2614
- Message["SpeechStop"] = "SPEECH_STOP";
2615
- Message["SpeechRealStart"] = "SPEECH_REAL_START";
2616
- Message["FrameProcessed"] = "FRAME_PROCESSED";
2617
- })(Message || (messages.Message = Message = {}));
2618
-
2619
- /*
2620
- Some of this code, together with the default options found in index.ts,
2621
- were taken (or took inspiration) from https://github.com/snakers4/silero-vad
2622
- */
2623
- Object.defineProperty(frameProcessor, "__esModule", { value: true });
2624
- frameProcessor.FrameProcessor = frameProcessor.validateOptions = frameProcessor.defaultV5FrameProcessorOptions = frameProcessor.defaultLegacyFrameProcessorOptions = void 0;
2625
- const logging_1$3 = logging;
2626
- const messages_1 = messages;
2627
- const RECOMMENDED_FRAME_SAMPLES = [512, 1024, 1536];
2628
- frameProcessor.defaultLegacyFrameProcessorOptions = {
2629
- positiveSpeechThreshold: 0.5,
2630
- negativeSpeechThreshold: 0.5 - 0.15,
2631
- preSpeechPadFrames: 1,
2632
- redemptionFrames: 8,
2633
- frameSamples: 1536,
2634
- minSpeechFrames: 3,
2635
- submitUserSpeechOnPause: false,
2636
- };
2637
- frameProcessor.defaultV5FrameProcessorOptions = {
2638
- positiveSpeechThreshold: 0.5,
2639
- negativeSpeechThreshold: 0.5 - 0.15,
2640
- preSpeechPadFrames: 3,
2641
- redemptionFrames: 24,
2642
- frameSamples: 512,
2643
- minSpeechFrames: 9,
2644
- submitUserSpeechOnPause: false,
2645
- };
2646
- function validateOptions(options) {
2647
- if (!RECOMMENDED_FRAME_SAMPLES.includes(options.frameSamples)) {
2648
- logging_1$3.log.warn("You are using an unusual frame size");
2649
- }
2650
- if (options.positiveSpeechThreshold < 0 ||
2651
- options.positiveSpeechThreshold > 1) {
2652
- logging_1$3.log.error("positiveSpeechThreshold should be a number between 0 and 1");
2653
- }
2654
- if (options.negativeSpeechThreshold < 0 ||
2655
- options.negativeSpeechThreshold > options.positiveSpeechThreshold) {
2656
- logging_1$3.log.error("negativeSpeechThreshold should be between 0 and positiveSpeechThreshold");
2657
- }
2658
- if (options.preSpeechPadFrames < 0) {
2659
- logging_1$3.log.error("preSpeechPadFrames should be positive");
2660
- }
2661
- if (options.redemptionFrames < 0) {
2662
- logging_1$3.log.error("redemptionFrames should be positive");
2663
- }
2664
- }
2665
- frameProcessor.validateOptions = validateOptions;
2666
- const concatArrays = (arrays) => {
2667
- const sizes = arrays.reduce((out, next) => {
2668
- out.push(out.at(-1) + next.length);
2669
- return out;
2670
- }, [0]);
2671
- const outArray = new Float32Array(sizes.at(-1));
2672
- arrays.forEach((arr, index) => {
2673
- const place = sizes[index];
2674
- outArray.set(arr, place);
2675
- });
2676
- return outArray;
2677
- };
2678
- class FrameProcessor {
2679
- constructor(modelProcessFunc, modelResetFunc, options) {
2680
- this.modelProcessFunc = modelProcessFunc;
2681
- this.modelResetFunc = modelResetFunc;
2682
- this.options = options;
2683
- this.speaking = false;
2684
- this.redemptionCounter = 0;
2685
- this.speechFrameCount = 0;
2686
- this.active = false;
2687
- this.speechRealStartFired = false;
2688
- this.reset = () => {
2689
- this.speaking = false;
2690
- this.speechRealStartFired = false;
2691
- this.audioBuffer = [];
2692
- this.modelResetFunc();
2693
- this.redemptionCounter = 0;
2694
- this.speechFrameCount = 0;
2695
- };
2696
- this.pause = (handleEvent) => {
2697
- this.active = false;
2698
- if (this.options.submitUserSpeechOnPause) {
2699
- this.endSegment(handleEvent);
2430
+ else if (isImageDataEle) {
2431
+ // ImageData - image object - format is RGBA by default
2432
+ const format = 'RGBA';
2433
+ let height;
2434
+ let width;
2435
+ if (options !== undefined && options.resizedWidth !== undefined && options.resizedHeight !== undefined) {
2436
+ height = options.resizedHeight;
2437
+ width = options.resizedWidth;
2700
2438
  }
2701
2439
  else {
2702
- this.reset();
2440
+ height = image.height;
2441
+ width = image.width;
2703
2442
  }
2704
- };
2705
- this.resume = () => {
2706
- this.active = true;
2707
- };
2708
- this.endSegment = (handleEvent) => {
2709
- const audioBuffer = this.audioBuffer;
2710
- this.audioBuffer = [];
2711
- const speaking = this.speaking;
2712
- this.reset();
2713
- if (speaking) {
2714
- const speechFrameCount = audioBuffer.reduce((acc, item) => {
2715
- return item.isSpeech ? (acc + 1) : acc;
2716
- }, 0);
2717
- if (speechFrameCount >= this.options.minSpeechFrames) {
2718
- const audio = concatArrays(audioBuffer.map((item) => item.frame));
2719
- handleEvent({ msg: messages_1.Message.SpeechEnd, audio });
2443
+ if (options !== undefined) {
2444
+ tensorConfig = options;
2445
+ if (options.bitmapFormat !== undefined && options.bitmapFormat !== format) {
2446
+ throw new Error('Image input config format must be RGBA for ImageData');
2720
2447
  }
2721
2448
  else {
2722
- handleEvent({ msg: messages_1.Message.VADMisfire });
2449
+ tensorConfig.bitmapFormat = 'RGBA';
2723
2450
  }
2724
2451
  }
2725
- return {};
2726
- };
2727
- this.process = async (frame, handleEvent) => {
2728
- if (!this.active) {
2729
- return;
2452
+ else {
2453
+ tensorConfig.bitmapFormat = 'RGBA';
2730
2454
  }
2731
- const probs = await this.modelProcessFunc(frame);
2732
- const isSpeech = probs.isSpeech >= this.options.positiveSpeechThreshold;
2733
- handleEvent({ probs, msg: messages_1.Message.FrameProcessed, frame });
2734
- this.audioBuffer.push({
2735
- frame,
2736
- isSpeech,
2737
- });
2738
- if (isSpeech) {
2739
- this.speechFrameCount++;
2740
- this.redemptionCounter = 0;
2455
+ tensorConfig.height = height;
2456
+ tensorConfig.width = width;
2457
+ if (options !== undefined) {
2458
+ const tempCanvas = document.createElement('canvas');
2459
+ tempCanvas.width = width;
2460
+ tempCanvas.height = height;
2461
+ const pixels2DContext = tempCanvas.getContext('2d');
2462
+ if (pixels2DContext != null) {
2463
+ pixels2DContext.putImageData(image, 0, 0);
2464
+ data = pixels2DContext.getImageData(0, 0, width, height).data;
2465
+ }
2466
+ else {
2467
+ throw new Error('Can not access image data');
2468
+ }
2741
2469
  }
2742
- if (isSpeech && !this.speaking) {
2743
- this.speaking = true;
2744
- handleEvent({ msg: messages_1.Message.SpeechStart });
2470
+ else {
2471
+ data = image.data;
2745
2472
  }
2746
- if (this.speaking &&
2747
- this.speechFrameCount === this.options.minSpeechFrames &&
2748
- !this.speechRealStartFired) {
2749
- this.speechRealStartFired = true;
2750
- handleEvent({ msg: messages_1.Message.SpeechRealStart });
2473
+ }
2474
+ else if (isImageBitmap) {
2475
+ // ImageBitmap - image object - format must be provided by user
2476
+ if (options === undefined) {
2477
+ throw new Error('Please provide image config with format for Imagebitmap');
2751
2478
  }
2752
- if (probs.isSpeech < this.options.negativeSpeechThreshold &&
2753
- this.speaking &&
2754
- ++this.redemptionCounter >= this.options.redemptionFrames) {
2755
- this.redemptionCounter = 0;
2756
- this.speechFrameCount = 0;
2757
- this.speaking = false;
2758
- this.speechRealStartFired = false;
2759
- const audioBuffer = this.audioBuffer;
2760
- this.audioBuffer = [];
2761
- const speechFrameCount = audioBuffer.reduce((acc, item) => {
2762
- return item.isSpeech ? (acc + 1) : acc;
2763
- }, 0);
2764
- if (speechFrameCount >= this.options.minSpeechFrames) {
2765
- const audio = concatArrays(audioBuffer.map((item) => item.frame));
2766
- handleEvent({ msg: messages_1.Message.SpeechEnd, audio });
2479
+ if (options.bitmapFormat !== undefined) {
2480
+ throw new Error('Image input config format must be defined for ImageBitmap');
2481
+ }
2482
+ const pixels2DContext = document.createElement('canvas').getContext('2d');
2483
+ if (pixels2DContext != null) {
2484
+ const height = image.height;
2485
+ const width = image.width;
2486
+ pixels2DContext.drawImage(image, 0, 0, width, height);
2487
+ data = pixels2DContext.getImageData(0, 0, width, height).data;
2488
+ if (options !== undefined) {
2489
+ // using square brackets to avoid TS error - type 'never'
2490
+ if (options.height !== undefined && options.height !== height) {
2491
+ throw new Error('Image input config height doesn\'t match ImageBitmap height');
2492
+ }
2493
+ else {
2494
+ tensorConfig.height = height;
2495
+ }
2496
+ // using square brackets to avoid TS error - type 'never'
2497
+ if (options.width !== undefined && options.width !== width) {
2498
+ throw new Error('Image input config width doesn\'t match ImageBitmap width');
2499
+ }
2500
+ else {
2501
+ tensorConfig.width = width;
2502
+ }
2767
2503
  }
2768
2504
  else {
2769
- handleEvent({ msg: messages_1.Message.VADMisfire });
2505
+ tensorConfig.height = height;
2506
+ tensorConfig.width = width;
2770
2507
  }
2508
+ return Tensor.bufferToTensor(data, tensorConfig);
2771
2509
  }
2772
- if (!this.speaking) {
2773
- while (this.audioBuffer.length > this.options.preSpeechPadFrames) {
2774
- this.audioBuffer.shift();
2775
- }
2776
- this.speechFrameCount = 0;
2510
+ else {
2511
+ throw new Error('Can not access image data');
2777
2512
  }
2778
- };
2779
- this.audioBuffer = [];
2780
- this.reset();
2781
- }
2782
- }
2783
- frameProcessor.FrameProcessor = FrameProcessor;
2784
-
2785
- var nonRealTimeVad = {};
2786
-
2787
- var ortWeb_min = {exports: {}};
2788
-
2789
- // Copyright (c) Microsoft Corporation. All rights reserved.
2790
- // Licensed under the MIT License.
2791
- const backends = {};
2792
- const backendsSortedByPriority = [];
2793
- /**
2794
- * Register a backend.
2795
- *
2796
- * @param name - the name as a key to lookup as an execution provider.
2797
- * @param backend - the backend object.
2798
- * @param priority - an integer indicating the priority of the backend. Higher number means higher priority. if priority
2799
- * < 0, it will be considered as a 'beta' version and will not be used as a fallback backend by default.
2800
- *
2801
- * @internal
2802
- */
2803
- const registerBackend = (name, backend, priority) => {
2804
- if (backend && typeof backend.init === 'function' && typeof backend.createSessionHandler === 'function') {
2805
- const currentBackend = backends[name];
2806
- if (currentBackend === undefined) {
2807
- backends[name] = { backend, priority };
2808
2513
  }
2809
- else if (currentBackend.priority > priority) {
2810
- // same name is already registered with a higher priority. skip registeration.
2811
- return;
2514
+ else if (isURL) {
2515
+ return new Promise((resolve, reject) => {
2516
+ const canvas = document.createElement('canvas');
2517
+ const context = canvas.getContext('2d');
2518
+ if (!image || !context) {
2519
+ return reject();
2520
+ }
2521
+ const newImage = new Image();
2522
+ newImage.crossOrigin = 'Anonymous';
2523
+ newImage.src = image;
2524
+ newImage.onload = () => {
2525
+ canvas.width = newImage.width;
2526
+ canvas.height = newImage.height;
2527
+ context.drawImage(newImage, 0, 0, canvas.width, canvas.height);
2528
+ const img = context.getImageData(0, 0, canvas.width, canvas.height);
2529
+ if (options !== undefined) {
2530
+ // using square brackets to avoid TS error - type 'never'
2531
+ if (options.height !== undefined && options.height !== canvas.height) {
2532
+ throw new Error('Image input config height doesn\'t match ImageBitmap height');
2533
+ }
2534
+ else {
2535
+ tensorConfig.height = canvas.height;
2536
+ }
2537
+ // using square brackets to avoid TS error - type 'never'
2538
+ if (options.width !== undefined && options.width !== canvas.width) {
2539
+ throw new Error('Image input config width doesn\'t match ImageBitmap width');
2540
+ }
2541
+ else {
2542
+ tensorConfig.width = canvas.width;
2543
+ }
2544
+ }
2545
+ else {
2546
+ tensorConfig.height = canvas.height;
2547
+ tensorConfig.width = canvas.width;
2548
+ }
2549
+ resolve(Tensor.bufferToTensor(img.data, tensorConfig));
2550
+ };
2551
+ });
2812
2552
  }
2813
- else if (currentBackend.priority === priority) {
2814
- if (currentBackend.backend !== backend) {
2815
- throw new Error(`cannot register backend "${name}" using priority ${priority}`);
2816
- }
2553
+ else {
2554
+ throw new Error('Input data provided is not supported - aborted tensor creation');
2817
2555
  }
2818
- if (priority >= 0) {
2819
- const i = backendsSortedByPriority.indexOf(name);
2820
- if (i !== -1) {
2821
- backendsSortedByPriority.splice(i, 1);
2822
- }
2823
- for (let i = 0; i < backendsSortedByPriority.length; i++) {
2824
- if (backends[backendsSortedByPriority[i]].priority <= priority) {
2825
- backendsSortedByPriority.splice(i, 0, name);
2826
- return;
2827
- }
2828
- }
2829
- backendsSortedByPriority.push(name);
2556
+ if (data !== undefined) {
2557
+ return Tensor.bufferToTensor(data, tensorConfig);
2558
+ }
2559
+ else {
2560
+ throw new Error('Input data provided is not supported - aborted tensor creation');
2830
2561
  }
2831
- return;
2832
2562
  }
2833
- throw new TypeError('not a valid backend');
2834
- };
2835
- /**
2836
- * Resolve backend by specified hints.
2837
- *
2838
- * @param backendHints - a list of execution provider names to lookup. If omitted use registered backends as list.
2839
- * @returns a promise that resolves to the backend.
2840
- *
2841
- * @internal
2842
- */
2843
- const resolveBackend = async (backendHints) => {
2844
- const backendNames = backendHints.length === 0 ? backendsSortedByPriority : backendHints;
2845
- const errors = [];
2846
- for (const backendName of backendNames) {
2847
- const backendInfo = backends[backendName];
2848
- if (backendInfo) {
2849
- if (backendInfo.initialized) {
2850
- return backendInfo.backend;
2563
+ toImageData(options) {
2564
+ var _a, _b;
2565
+ const pixels2DContext = document.createElement('canvas').getContext('2d');
2566
+ let image;
2567
+ if (pixels2DContext != null) {
2568
+ // Default values for height and width & format
2569
+ const width = this.dims[3];
2570
+ const height = this.dims[2];
2571
+ const channels = this.dims[1];
2572
+ const inputformat = options !== undefined ? (options.format !== undefined ? options.format : 'RGB') : 'RGB';
2573
+ const normMean = options !== undefined ? (((_a = options.norm) === null || _a === void 0 ? void 0 : _a.mean) !== undefined ? options.norm.mean : 255) : 255;
2574
+ const normBias = options !== undefined ? (((_b = options.norm) === null || _b === void 0 ? void 0 : _b.bias) !== undefined ? options.norm.bias : 0) : 0;
2575
+ const offset = height * width;
2576
+ if (options !== undefined) {
2577
+ if (options.height !== undefined && options.height !== height) {
2578
+ throw new Error('Image output config height doesn\'t match tensor height');
2579
+ }
2580
+ if (options.width !== undefined && options.width !== width) {
2581
+ throw new Error('Image output config width doesn\'t match tensor width');
2582
+ }
2583
+ if (options.format !== undefined && (channels === 4 && options.format !== 'RGBA') ||
2584
+ (channels === 3 && (options.format !== 'RGB' && options.format !== 'BGR'))) {
2585
+ throw new Error('Tensor format doesn\'t match input tensor dims');
2586
+ }
2851
2587
  }
2852
- else if (backendInfo.aborted) {
2853
- continue; // current backend is unavailable; try next
2588
+ // Default pointer assignments
2589
+ const step = 4;
2590
+ let rImagePointer = 0, gImagePointer = 1, bImagePointer = 2, aImagePointer = 3;
2591
+ let rTensorPointer = 0, gTensorPointer = offset, bTensorPointer = offset * 2, aTensorPointer = -1;
2592
+ // Updating the pointer assignments based on the input image format
2593
+ if (inputformat === 'RGBA') {
2594
+ rTensorPointer = 0;
2595
+ gTensorPointer = offset;
2596
+ bTensorPointer = offset * 2;
2597
+ aTensorPointer = offset * 3;
2854
2598
  }
2855
- const isInitializing = !!backendInfo.initPromise;
2856
- try {
2857
- if (!isInitializing) {
2858
- backendInfo.initPromise = backendInfo.backend.init();
2859
- }
2860
- await backendInfo.initPromise;
2861
- backendInfo.initialized = true;
2862
- return backendInfo.backend;
2599
+ else if (inputformat === 'RGB') {
2600
+ rTensorPointer = 0;
2601
+ gTensorPointer = offset;
2602
+ bTensorPointer = offset * 2;
2863
2603
  }
2864
- catch (e) {
2865
- if (!isInitializing) {
2866
- errors.push({ name: backendName, err: e });
2867
- }
2868
- backendInfo.aborted = true;
2604
+ else if (inputformat === 'RBG') {
2605
+ rTensorPointer = 0;
2606
+ bTensorPointer = offset;
2607
+ gTensorPointer = offset * 2;
2869
2608
  }
2870
- finally {
2871
- delete backendInfo.initPromise;
2609
+ image = pixels2DContext.createImageData(width, height);
2610
+ for (let i = 0; i < height * width; rImagePointer += step, gImagePointer += step, bImagePointer += step, aImagePointer += step, i++) {
2611
+ image.data[rImagePointer] = (this.data[rTensorPointer++] - normBias) * normMean; // R value
2612
+ image.data[gImagePointer] = (this.data[gTensorPointer++] - normBias) * normMean; // G value
2613
+ image.data[bImagePointer] = (this.data[bTensorPointer++] - normBias) * normMean; // B value
2614
+ image.data[aImagePointer] =
2615
+ aTensorPointer === -1 ? 255 : (this.data[aTensorPointer++] - normBias) * normMean; // A value
2872
2616
  }
2873
2617
  }
2618
+ else {
2619
+ throw new Error('Can not access image data');
2620
+ }
2621
+ return image;
2622
+ }
2623
+ // #endregion
2624
+ // #region tensor utilities
2625
+ reshape(dims) {
2626
+ return new Tensor(this.type, this.data, dims);
2874
2627
  }
2875
- throw new Error(`no available backend found. ERR: ${errors.map(e => `[${e.name}] ${e.err}`).join(', ')}`);
2876
2628
  };
2877
2629
 
2878
2630
  // Copyright (c) Microsoft Corporation. All rights reserved.
2879
2631
  // Licensed under the MIT License.
2880
- class EnvImpl {
2881
- constructor() {
2882
- this.wasm = {};
2883
- this.webgl = {};
2884
- this.logLevelInternal = 'warning';
2885
- }
2886
- // TODO standadize the getter and setter convention in env for other fields.
2887
- set logLevel(value) {
2888
- if (value === undefined) {
2889
- return;
2890
- }
2891
- if (typeof value !== 'string' || ['verbose', 'info', 'warning', 'error', 'fatal'].indexOf(value) === -1) {
2892
- throw new Error(`Unsupported logging level: ${value}`);
2893
- }
2894
- this.logLevelInternal = value;
2895
- }
2896
- get logLevel() {
2897
- return this.logLevelInternal;
2898
- }
2899
- }
2900
-
2901
- // Copyright (c) Microsoft Corporation. All rights reserved.
2902
- // Licensed under the MIT License.
2903
- /**
2904
- * Represent a set of flags as a global singleton.
2905
- */
2906
- const env = new EnvImpl();
2632
+ // eslint-disable-next-line @typescript-eslint/naming-convention
2633
+ const Tensor = Tensor$1;
2907
2634
 
2908
2635
  // Copyright (c) Microsoft Corporation. All rights reserved.
2909
2636
  // Licensed under the MIT License.
2910
- const isBigInt64ArrayAvailable = typeof BigInt64Array !== 'undefined' && typeof BigInt64Array.from === 'function';
2911
- const isBigUint64ArrayAvailable = typeof BigUint64Array !== 'undefined' && typeof BigUint64Array.from === 'function';
2912
- // a runtime map that maps type string to TypedArray constructor. Should match Tensor.DataTypeMap.
2913
- const NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP = new Map([
2914
- ['float32', Float32Array],
2915
- ['uint8', Uint8Array],
2916
- ['int8', Int8Array],
2917
- ['uint16', Uint16Array],
2918
- ['int16', Int16Array],
2919
- ['int32', Int32Array],
2920
- ['bool', Uint8Array],
2921
- ['float64', Float64Array],
2922
- ['uint32', Uint32Array],
2923
- ]);
2924
- // a runtime map that maps type string to TypedArray constructor. Should match Tensor.DataTypeMap.
2925
- const NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP = new Map([
2926
- [Float32Array, 'float32'],
2927
- [Uint8Array, 'uint8'],
2928
- [Int8Array, 'int8'],
2929
- [Uint16Array, 'uint16'],
2930
- [Int16Array, 'int16'],
2931
- [Int32Array, 'int32'],
2932
- [Float64Array, 'float64'],
2933
- [Uint32Array, 'uint32'],
2934
- ]);
2935
- if (isBigInt64ArrayAvailable) {
2936
- NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.set('int64', BigInt64Array);
2937
- NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.set(BigInt64Array, 'int64');
2938
- }
2939
- if (isBigUint64ArrayAvailable) {
2940
- NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.set('uint64', BigUint64Array);
2941
- NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.set(BigUint64Array, 'uint64');
2942
- }
2943
- /**
2944
- * calculate size from dims.
2945
- *
2946
- * @param dims the dims array. May be an illegal input.
2947
- */
2948
- const calculateSize = (dims) => {
2949
- let size = 1;
2950
- for (let i = 0; i < dims.length; i++) {
2951
- const dim = dims[i];
2952
- if (typeof dim !== 'number' || !Number.isSafeInteger(dim)) {
2953
- throw new TypeError(`dims[${i}] must be an integer, got: ${dim}`);
2954
- }
2955
- if (dim < 0) {
2956
- throw new RangeError(`dims[${i}] must be a non-negative integer, got: ${dim}`);
2957
- }
2958
- size *= dim;
2637
+ let InferenceSession$1 = class InferenceSession {
2638
+ constructor(handler) {
2639
+ this.handler = handler;
2959
2640
  }
2960
- return size;
2961
- };
2962
- let Tensor$1 = class Tensor {
2963
- constructor(arg0, arg1, arg2) {
2964
- let type;
2965
- let data;
2966
- let dims;
2967
- // check whether arg0 is type or data
2968
- if (typeof arg0 === 'string') {
2969
- //
2970
- // Override: constructor(type, data, ...)
2971
- //
2972
- type = arg0;
2973
- dims = arg2;
2974
- if (arg0 === 'string') {
2975
- // string tensor
2976
- if (!Array.isArray(arg1)) {
2977
- throw new TypeError('A string tensor\'s data must be a string array.');
2978
- }
2979
- // we don't check whether every element in the array is string; this is too slow. we assume it's correct and
2980
- // error will be populated at inference
2981
- data = arg1;
2641
+ async run(feeds, arg1, arg2) {
2642
+ const fetches = {};
2643
+ let options = {};
2644
+ // check inputs
2645
+ if (typeof feeds !== 'object' || feeds === null || feeds instanceof Tensor || Array.isArray(feeds)) {
2646
+ throw new TypeError('\'feeds\' must be an object that use input names as keys and OnnxValue as corresponding values.');
2647
+ }
2648
+ let isFetchesEmpty = true;
2649
+ // determine which override is being used
2650
+ if (typeof arg1 === 'object') {
2651
+ if (arg1 === null) {
2652
+ throw new TypeError('Unexpected argument[1]: cannot be null.');
2982
2653
  }
2983
- else {
2984
- // numeric tensor
2985
- const typedArrayConstructor = NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.get(arg0);
2986
- if (typedArrayConstructor === undefined) {
2987
- throw new TypeError(`Unsupported tensor type: ${arg0}.`);
2654
+ if (arg1 instanceof Tensor) {
2655
+ throw new TypeError('\'fetches\' cannot be a Tensor');
2656
+ }
2657
+ if (Array.isArray(arg1)) {
2658
+ if (arg1.length === 0) {
2659
+ throw new TypeError('\'fetches\' cannot be an empty array.');
2988
2660
  }
2989
- if (Array.isArray(arg1)) {
2990
- // use 'as any' here because TypeScript's check on type of 'SupportedTypedArrayConstructors.from()' produces
2991
- // incorrect results.
2992
- // 'typedArrayConstructor' should be one of the typed array prototype objects.
2993
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
2994
- data = typedArrayConstructor.from(arg1);
2661
+ isFetchesEmpty = false;
2662
+ // output names
2663
+ for (const name of arg1) {
2664
+ if (typeof name !== 'string') {
2665
+ throw new TypeError('\'fetches\' must be a string array or an object.');
2666
+ }
2667
+ if (this.outputNames.indexOf(name) === -1) {
2668
+ throw new RangeError(`'fetches' contains invalid output name: ${name}.`);
2669
+ }
2670
+ fetches[name] = null;
2995
2671
  }
2996
- else if (arg1 instanceof typedArrayConstructor) {
2997
- data = arg1;
2672
+ if (typeof arg2 === 'object' && arg2 !== null) {
2673
+ options = arg2;
2998
2674
  }
2999
- else {
3000
- throw new TypeError(`A ${type} tensor's data must be type of ${typedArrayConstructor}`);
2675
+ else if (typeof arg2 !== 'undefined') {
2676
+ throw new TypeError('\'options\' must be an object.');
3001
2677
  }
3002
2678
  }
3003
- }
3004
- else {
3005
- //
3006
- // Override: constructor(data, ...)
3007
- //
3008
- dims = arg1;
3009
- if (Array.isArray(arg0)) {
3010
- // only boolean[] and string[] is supported
3011
- if (arg0.length === 0) {
3012
- throw new TypeError('Tensor type cannot be inferred from an empty array.');
3013
- }
3014
- const firstElementType = typeof arg0[0];
3015
- if (firstElementType === 'string') {
3016
- type = 'string';
3017
- data = arg0;
2679
+ else {
2680
+ // decide whether arg1 is fetches or options
2681
+ // if any output name is present and its value is valid OnnxValue, we consider it fetches
2682
+ let isFetches = false;
2683
+ const arg1Keys = Object.getOwnPropertyNames(arg1);
2684
+ for (const name of this.outputNames) {
2685
+ if (arg1Keys.indexOf(name) !== -1) {
2686
+ const v = arg1[name];
2687
+ if (v === null || v instanceof Tensor) {
2688
+ isFetches = true;
2689
+ isFetchesEmpty = false;
2690
+ fetches[name] = v;
2691
+ }
2692
+ }
3018
2693
  }
3019
- else if (firstElementType === 'boolean') {
3020
- type = 'bool';
3021
- // 'arg0' is of type 'boolean[]'. Uint8Array.from(boolean[]) actually works, but typescript thinks this is
3022
- // wrong type. We use 'as any' to make it happy.
3023
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
3024
- data = Uint8Array.from(arg0);
2694
+ if (isFetches) {
2695
+ if (typeof arg2 === 'object' && arg2 !== null) {
2696
+ options = arg2;
2697
+ }
2698
+ else if (typeof arg2 !== 'undefined') {
2699
+ throw new TypeError('\'options\' must be an object.');
2700
+ }
3025
2701
  }
3026
2702
  else {
3027
- throw new TypeError(`Invalid element type of data array: ${firstElementType}.`);
3028
- }
3029
- }
3030
- else {
3031
- // get tensor type from TypedArray
3032
- const mappedType = NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.get(arg0.constructor);
3033
- if (mappedType === undefined) {
3034
- throw new TypeError(`Unsupported type for tensor data: ${arg0.constructor}.`);
2703
+ options = arg1;
3035
2704
  }
3036
- type = mappedType;
3037
- data = arg0;
3038
2705
  }
3039
2706
  }
3040
- // type and data is processed, now processing dims
3041
- if (dims === undefined) {
3042
- // assume 1-D tensor if dims omitted
3043
- dims = [data.length];
2707
+ else if (typeof arg1 !== 'undefined') {
2708
+ throw new TypeError('Unexpected argument[1]: must be \'fetches\' or \'options\'.');
3044
2709
  }
3045
- else if (!Array.isArray(dims)) {
3046
- throw new TypeError('A tensor\'s dims must be a number array');
2710
+ // check if all inputs are in feed
2711
+ for (const name of this.inputNames) {
2712
+ if (typeof feeds[name] === 'undefined') {
2713
+ throw new Error(`input '${name}' is missing in 'feeds'.`);
2714
+ }
3047
2715
  }
3048
- // perform check
3049
- const size = calculateSize(dims);
3050
- if (size !== data.length) {
3051
- throw new Error(`Tensor's size(${size}) does not match data length(${data.length}).`);
2716
+ // if no fetches is specified, we use the full output names list
2717
+ if (isFetchesEmpty) {
2718
+ for (const name of this.outputNames) {
2719
+ fetches[name] = null;
2720
+ }
3052
2721
  }
3053
- this.dims = dims;
3054
- this.type = type;
3055
- this.data = data;
3056
- this.size = size;
3057
- }
3058
- // #endregion
3059
- /**
3060
- * Create a new tensor object from image object
3061
- *
3062
- * @param buffer - Extracted image buffer data - assuming RGBA format
3063
- * @param imageFormat - input image configuration - required configurations height, width, format
3064
- * @param tensorFormat - output tensor configuration - Default is RGB format
3065
- */
3066
- static bufferToTensor(buffer, options) {
3067
- if (buffer === undefined) {
3068
- throw new Error('Image buffer must be defined');
2722
+ // feeds, fetches and options are prepared
2723
+ const results = await this.handler.run(feeds, fetches, options);
2724
+ const returnValue = {};
2725
+ for (const key in results) {
2726
+ if (Object.hasOwnProperty.call(results, key)) {
2727
+ returnValue[key] = new Tensor(results[key].type, results[key].data, results[key].dims);
2728
+ }
3069
2729
  }
3070
- if (options.height === undefined || options.width === undefined) {
3071
- throw new Error('Image height and width must be defined');
2730
+ return returnValue;
2731
+ }
2732
+ static async create(arg0, arg1, arg2, arg3) {
2733
+ // either load from a file or buffer
2734
+ let filePathOrUint8Array;
2735
+ let options = {};
2736
+ if (typeof arg0 === 'string') {
2737
+ filePathOrUint8Array = arg0;
2738
+ if (typeof arg1 === 'object' && arg1 !== null) {
2739
+ options = arg1;
2740
+ }
2741
+ else if (typeof arg1 !== 'undefined') {
2742
+ throw new TypeError('\'options\' must be an object.');
2743
+ }
3072
2744
  }
3073
- const { height, width } = options;
3074
- const norm = options.norm;
3075
- let normMean;
3076
- let normBias;
3077
- if (norm === undefined || norm.mean === undefined) {
3078
- normMean = 255;
3079
- }
3080
- else {
3081
- normMean = norm.mean;
3082
- }
3083
- if (norm === undefined || norm.bias === undefined) {
3084
- normBias = 0;
3085
- }
3086
- else {
3087
- normBias = norm.bias;
3088
- }
3089
- const inputformat = options.bitmapFormat !== undefined ? options.bitmapFormat : 'RGBA';
3090
- // default value is RGBA since imagedata and HTMLImageElement uses it
3091
- const outputformat = options.tensorFormat !== undefined ?
3092
- (options.tensorFormat !== undefined ? options.tensorFormat : 'RGB') :
3093
- 'RGB';
3094
- const offset = height * width;
3095
- const float32Data = outputformat === 'RGBA' ? new Float32Array(offset * 4) : new Float32Array(offset * 3);
3096
- // Default pointer assignments
3097
- let step = 4, rImagePointer = 0, gImagePointer = 1, bImagePointer = 2, aImagePointer = 3;
3098
- let rTensorPointer = 0, gTensorPointer = offset, bTensorPointer = offset * 2, aTensorPointer = -1;
3099
- // Updating the pointer assignments based on the input image format
3100
- if (inputformat === 'RGB') {
3101
- step = 3;
3102
- rImagePointer = 0;
3103
- gImagePointer = 1;
3104
- bImagePointer = 2;
3105
- aImagePointer = -1;
3106
- }
3107
- // Updating the pointer assignments based on the output tensor format
3108
- if (outputformat === 'RGBA') {
3109
- aTensorPointer = offset * 3;
3110
- }
3111
- else if (outputformat === 'RBG') {
3112
- rTensorPointer = 0;
3113
- bTensorPointer = offset;
3114
- gTensorPointer = offset * 2;
3115
- }
3116
- else if (outputformat === 'BGR') {
3117
- bTensorPointer = 0;
3118
- gTensorPointer = offset;
3119
- rTensorPointer = offset * 2;
3120
- }
3121
- for (let i = 0; i < offset; i++, rImagePointer += step, bImagePointer += step, gImagePointer += step, aImagePointer += step) {
3122
- float32Data[rTensorPointer++] = (buffer[rImagePointer] + normBias) / normMean;
3123
- float32Data[gTensorPointer++] = (buffer[gImagePointer] + normBias) / normMean;
3124
- float32Data[bTensorPointer++] = (buffer[bImagePointer] + normBias) / normMean;
3125
- if (aTensorPointer !== -1 && aImagePointer !== -1) {
3126
- float32Data[aTensorPointer++] = (buffer[aImagePointer] + normBias) / normMean;
3127
- }
3128
- }
3129
- // Float32Array -> ort.Tensor
3130
- const outputTensor = outputformat === 'RGBA' ? new Tensor('float32', float32Data, [1, 4, height, width]) :
3131
- new Tensor('float32', float32Data, [1, 3, height, width]);
3132
- return outputTensor;
3133
- }
3134
- static async fromImage(image, options) {
3135
- // checking the type of image object
3136
- const isHTMLImageEle = typeof (HTMLImageElement) !== 'undefined' && image instanceof HTMLImageElement;
3137
- const isImageDataEle = typeof (ImageData) !== 'undefined' && image instanceof ImageData;
3138
- const isImageBitmap = typeof (ImageBitmap) !== 'undefined' && image instanceof ImageBitmap;
3139
- const isURL = typeof (String) !== 'undefined' && (image instanceof String || typeof image === 'string');
3140
- let data;
3141
- let tensorConfig = {};
3142
- // filling and checking image configuration options
3143
- if (isHTMLImageEle) {
3144
- // HTMLImageElement - image object - format is RGBA by default
3145
- const canvas = document.createElement('canvas');
3146
- const pixels2DContext = canvas.getContext('2d');
3147
- if (pixels2DContext != null) {
3148
- let height = image.naturalHeight;
3149
- let width = image.naturalWidth;
3150
- if (options !== undefined && options.resizedHeight !== undefined && options.resizedWidth !== undefined) {
3151
- height = options.resizedHeight;
3152
- width = options.resizedWidth;
3153
- }
3154
- if (options !== undefined) {
3155
- tensorConfig = options;
3156
- if (options.tensorFormat !== undefined) {
3157
- throw new Error('Image input config format must be RGBA for HTMLImageElement');
3158
- }
3159
- else {
3160
- tensorConfig.tensorFormat = 'RGBA';
3161
- }
3162
- if (options.height !== undefined && options.height !== height) {
3163
- throw new Error('Image input config height doesn\'t match HTMLImageElement height');
3164
- }
3165
- else {
3166
- tensorConfig.height = height;
3167
- }
3168
- if (options.width !== undefined && options.width !== width) {
3169
- throw new Error('Image input config width doesn\'t match HTMLImageElement width');
3170
- }
3171
- else {
3172
- tensorConfig.width = width;
3173
- }
3174
- }
3175
- else {
3176
- tensorConfig.tensorFormat = 'RGBA';
3177
- tensorConfig.height = height;
3178
- tensorConfig.width = width;
3179
- }
3180
- canvas.width = width;
3181
- canvas.height = height;
3182
- pixels2DContext.drawImage(image, 0, 0, width, height);
3183
- data = pixels2DContext.getImageData(0, 0, width, height).data;
2745
+ else if (arg0 instanceof Uint8Array) {
2746
+ filePathOrUint8Array = arg0;
2747
+ if (typeof arg1 === 'object' && arg1 !== null) {
2748
+ options = arg1;
3184
2749
  }
3185
- else {
3186
- throw new Error('Can not access image data');
2750
+ else if (typeof arg1 !== 'undefined') {
2751
+ throw new TypeError('\'options\' must be an object.');
3187
2752
  }
3188
2753
  }
3189
- else if (isImageDataEle) {
3190
- // ImageData - image object - format is RGBA by default
3191
- const format = 'RGBA';
3192
- let height;
3193
- let width;
3194
- if (options !== undefined && options.resizedWidth !== undefined && options.resizedHeight !== undefined) {
3195
- height = options.resizedHeight;
3196
- width = options.resizedWidth;
3197
- }
3198
- else {
3199
- height = image.height;
3200
- width = image.width;
3201
- }
3202
- if (options !== undefined) {
3203
- tensorConfig = options;
3204
- if (options.bitmapFormat !== undefined && options.bitmapFormat !== format) {
3205
- throw new Error('Image input config format must be RGBA for ImageData');
3206
- }
3207
- else {
3208
- tensorConfig.bitmapFormat = 'RGBA';
3209
- }
3210
- }
3211
- else {
3212
- tensorConfig.bitmapFormat = 'RGBA';
2754
+ else if (arg0 instanceof ArrayBuffer ||
2755
+ (typeof SharedArrayBuffer !== 'undefined' && arg0 instanceof SharedArrayBuffer)) {
2756
+ const buffer = arg0;
2757
+ let byteOffset = 0;
2758
+ let byteLength = arg0.byteLength;
2759
+ if (typeof arg1 === 'object' && arg1 !== null) {
2760
+ options = arg1;
3213
2761
  }
3214
- tensorConfig.height = height;
3215
- tensorConfig.width = width;
3216
- if (options !== undefined) {
3217
- const tempCanvas = document.createElement('canvas');
3218
- tempCanvas.width = width;
3219
- tempCanvas.height = height;
3220
- const pixels2DContext = tempCanvas.getContext('2d');
3221
- if (pixels2DContext != null) {
3222
- pixels2DContext.putImageData(image, 0, 0);
3223
- data = pixels2DContext.getImageData(0, 0, width, height).data;
2762
+ else if (typeof arg1 === 'number') {
2763
+ byteOffset = arg1;
2764
+ if (!Number.isSafeInteger(byteOffset)) {
2765
+ throw new RangeError('\'byteOffset\' must be an integer.');
3224
2766
  }
3225
- else {
3226
- throw new Error('Can not access image data');
2767
+ if (byteOffset < 0 || byteOffset >= buffer.byteLength) {
2768
+ throw new RangeError(`'byteOffset' is out of range [0, ${buffer.byteLength}).`);
3227
2769
  }
3228
- }
3229
- else {
3230
- data = image.data;
3231
- }
3232
- }
3233
- else if (isImageBitmap) {
3234
- // ImageBitmap - image object - format must be provided by user
3235
- if (options === undefined) {
3236
- throw new Error('Please provide image config with format for Imagebitmap');
3237
- }
3238
- if (options.bitmapFormat !== undefined) {
3239
- throw new Error('Image input config format must be defined for ImageBitmap');
3240
- }
3241
- const pixels2DContext = document.createElement('canvas').getContext('2d');
3242
- if (pixels2DContext != null) {
3243
- const height = image.height;
3244
- const width = image.width;
3245
- pixels2DContext.drawImage(image, 0, 0, width, height);
3246
- data = pixels2DContext.getImageData(0, 0, width, height).data;
3247
- if (options !== undefined) {
3248
- // using square brackets to avoid TS error - type 'never'
3249
- if (options.height !== undefined && options.height !== height) {
3250
- throw new Error('Image input config height doesn\'t match ImageBitmap height');
2770
+ byteLength = arg0.byteLength - byteOffset;
2771
+ if (typeof arg2 === 'number') {
2772
+ byteLength = arg2;
2773
+ if (!Number.isSafeInteger(byteLength)) {
2774
+ throw new RangeError('\'byteLength\' must be an integer.');
3251
2775
  }
3252
- else {
3253
- tensorConfig.height = height;
2776
+ if (byteLength <= 0 || byteOffset + byteLength > buffer.byteLength) {
2777
+ throw new RangeError(`'byteLength' is out of range (0, ${buffer.byteLength - byteOffset}].`);
3254
2778
  }
3255
- // using square brackets to avoid TS error - type 'never'
3256
- if (options.width !== undefined && options.width !== width) {
3257
- throw new Error('Image input config width doesn\'t match ImageBitmap width');
2779
+ if (typeof arg3 === 'object' && arg3 !== null) {
2780
+ options = arg3;
3258
2781
  }
3259
- else {
3260
- tensorConfig.width = width;
2782
+ else if (typeof arg3 !== 'undefined') {
2783
+ throw new TypeError('\'options\' must be an object.');
3261
2784
  }
3262
2785
  }
3263
- else {
3264
- tensorConfig.height = height;
3265
- tensorConfig.width = width;
2786
+ else if (typeof arg2 !== 'undefined') {
2787
+ throw new TypeError('\'byteLength\' must be a number.');
3266
2788
  }
3267
- return Tensor.bufferToTensor(data, tensorConfig);
3268
2789
  }
3269
- else {
3270
- throw new Error('Can not access image data');
2790
+ else if (typeof arg1 !== 'undefined') {
2791
+ throw new TypeError('\'options\' must be an object.');
3271
2792
  }
3272
- }
3273
- else if (isURL) {
3274
- return new Promise((resolve, reject) => {
3275
- const canvas = document.createElement('canvas');
3276
- const context = canvas.getContext('2d');
3277
- if (!image || !context) {
3278
- return reject();
3279
- }
3280
- const newImage = new Image();
3281
- newImage.crossOrigin = 'Anonymous';
3282
- newImage.src = image;
3283
- newImage.onload = () => {
3284
- canvas.width = newImage.width;
3285
- canvas.height = newImage.height;
3286
- context.drawImage(newImage, 0, 0, canvas.width, canvas.height);
3287
- const img = context.getImageData(0, 0, canvas.width, canvas.height);
3288
- if (options !== undefined) {
3289
- // using square brackets to avoid TS error - type 'never'
3290
- if (options.height !== undefined && options.height !== canvas.height) {
3291
- throw new Error('Image input config height doesn\'t match ImageBitmap height');
3292
- }
3293
- else {
3294
- tensorConfig.height = canvas.height;
3295
- }
3296
- // using square brackets to avoid TS error - type 'never'
3297
- if (options.width !== undefined && options.width !== canvas.width) {
3298
- throw new Error('Image input config width doesn\'t match ImageBitmap width');
3299
- }
3300
- else {
3301
- tensorConfig.width = canvas.width;
3302
- }
3303
- }
3304
- else {
3305
- tensorConfig.height = canvas.height;
3306
- tensorConfig.width = canvas.width;
3307
- }
3308
- resolve(Tensor.bufferToTensor(img.data, tensorConfig));
3309
- };
3310
- });
3311
- }
3312
- else {
3313
- throw new Error('Input data provided is not supported - aborted tensor creation');
3314
- }
3315
- if (data !== undefined) {
3316
- return Tensor.bufferToTensor(data, tensorConfig);
3317
- }
3318
- else {
3319
- throw new Error('Input data provided is not supported - aborted tensor creation');
3320
- }
3321
- }
3322
- toImageData(options) {
3323
- var _a, _b;
3324
- const pixels2DContext = document.createElement('canvas').getContext('2d');
3325
- let image;
3326
- if (pixels2DContext != null) {
3327
- // Default values for height and width & format
3328
- const width = this.dims[3];
3329
- const height = this.dims[2];
3330
- const channels = this.dims[1];
3331
- const inputformat = options !== undefined ? (options.format !== undefined ? options.format : 'RGB') : 'RGB';
3332
- const normMean = options !== undefined ? (((_a = options.norm) === null || _a === void 0 ? void 0 : _a.mean) !== undefined ? options.norm.mean : 255) : 255;
3333
- const normBias = options !== undefined ? (((_b = options.norm) === null || _b === void 0 ? void 0 : _b.bias) !== undefined ? options.norm.bias : 0) : 0;
3334
- const offset = height * width;
3335
- if (options !== undefined) {
3336
- if (options.height !== undefined && options.height !== height) {
3337
- throw new Error('Image output config height doesn\'t match tensor height');
3338
- }
3339
- if (options.width !== undefined && options.width !== width) {
3340
- throw new Error('Image output config width doesn\'t match tensor width');
3341
- }
3342
- if (options.format !== undefined && (channels === 4 && options.format !== 'RGBA') ||
3343
- (channels === 3 && (options.format !== 'RGB' && options.format !== 'BGR'))) {
3344
- throw new Error('Tensor format doesn\'t match input tensor dims');
3345
- }
3346
- }
3347
- // Default pointer assignments
3348
- const step = 4;
3349
- let rImagePointer = 0, gImagePointer = 1, bImagePointer = 2, aImagePointer = 3;
3350
- let rTensorPointer = 0, gTensorPointer = offset, bTensorPointer = offset * 2, aTensorPointer = -1;
3351
- // Updating the pointer assignments based on the input image format
3352
- if (inputformat === 'RGBA') {
3353
- rTensorPointer = 0;
3354
- gTensorPointer = offset;
3355
- bTensorPointer = offset * 2;
3356
- aTensorPointer = offset * 3;
3357
- }
3358
- else if (inputformat === 'RGB') {
3359
- rTensorPointer = 0;
3360
- gTensorPointer = offset;
3361
- bTensorPointer = offset * 2;
3362
- }
3363
- else if (inputformat === 'RBG') {
3364
- rTensorPointer = 0;
3365
- bTensorPointer = offset;
3366
- gTensorPointer = offset * 2;
3367
- }
3368
- image = pixels2DContext.createImageData(width, height);
3369
- for (let i = 0; i < height * width; rImagePointer += step, gImagePointer += step, bImagePointer += step, aImagePointer += step, i++) {
3370
- image.data[rImagePointer] = (this.data[rTensorPointer++] - normBias) * normMean; // R value
3371
- image.data[gImagePointer] = (this.data[gTensorPointer++] - normBias) * normMean; // G value
3372
- image.data[bImagePointer] = (this.data[bTensorPointer++] - normBias) * normMean; // B value
3373
- image.data[aImagePointer] =
3374
- aTensorPointer === -1 ? 255 : (this.data[aTensorPointer++] - normBias) * normMean; // A value
3375
- }
3376
- }
3377
- else {
3378
- throw new Error('Can not access image data');
3379
- }
3380
- return image;
3381
- }
3382
- // #endregion
3383
- // #region tensor utilities
3384
- reshape(dims) {
3385
- return new Tensor(this.type, this.data, dims);
3386
- }
3387
- };
3388
-
3389
- // Copyright (c) Microsoft Corporation. All rights reserved.
3390
- // Licensed under the MIT License.
3391
- // eslint-disable-next-line @typescript-eslint/naming-convention
3392
- const Tensor = Tensor$1;
3393
-
3394
- // Copyright (c) Microsoft Corporation. All rights reserved.
3395
- // Licensed under the MIT License.
3396
- let InferenceSession$1 = class InferenceSession {
3397
- constructor(handler) {
3398
- this.handler = handler;
3399
- }
3400
- async run(feeds, arg1, arg2) {
3401
- const fetches = {};
3402
- let options = {};
3403
- // check inputs
3404
- if (typeof feeds !== 'object' || feeds === null || feeds instanceof Tensor || Array.isArray(feeds)) {
3405
- throw new TypeError('\'feeds\' must be an object that use input names as keys and OnnxValue as corresponding values.');
3406
- }
3407
- let isFetchesEmpty = true;
3408
- // determine which override is being used
3409
- if (typeof arg1 === 'object') {
3410
- if (arg1 === null) {
3411
- throw new TypeError('Unexpected argument[1]: cannot be null.');
3412
- }
3413
- if (arg1 instanceof Tensor) {
3414
- throw new TypeError('\'fetches\' cannot be a Tensor');
3415
- }
3416
- if (Array.isArray(arg1)) {
3417
- if (arg1.length === 0) {
3418
- throw new TypeError('\'fetches\' cannot be an empty array.');
3419
- }
3420
- isFetchesEmpty = false;
3421
- // output names
3422
- for (const name of arg1) {
3423
- if (typeof name !== 'string') {
3424
- throw new TypeError('\'fetches\' must be a string array or an object.');
3425
- }
3426
- if (this.outputNames.indexOf(name) === -1) {
3427
- throw new RangeError(`'fetches' contains invalid output name: ${name}.`);
3428
- }
3429
- fetches[name] = null;
3430
- }
3431
- if (typeof arg2 === 'object' && arg2 !== null) {
3432
- options = arg2;
3433
- }
3434
- else if (typeof arg2 !== 'undefined') {
3435
- throw new TypeError('\'options\' must be an object.');
3436
- }
3437
- }
3438
- else {
3439
- // decide whether arg1 is fetches or options
3440
- // if any output name is present and its value is valid OnnxValue, we consider it fetches
3441
- let isFetches = false;
3442
- const arg1Keys = Object.getOwnPropertyNames(arg1);
3443
- for (const name of this.outputNames) {
3444
- if (arg1Keys.indexOf(name) !== -1) {
3445
- const v = arg1[name];
3446
- if (v === null || v instanceof Tensor) {
3447
- isFetches = true;
3448
- isFetchesEmpty = false;
3449
- fetches[name] = v;
3450
- }
3451
- }
3452
- }
3453
- if (isFetches) {
3454
- if (typeof arg2 === 'object' && arg2 !== null) {
3455
- options = arg2;
3456
- }
3457
- else if (typeof arg2 !== 'undefined') {
3458
- throw new TypeError('\'options\' must be an object.');
3459
- }
3460
- }
3461
- else {
3462
- options = arg1;
3463
- }
3464
- }
3465
- }
3466
- else if (typeof arg1 !== 'undefined') {
3467
- throw new TypeError('Unexpected argument[1]: must be \'fetches\' or \'options\'.');
3468
- }
3469
- // check if all inputs are in feed
3470
- for (const name of this.inputNames) {
3471
- if (typeof feeds[name] === 'undefined') {
3472
- throw new Error(`input '${name}' is missing in 'feeds'.`);
3473
- }
3474
- }
3475
- // if no fetches is specified, we use the full output names list
3476
- if (isFetchesEmpty) {
3477
- for (const name of this.outputNames) {
3478
- fetches[name] = null;
3479
- }
3480
- }
3481
- // feeds, fetches and options are prepared
3482
- const results = await this.handler.run(feeds, fetches, options);
3483
- const returnValue = {};
3484
- for (const key in results) {
3485
- if (Object.hasOwnProperty.call(results, key)) {
3486
- returnValue[key] = new Tensor(results[key].type, results[key].data, results[key].dims);
3487
- }
3488
- }
3489
- return returnValue;
3490
- }
3491
- static async create(arg0, arg1, arg2, arg3) {
3492
- // either load from a file or buffer
3493
- let filePathOrUint8Array;
3494
- let options = {};
3495
- if (typeof arg0 === 'string') {
3496
- filePathOrUint8Array = arg0;
3497
- if (typeof arg1 === 'object' && arg1 !== null) {
3498
- options = arg1;
3499
- }
3500
- else if (typeof arg1 !== 'undefined') {
3501
- throw new TypeError('\'options\' must be an object.');
3502
- }
3503
- }
3504
- else if (arg0 instanceof Uint8Array) {
3505
- filePathOrUint8Array = arg0;
3506
- if (typeof arg1 === 'object' && arg1 !== null) {
3507
- options = arg1;
3508
- }
3509
- else if (typeof arg1 !== 'undefined') {
3510
- throw new TypeError('\'options\' must be an object.');
3511
- }
3512
- }
3513
- else if (arg0 instanceof ArrayBuffer ||
3514
- (typeof SharedArrayBuffer !== 'undefined' && arg0 instanceof SharedArrayBuffer)) {
3515
- const buffer = arg0;
3516
- let byteOffset = 0;
3517
- let byteLength = arg0.byteLength;
3518
- if (typeof arg1 === 'object' && arg1 !== null) {
3519
- options = arg1;
3520
- }
3521
- else if (typeof arg1 === 'number') {
3522
- byteOffset = arg1;
3523
- if (!Number.isSafeInteger(byteOffset)) {
3524
- throw new RangeError('\'byteOffset\' must be an integer.');
3525
- }
3526
- if (byteOffset < 0 || byteOffset >= buffer.byteLength) {
3527
- throw new RangeError(`'byteOffset' is out of range [0, ${buffer.byteLength}).`);
3528
- }
3529
- byteLength = arg0.byteLength - byteOffset;
3530
- if (typeof arg2 === 'number') {
3531
- byteLength = arg2;
3532
- if (!Number.isSafeInteger(byteLength)) {
3533
- throw new RangeError('\'byteLength\' must be an integer.');
3534
- }
3535
- if (byteLength <= 0 || byteOffset + byteLength > buffer.byteLength) {
3536
- throw new RangeError(`'byteLength' is out of range (0, ${buffer.byteLength - byteOffset}].`);
3537
- }
3538
- if (typeof arg3 === 'object' && arg3 !== null) {
3539
- options = arg3;
3540
- }
3541
- else if (typeof arg3 !== 'undefined') {
3542
- throw new TypeError('\'options\' must be an object.');
3543
- }
3544
- }
3545
- else if (typeof arg2 !== 'undefined') {
3546
- throw new TypeError('\'byteLength\' must be a number.');
3547
- }
3548
- }
3549
- else if (typeof arg1 !== 'undefined') {
3550
- throw new TypeError('\'options\' must be an object.');
3551
- }
3552
- filePathOrUint8Array = new Uint8Array(buffer, byteOffset, byteLength);
2793
+ filePathOrUint8Array = new Uint8Array(buffer, byteOffset, byteLength);
3553
2794
  }
3554
2795
  else {
3555
2796
  throw new TypeError('Unexpected argument[0]: must be \'path\' or \'buffer\'.');
@@ -3947,7 +3188,7 @@ registerProcessor('audio_processor', AudioProcessor);
3947
3188
  return Math.ceil((targetDuration * sr) / 1000 / frameSamples);
3948
3189
  }
3949
3190
  utils.minFramesForTargetMS = minFramesForTargetMS;
3950
- function arrayBufferToBase64(buffer) {
3191
+ function arrayBufferToBase64$1(buffer) {
3951
3192
  const bytes = new Uint8Array(buffer);
3952
3193
  const len = bytes.byteLength;
3953
3194
  const binary = new Array(len);
@@ -3960,7 +3201,7 @@ registerProcessor('audio_processor', AudioProcessor);
3960
3201
  }
3961
3202
  return btoa(binary.join(""));
3962
3203
  }
3963
- utils.arrayBufferToBase64 = arrayBufferToBase64;
3204
+ utils.arrayBufferToBase64 = arrayBufferToBase64$1;
3964
3205
  /*
3965
3206
  This rest of this was mostly copied from https://github.com/linto-ai/WebVoiceSDK
3966
3207
  */
@@ -4379,12 +3620,726 @@ registerProcessor('audio_processor', AudioProcessor);
4379
3620
 
4380
3621
  } (dist));
4381
3622
 
4382
- var index = /*@__PURE__*/getDefaultExportFromCjs(dist);
3623
+ /**
3624
+ * Converts a base64 string to an ArrayBuffer.
3625
+ * @param {string} base64 - The base64 string to convert.
3626
+ * @returns {ArrayBuffer} The resulting ArrayBuffer.
3627
+ */
3628
+ function base64ToArrayBuffer(base64) {
3629
+ const binaryString = atob(base64);
3630
+ const len = binaryString.length;
3631
+ const bytes = new Uint8Array(len);
3632
+ for (let i = 0; i < len; i++) {
3633
+ bytes[i] = binaryString.charCodeAt(i);
3634
+ }
3635
+ return bytes.buffer;
3636
+ }
4383
3637
 
4384
- var index$1 = /*#__PURE__*/_mergeNamespaces({
4385
- __proto__: null,
4386
- default: index
4387
- }, [dist]);
3638
+ /**
3639
+ * Converts an ArrayBuffer to a base64 string.
3640
+ * @param {ArrayBuffer|Float32Array|Int16Array} arrayBuffer - The ArrayBuffer to convert.
3641
+ * @returns {string} The resulting base64 string.
3642
+ */
3643
+ function arrayBufferToBase64(arrayBuffer) {
3644
+ if (arrayBuffer instanceof Float32Array) {
3645
+ arrayBuffer = this.floatTo16BitPCM(arrayBuffer);
3646
+ } else if (arrayBuffer instanceof Int16Array) {
3647
+ arrayBuffer = arrayBuffer.buffer;
3648
+ }
3649
+ let binary = '';
3650
+ let bytes = new Uint8Array(arrayBuffer);
3651
+ const chunkSize = 0x8000; // 32KB chunk size
3652
+ for (let i = 0; i < bytes.length; i += chunkSize) {
3653
+ let chunk = bytes.subarray(i, i + chunkSize);
3654
+ binary += String.fromCharCode.apply(null, chunk);
3655
+ }
3656
+ return btoa(binary);
3657
+ }
3658
+
3659
+ /* eslint-env browser */
3660
+ // import { env as ortEnv } from 'onnxruntime-web';
3661
+ const NOOP = () => { };
3662
+ const DEFAULT_WS_URL = 'wss://api.layercode.com/v1/agents/web/websocket';
3663
+ // SDK version - updated when publishing
3664
+ const SDK_VERSION = '2.1.3';
3665
+ /**
3666
+ * @class LayercodeClient
3667
+ * @classdesc Core client for Layercode audio agent that manages audio recording, WebSocket communication, and speech processing.
3668
+ */
3669
+ class LayercodeClient {
3670
+ /**
3671
+ * Creates an instance of LayercodeClient.
3672
+ * @param {Object} options - Configuration options
3673
+ */
3674
+ constructor(options) {
3675
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p;
3676
+ this.deviceId = null;
3677
+ this.options = {
3678
+ agentId: options.agentId,
3679
+ conversationId: (_a = options.conversationId) !== null && _a !== void 0 ? _a : null,
3680
+ authorizeSessionEndpoint: options.authorizeSessionEndpoint,
3681
+ metadata: (_b = options.metadata) !== null && _b !== void 0 ? _b : {},
3682
+ vadResumeDelay: (_c = options.vadResumeDelay) !== null && _c !== void 0 ? _c : 500,
3683
+ onConnect: (_d = options.onConnect) !== null && _d !== void 0 ? _d : NOOP,
3684
+ onDisconnect: (_e = options.onDisconnect) !== null && _e !== void 0 ? _e : NOOP,
3685
+ onError: (_f = options.onError) !== null && _f !== void 0 ? _f : NOOP,
3686
+ onDeviceSwitched: (_g = options.onDeviceSwitched) !== null && _g !== void 0 ? _g : NOOP,
3687
+ onDataMessage: (_h = options.onDataMessage) !== null && _h !== void 0 ? _h : NOOP,
3688
+ onMessage: (_j = options.onMessage) !== null && _j !== void 0 ? _j : NOOP,
3689
+ onUserAmplitudeChange: (_k = options.onUserAmplitudeChange) !== null && _k !== void 0 ? _k : NOOP,
3690
+ onAgentAmplitudeChange: (_l = options.onAgentAmplitudeChange) !== null && _l !== void 0 ? _l : NOOP,
3691
+ onStatusChange: (_m = options.onStatusChange) !== null && _m !== void 0 ? _m : NOOP,
3692
+ onUserIsSpeakingChange: (_o = options.onUserIsSpeakingChange) !== null && _o !== void 0 ? _o : NOOP,
3693
+ onMuteStateChange: (_p = options.onMuteStateChange) !== null && _p !== void 0 ? _p : NOOP,
3694
+ };
3695
+ this.AMPLITUDE_MONITORING_SAMPLE_RATE = 2;
3696
+ this._websocketUrl = DEFAULT_WS_URL;
3697
+ this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
3698
+ this.wavPlayer = new WavStreamPlayer({
3699
+ finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
3700
+ sampleRate: 16000, // TODO should be set my fetched agent config
3701
+ });
3702
+ this.vad = null;
3703
+ this.ws = null;
3704
+ this.status = 'disconnected';
3705
+ this.userAudioAmplitude = 0;
3706
+ this.agentAudioAmplitude = 0;
3707
+ this.conversationId = this.options.conversationId;
3708
+ this.pushToTalkActive = false;
3709
+ this.pushToTalkEnabled = false;
3710
+ this.canInterrupt = false;
3711
+ this.userIsSpeaking = false;
3712
+ this.recorderStarted = false;
3713
+ this.readySent = false;
3714
+ this.currentTurnId = null;
3715
+ this.audioBuffer = [];
3716
+ this.vadConfig = null;
3717
+ this.activeDeviceId = null;
3718
+ this.useSystemDefaultDevice = false;
3719
+ this.lastReportedDeviceId = null;
3720
+ this.lastKnownSystemDefaultDeviceKey = null;
3721
+ this.isMuted = false;
3722
+ this.stopPlayerAmplitude = undefined;
3723
+ this.stopRecorderAmplitude = undefined;
3724
+ this.deviceChangeListener = null;
3725
+ // this.audioPauseTime = null;
3726
+ // Bind event handlers
3727
+ this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
3728
+ this._handleDataAvailable = this._handleDataAvailable.bind(this);
3729
+ }
3730
+ _initializeVAD() {
3731
+ var _a;
3732
+ console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt, vadConfig: this.vadConfig });
3733
+ // If we're in push to talk mode, we don't need to use the VAD model
3734
+ if (this.pushToTalkEnabled) {
3735
+ return;
3736
+ }
3737
+ // Check if VAD is disabled
3738
+ if (((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) === false) {
3739
+ console.log('VAD is disabled by backend configuration');
3740
+ return;
3741
+ }
3742
+ // Build VAD configuration object, only including keys that are defined
3743
+ const vadOptions = {
3744
+ stream: this.wavRecorder.getStream() || undefined,
3745
+ onSpeechStart: () => {
3746
+ console.debug('onSpeechStart: sending vad_start');
3747
+ this.userIsSpeaking = true;
3748
+ this.options.onUserIsSpeakingChange(true);
3749
+ this._wsSend({
3750
+ type: 'vad_events',
3751
+ event: 'vad_start',
3752
+ });
3753
+ this.options.onMessage({
3754
+ type: 'vad_events',
3755
+ event: 'vad_start',
3756
+ });
3757
+ },
3758
+ onSpeechEnd: () => {
3759
+ console.debug('onSpeechEnd: sending vad_end');
3760
+ this.userIsSpeaking = false;
3761
+ this.options.onUserIsSpeakingChange(false);
3762
+ this.audioBuffer = []; // Clear buffer on speech end
3763
+ this._wsSend({
3764
+ type: 'vad_events',
3765
+ event: 'vad_end',
3766
+ });
3767
+ this.options.onMessage({
3768
+ type: 'vad_events',
3769
+ event: 'vad_end',
3770
+ });
3771
+ },
3772
+ };
3773
+ // Apply VAD configuration from backend if available
3774
+ if (this.vadConfig) {
3775
+ // Only add keys that are explicitly defined (not undefined)
3776
+ if (this.vadConfig.model !== undefined)
3777
+ vadOptions.model = this.vadConfig.model;
3778
+ if (this.vadConfig.positive_speech_threshold !== undefined)
3779
+ vadOptions.positiveSpeechThreshold = this.vadConfig.positive_speech_threshold;
3780
+ if (this.vadConfig.negative_speech_threshold !== undefined)
3781
+ vadOptions.negativeSpeechThreshold = this.vadConfig.negative_speech_threshold;
3782
+ if (this.vadConfig.redemption_frames !== undefined)
3783
+ vadOptions.redemptionFrames = this.vadConfig.redemption_frames;
3784
+ if (this.vadConfig.min_speech_frames !== undefined)
3785
+ vadOptions.minSpeechFrames = this.vadConfig.min_speech_frames;
3786
+ if (this.vadConfig.pre_speech_pad_frames !== undefined)
3787
+ vadOptions.preSpeechPadFrames = this.vadConfig.pre_speech_pad_frames;
3788
+ if (this.vadConfig.frame_samples !== undefined)
3789
+ vadOptions.frameSamples = this.vadConfig.frame_samples;
3790
+ }
3791
+ else {
3792
+ // Default values if no config from backend
3793
+ vadOptions.model = 'v5';
3794
+ vadOptions.positiveSpeechThreshold = 0.15;
3795
+ vadOptions.negativeSpeechThreshold = 0.05;
3796
+ vadOptions.redemptionFrames = 4;
3797
+ vadOptions.minSpeechFrames = 2;
3798
+ vadOptions.preSpeechPadFrames = 0;
3799
+ vadOptions.frameSamples = 512; // Required for v5
3800
+ }
3801
+ console.log('Creating VAD with options:', vadOptions);
3802
+ dist.MicVAD.new(vadOptions)
3803
+ .then((vad) => {
3804
+ this.vad = vad;
3805
+ this.vad.start();
3806
+ console.log('VAD started successfully');
3807
+ })
3808
+ .catch((error) => {
3809
+ console.warn('Error initializing VAD:', error);
3810
+ // Send a message to server indicating VAD failure
3811
+ this._wsSend({
3812
+ type: 'vad_events',
3813
+ event: 'vad_model_failed',
3814
+ });
3815
+ });
3816
+ }
3817
+ /**
3818
+ * Updates the connection status and triggers the callback
3819
+ * @param {string} status - New status value
3820
+ */
3821
+ _setStatus(status) {
3822
+ this.status = status;
3823
+ this.options.onStatusChange(status);
3824
+ }
3825
+ /**
3826
+ * Handles when agent audio finishes playing
3827
+ */
3828
+ _clientResponseAudioReplayFinished() {
3829
+ console.debug('clientResponseAudioReplayFinished');
3830
+ this._wsSend({
3831
+ type: 'trigger.response.audio.replay_finished',
3832
+ reason: 'completed',
3833
+ });
3834
+ }
3835
+ async _clientInterruptAssistantReplay() {
3836
+ await this.wavPlayer.interrupt();
3837
+ }
3838
+ async triggerUserTurnStarted() {
3839
+ if (!this.pushToTalkActive) {
3840
+ this.pushToTalkActive = true;
3841
+ this._wsSend({ type: 'trigger.turn.start', role: 'user' });
3842
+ await this._clientInterruptAssistantReplay();
3843
+ }
3844
+ }
3845
+ async triggerUserTurnFinished() {
3846
+ if (this.pushToTalkActive) {
3847
+ this.pushToTalkActive = false;
3848
+ this._wsSend({ type: 'trigger.turn.end', role: 'user' });
3849
+ }
3850
+ }
3851
+ /**
3852
+ * Handles incoming WebSocket messages
3853
+ * @param {MessageEvent} event - The WebSocket message event
3854
+ */
3855
+ async _handleWebSocketMessage(event) {
3856
+ try {
3857
+ const message = JSON.parse(event.data);
3858
+ if (message.type !== 'response.audio') {
3859
+ console.debug('msg:', message);
3860
+ }
3861
+ switch (message.type) {
3862
+ case 'turn.start':
3863
+ // Sent from the server to this client when a new user turn is detected
3864
+ if (message.role === 'assistant') {
3865
+ // Start tracking new assistant turn
3866
+ console.debug('Assistant turn started, will track new turn ID from audio/text');
3867
+ }
3868
+ else if (message.role === 'user' && !this.pushToTalkEnabled) {
3869
+ // Interrupt any playing assistant audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
3870
+ console.debug('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
3871
+ await this._clientInterruptAssistantReplay();
3872
+ }
3873
+ this.options.onMessage(message);
3874
+ break;
3875
+ case 'response.audio':
3876
+ const audioBuffer = base64ToArrayBuffer(message.content);
3877
+ this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
3878
+ // TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
3879
+ // Set current turn ID from first audio message, or update if different turn
3880
+ if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
3881
+ console.debug(`Setting current turn ID to: ${message.turn_id} (was: ${this.currentTurnId})`);
3882
+ this.currentTurnId = message.turn_id;
3883
+ // Clean up interrupted tracks, keeping only the current turn
3884
+ this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
3885
+ }
3886
+ break;
3887
+ case 'response.text':
3888
+ // Set turn ID from first text message if not set
3889
+ if (!this.currentTurnId) {
3890
+ this.currentTurnId = message.turn_id;
3891
+ console.debug(`Setting current turn ID to: ${message.turn_id} from text message`);
3892
+ }
3893
+ this.options.onMessage(message);
3894
+ break;
3895
+ case 'response.data':
3896
+ this.options.onDataMessage(message);
3897
+ break;
3898
+ case 'user.transcript':
3899
+ case 'user.transcript.delta':
3900
+ case 'user.transcript.interim_delta':
3901
+ this.options.onMessage(message);
3902
+ break;
3903
+ default:
3904
+ console.warn('Unknown message type received:', message);
3905
+ }
3906
+ }
3907
+ catch (error) {
3908
+ console.error('Error processing WebSocket message:', error);
3909
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
3910
+ }
3911
+ }
3912
+ /**
3913
+ * Handles available client browser microphone audio data and sends it over the WebSocket
3914
+ * @param {ArrayBuffer} data - The audio data buffer
3915
+ */
3916
+ _handleDataAvailable(data) {
3917
+ var _a, _b, _c;
3918
+ try {
3919
+ const base64 = arrayBufferToBase64(data.mono);
3920
+ // Don't send audio if muted
3921
+ if (this.isMuted) {
3922
+ return;
3923
+ }
3924
+ // Determine if we should gate audio based on VAD configuration
3925
+ const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
3926
+ const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
3927
+ let sendAudio;
3928
+ if (this.pushToTalkEnabled) {
3929
+ sendAudio = this.pushToTalkActive;
3930
+ }
3931
+ else if (shouldGateAudio) {
3932
+ sendAudio = this.userIsSpeaking;
3933
+ }
3934
+ else {
3935
+ // If gate_audio is false, always send audio
3936
+ sendAudio = true;
3937
+ }
3938
+ if (sendAudio) {
3939
+ // If we have buffered audio and we're gating, send it first
3940
+ if (shouldGateAudio && this.audioBuffer.length > 0) {
3941
+ console.debug(`Sending ${this.audioBuffer.length} buffered audio chunks`);
3942
+ for (const bufferedAudio of this.audioBuffer) {
3943
+ this._wsSend({
3944
+ type: 'client.audio',
3945
+ content: bufferedAudio,
3946
+ });
3947
+ }
3948
+ this.audioBuffer = []; // Clear the buffer after sending
3949
+ }
3950
+ // Send the current audio
3951
+ this._wsSend({
3952
+ type: 'client.audio',
3953
+ content: base64,
3954
+ });
3955
+ }
3956
+ else {
3957
+ // Buffer audio when not sending (to catch audio just before VAD triggers)
3958
+ this.audioBuffer.push(base64);
3959
+ // Keep buffer size based on configuration
3960
+ if (this.audioBuffer.length > bufferFrames) {
3961
+ this.audioBuffer.shift(); // Remove oldest chunk
3962
+ }
3963
+ }
3964
+ }
3965
+ catch (error) {
3966
+ console.error('Error processing audio:', error);
3967
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
3968
+ }
3969
+ }
3970
+ _wsSend(message) {
3971
+ var _a;
3972
+ if (message.type !== 'client.audio') {
3973
+ console.debug('sent_msg:', message);
3974
+ }
3975
+ const messageString = JSON.stringify(message);
3976
+ if (((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN) {
3977
+ this.ws.send(messageString);
3978
+ }
3979
+ }
3980
+ _sendReadyIfNeeded() {
3981
+ var _a;
3982
+ if (this.recorderStarted && ((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN && !this.readySent) {
3983
+ this._wsSend({ type: 'client.ready' });
3984
+ this.readySent = true;
3985
+ }
3986
+ }
3987
+ /**
3988
+ * Sets up amplitude monitoring for a given audio source.
3989
+ * @param {WavRecorder | WavStreamPlayer} source - The audio source (recorder or player).
3990
+ * @param {(amplitude: number) => void} callback - The callback function to invoke on amplitude change.
3991
+ * @param {(amplitude: number) => void} updateInternalState - Function to update the internal amplitude state.
3992
+ */
3993
+ _setupAmplitudeMonitoring(source, callback, updateInternalState) {
3994
+ let updateCounter = 0;
3995
+ source.startAmplitudeMonitoring((amplitude) => {
3996
+ // Only update and call callback at the specified sample rate
3997
+ if (updateCounter >= this.AMPLITUDE_MONITORING_SAMPLE_RATE) {
3998
+ updateInternalState(amplitude);
3999
+ if (callback !== NOOP) {
4000
+ callback(amplitude);
4001
+ }
4002
+ updateCounter = 0; // Reset counter after sampling
4003
+ }
4004
+ updateCounter++;
4005
+ });
4006
+ const stop = () => { var _a; return (_a = source.stopAmplitudeMonitoring) === null || _a === void 0 ? void 0 : _a.call(source); };
4007
+ if (source === this.wavPlayer) {
4008
+ this.stopPlayerAmplitude = stop;
4009
+ }
4010
+ if (source === this.wavRecorder) {
4011
+ this.stopRecorderAmplitude = stop;
4012
+ }
4013
+ }
4014
+ _stopAmplitudeMonitoring() {
4015
+ var _a, _b;
4016
+ (_a = this.stopPlayerAmplitude) === null || _a === void 0 ? void 0 : _a.call(this);
4017
+ (_b = this.stopRecorderAmplitude) === null || _b === void 0 ? void 0 : _b.call(this);
4018
+ this.stopPlayerAmplitude = undefined;
4019
+ this.stopRecorderAmplitude = undefined;
4020
+ }
4021
+ /**
4022
+ * Connects to the Layercode agent using the stored conversation ID and starts the audio conversation
4023
+ * @async
4024
+ * @returns {Promise<void>}
4025
+ */
4026
+ async connect() {
4027
+ if (this.status === 'connecting') {
4028
+ return;
4029
+ }
4030
+ try {
4031
+ this._setStatus('connecting');
4032
+ // Reset turn tracking for clean start
4033
+ this._resetTurnTracking();
4034
+ this._stopAmplitudeMonitoring();
4035
+ this._setupDeviceChangeListener();
4036
+ // Get conversation key from server
4037
+ let authorizeSessionRequestBody = {
4038
+ agent_id: this.options.agentId,
4039
+ metadata: this.options.metadata,
4040
+ sdk_version: SDK_VERSION,
4041
+ };
4042
+ // If we're reconnecting to a previous conversation, we need to include the conversation_id in the request. Otherwise we don't send conversation_id, and a new conversation will be created and the conversation_id will be returned in the response.
4043
+ if (this.options.conversationId) {
4044
+ authorizeSessionRequestBody.conversation_id = this.options.conversationId;
4045
+ }
4046
+ const authorizeSessionResponse = await fetch(this.options.authorizeSessionEndpoint, {
4047
+ method: 'POST',
4048
+ headers: {
4049
+ 'Content-Type': 'application/json',
4050
+ },
4051
+ body: JSON.stringify(authorizeSessionRequestBody),
4052
+ });
4053
+ if (!authorizeSessionResponse.ok) {
4054
+ throw new Error(`Failed to authorize conversation: ${authorizeSessionResponse.statusText}`);
4055
+ }
4056
+ const authorizeSessionResponseBody = await authorizeSessionResponse.json();
4057
+ this.conversationId = authorizeSessionResponseBody.conversation_id; // Save the conversation_id for use in future reconnects
4058
+ this.options.conversationId = this.conversationId;
4059
+ // Connect WebSocket
4060
+ this.ws = new WebSocket(`${this._websocketUrl}?${new URLSearchParams({
4061
+ client_session_key: authorizeSessionResponseBody.client_session_key,
4062
+ })}`);
4063
+ const config = authorizeSessionResponseBody.config;
4064
+ console.log('AgentConfig', config);
4065
+ // Store VAD configuration
4066
+ this.vadConfig = config.vad || null;
4067
+ if (config.transcription.trigger === 'push_to_talk') {
4068
+ this.pushToTalkEnabled = true;
4069
+ }
4070
+ else if (config.transcription.trigger === 'automatic') {
4071
+ this.pushToTalkEnabled = false;
4072
+ this.canInterrupt = config.transcription.can_interrupt;
4073
+ }
4074
+ else {
4075
+ throw new Error(`Unknown trigger: ${config.transcription.trigger}`);
4076
+ }
4077
+ // Bind the websocket message callbacks
4078
+ this.ws.onmessage = this._handleWebSocketMessage;
4079
+ this.ws.onopen = () => {
4080
+ console.log('WebSocket connection established');
4081
+ this._setStatus('connected');
4082
+ this.options.onConnect({ conversationId: this.conversationId });
4083
+ // Attempt to send ready message if recorder already started
4084
+ this._sendReadyIfNeeded();
4085
+ };
4086
+ this.ws.onclose = () => {
4087
+ console.log('WebSocket connection closed');
4088
+ this.ws = null;
4089
+ this._performDisconnectCleanup().catch((error) => {
4090
+ console.error('Error during disconnect cleanup:', error);
4091
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
4092
+ });
4093
+ };
4094
+ this.ws.onerror = (error) => {
4095
+ console.error('WebSocket error:', error);
4096
+ this._setStatus('error');
4097
+ this.options.onError(new Error('WebSocket connection error'));
4098
+ };
4099
+ // Initialize audio player
4100
+ await this.wavPlayer.connect();
4101
+ // Set up audio player amplitude monitoring
4102
+ this._setupAmplitudeMonitoring(this.wavPlayer, this.options.onAgentAmplitudeChange, (amp) => (this.agentAudioAmplitude = amp));
4103
+ // wavRecorder will be started from the onDeviceSwitched callback,
4104
+ // which is called when the device is first initialized and also when the device is switched
4105
+ // this is to ensure that the device is initialized before the recorder is started
4106
+ }
4107
+ catch (error) {
4108
+ console.error('Error connecting to Layercode agent:', error);
4109
+ this._setStatus('error');
4110
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
4111
+ throw error;
4112
+ }
4113
+ }
4114
+ _resetTurnTracking() {
4115
+ this.currentTurnId = null;
4116
+ console.debug('Reset turn tracking state');
4117
+ }
4118
+ async disconnect() {
4119
+ if (this.status === 'disconnected') {
4120
+ return;
4121
+ }
4122
+ if (this.ws) {
4123
+ this.ws.onopen = null;
4124
+ this.ws.onclose = null;
4125
+ this.ws.onerror = null;
4126
+ this.ws.onmessage = null;
4127
+ this.ws.close();
4128
+ this.ws = null;
4129
+ }
4130
+ await this._performDisconnectCleanup();
4131
+ }
4132
+ /**
4133
+ * Gets the microphone MediaStream used by this client
4134
+ * @returns {MediaStream|null} The microphone stream or null if not initialized
4135
+ */
4136
+ getStream() {
4137
+ return this.wavRecorder.getStream();
4138
+ }
4139
+ /**
4140
+ * Switches the input device for the microphone and restarts recording
4141
+ * @param {string} deviceId - The deviceId of the new microphone
4142
+ */
4143
+ async setInputDevice(deviceId) {
4144
+ var _a, _b, _c;
4145
+ try {
4146
+ const normalizedDeviceId = !deviceId || deviceId === 'default' ? null : deviceId;
4147
+ this.useSystemDefaultDevice = normalizedDeviceId === null;
4148
+ this.deviceId = normalizedDeviceId;
4149
+ // Restart recording with the new device
4150
+ await this._restartAudioRecording();
4151
+ // Reinitialize VAD with the new audio stream if VAD is enabled
4152
+ const shouldUseVAD = !this.pushToTalkEnabled && ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) !== false;
4153
+ if (shouldUseVAD) {
4154
+ console.debug('Reinitializing VAD with new audio stream');
4155
+ const newStream = this.wavRecorder.getStream();
4156
+ await this._reinitializeVAD(newStream);
4157
+ }
4158
+ const reportedDeviceId = (_c = (_b = this.lastReportedDeviceId) !== null && _b !== void 0 ? _b : this.activeDeviceId) !== null && _c !== void 0 ? _c : (this.useSystemDefaultDevice ? 'default' : normalizedDeviceId !== null && normalizedDeviceId !== void 0 ? normalizedDeviceId : 'default');
4159
+ console.debug(`Successfully switched to input device: ${reportedDeviceId}`);
4160
+ }
4161
+ catch (error) {
4162
+ console.error(`Failed to switch to input device ${deviceId}:`, error);
4163
+ throw new Error(`Failed to switch to input device: ${error instanceof Error ? error.message : String(error)}`);
4164
+ }
4165
+ }
4166
+ /**
4167
+ * Restarts audio recording after a device switch to ensure audio is captured from the new device
4168
+ */
4169
+ async _restartAudioRecording() {
4170
+ var _a, _b;
4171
+ try {
4172
+ console.debug('Restarting audio recording after device switch...');
4173
+ try {
4174
+ await this.wavRecorder.end();
4175
+ }
4176
+ catch (_c) {
4177
+ // Ignore cleanup errors
4178
+ }
4179
+ // Start with new device
4180
+ const targetDeviceId = this.useSystemDefaultDevice ? undefined : this.deviceId || undefined;
4181
+ await this.wavRecorder.begin(targetDeviceId);
4182
+ await this.wavRecorder.record(this._handleDataAvailable, 1638);
4183
+ // Re-setup amplitude monitoring with the new stream
4184
+ this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
4185
+ const previousReportedDeviceId = this.lastReportedDeviceId;
4186
+ const stream = this.wavRecorder.getStream();
4187
+ const activeTrack = (stream === null || stream === void 0 ? void 0 : stream.getAudioTracks()[0]) || null;
4188
+ const trackSettings = activeTrack && typeof activeTrack.getSettings === 'function' ? activeTrack.getSettings() : null;
4189
+ const trackDeviceId = trackSettings && typeof trackSettings.deviceId === 'string' ? trackSettings.deviceId : null;
4190
+ this.activeDeviceId = trackDeviceId !== null && trackDeviceId !== void 0 ? trackDeviceId : (this.useSystemDefaultDevice ? null : this.deviceId);
4191
+ if (!this.recorderStarted) {
4192
+ this.recorderStarted = true;
4193
+ this._sendReadyIfNeeded();
4194
+ }
4195
+ const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : (_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default');
4196
+ if (reportedDeviceId !== previousReportedDeviceId) {
4197
+ this.lastReportedDeviceId = reportedDeviceId;
4198
+ if (this.options.onDeviceSwitched) {
4199
+ this.options.onDeviceSwitched(reportedDeviceId);
4200
+ }
4201
+ }
4202
+ console.debug('Audio recording restart completed successfully');
4203
+ }
4204
+ catch (error) {
4205
+ console.error('Error restarting audio recording after device switch:', error);
4206
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
4207
+ }
4208
+ }
4209
+ /**
4210
+ * Reinitializes VAD with a new stream (used after device switching)
4211
+ */
4212
+ async _reinitializeVAD(stream) {
4213
+ // Clean up existing VAD
4214
+ if (this.vad) {
4215
+ this.vad.pause();
4216
+ this.vad.destroy();
4217
+ this.vad = null;
4218
+ }
4219
+ // Reinitialize with new stream
4220
+ if (stream) {
4221
+ this._initializeVAD();
4222
+ }
4223
+ }
4224
+ /**
4225
+ * Sets up the device change event listener
4226
+ */
4227
+ _setupDeviceChangeListener() {
4228
+ if (!this.deviceChangeListener) {
4229
+ this.deviceChangeListener = async (devices) => {
4230
+ try {
4231
+ const defaultDevice = devices.find((device) => device.default);
4232
+ const usingDefaultDevice = this.useSystemDefaultDevice;
4233
+ const previousDefaultDeviceKey = this.lastKnownSystemDefaultDeviceKey;
4234
+ const currentDefaultDeviceKey = this._getDeviceComparisonKey(defaultDevice);
4235
+ let shouldSwitch = !this.recorderStarted;
4236
+ if (!shouldSwitch) {
4237
+ if (usingDefaultDevice) {
4238
+ if (!defaultDevice) {
4239
+ shouldSwitch = true;
4240
+ }
4241
+ else if (this.activeDeviceId && defaultDevice.deviceId !== 'default' && defaultDevice.deviceId !== this.activeDeviceId) {
4242
+ shouldSwitch = true;
4243
+ }
4244
+ else if ((previousDefaultDeviceKey && previousDefaultDeviceKey !== currentDefaultDeviceKey) ||
4245
+ (!previousDefaultDeviceKey && !currentDefaultDeviceKey && this.recorderStarted)) {
4246
+ shouldSwitch = true;
4247
+ }
4248
+ }
4249
+ else {
4250
+ const matchesRequestedDevice = devices.some((device) => device.deviceId === this.deviceId || device.deviceId === this.activeDeviceId);
4251
+ shouldSwitch = !matchesRequestedDevice;
4252
+ }
4253
+ }
4254
+ this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
4255
+ if (shouldSwitch) {
4256
+ console.debug('Selecting fallback audio input device');
4257
+ const fallbackDevice = defaultDevice || devices[0];
4258
+ if (fallbackDevice) {
4259
+ const fallbackId = fallbackDevice.default ? 'default' : fallbackDevice.deviceId;
4260
+ await this.setInputDevice(fallbackId);
4261
+ }
4262
+ else {
4263
+ console.warn('No alternative audio device found');
4264
+ }
4265
+ }
4266
+ }
4267
+ catch (error) {
4268
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
4269
+ }
4270
+ };
4271
+ }
4272
+ this.wavRecorder.listenForDeviceChange(this.deviceChangeListener);
4273
+ }
4274
+ _teardownDeviceListeners() {
4275
+ this.wavRecorder.listenForDeviceChange(null);
4276
+ }
4277
+ async _performDisconnectCleanup() {
4278
+ var _a, _b;
4279
+ this.deviceId = null;
4280
+ this.activeDeviceId = null;
4281
+ this.useSystemDefaultDevice = false;
4282
+ this.lastReportedDeviceId = null;
4283
+ this.lastKnownSystemDefaultDeviceKey = null;
4284
+ this.recorderStarted = false;
4285
+ this.readySent = false;
4286
+ this._stopAmplitudeMonitoring();
4287
+ this._teardownDeviceListeners();
4288
+ if (this.vad) {
4289
+ this.vad.pause();
4290
+ this.vad.destroy();
4291
+ this.vad = null;
4292
+ }
4293
+ await this.wavRecorder.quit();
4294
+ (_b = (_a = this.wavPlayer).stop) === null || _b === void 0 ? void 0 : _b.call(_a);
4295
+ this.wavPlayer.disconnect();
4296
+ this._resetTurnTracking();
4297
+ this.options.conversationId = this.conversationId;
4298
+ this.userAudioAmplitude = 0;
4299
+ this.agentAudioAmplitude = 0;
4300
+ this._setStatus('disconnected');
4301
+ this.options.onDisconnect();
4302
+ }
4303
+ _getDeviceComparisonKey(device) {
4304
+ if (!device || typeof device !== 'object') {
4305
+ return null;
4306
+ }
4307
+ const deviceId = typeof device.deviceId === 'string' ? device.deviceId : '';
4308
+ if (deviceId && deviceId !== 'default') {
4309
+ return deviceId;
4310
+ }
4311
+ const groupId = typeof device.groupId === 'string' ? device.groupId : '';
4312
+ if (groupId) {
4313
+ return groupId;
4314
+ }
4315
+ const label = typeof device.label === 'string' ? device.label : '';
4316
+ if (label) {
4317
+ return label;
4318
+ }
4319
+ return null;
4320
+ }
4321
+ /**
4322
+ * Mutes the microphone to stop sending audio to the server
4323
+ * The connection and recording remain active for quick unmute
4324
+ */
4325
+ mute() {
4326
+ if (!this.isMuted) {
4327
+ this.isMuted = true;
4328
+ console.log('Microphone muted');
4329
+ this.options.onMuteStateChange(true);
4330
+ }
4331
+ }
4332
+ /**
4333
+ * Unmutes the microphone to resume sending audio to the server
4334
+ */
4335
+ unmute() {
4336
+ if (this.isMuted) {
4337
+ this.isMuted = false;
4338
+ console.log('Microphone unmuted');
4339
+ this.options.onMuteStateChange(false);
4340
+ }
4341
+ }
4342
+ }
4388
4343
 
4389
4344
  return LayercodeClient;
4390
4345