mellon 0.0.16 → 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -27,8 +27,8 @@ export interface WordRef {
27
27
  embeddings: number[][];
28
28
  }
29
29
  export declare const DEFAULT_WASM_PATHS = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/";
30
- export declare const DEFAULT_MODEL_PATH = "https://github.com/ComicScrip/mellon-stt/releases/download/0.0.15/model.onnx";
31
- export declare const DEFAULT_AUDIO_PROCESSOR_PATH = "https://github.com/ComicScrip/mellon-stt/releases/download/0.0.15/audio-processor.js";
30
+ export declare const DEFAULT_MODEL_PATH = "https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx";
31
+ export declare const DEFAULT_AUDIO_PROCESSOR_PATH = "https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js";
32
32
  export declare const DEFAULT_REFS_STORAGE_KEY = "mellon-refs";
33
33
  export declare const DEFAULT_THRESHOLD_STORAGE_KEY = "mellon-threshold";
34
34
  export { Mellon, EnrollmentSession };
package/dist/mellon.cjs CHANGED
@@ -2835,4 +2835,4 @@ ${B}`,w=C.createShaderModule({code:E,label:A.name});LA("verbose",()=>`[WebGPU] $
2835
2835
  * See the License for the specific language governing permissions and
2836
2836
  * limitations under the License.
2837
2837
  * =============================================================================
2838
- */const Do=Object.freeze(Object.defineProperty({__proto__:null,get InferenceSession(){return mg},get TRACE(){return sC},get TRACE_EVENT_BEGIN(){return YI},get TRACE_EVENT_END(){return tI},get TRACE_FUNC_BEGIN(){return II},get TRACE_FUNC_END(){return zA},get Tensor(){return AI},default:wo,get env(){return sA},get registerBackend(){return TI}},Symbol.toStringTag,{value:"Module"}));async function SB(A=hC){return sA.wasm.wasmPaths=A,sA.wasm.numThreads=1,Do}let Hg=null;async function Wg(A=EQ,I=hC){return Hg||(Hg=SB(I).then(C=>C.InferenceSession.create(A,{executionProviders:["wasm"],graphOptimizationLevel:"all"}))),Hg}const Lo=new zL;class Mo{constructor(I,C){this._started=!1,this._inferring=!1,this._audioCtx=null,this._stream=null,this._refEmbeddings=new Map,this._lastMatchAt=0,this._lastInferenceAt=0;const{refsStorageKey:Q=JU,thresholdStorageKey:g=aU,wasmPaths:B=hC,modelPath:E=EQ,audioProcessorPath:w=HU}=C||{};this._commands=I,this._refsStorageKey=Q,this._thresholdStorageKey=g,this._audioProcessorPath=w,this._wasmPaths=B,this._modelPath=E;try{const D=localStorage.getItem(this._thresholdStorageKey);this._threshold=D!==null?Math.max(0,Math.min(1,Number(D))):.65}catch{this._threshold=.65}this._initPromise=this._init()}get threshold(){return this._threshold}set threshold(I){this._threshold=Math.max(0,Math.min(1,I));try{localStorage.setItem(this._thresholdStorageKey,String(this._threshold))}catch{}}get listening(){return this._started}async _init(){await Wg(this._modelPath);const I=new Set;for(const C of this._commands)for(const Q of C.triggers)if(!I.has(Q.name)&&(I.add(Q.name),Q.defaultRefPath)){const g=await fetch(Q.defaultRefPath);if(g.ok){const B=await g.json();this.addCustomWord(B)}}for(const C of this.loadWords())this._refEmbeddings.set(C.word_name,C.embeddings);console.info("[Mellon] init complete, loaded refs:",[...this._refEmbeddings.keys()])}async init(){await this._initPromise}addCustomWord(I){if(!(Array.isArray(I.embeddings)&&I.embeddings.length>0))throw new Error("invalid ref file for : "+I.word_name);this._refEmbeddings.set(I.word_name,I.embeddings)}async start(){if(this._started)return;await this._initPromise;let I;try{I=await navigator.mediaDevices.getUserMedia({audio:{noiseSuppression:!1,echoCancellation:!1,autoGainControl:!1,channelCount:1}})}catch{I=await navigator.mediaDevices.getUserMedia({audio:!0})}this._stream=I;const C=new AudioContext({sampleRate:16e3});this._audioCtx=C,await C.audioWorklet.addModule(this._audioProcessorPath);const Q=C.createMediaStreamSource(I),g=new AudioWorkletNode(C,"audio-processor");g.port.onmessage=B=>{this._handleBuffer(B.data)},Q.connect(g),g.connect(C.destination),this._started=!0}async stop(){if(this._started=!1,this._audioCtx&&(await this._audioCtx.close(),this._audioCtx=null),this._stream){for(const I of this._stream.getTracks())I.stop();this._stream=null}}async _handleBuffer(I){if(this._inferring)return;const C=Date.now();if(!(C-this._lastInferenceAt<300)){this._lastInferenceAt=C,this._inferring=!0;try{const[Q,g]=await Promise.all([SB(this._wasmPaths),Wg(this._modelPath,this._wasmPaths)]),B=Lo.logfbank(I),E=new Q.Tensor("float32",B,[1,1,149,64]),w=await g.run({input:E}),D=w[Object.keys(w)[0]].data;let L=!1;for(const U of this._commands){if(L)break;for(const F of U.triggers){const i=this._refEmbeddings.get(F.name);if(!i)continue;const o=this._maxCosineSim(D,i);if(o>=this._threshold&&C-this._lastMatchAt>2e3){this._lastMatchAt=C,console.info(`[Mellon] match: "${F}" sim=${o.toFixed(3)}`),typeof U.onMatch=="function"&&U.onMatch(F.name,o),L=!0;break}}}}catch(Q){console.error("[Mellon] inference error:",Q)}finally{this._inferring=!1}}}_maxCosineSim(I,C){let Q=0;for(const g of C){let B=0;for(let w=0;w<g.length;w++)B+=I[w]*g[w];const E=(B+1)/2;E>Q&&(Q=E)}return Q}loadWords(){try{const I=localStorage.getItem(this._refsStorageKey);return I?JSON.parse(I):[]}catch{return[]}}saveWord(I){const C=this.loadWords().filter(Q=>Q.word_name!==I.word_name);localStorage.setItem(this._refsStorageKey,JSON.stringify([...C,I]))}deleteWord(I){this._refEmbeddings.delete(I);try{const C=this.loadWords().filter(Q=>Q.word_name!==I);localStorage.setItem(this._refsStorageKey,JSON.stringify(C))}catch{}}}const Fo=new zL;class Uo{constructor(I,C){this._config={},this._samples=[],this._wordName=I,this._config.modelPath=(C==null?void 0:C.modelPath)||EQ,this._config.wasmPaths=(C==null?void 0:C.wasmPaths)||hC}async recordSample(){const I=await navigator.mediaDevices.getUserMedia({audio:!0}),C=new AudioContext({sampleRate:16e3}),Q=await new Promise((E,w)=>{const D=new MediaRecorder(I),L=[];D.ondataavailable=U=>{U.data.size>0&&L.push(U.data)},D.onstop=async()=>{var U;for(const F of I.getTracks())F.stop();try{const i=await new Blob(L,{type:((U=L[0])==null?void 0:U.type)||"audio/webm"}).arrayBuffer(),o=await C.decodeAudioData(i);await C.close(),E(o.getChannelData(0).slice())}catch(F){w(F)}},D.start(),setTimeout(()=>{try{D.stop()}catch{}},1500)}),g=24e3,B=new Float32Array(g);return B.set(Q.slice(0,g)),this._samples.push(B),this._samples.length}async generateRef(){const[I,C]=await Promise.all([SB(this._config.wasmPaths),Wg(this._config.modelPath,this._config.wasmPaths)]),Q=[];for(const g of this._samples){const B=Fo.logfbank(g),E=new I.Tensor("float32",B,[1,1,149,64]),w=await C.run({input:E}),D=Array.from(w[Object.keys(w)[0]].data);Q.push(D)}return{word_name:this._wordName,model_type:"resnet_50_arc",embeddings:Q}}}const hC="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/",EQ="https://github.com/ComicScrip/mellon-stt/releases/download/0.0.15/model.onnx",HU="https://github.com/ComicScrip/mellon-stt/releases/download/0.0.15/audio-processor.js",JU="mellon-refs",aU="mellon-threshold";exports.DEFAULT_AUDIO_PROCESSOR_PATH=HU;exports.DEFAULT_MODEL_PATH=EQ;exports.DEFAULT_REFS_STORAGE_KEY=JU;exports.DEFAULT_THRESHOLD_STORAGE_KEY=aU;exports.DEFAULT_WASM_PATHS=hC;exports.EnrollmentSession=Uo;exports.Mellon=Mo;
2838
+ */const Do=Object.freeze(Object.defineProperty({__proto__:null,get InferenceSession(){return mg},get TRACE(){return sC},get TRACE_EVENT_BEGIN(){return YI},get TRACE_EVENT_END(){return tI},get TRACE_FUNC_BEGIN(){return II},get TRACE_FUNC_END(){return zA},get Tensor(){return AI},default:wo,get env(){return sA},get registerBackend(){return TI}},Symbol.toStringTag,{value:"Module"}));async function SB(A=hC){return sA.wasm.wasmPaths=A,sA.wasm.numThreads=1,Do}let Hg=null;async function Wg(A=EQ,I=hC){return Hg||(Hg=SB(I).then(C=>C.InferenceSession.create(A,{executionProviders:["wasm"],graphOptimizationLevel:"all"}))),Hg}const Lo=new zL;class Mo{constructor(I,C){this._started=!1,this._inferring=!1,this._audioCtx=null,this._stream=null,this._refEmbeddings=new Map,this._lastMatchAt=0,this._lastInferenceAt=0;const{refsStorageKey:Q=JU,thresholdStorageKey:g=aU,wasmPaths:B=hC,modelPath:E=EQ,audioProcessorPath:w=HU}=C||{};this._commands=I,this._refsStorageKey=Q,this._thresholdStorageKey=g,this._audioProcessorPath=w,this._wasmPaths=B,this._modelPath=E;try{const D=localStorage.getItem(this._thresholdStorageKey);this._threshold=D!==null?Math.max(0,Math.min(1,Number(D))):.65}catch{this._threshold=.65}this._initPromise=this._init()}get threshold(){return this._threshold}set threshold(I){this._threshold=Math.max(0,Math.min(1,I));try{localStorage.setItem(this._thresholdStorageKey,String(this._threshold))}catch{}}get listening(){return this._started}async _init(){await Wg(this._modelPath);const I=new Set;for(const C of this._commands)for(const Q of C.triggers)if(!I.has(Q.name)&&(I.add(Q.name),Q.defaultRefPath)){const g=await fetch(Q.defaultRefPath);if(g.ok){const B=await g.json();this.addCustomWord(B)}}for(const C of this.loadWords())this._refEmbeddings.set(C.word_name,C.embeddings);console.info("[Mellon] init complete, loaded refs:",[...this._refEmbeddings.keys()])}async init(){await this._initPromise}addCustomWord(I){if(!(Array.isArray(I.embeddings)&&I.embeddings.length>0))throw new Error("invalid ref file for : "+I.word_name);this._refEmbeddings.set(I.word_name,I.embeddings)}async start(){if(this._started)return;await this._initPromise;let I;try{I=await navigator.mediaDevices.getUserMedia({audio:{noiseSuppression:!1,echoCancellation:!1,autoGainControl:!1,channelCount:1}})}catch{I=await navigator.mediaDevices.getUserMedia({audio:!0})}this._stream=I;const C=new AudioContext({sampleRate:16e3});this._audioCtx=C,await C.audioWorklet.addModule(this._audioProcessorPath);const Q=C.createMediaStreamSource(I),g=new AudioWorkletNode(C,"audio-processor");g.port.onmessage=B=>{this._handleBuffer(B.data)},Q.connect(g),g.connect(C.destination),this._started=!0}async stop(){if(this._started=!1,this._audioCtx&&(await this._audioCtx.close(),this._audioCtx=null),this._stream){for(const I of this._stream.getTracks())I.stop();this._stream=null}}async _handleBuffer(I){if(this._inferring)return;const C=Date.now();if(!(C-this._lastInferenceAt<300)){this._lastInferenceAt=C,this._inferring=!0;try{const[Q,g]=await Promise.all([SB(this._wasmPaths),Wg(this._modelPath,this._wasmPaths)]),B=Lo.logfbank(I),E=new Q.Tensor("float32",B,[1,1,149,64]),w=await g.run({input:E}),D=w[Object.keys(w)[0]].data;let L=!1;for(const U of this._commands){if(L)break;for(const F of U.triggers){const i=this._refEmbeddings.get(F.name);if(!i)continue;const o=this._maxCosineSim(D,i);if(o>=this._threshold&&C-this._lastMatchAt>2e3){this._lastMatchAt=C,console.info(`[Mellon] match: "${F}" sim=${o.toFixed(3)}`),typeof U.onMatch=="function"&&U.onMatch(F.name,o),L=!0;break}}}}catch(Q){console.error("[Mellon] inference error:",Q)}finally{this._inferring=!1}}}_maxCosineSim(I,C){let Q=0;for(const g of C){let B=0;for(let w=0;w<g.length;w++)B+=I[w]*g[w];const E=(B+1)/2;E>Q&&(Q=E)}return Q}loadWords(){try{const I=localStorage.getItem(this._refsStorageKey);return I?JSON.parse(I):[]}catch{return[]}}saveWord(I){const C=this.loadWords().filter(Q=>Q.word_name!==I.word_name);localStorage.setItem(this._refsStorageKey,JSON.stringify([...C,I]))}deleteWord(I){this._refEmbeddings.delete(I);try{const C=this.loadWords().filter(Q=>Q.word_name!==I);localStorage.setItem(this._refsStorageKey,JSON.stringify(C))}catch{}}}const Fo=new zL;class Uo{constructor(I,C){this._config={},this._samples=[],this._wordName=I,this._config.modelPath=(C==null?void 0:C.modelPath)||EQ,this._config.wasmPaths=(C==null?void 0:C.wasmPaths)||hC}async recordSample(){const I=await navigator.mediaDevices.getUserMedia({audio:!0}),C=new AudioContext({sampleRate:16e3}),Q=await new Promise((E,w)=>{const D=new MediaRecorder(I),L=[];D.ondataavailable=U=>{U.data.size>0&&L.push(U.data)},D.onstop=async()=>{var U;for(const F of I.getTracks())F.stop();try{const i=await new Blob(L,{type:((U=L[0])==null?void 0:U.type)||"audio/webm"}).arrayBuffer(),o=await C.decodeAudioData(i);await C.close(),E(o.getChannelData(0).slice())}catch(F){w(F)}},D.start(),setTimeout(()=>{try{D.stop()}catch{}},1500)}),g=24e3,B=new Float32Array(g);return B.set(Q.slice(0,g)),this._samples.push(B),this._samples.length}async generateRef(){const[I,C]=await Promise.all([SB(this._config.wasmPaths),Wg(this._config.modelPath,this._config.wasmPaths)]),Q=[];for(const g of this._samples){const B=Fo.logfbank(g),E=new I.Tensor("float32",B,[1,1,149,64]),w=await C.run({input:E}),D=Array.from(w[Object.keys(w)[0]].data);Q.push(D)}return{word_name:this._wordName,model_type:"resnet_50_arc",embeddings:Q}}}const hC="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/",EQ="https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx",HU="https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js",JU="mellon-refs",aU="mellon-threshold";exports.DEFAULT_AUDIO_PROCESSOR_PATH=HU;exports.DEFAULT_MODEL_PATH=EQ;exports.DEFAULT_REFS_STORAGE_KEY=JU;exports.DEFAULT_THRESHOLD_STORAGE_KEY=aU;exports.DEFAULT_WASM_PATHS=hC;exports.EnrollmentSession=Uo;exports.Mellon=Mo;
package/dist/mellon.mjs CHANGED
@@ -9845,7 +9845,7 @@ class Uo {
9845
9845
  return { word_name: this._wordName, model_type: "resnet_50_arc", embeddings: Q };
9846
9846
  }
9847
9847
  }
9848
- const gQ = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/", sB = "https://github.com/ComicScrip/mellon-stt/releases/download/0.0.15/model.onnx", wo = "https://github.com/ComicScrip/mellon-stt/releases/download/0.0.15/audio-processor.js", Do = "mellon-refs", Lo = "mellon-threshold";
9848
+ const gQ = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/", sB = "https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx", wo = "https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js", Do = "mellon-refs", Lo = "mellon-threshold";
9849
9849
  export {
9850
9850
  wo as DEFAULT_AUDIO_PROCESSOR_PATH,
9851
9851
  sB as DEFAULT_MODEL_PATH,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mellon",
3
- "version": "0.0.16",
3
+ "version": "0.0.17",
4
4
  "description": "Offline, in-browser voice commands powered by EfficientWord-Net (ResNet-50 ArcFace).",
5
5
  "type": "module",
6
6
  "main": "./dist/mellon.cjs",