mellon 0.0.16 → 0.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +2 -2
- package/dist/mellon.cjs +1 -1
- package/dist/mellon.mjs +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -27,8 +27,8 @@ export interface WordRef {
|
|
|
27
27
|
embeddings: number[][];
|
|
28
28
|
}
|
|
29
29
|
export declare const DEFAULT_WASM_PATHS = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/";
|
|
30
|
-
export declare const DEFAULT_MODEL_PATH = "https://
|
|
31
|
-
export declare const DEFAULT_AUDIO_PROCESSOR_PATH = "https://
|
|
30
|
+
export declare const DEFAULT_MODEL_PATH = "https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx";
|
|
31
|
+
export declare const DEFAULT_AUDIO_PROCESSOR_PATH = "https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js";
|
|
32
32
|
export declare const DEFAULT_REFS_STORAGE_KEY = "mellon-refs";
|
|
33
33
|
export declare const DEFAULT_THRESHOLD_STORAGE_KEY = "mellon-threshold";
|
|
34
34
|
export { Mellon, EnrollmentSession };
|
package/dist/mellon.cjs
CHANGED
|
@@ -2835,4 +2835,4 @@ ${B}`,w=C.createShaderModule({code:E,label:A.name});LA("verbose",()=>`[WebGPU] $
|
|
|
2835
2835
|
* See the License for the specific language governing permissions and
|
|
2836
2836
|
* limitations under the License.
|
|
2837
2837
|
* =============================================================================
|
|
2838
|
-
*/const Do=Object.freeze(Object.defineProperty({__proto__:null,get InferenceSession(){return mg},get TRACE(){return sC},get TRACE_EVENT_BEGIN(){return YI},get TRACE_EVENT_END(){return tI},get TRACE_FUNC_BEGIN(){return II},get TRACE_FUNC_END(){return zA},get Tensor(){return AI},default:wo,get env(){return sA},get registerBackend(){return TI}},Symbol.toStringTag,{value:"Module"}));async function SB(A=hC){return sA.wasm.wasmPaths=A,sA.wasm.numThreads=1,Do}let Hg=null;async function Wg(A=EQ,I=hC){return Hg||(Hg=SB(I).then(C=>C.InferenceSession.create(A,{executionProviders:["wasm"],graphOptimizationLevel:"all"}))),Hg}const Lo=new zL;class Mo{constructor(I,C){this._started=!1,this._inferring=!1,this._audioCtx=null,this._stream=null,this._refEmbeddings=new Map,this._lastMatchAt=0,this._lastInferenceAt=0;const{refsStorageKey:Q=JU,thresholdStorageKey:g=aU,wasmPaths:B=hC,modelPath:E=EQ,audioProcessorPath:w=HU}=C||{};this._commands=I,this._refsStorageKey=Q,this._thresholdStorageKey=g,this._audioProcessorPath=w,this._wasmPaths=B,this._modelPath=E;try{const D=localStorage.getItem(this._thresholdStorageKey);this._threshold=D!==null?Math.max(0,Math.min(1,Number(D))):.65}catch{this._threshold=.65}this._initPromise=this._init()}get threshold(){return this._threshold}set threshold(I){this._threshold=Math.max(0,Math.min(1,I));try{localStorage.setItem(this._thresholdStorageKey,String(this._threshold))}catch{}}get listening(){return this._started}async _init(){await Wg(this._modelPath);const I=new Set;for(const C of this._commands)for(const Q of C.triggers)if(!I.has(Q.name)&&(I.add(Q.name),Q.defaultRefPath)){const g=await fetch(Q.defaultRefPath);if(g.ok){const B=await g.json();this.addCustomWord(B)}}for(const C of this.loadWords())this._refEmbeddings.set(C.word_name,C.embeddings);console.info("[Mellon] init complete, loaded refs:",[...this._refEmbeddings.keys()])}async init(){await this._initPromise}addCustomWord(I){if(!(Array.isArray(I.embeddings)&&I.embeddings.length>0))throw new Error("invalid ref file for : "+I.word_name);this._refEmbeddings.set(I.word_name,I.embeddings)}async start(){if(this._started)return;await this._initPromise;let I;try{I=await navigator.mediaDevices.getUserMedia({audio:{noiseSuppression:!1,echoCancellation:!1,autoGainControl:!1,channelCount:1}})}catch{I=await navigator.mediaDevices.getUserMedia({audio:!0})}this._stream=I;const C=new AudioContext({sampleRate:16e3});this._audioCtx=C,await C.audioWorklet.addModule(this._audioProcessorPath);const Q=C.createMediaStreamSource(I),g=new AudioWorkletNode(C,"audio-processor");g.port.onmessage=B=>{this._handleBuffer(B.data)},Q.connect(g),g.connect(C.destination),this._started=!0}async stop(){if(this._started=!1,this._audioCtx&&(await this._audioCtx.close(),this._audioCtx=null),this._stream){for(const I of this._stream.getTracks())I.stop();this._stream=null}}async _handleBuffer(I){if(this._inferring)return;const C=Date.now();if(!(C-this._lastInferenceAt<300)){this._lastInferenceAt=C,this._inferring=!0;try{const[Q,g]=await Promise.all([SB(this._wasmPaths),Wg(this._modelPath,this._wasmPaths)]),B=Lo.logfbank(I),E=new Q.Tensor("float32",B,[1,1,149,64]),w=await g.run({input:E}),D=w[Object.keys(w)[0]].data;let L=!1;for(const U of this._commands){if(L)break;for(const F of U.triggers){const i=this._refEmbeddings.get(F.name);if(!i)continue;const o=this._maxCosineSim(D,i);if(o>=this._threshold&&C-this._lastMatchAt>2e3){this._lastMatchAt=C,console.info(`[Mellon] match: "${F}" sim=${o.toFixed(3)}`),typeof U.onMatch=="function"&&U.onMatch(F.name,o),L=!0;break}}}}catch(Q){console.error("[Mellon] inference error:",Q)}finally{this._inferring=!1}}}_maxCosineSim(I,C){let Q=0;for(const g of C){let B=0;for(let w=0;w<g.length;w++)B+=I[w]*g[w];const E=(B+1)/2;E>Q&&(Q=E)}return Q}loadWords(){try{const I=localStorage.getItem(this._refsStorageKey);return I?JSON.parse(I):[]}catch{return[]}}saveWord(I){const C=this.loadWords().filter(Q=>Q.word_name!==I.word_name);localStorage.setItem(this._refsStorageKey,JSON.stringify([...C,I]))}deleteWord(I){this._refEmbeddings.delete(I);try{const C=this.loadWords().filter(Q=>Q.word_name!==I);localStorage.setItem(this._refsStorageKey,JSON.stringify(C))}catch{}}}const Fo=new zL;class Uo{constructor(I,C){this._config={},this._samples=[],this._wordName=I,this._config.modelPath=(C==null?void 0:C.modelPath)||EQ,this._config.wasmPaths=(C==null?void 0:C.wasmPaths)||hC}async recordSample(){const I=await navigator.mediaDevices.getUserMedia({audio:!0}),C=new AudioContext({sampleRate:16e3}),Q=await new Promise((E,w)=>{const D=new MediaRecorder(I),L=[];D.ondataavailable=U=>{U.data.size>0&&L.push(U.data)},D.onstop=async()=>{var U;for(const F of I.getTracks())F.stop();try{const i=await new Blob(L,{type:((U=L[0])==null?void 0:U.type)||"audio/webm"}).arrayBuffer(),o=await C.decodeAudioData(i);await C.close(),E(o.getChannelData(0).slice())}catch(F){w(F)}},D.start(),setTimeout(()=>{try{D.stop()}catch{}},1500)}),g=24e3,B=new Float32Array(g);return B.set(Q.slice(0,g)),this._samples.push(B),this._samples.length}async generateRef(){const[I,C]=await Promise.all([SB(this._config.wasmPaths),Wg(this._config.modelPath,this._config.wasmPaths)]),Q=[];for(const g of this._samples){const B=Fo.logfbank(g),E=new I.Tensor("float32",B,[1,1,149,64]),w=await C.run({input:E}),D=Array.from(w[Object.keys(w)[0]].data);Q.push(D)}return{word_name:this._wordName,model_type:"resnet_50_arc",embeddings:Q}}}const hC="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/",EQ="https://
|
|
2838
|
+
*/const Do=Object.freeze(Object.defineProperty({__proto__:null,get InferenceSession(){return mg},get TRACE(){return sC},get TRACE_EVENT_BEGIN(){return YI},get TRACE_EVENT_END(){return tI},get TRACE_FUNC_BEGIN(){return II},get TRACE_FUNC_END(){return zA},get Tensor(){return AI},default:wo,get env(){return sA},get registerBackend(){return TI}},Symbol.toStringTag,{value:"Module"}));async function SB(A=hC){return sA.wasm.wasmPaths=A,sA.wasm.numThreads=1,Do}let Hg=null;async function Wg(A=EQ,I=hC){return Hg||(Hg=SB(I).then(C=>C.InferenceSession.create(A,{executionProviders:["wasm"],graphOptimizationLevel:"all"}))),Hg}const Lo=new zL;class Mo{constructor(I,C){this._started=!1,this._inferring=!1,this._audioCtx=null,this._stream=null,this._refEmbeddings=new Map,this._lastMatchAt=0,this._lastInferenceAt=0;const{refsStorageKey:Q=JU,thresholdStorageKey:g=aU,wasmPaths:B=hC,modelPath:E=EQ,audioProcessorPath:w=HU}=C||{};this._commands=I,this._refsStorageKey=Q,this._thresholdStorageKey=g,this._audioProcessorPath=w,this._wasmPaths=B,this._modelPath=E;try{const D=localStorage.getItem(this._thresholdStorageKey);this._threshold=D!==null?Math.max(0,Math.min(1,Number(D))):.65}catch{this._threshold=.65}this._initPromise=this._init()}get threshold(){return this._threshold}set threshold(I){this._threshold=Math.max(0,Math.min(1,I));try{localStorage.setItem(this._thresholdStorageKey,String(this._threshold))}catch{}}get listening(){return this._started}async _init(){await Wg(this._modelPath);const I=new Set;for(const C of this._commands)for(const Q of C.triggers)if(!I.has(Q.name)&&(I.add(Q.name),Q.defaultRefPath)){const g=await fetch(Q.defaultRefPath);if(g.ok){const B=await g.json();this.addCustomWord(B)}}for(const C of this.loadWords())this._refEmbeddings.set(C.word_name,C.embeddings);console.info("[Mellon] init complete, loaded refs:",[...this._refEmbeddings.keys()])}async init(){await this._initPromise}addCustomWord(I){if(!(Array.isArray(I.embeddings)&&I.embeddings.length>0))throw new Error("invalid ref file for : "+I.word_name);this._refEmbeddings.set(I.word_name,I.embeddings)}async start(){if(this._started)return;await this._initPromise;let I;try{I=await navigator.mediaDevices.getUserMedia({audio:{noiseSuppression:!1,echoCancellation:!1,autoGainControl:!1,channelCount:1}})}catch{I=await navigator.mediaDevices.getUserMedia({audio:!0})}this._stream=I;const C=new AudioContext({sampleRate:16e3});this._audioCtx=C,await C.audioWorklet.addModule(this._audioProcessorPath);const Q=C.createMediaStreamSource(I),g=new AudioWorkletNode(C,"audio-processor");g.port.onmessage=B=>{this._handleBuffer(B.data)},Q.connect(g),g.connect(C.destination),this._started=!0}async stop(){if(this._started=!1,this._audioCtx&&(await this._audioCtx.close(),this._audioCtx=null),this._stream){for(const I of this._stream.getTracks())I.stop();this._stream=null}}async _handleBuffer(I){if(this._inferring)return;const C=Date.now();if(!(C-this._lastInferenceAt<300)){this._lastInferenceAt=C,this._inferring=!0;try{const[Q,g]=await Promise.all([SB(this._wasmPaths),Wg(this._modelPath,this._wasmPaths)]),B=Lo.logfbank(I),E=new Q.Tensor("float32",B,[1,1,149,64]),w=await g.run({input:E}),D=w[Object.keys(w)[0]].data;let L=!1;for(const U of this._commands){if(L)break;for(const F of U.triggers){const i=this._refEmbeddings.get(F.name);if(!i)continue;const o=this._maxCosineSim(D,i);if(o>=this._threshold&&C-this._lastMatchAt>2e3){this._lastMatchAt=C,console.info(`[Mellon] match: "${F}" sim=${o.toFixed(3)}`),typeof U.onMatch=="function"&&U.onMatch(F.name,o),L=!0;break}}}}catch(Q){console.error("[Mellon] inference error:",Q)}finally{this._inferring=!1}}}_maxCosineSim(I,C){let Q=0;for(const g of C){let B=0;for(let w=0;w<g.length;w++)B+=I[w]*g[w];const E=(B+1)/2;E>Q&&(Q=E)}return Q}loadWords(){try{const I=localStorage.getItem(this._refsStorageKey);return I?JSON.parse(I):[]}catch{return[]}}saveWord(I){const C=this.loadWords().filter(Q=>Q.word_name!==I.word_name);localStorage.setItem(this._refsStorageKey,JSON.stringify([...C,I]))}deleteWord(I){this._refEmbeddings.delete(I);try{const C=this.loadWords().filter(Q=>Q.word_name!==I);localStorage.setItem(this._refsStorageKey,JSON.stringify(C))}catch{}}}const Fo=new zL;class Uo{constructor(I,C){this._config={},this._samples=[],this._wordName=I,this._config.modelPath=(C==null?void 0:C.modelPath)||EQ,this._config.wasmPaths=(C==null?void 0:C.wasmPaths)||hC}async recordSample(){const I=await navigator.mediaDevices.getUserMedia({audio:!0}),C=new AudioContext({sampleRate:16e3}),Q=await new Promise((E,w)=>{const D=new MediaRecorder(I),L=[];D.ondataavailable=U=>{U.data.size>0&&L.push(U.data)},D.onstop=async()=>{var U;for(const F of I.getTracks())F.stop();try{const i=await new Blob(L,{type:((U=L[0])==null?void 0:U.type)||"audio/webm"}).arrayBuffer(),o=await C.decodeAudioData(i);await C.close(),E(o.getChannelData(0).slice())}catch(F){w(F)}},D.start(),setTimeout(()=>{try{D.stop()}catch{}},1500)}),g=24e3,B=new Float32Array(g);return B.set(Q.slice(0,g)),this._samples.push(B),this._samples.length}async generateRef(){const[I,C]=await Promise.all([SB(this._config.wasmPaths),Wg(this._config.modelPath,this._config.wasmPaths)]),Q=[];for(const g of this._samples){const B=Fo.logfbank(g),E=new I.Tensor("float32",B,[1,1,149,64]),w=await C.run({input:E}),D=Array.from(w[Object.keys(w)[0]].data);Q.push(D)}return{word_name:this._wordName,model_type:"resnet_50_arc",embeddings:Q}}}const hC="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/",EQ="https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx",HU="https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js",JU="mellon-refs",aU="mellon-threshold";exports.DEFAULT_AUDIO_PROCESSOR_PATH=HU;exports.DEFAULT_MODEL_PATH=EQ;exports.DEFAULT_REFS_STORAGE_KEY=JU;exports.DEFAULT_THRESHOLD_STORAGE_KEY=aU;exports.DEFAULT_WASM_PATHS=hC;exports.EnrollmentSession=Uo;exports.Mellon=Mo;
|
package/dist/mellon.mjs
CHANGED
|
@@ -9845,7 +9845,7 @@ class Uo {
|
|
|
9845
9845
|
return { word_name: this._wordName, model_type: "resnet_50_arc", embeddings: Q };
|
|
9846
9846
|
}
|
|
9847
9847
|
}
|
|
9848
|
-
const gQ = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/", sB = "https://
|
|
9848
|
+
const gQ = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/", sB = "https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx", wo = "https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js", Do = "mellon-refs", Lo = "mellon-threshold";
|
|
9849
9849
|
export {
|
|
9850
9850
|
wo as DEFAULT_AUDIO_PROCESSOR_PATH,
|
|
9851
9851
|
sB as DEFAULT_MODEL_PATH,
|