npm - @huggingface/transformers - Versions diffs - 3.0.0-alpha.6 → 3.0.0-alpha.8 - Mend

@huggingface/transformers 3.0.0-alpha.6 → 3.0.0-alpha.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +2 -2
package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
package/dist/transformers.cjs +95 -44
package/dist/transformers.cjs.map +1 -1
package/dist/transformers.js +554 -466
package/dist/transformers.js.map +1 -1
package/dist/transformers.min.cjs +13 -13
package/dist/transformers.min.cjs.map +1 -1
package/dist/transformers.min.js +23 -23
package/dist/transformers.min.js.map +1 -1
package/dist/transformers.min.mjs +7 -7
package/dist/transformers.min.mjs.map +1 -1
package/dist/transformers.mjs +95 -44
package/dist/transformers.mjs.map +1 -1
package/package.json +4 -4
package/src/backends/onnx.js +61 -22
package/src/env.js +5 -1
package/src/generation/streamers.js +3 -3
package/src/models.js +13 -10
package/src/utils/devices.js +10 -1
package/src/utils/dtypes.js +1 -5
package/types/backends/onnx.d.ts +4 -3
package/types/backends/onnx.d.ts.map +1 -1
package/types/env.d.ts +2 -0
package/types/env.d.ts.map +1 -1
package/types/models.d.ts.map +1 -1
package/types/utils/devices.d.ts +9 -1
package/types/utils/devices.d.ts.map +1 -1
package/types/utils/dtypes.d.ts +0 -5
package/types/utils/dtypes.d.ts.map +1 -1

package/dist/transformers.min.mjs CHANGED Viewed

@@ -82,16 +82,16 @@ import*as e from"fs";import*as t from"onnxruntime-node";import*as n from"path";i
   \**********************************************************************/(e,t,n)=>{n.r(t),n.d(t,{TrainingSession:()=>r});const r=n(/*! ./training-session-impl.js */"./node_modules/onnxruntime-common/dist/esm/training-session-impl.js").TrainingSession},"./node_modules/onnxruntime-common/dist/esm/version.js":
 /*!*************************************************************!*\
   !*** ./node_modules/onnxruntime-common/dist/esm/version.js ***!
-  \*************************************************************/(e,t,n)=>{n.r(t),n.d(t,{version:()=>r});const r="1.18.0"},"./src/backends/onnx.js":
+  \*************************************************************/(e,t,n)=>{n.r(t),n.d(t,{version:()=>r});const r="1.19.0"},"./src/backends/onnx.js":
 /*!******************************!*\
   !*** ./src/backends/onnx.js ***!
-  \******************************/(e,t,n)=>{var r;n.r(t),n.d(t,{Tensor:()=>i.Tensor,createInferenceSession:()=>m,deviceToExecutionProviders:()=>p,isONNXProxy:()=>g,isONNXTensor:()=>_});var o=n(/*! ../env.js */"./src/env.js"),s=n(/*! onnxruntime-node */"onnxruntime-node"),a=n(/*! onnxruntime-web/webgpu */"?9c66"),i=n(/*! onnxruntime-common */"./node_modules/onnxruntime-common/dist/esm/index.js");const l=[];let c,d;if(o.apis.IS_NODE_ENV){switch(d=s.default??s,process.platform){case"win32":l.push("dml");break;case"linux":"x64"===process.arch&&l.push("cuda")}l.push("cpu"),c=["cpu"]}else d=r||(r=n.t(a,2)),o.apis.IS_WEBGPU_AVAILABLE&&l.push("webgpu"),l.push("wasm"),c=["wasm"];const u=d.InferenceSession;function p(e){let t=c;if(e){if(!l.includes(e))throw new Error(`Unsupported device: "${e}". Should be one of: ${l.join(", ")}.`);t=[e]}return t}let h=null;async function m(e,t){h&&await h;const n=u.create(e,t);return h??=n,await n}function _(e){return e instanceof d.Tensor}const f=d?.env;if(f?.wasm){f.wasm.wasmPaths=`https://cdn.jsdelivr.net/npm/@huggingface/transformers@${o.env.version}/dist/`,f.wasm.proxy=!o.apis.IS_WEBWORKER_ENV,"undefined"!=typeof crossOriginIsolated&&crossOriginIsolated||(f.wasm.numThreads=1);"undefined"!=typeof navigator&&/iP(hone|od|ad).+16_4.+AppleWebKit/.test(navigator.userAgent)&&(f.wasm.simd=!1)}function g(){return f?.wasm?.proxy}f?.webgpu&&(f.webgpu.powerPreference="high-performance"),o.env.backends.onnx=f},"./src/configs.js":
+  \******************************/(e,t,n)=>{var r;n.r(t),n.d(t,{Tensor:()=>i.Tensor,createInferenceSession:()=>_,deviceToExecutionProviders:()=>h,isONNXProxy:()=>M,isONNXTensor:()=>f});var o=n(/*! ../env.js */"./src/env.js"),s=n(/*! onnxruntime-node */"onnxruntime-node"),a=n(/*! onnxruntime-web/webgpu */"?9c66"),i=n(/*! onnxruntime-common */"./node_modules/onnxruntime-common/dist/esm/index.js");const l=Object.freeze({auto:null,gpu:null,cpu:"cpu",wasm:"wasm",webgpu:"webgpu",cuda:"cuda",dml:"dml",webnn:{name:"webnn",deviceType:"cpu"},"webnn-npu":{name:"webnn",deviceType:"npu"},"webnn-gpu":{name:"webnn",deviceType:"gpu"},"webnn-cpu":{name:"webnn",deviceType:"cpu"}}),c=[];let d,u;if(o.apis.IS_NODE_ENV){switch(u=s.default??s,process.platform){case"win32":c.push("dml");break;case"linux":"x64"===process.arch&&c.push("cuda")}c.push("cpu"),d=["cpu"]}else u=r||(r=n.t(a,2)),o.apis.IS_WEBNN_AVAILABLE&&c.push("webnn-npu","webnn-gpu","webnn-cpu","webnn"),o.apis.IS_WEBGPU_AVAILABLE&&c.push("webgpu"),c.push("wasm"),d=["wasm"];const p=u.InferenceSession;function h(e=null){if(!e)return d;switch(e){case"auto":return c;case"gpu":return c.filter((e=>["webgpu","cuda","dml","webnn-gpu"].includes(e)))}if(c.includes(e))return[l[e]??e];throw new Error(`Unsupported device: "${e}". Should be one of: ${c.join(", ")}.`)}let m=null;async function _(e,t){m&&await m;const n=p.create(e,t);return m??=n,await n}function f(e){return e instanceof u.Tensor}const g=u?.env;if(g?.wasm){g.wasm.wasmPaths=`https://cdn.jsdelivr.net/npm/@huggingface/transformers@${o.env.version}/dist/`,g.wasm.proxy=!o.apis.IS_WEBWORKER_ENV,"undefined"!=typeof crossOriginIsolated&&crossOriginIsolated||(g.wasm.numThreads=1);"undefined"!=typeof navigator&&/iP(hone|od|ad).+16_4.+AppleWebKit/.test(navigator.userAgent)&&(g.wasm.simd=!1)}function M(){return g?.wasm?.proxy}g?.webgpu&&(g.webgpu.powerPreference="high-performance"),o.env.backends.onnx=g},"./src/configs.js":
 /*!************************!*\
   !*** ./src/configs.js ***!
   \************************/(e,t,n)=>{n.r(t),n.d(t,{AutoConfig:()=>l,PretrainedConfig:()=>i,getKeyValueShapes:()=>a});var r=n(/*! ./utils/core.js */"./src/utils/core.js"),o=n(/*! ./utils/hub.js */"./src/utils/hub.js");function s(e){const t={};let n={};switch(e.model_type){case"llava":case"paligemma":case"florence2":n=s(e.text_config);break;case"moondream1":n=s(e.phi_config);break;case"musicgen":n=s(e.decoder);break;case"gpt2":case"gptj":case"codegen":case"gpt_bigcode":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="n_embd";break;case"gpt_neox":case"stablelm":case"opt":case"phi":case"phi3":case"falcon":t.num_heads="num_attention_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size";break;case"llama":case"cohere":case"mistral":case"starcoder2":case"qwen2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size",t.num_attention_heads="num_attention_heads";break;case"gemma":case"gemma2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.dim_kv="head_dim";break;case"openelm":t.num_heads="num_kv_heads",t.num_layers="num_transformer_layers",t.dim_kv="head_dim";break;case"gpt_neo":case"donut-swin":t.num_heads="num_heads",t.num_layers="num_layers",t.hidden_size="hidden_size";break;case"bloom":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="hidden_size";break;case"mpt":t.num_heads="n_heads",t.num_layers="n_layers",t.hidden_size="d_model";break;case"t5":case"mt5":case"longt5":t.num_decoder_layers="num_decoder_layers",t.num_decoder_heads="num_heads",t.decoder_dim_kv="d_kv",t.num_encoder_layers="num_layers",t.num_encoder_heads="num_heads",t.encoder_dim_kv="d_kv";break;case"bart":case"mbart":case"marian":case"whisper":case"m2m_100":case"blenderbot":case"blenderbot-small":case"florence2_language":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="d_model",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="d_model";break;case"speecht5":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="hidden_size",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="hidden_size";break;case"trocr":t.num_encoder_layers=t.num_decoder_layers="decoder_layers",t.num_encoder_heads=t.num_decoder_heads="decoder_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="d_model";break;case"musicgen_decoder":t.num_encoder_layers=t.num_decoder_layers="num_hidden_layers",t.num_encoder_heads=t.num_decoder_heads="num_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="hidden_size";break;case"vision-encoder-decoder":const o=s(e.decoder),a="num_decoder_layers"in o,i=(0,r.pick)(e,["model_type","is_encoder_decoder"]);return a?(i.num_decoder_layers=o.num_decoder_layers,i.num_decoder_heads=o.num_decoder_heads,i.decoder_hidden_size=o.decoder_hidden_size,i.num_encoder_layers=o.num_encoder_layers,i.num_encoder_heads=o.num_encoder_heads,i.encoder_hidden_size=o.encoder_hidden_size):(i.num_layers=o.num_layers,i.num_heads=o.num_heads,i.hidden_size=o.hidden_size),i}const o={...n,...(0,r.pick)(e,["model_type","multi_query","is_encoder_decoder"])};for(const n in t)o[n]=e[t[n]];return o}function a(e,{prefix:t="past_key_values"}={}){const n={},r=e.normalized_config;if(r.is_encoder_decoder&&"num_encoder_heads"in r&&"num_decoder_heads"in r){const e=r.encoder_dim_kv??r.encoder_hidden_size/r.num_encoder_heads,o=r.decoder_dim_kv??r.decoder_hidden_size/r.num_decoder_heads,s=[1,r.num_encoder_heads,0,e],a=[1,r.num_decoder_heads,0,o];for(let e=0;e<r.num_decoder_layers;++e)n[`${t}.${e}.encoder.key`]=s,n[`${t}.${e}.encoder.value`]=s,n[`${t}.${e}.decoder.key`]=a,n[`${t}.${e}.decoder.value`]=a}else{const e=r.num_heads,o=r.num_layers,s=r.dim_kv??r.hidden_size/(r.num_attention_heads??e);if("falcon"===r.model_type){const r=[1*e,0,s];for(let e=0;e<o;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=r}else if(r.multi_query){const r=[1*e,0,2*s];for(let e=0;e<o;++e)n[`${t}.${e}.key_value`]=r}else if("bloom"===r.model_type){const r=[1*e,s,0],a=[1*e,0,s];for(let e=0;e<o;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=a}else if("openelm"===r.model_type)for(let r=0;r<o;++r){const o=[1,e[r],0,s];n[`${t}.${r}.key`]=o,n[`${t}.${r}.value`]=o}else{const r=[1,e,0,s];for(let e=0;e<o;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=r}}return n}class i{max_position_embeddings;constructor(e){this.model_type=null,this.is_encoder_decoder=!1,Object.assign(this,e),this.normalized_config=s(this)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:s=!1,revision:a="main"}={}){!n||n instanceof i||(n=new i(n));const l=n??await async function(e,t){return await(0,o.getModelJSON)(e,"config.json",!0,t)}(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:s,revision:a});return new this(l)}}class l{static async from_pretrained(...e){return i.from_pretrained(...e)}}},"./src/env.js":
 /*!********************!*\
   !*** ./src/env.js ***!
-  \********************/(e,t,n)=>{n.r(t),n.d(t,{apis:()=>m,env:()=>w});var r=n(/*! fs */"fs"),o=n(/*! path */"path"),s=n(/*! url */"url");const a="undefined"!=typeof self,i=a&&"DedicatedWorkerGlobalScope"===self.constructor.name,l=a&&"caches"in self,c="undefined"!=typeof navigator&&"gpu"in navigator,d="undefined"!=typeof process,u=d&&"node"===process?.release?.name,p=!b(r.default),h=!b(o.default),m=Object.freeze({IS_BROWSER_ENV:a,IS_WEBWORKER_ENV:i,IS_WEB_CACHE_AVAILABLE:l,IS_WEBGPU_AVAILABLE:c,IS_PROCESS_AVAILABLE:d,IS_NODE_ENV:u,IS_FS_AVAILABLE:p,IS_PATH_AVAILABLE:h}),_=p&&h,f=_?o.default.dirname(o.default.dirname(s.default.fileURLToPath(import.meta.url))):"./",g=_?o.default.join(f,"/.cache/"):null,M="/models/",w={version:"3.0.0-alpha.6",backends:{onnx:{},tfjs:{}},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!a,localModelPath:_?o.default.join(f,M):M,useFS:p,useBrowserCache:l,useFSCache:p,cacheDir:g,useCustomCache:!1,customCache:null};function b(e){return 0===Object.keys(e).length}},"./src/generation/configuration_utils.js":
+  \********************/(e,t,n)=>{n.r(t),n.d(t,{apis:()=>_,env:()=>b});var r=n(/*! fs */"fs"),o=n(/*! path */"path"),s=n(/*! url */"url");const a="undefined"!=typeof self,i=a&&"DedicatedWorkerGlobalScope"===self.constructor.name,l=a&&"caches"in self,c="undefined"!=typeof navigator&&"gpu"in navigator,d="undefined"!=typeof navigator&&"ml"in navigator,u="undefined"!=typeof process,p=u&&"node"===process?.release?.name,h=!T(r.default),m=!T(o.default),_=Object.freeze({IS_BROWSER_ENV:a,IS_WEBWORKER_ENV:i,IS_WEB_CACHE_AVAILABLE:l,IS_WEBGPU_AVAILABLE:c,IS_WEBNN_AVAILABLE:d,IS_PROCESS_AVAILABLE:u,IS_NODE_ENV:p,IS_FS_AVAILABLE:h,IS_PATH_AVAILABLE:m}),f=h&&m,g=f?o.default.dirname(o.default.dirname(s.default.fileURLToPath(import.meta.url))):"./",M=f?o.default.join(g,"/.cache/"):null,w="/models/",b={version:"3.0.0-alpha.8",backends:{onnx:{},tfjs:{}},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!a,localModelPath:f?o.default.join(g,w):w,useFS:h,useBrowserCache:l,useFSCache:h,cacheDir:M,useCustomCache:!1,customCache:null};function T(e){return 0===Object.keys(e).length}},"./src/generation/configuration_utils.js":
 /*!***********************************************!*\
   !*** ./src/generation/configuration_utils.js ***!
   \***********************************************/(e,t,n)=>{n.r(t),n.d(t,{GenerationConfig:()=>o});var r=n(/*! ../utils/core.js */"./src/utils/core.js");class o{max_length=20;max_new_tokens=null;min_length=0;min_new_tokens=null;early_stopping=!1;max_time=null;do_sample=!1;num_beams=1;num_beam_groups=1;penalty_alpha=null;use_cache=!0;temperature=1;top_k=50;top_p=1;typical_p=1;epsilon_cutoff=0;eta_cutoff=0;diversity_penalty=0;repetition_penalty=1;encoder_repetition_penalty=1;length_penalty=1;no_repeat_ngram_size=0;bad_words_ids=null;force_words_ids=null;renormalize_logits=!1;constraints=null;forced_bos_token_id=null;forced_eos_token_id=null;remove_invalid_values=!1;exponential_decay_length_penalty=null;suppress_tokens=null;begin_suppress_tokens=null;forced_decoder_ids=null;guidance_scale=null;num_return_sequences=1;output_attentions=!1;output_hidden_states=!1;output_scores=!1;return_dict_in_generate=!1;pad_token_id=null;bos_token_id=null;eos_token_id=null;encoder_no_repeat_ngram_size=0;decoder_start_token_id=null;generation_kwargs={};constructor(e){Object.assign(this,(0,r.pick)(e,Object.getOwnPropertyNames(this)))}}},"./src/generation/logits_process.js":
@@ -106,10 +106,10 @@ import*as e from"fs";import*as t from"onnxruntime-node";import*as n from"path";i
   \*********************************************/(e,t,n)=>{n.r(t),n.d(t,{EosTokenCriteria:()=>i,InterruptableStoppingCriteria:()=>l,MaxLengthCriteria:()=>a,StoppingCriteria:()=>o,StoppingCriteriaList:()=>s});var r=n(/*! ../utils/generic.js */"./src/utils/generic.js");class o extends r.Callable{_call(e,t){throw Error("StoppingCriteria needs to be subclassed")}}class s extends r.Callable{constructor(){super(),this.criteria=[]}push(e){this.criteria.push(e)}extend(e){e instanceof s?e=e.criteria:e instanceof o&&(e=[e]),this.criteria.push(...e)}_call(e,t){const n=new Array(e.length).fill(!1);for(const r of this.criteria){const o=r(e,t);for(let e=0;e<n.length;++e)n[e]||=o[e]}return n}[Symbol.iterator](){return this.criteria.values()}}class a extends o{constructor(e,t=null){super(),this.max_length=e,this.max_position_embeddings=t}_call(e){return e.map((e=>e.length>=this.max_length))}}class i extends o{constructor(e){super(),Array.isArray(e)||(e=[e]),this.eos_token_id=e}_call(e,t){return e.map((e=>{const t=e.at(-1);return this.eos_token_id.some((e=>t==e))}))}}class l extends o{constructor(){super(),this.interrupted=!1}interrupt(){this.interrupted=!0}reset(){this.interrupted=!1}_call(e,t){return new Array(e.length).fill(this.interrupted)}}},"./src/generation/streamers.js":
 /*!*************************************!*\
   !*** ./src/generation/streamers.js ***!
-  \*************************************/(e,t,n)=>{n.r(t),n.d(t,{BaseStreamer:()=>a,TextStreamer:()=>l,WhisperTextStreamer:()=>c});var r=n(/*! ../utils/core.js */"./src/utils/core.js"),o=n(/*! ../tokenizers.js */"./src/tokenizers.js"),s=n(/*! ../env.js */"./src/env.js");class a{put(e){throw Error("Not implemented")}end(){throw Error("Not implemented")}}const i=s.apis.IS_PROCESS_AVAILABLE?e=>process.stdout.write(e):e=>console.log(e);class l extends a{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:r=null,decode_kwargs:o={},...s}={}){super(),this.tokenizer=e,this.skip_prompt=t,this.callback_function=n??i,this.token_callback_function=r,this.decode_kwargs={...o,...s},this.token_cache=[],this.print_len=0,this.next_tokens_are_prompt=!0}put(e){if(e.length>1)throw Error("TextStreamer only supports batch size of 1");const t=e[0];if(this.token_callback_function?.(t),this.skip_prompt&&this.next_tokens_are_prompt)return void(this.next_tokens_are_prompt=!1);this.token_cache=(0,r.mergeArrays)(this.token_cache,t);const n=this.tokenizer.decode(this.token_cache,this.decode_kwargs);let s;n.endsWith("\n")?(s=n.slice(this.print_len),this.token_cache=[],this.print_len=0):n.length>0&&(0,o.is_chinese_char)(n.charCodeAt(n.length-1))?(s=n.slice(this.print_len),this.print_len+=s.length):(s=n.slice(this.print_len,n.lastIndexOf(" ")+1),this.print_len+=s.length),this.on_finalized_text(s,!1)}end(){let e;if(this.token_cache.length>0){e=this.tokenizer.decode(this.token_cache,this.decode_kwargs).slice(this.print_len),this.token_cache=[],this.print_len=0}else e="";this.next_tokens_are_prompt=!0,this.on_finalized_text(e,!0)}on_finalized_text(e,t){e.length>0&&this.callback_function?.(e),t&&this.callback_function===i&&s.apis.IS_PROCESS_AVAILABLE&&this.callback_function?.("\n")}}class c extends l{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:r=null,on_chunk_start:o=null,on_chunk_end:s=null,on_finalize:a=null,time_precision:i=.02,skip_special_tokens:l=!0,decode_kwargs:c={}}={}){super(e,{skip_prompt:t,callback_function:n,token_callback_function:r,decode_kwargs:{skip_special_tokens:l,...c}}),this.timestamp_begin=e.timestamp_begin,this.on_chunk_start=o,this.on_chunk_end=s,this.on_finalize=a,this.time_precision=i,this.waiting_for_timestamp=!1}put(e){if(e.length>1)throw Error("WhisperTextStreamer only supports batch size of 1");const t=e[0];if(1===t.length){const n=Number(t[0])-this.timestamp_begin;if(n>=0){const t=n*this.time_precision;this.waiting_for_timestamp?this.on_chunk_end?.(t):this.on_chunk_start?.(t),this.waiting_for_timestamp=!this.waiting_for_timestamp,e=[[]]}}return super.put(e)}end(){super.end(),this.on_finalize?.()}}},"./src/models.js":
+  \*************************************/(e,t,n)=>{n.r(t),n.d(t,{BaseStreamer:()=>a,TextStreamer:()=>l,WhisperTextStreamer:()=>c});var r=n(/*! ../utils/core.js */"./src/utils/core.js"),o=n(/*! ../tokenizers.js */"./src/tokenizers.js"),s=n(/*! ../env.js */"./src/env.js");class a{put(e){throw Error("Not implemented")}end(){throw Error("Not implemented")}}const i=s.apis.IS_PROCESS_AVAILABLE?e=>process.stdout.write(e):e=>console.log(e);class l extends a{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:r=null,decode_kwargs:o={},...s}={}){super(),this.tokenizer=e,this.skip_prompt=t,this.callback_function=n??i,this.token_callback_function=r,this.decode_kwargs={...o,...s},this.token_cache=[],this.print_len=0,this.next_tokens_are_prompt=!0}put(e){if(e.length>1)throw Error("TextStreamer only supports batch size of 1");if(this.skip_prompt&&this.next_tokens_are_prompt)return void(this.next_tokens_are_prompt=!1);const t=e[0];this.token_callback_function?.(t),this.token_cache=(0,r.mergeArrays)(this.token_cache,t);const n=this.tokenizer.decode(this.token_cache,this.decode_kwargs);let s;n.endsWith("\n")?(s=n.slice(this.print_len),this.token_cache=[],this.print_len=0):n.length>0&&(0,o.is_chinese_char)(n.charCodeAt(n.length-1))?(s=n.slice(this.print_len),this.print_len+=s.length):(s=n.slice(this.print_len,n.lastIndexOf(" ")+1),this.print_len+=s.length),this.on_finalized_text(s,!1)}end(){let e;if(this.token_cache.length>0){e=this.tokenizer.decode(this.token_cache,this.decode_kwargs).slice(this.print_len),this.token_cache=[],this.print_len=0}else e="";this.next_tokens_are_prompt=!0,this.on_finalized_text(e,!0)}on_finalized_text(e,t){e.length>0&&this.callback_function?.(e),t&&this.callback_function===i&&s.apis.IS_PROCESS_AVAILABLE&&this.callback_function?.("\n")}}class c extends l{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:r=null,on_chunk_start:o=null,on_chunk_end:s=null,on_finalize:a=null,time_precision:i=.02,skip_special_tokens:l=!0,decode_kwargs:c={}}={}){super(e,{skip_prompt:t,callback_function:n,token_callback_function:r,decode_kwargs:{skip_special_tokens:l,...c}}),this.timestamp_begin=e.timestamp_begin,this.on_chunk_start=o,this.on_chunk_end=s,this.on_finalize=a,this.time_precision=i,this.waiting_for_timestamp=!1}put(e){if(e.length>1)throw Error("WhisperTextStreamer only supports batch size of 1");const t=e[0];if(1===t.length){const n=Number(t[0])-this.timestamp_begin;if(n>=0){const t=n*this.time_precision;this.waiting_for_timestamp?this.on_chunk_end?.(t):this.on_chunk_start?.(t),this.waiting_for_timestamp=!this.waiting_for_timestamp,e=[[]]}}return super.put(e)}end(){super.end(),this.on_finalize?.()}}},"./src/models.js":
 /*!***********************!*\
   !*** ./src/models.js ***!
-  \***********************/(e,t,n)=>{n.r(t),n.d(t,{ASTForAudioClassification:()=>nn,ASTModel:()=>tn,ASTPreTrainedModel:()=>en,AlbertForMaskedLM:()=>ut,AlbertForQuestionAnswering:()=>dt,AlbertForSequenceClassification:()=>ct,AlbertModel:()=>lt,AlbertPreTrainedModel:()=>it,AutoModel:()=>ni,AutoModelForAudioClassification:()=>bi,AutoModelForAudioFrameClassification:()=>xi,AutoModelForCTC:()=>wi,AutoModelForCausalLM:()=>ci,AutoModelForDepthEstimation:()=>Ci,AutoModelForDocumentQuestionAnswering:()=>yi,AutoModelForImageClassification:()=>hi,AutoModelForImageFeatureExtraction:()=>Pi,AutoModelForImageMatting:()=>ki,AutoModelForImageSegmentation:()=>mi,AutoModelForImageToImage:()=>Fi,AutoModelForMaskGeneration:()=>Mi,AutoModelForMaskedLM:()=>di,AutoModelForObjectDetection:()=>fi,AutoModelForQuestionAnswering:()=>ui,AutoModelForSemanticSegmentation:()=>_i,AutoModelForSeq2SeqLM:()=>si,AutoModelForSequenceClassification:()=>ri,AutoModelForSpeechSeq2Seq:()=>ai,AutoModelForTextToSpectrogram:()=>ii,AutoModelForTextToWaveform:()=>li,AutoModelForTokenClassification:()=>oi,AutoModelForVision2Seq:()=>pi,AutoModelForXVector:()=>Ti,AutoModelForZeroShotObjectDetection:()=>gi,BartForConditionalGeneration:()=>yt,BartForSequenceClassification:()=>kt,BartModel:()=>xt,BartPretrainedModel:()=>Tt,BaseModelOutput:()=>q,BeitForImageClassification:()=>Wr,BeitModel:()=>$r,BeitPreTrainedModel:()=>qr,BertForMaskedLM:()=>U,BertForQuestionAnswering:()=>H,BertForSequenceClassification:()=>X,BertForTokenClassification:()=>Q,BertModel:()=>W,BertPreTrainedModel:()=>$,BlenderbotForConditionalGeneration:()=>Lt,BlenderbotModel:()=>Et,BlenderbotPreTrainedModel:()=>At,BlenderbotSmallForConditionalGeneration:()=>Bt,BlenderbotSmallModel:()=>It,BlenderbotSmallPreTrainedModel:()=>zt,BloomForCausalLM:()=>fr,BloomModel:()=>_r,BloomPreTrainedModel:()=>mr,CLIPModel:()=>mn,CLIPPreTrainedModel:()=>hn,CLIPSegForImageSegmentation:()=>Fn,CLIPSegModel:()=>kn,CLIPSegPreTrainedModel:()=>yn,CLIPTextModelWithProjection:()=>_n,CLIPVisionModelWithProjection:()=>fn,CamembertForMaskedLM:()=>Me,CamembertForQuestionAnswering:()=>Te,CamembertForSequenceClassification:()=>we,CamembertForTokenClassification:()=>be,CamembertModel:()=>ge,CamembertPreTrainedModel:()=>fe,CausalLMOutput:()=>Ii,CausalLMOutputWithPast:()=>Bi,ChineseCLIPModel:()=>xn,ChineseCLIPPreTrainedModel:()=>Tn,ClapAudioModelWithProjection:()=>Hs,ClapModel:()=>Xs,ClapPreTrainedModel:()=>Us,ClapTextModelWithProjection:()=>Qs,CodeGenForCausalLM:()=>qn,CodeGenModel:()=>Gn,CodeGenPreTrainedModel:()=>Rn,CohereForCausalLM:()=>Hn,CohereModel:()=>Qn,CoherePreTrainedModel:()=>Xn,ConvBertForMaskedLM:()=>ae,ConvBertForQuestionAnswering:()=>ce,ConvBertForSequenceClassification:()=>ie,ConvBertForTokenClassification:()=>le,ConvBertModel:()=>se,ConvBertPreTrainedModel:()=>oe,ConvNextForImageClassification:()=>Eo,ConvNextModel:()=>Ao,ConvNextPreTrainedModel:()=>So,ConvNextV2ForImageClassification:()=>Io,ConvNextV2Model:()=>zo,ConvNextV2PreTrainedModel:()=>Lo,DPTForDepthEstimation:()=>To,DPTModel:()=>bo,DPTPreTrainedModel:()=>wo,DebertaForMaskedLM:()=>ke,DebertaForQuestionAnswering:()=>Pe,DebertaForSequenceClassification:()=>Fe,DebertaForTokenClassification:()=>Ce,DebertaModel:()=>ye,DebertaPreTrainedModel:()=>xe,DebertaV2ForMaskedLM:()=>Ae,DebertaV2ForQuestionAnswering:()=>ze,DebertaV2ForSequenceClassification:()=>Ee,DebertaV2ForTokenClassification:()=>Le,DebertaV2Model:()=>Se,DebertaV2PreTrainedModel:()=>ve,DeiTForImageClassification:()=>lo,DeiTModel:()=>io,DeiTPreTrainedModel:()=>ao,DepthAnythingForDepthEstimation:()=>yo,DepthAnythingPreTrainedModel:()=>xo,DetrForObjectDetection:()=>Qr,DetrForSegmentation:()=>Hr,DetrModel:()=>Xr,DetrObjectDetectionOutput:()=>Yr,DetrPreTrainedModel:()=>Ur,DetrSegmentationOutput:()=>Kr,Dinov2ForImageClassification:()=>Oo,Dinov2Model:()=>No,Dinov2PreTrainedModel:()=>Bo,DistilBertForMaskedLM:()=>Ve,DistilBertForQuestionAnswering:()=>De,DistilBertForSequenceClassification:()=>Ne,DistilBertForTokenClassification:()=>Oe,DistilBertModel:()=>Be,DistilBertPreTrainedModel:()=>Ie,DonutSwinModel:()=>vo,DonutSwinPreTrainedModel:()=>Po,EfficientNetForImageClassification:()=>ia,EfficientNetModel:()=>aa,EfficientNetPreTrainedModel:()=>sa,ElectraForMaskedLM:()=>pe,ElectraForQuestionAnswering:()=>_e,ElectraForSequenceClassification:()=>he,ElectraForTokenClassification:()=>me,ElectraModel:()=>ue,ElectraPreTrainedModel:()=>de,EsmForMaskedLM:()=>Ge,EsmForSequenceClassification:()=>qe,EsmForTokenClassification:()=>$e,EsmModel:()=>Re,EsmPreTrainedModel:()=>je,FalconForCausalLM:()=>Ws,FalconModel:()=>$s,FalconPreTrainedModel:()=>qs,FastViTForImageClassification:()=>vr,FastViTModel:()=>Pr,FastViTPreTrainedModel:()=>Cr,Florence2ForConditionalGeneration:()=>pn,Florence2PreTrainedModel:()=>un,GLPNForDepthEstimation:()=>Co,GLPNModel:()=>Fo,GLPNPreTrainedModel:()=>ko,GPT2LMHeadModel:()=>vn,GPT2Model:()=>Pn,GPT2PreTrainedModel:()=>Cn,GPTBigCodeForCausalLM:()=>jn,GPTBigCodeModel:()=>Vn,GPTBigCodePreTrainedModel:()=>Dn,GPTJForCausalLM:()=>On,GPTJModel:()=>Nn,GPTJPreTrainedModel:()=>Bn,GPTNeoForCausalLM:()=>En,GPTNeoModel:()=>An,GPTNeoPreTrainedModel:()=>Sn,GPTNeoXForCausalLM:()=>In,GPTNeoXModel:()=>zn,GPTNeoXPreTrainedModel:()=>Ln,Gemma2ForCausalLM:()=>tr,Gemma2Model:()=>er,Gemma2PreTrainedModel:()=>Zn,GemmaForCausalLM:()=>Jn,GemmaModel:()=>Kn,GemmaPreTrainedModel:()=>Yn,HubertForCTC:()=>xs,HubertForSequenceClassification:()=>ys,HubertModel:()=>Ts,HubertPreTrainedModel:()=>bs,ImageMattingOutput:()=>Ni,LlamaForCausalLM:()=>Un,LlamaModel:()=>Wn,LlamaPreTrainedModel:()=>$n,LlavaForConditionalGeneration:()=>cn,LlavaPreTrainedModel:()=>ln,LongT5ForConditionalGeneration:()=>gt,LongT5Model:()=>ft,LongT5PreTrainedModel:()=>_t,M2M100ForConditionalGeneration:()=>Yo,M2M100Model:()=>Ho,M2M100PreTrainedModel:()=>Qo,MBartForCausalLM:()=>St,MBartForConditionalGeneration:()=>Pt,MBartForSequenceClassification:()=>vt,MBartModel:()=>Ct,MBartPreTrainedModel:()=>Ft,MPNetForMaskedLM:()=>Je,MPNetForQuestionAnswering:()=>tt,MPNetForSequenceClassification:()=>Ze,MPNetForTokenClassification:()=>et,MPNetModel:()=>Ke,MPNetPreTrainedModel:()=>Ye,MT5ForConditionalGeneration:()=>bt,MT5Model:()=>wt,MT5PreTrainedModel:()=>Mt,MarianMTModel:()=>Xo,MarianModel:()=>Uo,MarianPreTrainedModel:()=>Wo,MaskedLMOutput:()=>Li,MistralForCausalLM:()=>Vs,MistralModel:()=>Ds,MistralPreTrainedModel:()=>Os,MobileBertForMaskedLM:()=>Xe,MobileBertForQuestionAnswering:()=>He,MobileBertForSequenceClassification:()=>Qe,MobileBertModel:()=>Ue,MobileBertPreTrainedModel:()=>We,MobileNetV1ForImageClassification:()=>ma,MobileNetV1Model:()=>ha,MobileNetV1PreTrainedModel:()=>pa,MobileNetV2ForImageClassification:()=>ga,MobileNetV2Model:()=>fa,MobileNetV2PreTrainedModel:()=>_a,MobileNetV3ForImageClassification:()=>ba,MobileNetV3Model:()=>wa,MobileNetV3PreTrainedModel:()=>Ma,MobileNetV4ForImageClassification:()=>ya,MobileNetV4Model:()=>xa,MobileNetV4PreTrainedModel:()=>Ta,MobileViTForImageClassification:()=>zr,MobileViTModel:()=>Lr,MobileViTPreTrainedModel:()=>Er,MobileViTV2ForImageClassification:()=>Nr,MobileViTV2Model:()=>Br,MobileViTV2PreTrainedModel:()=>Ir,ModelOutput:()=>G,Moondream1ForConditionalGeneration:()=>dn,MptForCausalLM:()=>wr,MptModel:()=>Mr,MptPreTrainedModel:()=>gr,MusicgenForCausalLM:()=>da,MusicgenForConditionalGeneration:()=>ua,MusicgenModel:()=>ca,MusicgenPreTrainedModel:()=>la,NomicBertModel:()=>K,NomicBertPreTrainedModel:()=>Y,OPTForCausalLM:()=>xr,OPTModel:()=>Tr,OPTPreTrainedModel:()=>br,OpenELMForCausalLM:()=>or,OpenELMModel:()=>rr,OpenELMPreTrainedModel:()=>nr,OwlViTForObjectDetection:()=>Vr,OwlViTModel:()=>Dr,OwlViTPreTrainedModel:()=>Or,Owlv2ForObjectDetection:()=>Gr,Owlv2Model:()=>Rr,Owlv2PreTrainedModel:()=>jr,Phi3ForCausalLM:()=>hr,Phi3Model:()=>pr,Phi3PreTrainedModel:()=>ur,PhiForCausalLM:()=>dr,PhiModel:()=>cr,PhiPreTrainedModel:()=>lr,PreTrainedModel:()=>R,PretrainedMixin:()=>ka,PyAnnoteForAudioFrameClassification:()=>os,PyAnnoteModel:()=>rs,PyAnnotePreTrainedModel:()=>ns,QuestionAnsweringModelOutput:()=>zi,Qwen2ForCausalLM:()=>ir,Qwen2Model:()=>ar,Qwen2PreTrainedModel:()=>sr,RTDetrForObjectDetection:()=>eo,RTDetrModel:()=>Zr,RTDetrObjectDetectionOutput:()=>to,RTDetrPreTrainedModel:()=>Jr,ResNetForImageClassification:()=>po,ResNetModel:()=>uo,ResNetPreTrainedModel:()=>co,RoFormerForMaskedLM:()=>ee,RoFormerForQuestionAnswering:()=>re,RoFormerForSequenceClassification:()=>te,RoFormerForTokenClassification:()=>ne,RoFormerModel:()=>Z,RoFormerPreTrainedModel:()=>J,RobertaForMaskedLM:()=>Dt,RobertaForQuestionAnswering:()=>Rt,RobertaForSequenceClassification:()=>Vt,RobertaForTokenClassification:()=>jt,RobertaModel:()=>Ot,RobertaPreTrainedModel:()=>Nt,SamImageSegmentationOutput:()=>$o,SamModel:()=>qo,SamPreTrainedModel:()=>Go,SegformerForImageClassification:()=>ea,SegformerForSemanticSegmentation:()=>ta,SegformerModel:()=>Zs,SegformerPreTrainedModel:()=>Js,Seq2SeqLMOutput:()=>vi,SequenceClassifierOutput:()=>Si,SiglipModel:()=>Mn,SiglipPreTrainedModel:()=>gn,SiglipTextModel:()=>wn,SiglipVisionModel:()=>bn,SpeechT5ForSpeechToText:()=>Ls,SpeechT5ForTextToSpeech:()=>zs,SpeechT5HifiGan:()=>Is,SpeechT5Model:()=>Es,SpeechT5PreTrainedModel:()=>As,SqueezeBertForMaskedLM:()=>ot,SqueezeBertForQuestionAnswering:()=>at,SqueezeBertForSequenceClassification:()=>st,SqueezeBertModel:()=>rt,SqueezeBertPreTrainedModel:()=>nt,StableLmForCausalLM:()=>oa,StableLmModel:()=>ra,StableLmPreTrainedModel:()=>na,Starcoder2ForCausalLM:()=>Gs,Starcoder2Model:()=>Rs,Starcoder2PreTrainedModel:()=>js,Swin2SRForImageSuperResolution:()=>Mo,Swin2SRModel:()=>go,Swin2SRPreTrainedModel:()=>fo,SwinForImageClassification:()=>_o,SwinModel:()=>mo,SwinPreTrainedModel:()=>ho,T5ForConditionalGeneration:()=>mt,T5Model:()=>ht,T5PreTrainedModel:()=>pt,TableTransformerForObjectDetection:()=>oo,TableTransformerModel:()=>ro,TableTransformerObjectDetectionOutput:()=>so,TableTransformerPreTrainedModel:()=>no,TokenClassifierOutput:()=>Ei,TrOCRForCausalLM:()=>Ns,TrOCRPreTrainedModel:()=>Bs,UniSpeechForCTC:()=>cs,UniSpeechForSequenceClassification:()=>ds,UniSpeechModel:()=>ls,UniSpeechPreTrainedModel:()=>is,UniSpeechSatForAudioFrameClassification:()=>_s,UniSpeechSatForCTC:()=>hs,UniSpeechSatForSequenceClassification:()=>ms,UniSpeechSatModel:()=>ps,UniSpeechSatPreTrainedModel:()=>us,ViTForImageClassification:()=>Fr,ViTModel:()=>kr,ViTPreTrainedModel:()=>yr,VisionEncoderDecoderModel:()=>an,VitMatteForImageMatting:()=>Ar,VitMattePreTrainedModel:()=>Sr,VitsModel:()=>Ks,VitsModelOutput:()=>Oi,VitsPreTrainedModel:()=>Ys,Wav2Vec2BertForCTC:()=>Ms,Wav2Vec2BertForSequenceClassification:()=>ws,Wav2Vec2BertModel:()=>gs,Wav2Vec2BertPreTrainedModel:()=>fs,Wav2Vec2ForAudioFrameClassification:()=>ts,Wav2Vec2ForCTC:()=>Zo,Wav2Vec2ForSequenceClassification:()=>es,Wav2Vec2Model:()=>Jo,Wav2Vec2PreTrainedModel:()=>Ko,WavLMForAudioFrameClassification:()=>Ss,WavLMForCTC:()=>Cs,WavLMForSequenceClassification:()=>Ps,WavLMForXVector:()=>vs,WavLMModel:()=>Fs,WavLMPreTrainedModel:()=>ks,WeSpeakerResNetModel:()=>as,WeSpeakerResNetPreTrainedModel:()=>ss,WhisperForConditionalGeneration:()=>sn,WhisperModel:()=>on,WhisperPreTrainedModel:()=>rn,XLMForQuestionAnswering:()=>Xt,XLMForSequenceClassification:()=>Wt,XLMForTokenClassification:()=>Ut,XLMModel:()=>qt,XLMPreTrainedModel:()=>Gt,XLMRobertaForMaskedLM:()=>Yt,XLMRobertaForQuestionAnswering:()=>Zt,XLMRobertaForSequenceClassification:()=>Kt,XLMRobertaForTokenClassification:()=>Jt,XLMRobertaModel:()=>Ht,XLMRobertaPreTrainedModel:()=>Qt,XLMWithLMHeadModel:()=>$t,XVectorOutput:()=>Ai,YolosForObjectDetection:()=>jo,YolosModel:()=>Vo,YolosObjectDetectionOutput:()=>Ro,YolosPreTrainedModel:()=>Do});var r=n(/*! ./configs.js */"./src/configs.js"),o=n(/*! ./backends/onnx.js */"./src/backends/onnx.js"),s=n(/*! ./utils/dtypes.js */"./src/utils/dtypes.js"),a=n(/*! ./utils/generic.js */"./src/utils/generic.js"),i=n(/*! ./utils/core.js */"./src/utils/core.js"),l=n(/*! ./utils/hub.js */"./src/utils/hub.js"),c=n(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),d=n(/*! ./generation/configuration_utils.js */"./src/generation/configuration_utils.js"),u=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),p=n(/*! ./utils/maths.js */"./src/utils/maths.js"),h=n(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),m=n(/*! ./generation/logits_sampler.js */"./src/generation/logits_sampler.js"),_=n(/*! ./env.js */"./src/env.js"),f=n(/*! ./models/whisper/generation_whisper.js */"./src/models/whisper/generation_whisper.js"),g=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");const M=0,w=1,b=2,T=3,x=4,y=5,k=6,F=7,C=new Map,P=new Map,v=new Map;async function S(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async a=>{const{buffer:i,session_options:c}=await async function(e,t,n){let a=n.device;a&&"string"!=typeof a&&(a.hasOwnProperty(t)?a=a[t]:(console.warn(`device not specified for "${t}". Using the default device.`),a=null));const i=(0,o.deviceToExecutionProviders)(a);let c=n.dtype;if("string"!=typeof c&&(c&&c.hasOwnProperty(t)?c=c[t]:(c=s.DEFAULT_DEVICE_DTYPE_MAPPING[i[0]],console.warn(`dtype not specified for "${t}". Using the default dtype for this device (${c}).`))),!s.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(c))throw new Error(`Invalid dtype: ${c}. Should be one of: ${Object.keys(s.DATA_TYPES).join(", ")}`);if(c===s.DATA_TYPES.fp16&&"webgpu"===a&&!await(0,s.isWebGpuFp16Supported)())throw new Error(`The device (${a}) does not support fp16.`);const d=s.DEFAULT_DTYPE_SUFFIX_MAPPING[c],u=`${n.subfolder??""}/${t}${d}.onnx`,p={...n.session_options}??{};p.executionProviders??=i;const h=(0,l.getModelFile)(e,u,!0,n);let m=[];if(n.use_external_data_format&&(!0===n.use_external_data_format||"object"==typeof n.use_external_data_format&&n.use_external_data_format.hasOwnProperty(t)&&!0===n.use_external_data_format[t])){if(_.apis.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const r=`${t}${d}.onnx_data`,o=`${n.subfolder??""}/${r}`;m.push(new Promise((async(t,s)=>{const a=await(0,l.getModelFile)(e,o,!0,n);t({path:r,data:a})})))}else void 0!==p.externalData&&(m=p.externalData.map((async t=>{if("string"==typeof t.data){const r=await(0,l.getModelFile)(e,t.data,!0,n);return{...t,data:r}}return t})));if(m.length>0&&(p.externalData=await Promise.all(m)),"webgpu"===a){const e=(0,r.getKeyValueShapes)(n.config,{prefix:"present"});if(Object.keys(e).length>0&&!(0,o.isONNXProxy)()){const t={};for(const n in e)t[n]="gpu-buffer";p.preferredOutputLocation=t}}return{buffer:await h,session_options:p}}(e,t[a],n);return[a,await(0,o.createInferenceSession)(i,c)]}))))}async function A(e,t){const n=function(e,t){const n=Object.create(null),r=[];for(const s of e.inputNames){const e=t[s];e instanceof u.Tensor?n[s]=(0,o.isONNXProxy)()?e.clone():e:r.push(s)}if(r.length>0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${r.join(", ")}.`);const s=Object.keys(t).length,a=e.inputNames.length;if(s>a){let n=Object.keys(t).filter((t=>!e.inputNames.includes(t)));console.warn(`WARNING: Too many inputs were provided (${s} > ${a}). The following inputs will be ignored: "${n.join(", ")}".`)}return n}(e,t);try{const t=Object.fromEntries(Object.entries(n).map((([e,t])=>[e,t.ort_tensor])));let r=await e.run(t);return r=E(r),r}catch(e){throw console.error(`An error occurred during model execution: "${e}".`),console.error("Inputs given to model:",n),e}}function E(e){for(let t in e)(0,o.isONNXTensor)(e[t])?e[t]=new u.Tensor(e[t]):"object"==typeof e[t]&&E(e[t]);return e}function L(e){if(e instanceof u.Tensor)return e;if(0===e.length)throw Error("items must be non-empty");if(Array.isArray(e[0])){if(e.some((t=>t.length!==e[0].length)))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new u.Tensor("int64",BigInt64Array.from(e.flat().map((e=>BigInt(e)))),[e.length,e[0].length])}return new u.Tensor("int64",BigInt64Array.from(e.map((e=>BigInt(e)))),[1,e.length])}function z(e){return new u.Tensor("bool",[e],[1])}async function I(e,t){let{encoder_outputs:n,input_ids:r,decoder_input_ids:o,...s}=t;if(!n){const r=(0,i.pick)(t,e.sessions.model.inputNames);n=(await B(e,r)).last_hidden_state}s.input_ids=o,s.encoder_hidden_states=n,e.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(s.encoder_attention_mask=t.attention_mask);return await N(e,s,!0)}async function B(e,t){const n=e.sessions.model,r=(0,i.pick)(t,n.inputNames);if(n.inputNames.includes("inputs_embeds")&&!r.inputs_embeds){if(!t.input_ids)throw new Error("Both `input_ids` and `inputs_embeds` are missing in the model inputs.");r.inputs_embeds=await e.encode_text({input_ids:t.input_ids})}return n.inputNames.includes("token_type_ids")&&!r.token_type_ids&&(r.token_type_ids=new u.Tensor("int64",new BigInt64Array(r.input_ids.data.length),r.input_ids.dims)),await A(n,r)}async function N(e,t,n=!1){const r=e.sessions[n?"decoder_model_merged":"model"],{past_key_values:o,...s}=t;r.inputNames.includes("use_cache_branch")&&(s.use_cache_branch=z(!!o)),r.inputNames.includes("position_ids")&&s.attention_mask&&!s.position_ids&&(s.position_ids=function(e,t=null){const{input_ids:n,inputs_embeds:r,attention_mask:o}=e,[s,a]=o.dims,i=new BigInt64Array(o.data.length);for(let e=0;e<s;++e){const t=e*a;let n=BigInt(0);for(let e=0;e<a;++e){const r=t+e;0n===o.data[r]?i[r]=BigInt(1):(i[r]=n,n+=o.data[r])}}let l=new u.Tensor("int64",i,o.dims);if(t){const e=-(n??r).dims.at(1);l=l.slice(null,[e,null])}return l}(s,o)),e.addPastKeyValues(s,o);const a=(0,i.pick)(s,r.inputNames);return await A(r,a)}async function O(e,{input_ids:t=null,attention_mask:n=null,pixel_values:r=null,position_ids:o=null,inputs_embeds:s=null,past_key_values:a=null,generation_config:i=null,logits_processor:l=null,...c}){if(!s)if(s=await e.encode_text({input_ids:t}),r&&1!==t.dims[1]){const o=await e.encode_image({pixel_values:r});({inputs_embeds:s,attention_mask:n}=e._merge_input_ids_with_image_features({image_features:o,inputs_embeds:s,input_ids:t,attention_mask:n}))}else if(a&&r&&1===t.dims[1]){const e=t.dims[1],r=Object.values(a)[0].dims.at(-2);n=(0,u.cat)([(0,u.ones)([t.dims[0],r]),n.slice(null,[n.dims[1]-e,n.dims[1]])],1)}return await N(e,{inputs_embeds:s,past_key_values:a,attention_mask:n,position_ids:o,generation_config:i,logits_processor:l},!0)}function D(e,t,n,r){if(n.past_key_values){const t=Object.values(n.past_key_values)[0].dims.at(-2),{input_ids:r,attention_mask:o}=n;if(o&&o.dims[1]>r.dims[1]);else if(t<r.dims[1])n.input_ids=r.slice(null,[t,null]);else if(null!=e.config.image_token_index&&r.data.some((t=>t==e.config.image_token_index))){const o=e.config.num_image_tokens;if(!o)throw new Error("`num_image_tokens` is missing in the model configuration.");const s=r.dims[1]-(t-o);n.input_ids=r.slice(null,[-s,null]),n.attention_mask=(0,u.ones)([1,t+s])}}return n}function V(e,t,n,r){return n.past_key_values&&(t=t.map((e=>[e.at(-1)]))),{...n,decoder_input_ids:L(t)}}function j(e,...t){return e.config.is_encoder_decoder?V(e,...t):D(e,...t)}class R extends a.Callable{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,t){super(),this.config=e,this.sessions=t;const n=v.get(this.constructor),r=C.get(n);switch(this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,r){case x:this.can_generate=!0,this._forward=N,this._prepare_inputs_for_generation=D;break;case b:case T:case F:this.can_generate=!0,this._forward=I,this._prepare_inputs_for_generation=V;break;case w:this._forward=I;break;case k:this.can_generate=!0,this._forward=O,this._prepare_inputs_for_generation=j;break;default:this._forward=B}this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const t of Object.values(this.sessions))t?.handler?.dispose&&e.push(t.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:o=null,local_files_only:s=!1,revision:a="main",model_file_name:i=null,subfolder:c="onnx",device:d=null,dtype:u=null,use_external_data_format:p=null,session_options:h={}}={}){let m={progress_callback:t,config:n,cache_dir:o,local_files_only:s,revision:a,model_file_name:i,subfolder:c,device:d,dtype:u,use_external_data_format:p,session_options:h};const _=v.get(this),f=C.get(_);let g;if(n=m.config=await r.AutoConfig.from_pretrained(e,m),f===x)g=await Promise.all([S(e,{model:m.model_file_name??"model"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(f===b||f===T)g=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(f===y)g=await Promise.all([S(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},m)]);else if(f===w)g=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m)]);else if(f===k){const t={embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"};n.is_encoder_decoder&&(t.model="encoder_model"),g=await Promise.all([S(e,t,m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)])}else f===F?g=await Promise.all([S(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]):(f!==M&&console.warn(`Model type for '${_??n?.model_type}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`),g=await Promise.all([S(e,{model:m.model_file_name??"model"},m)]));return new this(n,...g)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}_get_logits_warper(e){const t=new c.LogitsProcessorList;return null!==e.temperature&&1!==e.temperature&&t.push(new c.TemperatureLogitsWarper(e.temperature)),null!==e.top_k&&0!==e.top_k&&t.push(new c.TopKLogitsWarper(e.top_k)),null!==e.top_p&&e.top_p<1&&t.push(new c.TopPLogitsWarper(e.top_p)),t}_get_logits_processor(e,t,n=null){const r=new c.LogitsProcessorList;if(null!==e.repetition_penalty&&1!==e.repetition_penalty&&r.push(new c.RepetitionPenaltyLogitsProcessor(e.repetition_penalty)),null!==e.no_repeat_ngram_size&&e.no_repeat_ngram_size>0&&r.push(new c.NoRepeatNGramLogitsProcessor(e.no_repeat_ngram_size)),null!==e.bad_words_ids&&r.push(new c.NoBadWordsLogitsProcessor(e.bad_words_ids,e.eos_token_id)),null!==e.min_length&&null!==e.eos_token_id&&e.min_length>0&&r.push(new c.MinLengthLogitsProcessor(e.min_length,e.eos_token_id)),null!==e.min_new_tokens&&null!==e.eos_token_id&&e.min_new_tokens>0&&r.push(new c.MinNewTokensLengthLogitsProcessor(t,e.min_new_tokens,e.eos_token_id)),null!==e.forced_bos_token_id&&r.push(new c.ForcedBOSTokenLogitsProcessor(e.forced_bos_token_id)),null!==e.forced_eos_token_id&&r.push(new c.ForcedEOSTokenLogitsProcessor(e.max_length,e.forced_eos_token_id)),null!==e.begin_suppress_tokens){const n=t>1||null===e.forced_bos_token_id?t:t+1;r.push(new c.SuppressTokensAtBeginLogitsProcessor(e.begin_suppress_tokens,n))}return null!==e.guidance_scale&&e.guidance_scale>1&&r.push(new c.ClassifierFreeGuidanceLogitsProcessor(e.guidance_scale)),null!==n&&r.extend(n),r}_prepare_generation_config(e,t,n=d.GenerationConfig){const r={...this.config};for(const e of["decoder","generator","text_config"])e in r&&Object.assign(r,r[e]);const o=new n(r);return"generation_config"in this&&Object.assign(o,this.generation_config),e&&Object.assign(o,e),t&&Object.assign(o,(0,i.pick)(t,Object.getOwnPropertyNames(o))),o}_get_stopping_criteria(e,t=null){const n=new h.StoppingCriteriaList;return null!==e.max_length&&n.push(new h.MaxLengthCriteria(e.max_length,this.config.max_position_embeddings??null)),null!==e.eos_token_id&&n.push(new h.EosTokenCriteria(e.eos_token_id)),t&&n.extend(t),n}_validate_model_class(){if(!this.can_generate){const e=[Ia,Oa,za,va],t=v.get(this.constructor),n=new Set,r=this.config.model_type;for(const t of e){const e=t.get(r);e&&n.add(e[0])}let o=`The current model class (${t}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw n.size>0&&(o+=` Please use the following class instead: ${[...n].join(", ")}`),Error(o)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:t,model_inputs:n,is_encoder_decoder:r}){return n.past_key_values=this.getPastKeyValues(t,n.past_key_values),n.input_ids=new u.Tensor("int64",e.flat(),[e.length,1]),r||(n.attention_mask=(0,u.cat)([n.attention_mask,(0,u.ones)([n.attention_mask.dims[0],1])],1)),n.position_ids=null,n}_prepare_model_inputs({inputs:e,bos_token_id:t,model_kwargs:n}){const r=(0,i.pick)(n,this.forward_params),o=this.main_input_name;if(o in r){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else r[o]=e;return{inputs_tensor:r[o],model_inputs:r,model_input_name:o}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:t,model_input_name:n,generation_config:r}){if(this.sessions.model.inputNames.includes("inputs_embeds")&&!t.inputs_embeds&&"_prepare_inputs_embeds"in this){const{input_ids:e,pixel_values:n,attention_mask:r,...o}=t,s=await this._prepare_inputs_embeds(t);t={...o,...(0,i.pick)(s,["inputs_embeds","attention_mask"])}}let{last_hidden_state:o}=await B(this,t);if(null!==r.guidance_scale&&r.guidance_scale>1)o=(0,u.cat)([o,(0,u.full_like)(o,0)],0),"attention_mask"in t&&(t.attention_mask=(0,u.cat)([t.attention_mask,(0,u.zeros_like)(t.attention_mask)],0));else if(t.decoder_input_ids){const e=L(t.decoder_input_ids).dims[0];if(e!==o.dims[0]){if(1!==o.dims[0])throw new Error(`The encoder outputs have a different batch size (${o.dims[0]}) than the decoder inputs (${e}).`);o=(0,u.cat)(Array.from({length:e},(()=>o)),0)}}return t.encoder_outputs=o,t}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:t,model_kwargs:n,decoder_start_token_id:r,bos_token_id:o,generation_config:s}){let{decoder_input_ids:a,...i}=n;if(a)Array.isArray(a[0])||(a=Array.from({length:e},(()=>a)));else if(r??=o,"musicgen"===this.config.model_type)a=Array.from({length:e*this.config.decoder.num_codebooks},(()=>[r]));else if(Array.isArray(r)){if(r.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${r.length}`);a=r}else a=Array.from({length:e},(()=>[r]));return a=L(a),n.decoder_attention_mask=(0,u.ones_like)(a),{input_ids:a,model_inputs:i}}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,streamer:o=null,...s}){this._validate_model_class(),t=this._prepare_generation_config(t,s);let{inputs_tensor:a,model_inputs:i,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:s});const c=this.config.is_encoder_decoder;let d;c&&("encoder_outputs"in i||(i=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:a,model_inputs:i,model_input_name:l,generation_config:t}))),c?({input_ids:d,model_inputs:i}=this._prepare_decoder_input_ids_for_generation({batch_size:i[l].dims.at(0),model_input_name:l,model_kwargs:i,decoder_start_token_id:t.decoder_start_token_id,bos_token_id:t.bos_token_id,generation_config:t})):d=i[l];let p=d.dims.at(-1);null!==t.max_new_tokens&&(t.max_length=p+t.max_new_tokens);const h=this._get_logits_processor(t,p,n),_=this._get_stopping_criteria(t,r),f=i[l].dims.at(0),g=m.LogitsSampler.getSampler(t),M=new Array(f).fill(0),w=d.tolist();o&&o.put(w);let b=null,T={};for(;;){i=this.prepare_inputs_for_generation(w,i,t);const e=await this.forward(i);if(t.output_attentions&&t.return_dict_in_generate){const t=this.getAttentions(e);for(const e in t)e in T||(T[e]=[]),T[e].push(t[e])}const n=h(w,e.logits.slice(null,-1,null)),r=[];for(let e=0;e<n.dims.at(0);++e){const t=n[e],o=await g(t);for(const[t,n]of o){const o=BigInt(t);M[e]+=n,w[e].push(o),r.push([o]);break}}o&&o.put(r);if(_(w).every((e=>e))){t.return_dict_in_generate&&(b=this.getPastKeyValues(e,i.past_key_values,!1));break}i=this._update_model_kwargs_for_generation({generated_input_ids:r,outputs:e,model_inputs:i,is_encoder_decoder:c})}o&&o.end();const x=new u.Tensor("int64",w.flat(),[w.length,w[0].length]);return t.return_dict_in_generate?{sequences:x,past_key_values:b,...T}:x}getPastKeyValues(e,t,n=!0){const r=Object.create(null);for(const o in e)if(o.startsWith("present")){const s=o.replace("present","past_key_values");if(t&&o.includes("encoder"))r[s]=t[s];else{if(n&&t){const e=t[s];"gpu-buffer"===e.location&&e.dispose()}r[s]=e[o]}}return r}getAttentions(e){const t={};for(const n of["cross_attentions","encoder_attentions","decoder_attentions"])for(const r in e)r.startsWith(n)&&(n in t||(t[n]=[]),t[n].push(e[r]));return t}addPastKeyValues(e,t){if(t)Object.assign(e,t);else{const t=this.custom_config.kv_cache_dtype??"float32",n="float16"===t?new Uint16Array:[],o=(0,r.getKeyValueShapes)(this.config);for(const r in o)e[r]=new u.Tensor(t,n,o[r])}}async encode_image({pixel_values:e}){const t=(await A(this.sessions.vision_encoder,{pixel_values:e})).image_features;return this.config.num_image_tokens||(console.warn(`The number of image tokens was not set in the model configuration. Setting it to the number of features detected by the vision encoder (${t.dims[1]}).`),this.config.num_image_tokens=t.dims[1]),t}async encode_text({input_ids:e}){return(await A(this.sessions.embed_tokens,{input_ids:e})).inputs_embeds}}class G{}class q extends G{constructor({last_hidden_state:e,hidden_states:t=null,attentions:n=null}){super(),this.last_hidden_state=e,this.hidden_states=t,this.attentions=n}}class $ extends R{}class W extends ${}class U extends ${async _call(e){return new Li(await super._call(e))}}class X extends ${async _call(e){return new Si(await super._call(e))}}class Q extends ${async _call(e){return new Ei(await super._call(e))}}class H extends ${async _call(e){return new zi(await super._call(e))}}class Y extends R{}class K extends Y{}class J extends R{}class Z extends J{}class ee extends J{async _call(e){return new Li(await super._call(e))}}class te extends J{async _call(e){return new Si(await super._call(e))}}class ne extends J{async _call(e){return new Ei(await super._call(e))}}class re extends J{async _call(e){return new zi(await super._call(e))}}class oe extends R{}class se extends oe{}class ae extends oe{async _call(e){return new Li(await super._call(e))}}class ie extends oe{async _call(e){return new Si(await super._call(e))}}class le extends oe{async _call(e){return new Ei(await super._call(e))}}class ce extends oe{async _call(e){return new zi(await super._call(e))}}class de extends R{}class ue extends de{}class pe extends de{async _call(e){return new Li(await super._call(e))}}class he extends de{async _call(e){return new Si(await super._call(e))}}class me extends de{async _call(e){return new Ei(await super._call(e))}}class _e extends de{async _call(e){return new zi(await super._call(e))}}class fe extends R{}class ge extends fe{}class Me extends fe{async _call(e){return new Li(await super._call(e))}}class we extends fe{async _call(e){return new Si(await super._call(e))}}class be extends fe{async _call(e){return new Ei(await super._call(e))}}class Te extends fe{async _call(e){return new zi(await super._call(e))}}class xe extends R{}class ye extends xe{}class ke extends xe{async _call(e){return new Li(await super._call(e))}}class Fe extends xe{async _call(e){return new Si(await super._call(e))}}class Ce extends xe{async _call(e){return new Ei(await super._call(e))}}class Pe extends xe{async _call(e){return new zi(await super._call(e))}}class ve extends R{}class Se extends ve{}class Ae extends ve{async _call(e){return new Li(await super._call(e))}}class Ee extends ve{async _call(e){return new Si(await super._call(e))}}class Le extends ve{async _call(e){return new Ei(await super._call(e))}}class ze extends ve{async _call(e){return new zi(await super._call(e))}}class Ie extends R{}class Be extends Ie{}class Ne extends Ie{async _call(e){return new Si(await super._call(e))}}class Oe extends Ie{async _call(e){return new Ei(await super._call(e))}}class De extends Ie{async _call(e){return new zi(await super._call(e))}}class Ve extends Ie{async _call(e){return new Li(await super._call(e))}}class je extends R{}class Re extends je{}class Ge extends je{async _call(e){return new Li(await super._call(e))}}class qe extends je{async _call(e){return new Si(await super._call(e))}}class $e extends je{async _call(e){return new Ei(await super._call(e))}}class We extends R{}class Ue extends We{}class Xe extends We{async _call(e){return new Li(await super._call(e))}}class Qe extends We{async _call(e){return new Si(await super._call(e))}}class He extends We{async _call(e){return new zi(await super._call(e))}}class Ye extends R{}class Ke extends Ye{}class Je extends Ye{async _call(e){return new Li(await super._call(e))}}class Ze extends Ye{async _call(e){return new Si(await super._call(e))}}class et extends Ye{async _call(e){return new Ei(await super._call(e))}}class tt extends Ye{async _call(e){return new zi(await super._call(e))}}class nt extends R{}class rt extends nt{}class ot extends nt{async _call(e){return new Li(await super._call(e))}}class st extends nt{async _call(e){return new Si(await super._call(e))}}class at extends nt{async _call(e){return new zi(await super._call(e))}}class it extends R{}class lt extends it{}class ct extends it{async _call(e){return new Si(await super._call(e))}}class dt extends it{async _call(e){return new zi(await super._call(e))}}class ut extends it{async _call(e){return new Li(await super._call(e))}}class pt extends R{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class ht extends pt{}class mt extends pt{}class _t extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ft extends _t{}class gt extends _t{}class Mt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class wt extends Mt{}class bt extends Mt{}class Tt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class xt extends Tt{}class yt extends Tt{}class kt extends Tt{async _call(e){return new Si(await super._call(e))}}class Ft extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ct extends Ft{}class Pt extends Ft{}class vt extends Ft{async _call(e){return new Si(await super._call(e))}}class St extends Ft{}class At extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Et extends At{}class Lt extends At{}class zt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class It extends zt{}class Bt extends zt{}class Nt extends R{}class Ot extends Nt{}class Dt extends Nt{async _call(e){return new Li(await super._call(e))}}class Vt extends Nt{async _call(e){return new Si(await super._call(e))}}class jt extends Nt{async _call(e){return new Ei(await super._call(e))}}class Rt extends Nt{async _call(e){return new zi(await super._call(e))}}class Gt extends R{}class qt extends Gt{}class $t extends Gt{async _call(e){return new Li(await super._call(e))}}class Wt extends Gt{async _call(e){return new Si(await super._call(e))}}class Ut extends Gt{async _call(e){return new Ei(await super._call(e))}}class Xt extends Gt{async _call(e){return new zi(await super._call(e))}}class Qt extends R{}class Ht extends Qt{}class Yt extends Qt{async _call(e){return new Li(await super._call(e))}}class Kt extends Qt{async _call(e){return new Si(await super._call(e))}}class Jt extends Qt{async _call(e){return new Ei(await super._call(e))}}class Zt extends Qt{async _call(e){return new zi(await super._call(e))}}class en extends R{}class tn extends en{}class nn extends en{}class rn extends R{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class on extends rn{}class sn extends rn{_prepare_generation_config(e,t){return super._prepare_generation_config(e,t,f.WhisperGenerationConfig)}_retrieve_init_tokens(e){const t=[e.decoder_start_token_id];let n=e.language;const r=e.task;if(e.is_multilingual){n||(console.warn("No language specified - defaulting to English (en)."),n="en");const o=`<|${(0,g.whisper_language_to_code)(n)}|>`;t.push(e.lang_to_id[o]),t.push(e.task_to_id[r??"transcribe"])}else if(n||r)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&t.at(-1)!==e.no_timestamps_token_id?t.push(e.no_timestamps_token_id):e.return_timestamps&&t.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),t.pop()),t.filter((e=>null!=e))}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,...o}){t=this._prepare_generation_config(t,o);const s=o.decoder_input_ids??this._retrieve_init_tokens(t);if(t.return_timestamps&&(n??=new c.LogitsProcessorList,n.push(new c.WhisperTimeStampLogitsProcessor(t,s))),t.begin_suppress_tokens&&(n??=new c.LogitsProcessorList,n.push(new c.SuppressTokensAtBeginLogitsProcessor(t.begin_suppress_tokens,s.length))),t.return_token_timestamps){if(!t.alignment_heads)throw new Error("Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config.");"translate"===t.task&&console.warn("Token-level timestamps may not be reliable for task 'translate'."),t.output_attentions=!0,t.return_dict_in_generate=!0}const a=await super.generate({inputs:e,generation_config:t,logits_processor:n,decoder_input_ids:s,...o});return t.return_token_timestamps&&(a.token_timestamps=this._extract_token_timestamps(a,t.alignment_heads,t.num_frames)),a}_extract_token_timestamps(e,t,n=null,r=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");null==n&&console.warn("`num_frames` has not been set, meaning the entire audio will be analyzed. This may lead to inaccurate token-level timestamps for short audios (< 30 seconds).");let o=this.config.median_filter_width;void 0===o&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),o=7);const s=e.cross_attentions,a=Array.from({length:this.config.decoder_layers},((e,t)=>(0,u.cat)(s.map((e=>e[t])),2))),l=(0,u.stack)(t.map((([e,t])=>{if(e>=a.length)throw new Error(`Layer index ${e} is out of bounds for cross attentions (length ${a.length}).`);return n?a[e].slice(null,t,null,[0,n]):a[e].slice(null,t)}))).transpose(1,0,2,3),[c,d]=(0,u.std_mean)(l,-2,0,!0),h=l.clone();for(let e=0;e<h.dims[0];++e){const t=h[e];for(let n=0;n<t.dims[0];++n){const r=t[n],s=c[e][n][0].data,a=d[e][n][0].data;for(let e=0;e<r.dims[0];++e){let t=r[e].data;for(let e=0;e<t.length;++e)t[e]=(t[e]-a[e])/s[e];t.set((0,p.medianFilter)(t,o))}}}const m=[(0,u.mean)(h,1)],_=e.sequences.dims,f=new u.Tensor("float32",new Float32Array(_[0]*_[1]),_);for(let e=0;e<_[0];++e){const t=m[e].neg().squeeze_(0),[n,o]=(0,p.dynamic_time_warping)(t.tolist()),s=Array.from({length:n.length-1},((e,t)=>n[t+1]-n[t])),a=(0,i.mergeArrays)([1],s).map((e=>!!e)),l=[];for(let e=0;e<a.length;++e)a[e]&&l.push(o[e]*r);f[e].data.set(l,1)}return f}}class an extends R{main_input_name="pixel_values";forward_params=["pixel_values","input_ids","encoder_hidden_states","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class ln extends R{forward_params=["input_ids","pixel_values","attention_mask","position_ids","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class cn extends ln{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){const o=this.config.image_token_index,s=n.tolist().map((e=>e.findIndex((e=>e==o)))),a=s.every((e=>-1===e)),i=s.every((e=>-1!==e));if(!a&&!i)throw new Error("Every input should contain either 0 or 1 image token.");if(a)return{inputs_embeds:e,attention_mask:r};const l=[],c=[];for(let n=0;n<s.length;++n){const o=s[n],a=e[n],i=t[n],d=r[n];l.push((0,u.cat)([a.slice([0,o]),i,a.slice([o+1,a.dims[0]])],0)),c.push((0,u.cat)([d.slice([0,o]),(0,u.ones)([i.dims[0]]),d.slice([o+1,d.dims[0]])],0))}return{inputs_embeds:(0,u.stack)(l,0),attention_mask:(0,u.stack)(c,0)}}}class dn extends cn{}class un extends R{forward_params=["input_ids","inputs_embeds","attention_mask","pixel_values","encoder_outputs","decoder_input_ids","decoder_inputs_embeds","decoder_attention_mask","past_key_values"];main_input_name="inputs_embeds";constructor(e,t,n){super(e,t),this.generation_config=n}}class pn extends un{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){return{inputs_embeds:(0,u.cat)([t,e],1),attention_mask:(0,u.cat)([(0,u.ones)(t.dims.slice(0,2)),r],1)}}async _prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:n,attention_mask:r}){if(!e&&!t)throw new Error("Either `input_ids` or `pixel_values` should be provided.");let o,s;return e&&(o=await this.encode_text({input_ids:e})),t&&(s=await this.encode_image({pixel_values:t})),o&&s?({inputs_embeds:n,attention_mask:r}=this._merge_input_ids_with_image_features({inputs_embeds:o,image_features:s,input_ids:e,attention_mask:r})):n=o||s,{inputs_embeds:n,attention_mask:r}}async forward({input_ids:e,pixel_values:t,attention_mask:n,decoder_input_ids:r,decoder_attention_mask:o,encoder_outputs:s,past_key_values:a,inputs_embeds:i,decoder_inputs_embeds:l}){if(i||({inputs_embeds:i,attention_mask:n}=await this._prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:i,attention_mask:n})),!s){let{last_hidden_state:e}=await B(this,{inputs_embeds:i,attention_mask:n});s=e}if(!l){if(!r)throw new Error("Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.");l=await this.encode_text({input_ids:r})}const c={inputs_embeds:l,attention_mask:o,encoder_attention_mask:n,encoder_hidden_states:s,past_key_values:a};return await N(this,c,!0)}}class hn extends R{}class mn extends hn{}class _n extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class fn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class gn extends R{}class Mn extends gn{}class wn extends gn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class bn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class Tn extends R{}class xn extends Tn{}class yn extends R{}class kn extends yn{}class Fn extends yn{}class Cn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Pn extends Cn{}class vn extends Cn{}class Sn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class An extends Sn{}class En extends Sn{}class Ln extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class zn extends Ln{}class In extends Ln{}class Bn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Nn extends Bn{}class On extends Bn{}class Dn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Vn extends Dn{}class jn extends Dn{}class Rn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Gn extends Rn{}class qn extends Rn{}class $n extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Wn extends $n{}class Un extends $n{}class Xn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Qn extends Xn{}class Hn extends Xn{}class Yn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Kn extends Yn{}class Jn extends Yn{}class Zn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class er extends Zn{}class tr extends Zn{}class nr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class rr extends nr{}class or extends nr{}class sr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ar extends sr{}class ir extends sr{}class lr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class cr extends lr{}class dr extends lr{}class ur extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class pr extends ur{}class hr extends ur{}class mr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class _r extends mr{}class fr extends mr{}class gr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Mr extends gr{}class wr extends gr{}class br extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Tr extends br{}class xr extends br{}class yr extends R{}class kr extends yr{}class Fr extends yr{async _call(e){return new Si(await super._call(e))}}class Cr extends R{}class Pr extends Cr{}class vr extends Cr{async _call(e){return new Si(await super._call(e))}}class Sr extends R{}class Ar extends Sr{async _call(e){return new Ni(await super._call(e))}}class Er extends R{}class Lr extends Er{}class zr extends Er{async _call(e){return new Si(await super._call(e))}}class Ir extends R{}class Br extends Ir{}class Nr extends Ir{async _call(e){return new Si(await super._call(e))}}class Or extends R{}class Dr extends Or{}class Vr extends Or{}class jr extends R{}class Rr extends jr{}class Gr extends jr{}class qr extends R{}class $r extends qr{}class Wr extends qr{async _call(e){return new Si(await super._call(e))}}class Ur extends R{}class Xr extends Ur{}class Qr extends Ur{async _call(e){return new Yr(await super._call(e))}}class Hr extends Ur{async _call(e){return new Kr(await super._call(e))}}class Yr extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Kr extends G{constructor({logits:e,pred_boxes:t,pred_masks:n}){super(),this.logits=e,this.pred_boxes=t,this.pred_masks=n}}class Jr extends R{}class Zr extends Jr{}class eo extends Jr{async _call(e){return new to(await super._call(e))}}class to extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class no extends R{}class ro extends no{}class oo extends no{async _call(e){return new so(await super._call(e))}}class so extends Yr{}class ao extends R{}class io extends ao{}class lo extends ao{async _call(e){return new Si(await super._call(e))}}class co extends R{}class uo extends co{}class po extends co{async _call(e){return new Si(await super._call(e))}}class ho extends R{}class mo extends ho{}class _o extends ho{async _call(e){return new Si(await super._call(e))}}class fo extends R{}class go extends fo{}class Mo extends fo{}class wo extends R{}class bo extends wo{}class To extends wo{}class xo extends R{}class yo extends xo{}class ko extends R{}class Fo extends ko{}class Co extends ko{}class Po extends R{}class vo extends Po{}class So extends R{}class Ao extends So{}class Eo extends So{async _call(e){return new Si(await super._call(e))}}class Lo extends R{}class zo extends Lo{}class Io extends Lo{async _call(e){return new Si(await super._call(e))}}class Bo extends R{}class No extends Bo{}class Oo extends Bo{async _call(e){return new Si(await super._call(e))}}class Do extends R{}class Vo extends Do{}class jo extends Do{async _call(e){return new Ro(await super._call(e))}}class Ro extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Go extends R{}class qo extends Go{async get_image_embeddings({pixel_values:e}){return await B(this,{pixel_values:e})}async forward(e){if(e.image_embeddings&&e.image_positional_embeddings||(e={...e,...await this.get_image_embeddings(e)}),!e.input_labels&&e.input_points){const t=e.input_points.dims.slice(0,-1),n=t.reduce(((e,t)=>e*t),1);e.input_labels=new u.Tensor("int64",new BigInt64Array(n).fill(1n),t)}const t={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(t.input_points=e.input_points),e.input_labels&&(t.input_labels=e.input_labels),e.input_boxes&&(t.input_boxes=e.input_boxes),await A(this.sessions.prompt_encoder_mask_decoder,t)}async _call(e){return new $o(await super._call(e))}}class $o extends G{constructor({iou_scores:e,pred_masks:t}){super(),this.iou_scores=e,this.pred_masks=t}}class Wo extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Uo extends Wo{}class Xo extends Wo{}class Qo extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ho extends Qo{}class Yo extends Qo{}class Ko extends R{}class Jo extends Ko{}class Zo extends Ko{async _call(e){return new Ii(await super._call(e))}}class es extends Ko{async _call(e){return new Si(await super._call(e))}}class ts extends Ko{async _call(e){return new Ei(await super._call(e))}}class ns extends R{}class rs extends ns{}class os extends ns{async _call(e){return new Ei(await super._call(e))}}class ss extends R{}class as extends ss{}class is extends R{}class ls extends is{}class cs extends is{async _call(e){return new Ii(await super._call(e))}}class ds extends is{async _call(e){return new Si(await super._call(e))}}class us extends R{}class ps extends us{}class hs extends us{async _call(e){return new Ii(await super._call(e))}}class ms extends us{async _call(e){return new Si(await super._call(e))}}class _s extends us{async _call(e){return new Ei(await super._call(e))}}class fs extends R{}class gs extends fs{}class Ms extends fs{async _call(e){return new Ii(await super._call(e))}}class ws extends fs{async _call(e){return new Si(await super._call(e))}}class bs extends R{}class Ts extends Ko{}class xs extends Ko{async _call(e){return new Ii(await super._call(e))}}class ys extends Ko{async _call(e){return new Si(await super._call(e))}}class ks extends R{}class Fs extends ks{}class Cs extends ks{async _call(e){return new Ii(await super._call(e))}}class Ps extends ks{async _call(e){return new Si(await super._call(e))}}class vs extends ks{async _call(e){return new Ai(await super._call(e))}}class Ss extends ks{async _call(e){return new Ei(await super._call(e))}}class As extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Es extends As{}class Ls extends As{}class zs extends As{async generate_speech(e,t,{threshold:n=.5,minlenratio:r=0,maxlenratio:o=20,vocoder:s=null}={}){const a={input_ids:e},{encoder_outputs:i,encoder_attention_mask:l}=await B(this,a),c=i.dims[1]/this.config.reduction_factor,d=Math.floor(c*o),p=Math.floor(c*r),h=this.config.num_mel_bins;let m=[],_=null,f=null,g=0;for(;;){++g;const e=z(!!f);let r;r=f?f.output_sequence_out:new u.Tensor("float32",new Float32Array(h),[1,1,h]);let o={use_cache_branch:e,output_sequence:r,encoder_attention_mask:l,speaker_embeddings:t,encoder_hidden_states:i};this.addPastKeyValues(o,_),f=await A(this.sessions.decoder_model_merged,o),_=this.getPastKeyValues(f,_);const{prob:s,spectrum:a}=f;if(m.push(a),g>=p&&(Array.from(s.data).filter((e=>e>=n)).length>0||g>=d))break}const M=(0,u.cat)(m),{waveform:w}=await A(s.sessions.model,{spectrogram:M});return{spectrogram:M,waveform:w}}}class Is extends R{main_input_name="spectrogram"}class Bs extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ns extends Bs{}class Os extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ds extends Os{}class Vs extends Os{}class js extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Rs extends js{}class Gs extends js{}class qs extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class $s extends qs{}class Ws extends qs{}class Us extends R{}class Xs extends Us{}class Qs extends Us{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Hs extends Us{static async from_pretrained(e,t={}){return t.model_file_name??="audio_model",super.from_pretrained(e,t)}}class Ys extends R{}class Ks extends Ys{async _call(e){return new Oi(await super._call(e))}}class Js extends R{}class Zs extends Js{}class ea extends Js{}class ta extends Js{}class na extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ra extends na{}class oa extends na{}class sa extends R{}class aa extends sa{}class ia extends sa{async _call(e){return new Si(await super._call(e))}}class la extends R{}class ca extends la{}class da extends la{}class ua extends R{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}_apply_and_filter_by_delay_pattern_mask(e){const[t,n]=e.dims,r=this.config.decoder.num_codebooks,o=n-r;let s=0;for(let t=0;t<e.size;++t){if(e.data[t]===this.config.decoder.pad_token_id)continue;const a=t%n-Math.floor(t/n)%r;a>0&&a<=o&&(e.data[s++]=e.data[t])}const a=Math.floor(t/r),i=s/(a*r);return new u.Tensor(e.type,e.data.slice(0,s),[a,r,i])}prepare_inputs_for_generation(e,t,n){let r=structuredClone(e);for(let e=0;e<r.length;++e)for(let t=0;t<r[e].length;++t)e%this.config.decoder.num_codebooks>=t&&(r[e][t]=BigInt(this.config.decoder.pad_token_id));null!==n.guidance_scale&&n.guidance_scale>1&&(r=r.concat(r));return super.prepare_inputs_for_generation(r,t,n)}async generate(e){const t=await super.generate(e),n=this._apply_and_filter_by_delay_pattern_mask(t).unsqueeze_(0),{audio_values:r}=await A(this.sessions.encodec_decode,{audio_codes:n});return r}}class pa extends R{}class ha extends pa{}class ma extends pa{async _call(e){return new Si(await super._call(e))}}class _a extends R{}class fa extends _a{}class ga extends _a{async _call(e){return new Si(await super._call(e))}}class Ma extends R{}class wa extends Ma{}class ba extends Ma{async _call(e){return new Si(await super._call(e))}}class Ta extends R{}class xa extends Ta{}class ya extends Ta{async _call(e){return new Si(await super._call(e))}}class ka{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:o=null,local_files_only:s=!1,revision:a="main",model_file_name:i=null,subfolder:l="onnx",device:c=null,dtype:d=null,use_external_data_format:u=null,session_options:p={}}={}){let h={progress_callback:t,config:n,cache_dir:o,local_files_only:s,revision:a,model_file_name:i,subfolder:l,device:c,dtype:d,use_external_data_format:u,session_options:p};if(h.config=await r.AutoConfig.from_pretrained(e,h),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(let t of this.MODEL_CLASS_MAPPINGS){const n=t.get(h.config.model_type);if(n)return await n[1].from_pretrained(e,h)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${h.config.model_type}", attempting to construct from base class.`),await R.from_pretrained(e,h);throw Error(`Unsupported model type: ${h.config.model_type}`)}}const Fa=new Map([["bert",["BertModel",W]],["nomic_bert",["NomicBertModel",K]],["roformer",["RoFormerModel",Z]],["electra",["ElectraModel",ue]],["esm",["EsmModel",Re]],["convbert",["ConvBertModel",se]],["camembert",["CamembertModel",ge]],["deberta",["DebertaModel",ye]],["deberta-v2",["DebertaV2Model",Se]],["mpnet",["MPNetModel",Ke]],["albert",["AlbertModel",lt]],["distilbert",["DistilBertModel",Be]],["roberta",["RobertaModel",Ot]],["xlm",["XLMModel",qt]],["xlm-roberta",["XLMRobertaModel",Ht]],["clap",["ClapModel",Xs]],["clip",["CLIPModel",mn]],["clipseg",["CLIPSegModel",kn]],["chinese_clip",["ChineseCLIPModel",xn]],["siglip",["SiglipModel",Mn]],["mobilebert",["MobileBertModel",Ue]],["squeezebert",["SqueezeBertModel",rt]],["wav2vec2",["Wav2Vec2Model",Jo]],["wav2vec2-bert",["Wav2Vec2BertModel",gs]],["unispeech",["UniSpeechModel",ls]],["unispeech-sat",["UniSpeechSatModel",ps]],["hubert",["HubertModel",Ts]],["wavlm",["WavLMModel",Fs]],["audio-spectrogram-transformer",["ASTModel",tn]],["vits",["VitsModel",Ks]],["pyannote",["PyAnnoteModel",rs]],["wespeaker-resnet",["WeSpeakerResNetModel",as]],["detr",["DetrModel",Xr]],["rt_detr",["RTDetrModel",Zr]],["table-transformer",["TableTransformerModel",ro]],["vit",["ViTModel",kr]],["fastvit",["FastViTModel",Pr]],["mobilevit",["MobileViTModel",Lr]],["mobilevitv2",["MobileViTV2Model",Br]],["owlvit",["OwlViTModel",Dr]],["owlv2",["Owlv2Model",Rr]],["beit",["BeitModel",$r]],["deit",["DeiTModel",io]],["convnext",["ConvNextModel",Ao]],["convnextv2",["ConvNextV2Model",zo]],["dinov2",["Dinov2Model",No]],["resnet",["ResNetModel",uo]],["swin",["SwinModel",mo]],["swin2sr",["Swin2SRModel",go]],["donut-swin",["DonutSwinModel",vo]],["yolos",["YolosModel",Vo]],["dpt",["DPTModel",bo]],["glpn",["GLPNModel",Fo]],["hifigan",["SpeechT5HifiGan",Is]],["efficientnet",["EfficientNetModel",aa]],["mobilenet_v1",["MobileNetV1Model",ha]],["mobilenet_v2",["MobileNetV2Model",fa]],["mobilenet_v3",["MobileNetV3Model",wa]],["mobilenet_v4",["MobileNetV4Model",xa]]]),Ca=new Map([["t5",["T5Model",ht]],["longt5",["LongT5Model",ft]],["mt5",["MT5Model",wt]],["bart",["BartModel",xt]],["mbart",["MBartModel",Ct]],["marian",["MarianModel",Uo]],["whisper",["WhisperModel",on]],["m2m_100",["M2M100Model",Ho]],["blenderbot",["BlenderbotModel",Et]],["blenderbot-small",["BlenderbotSmallModel",It]]]),Pa=new Map([["bloom",["BloomModel",_r]],["gpt2",["GPT2Model",Pn]],["gptj",["GPTJModel",Nn]],["gpt_bigcode",["GPTBigCodeModel",Vn]],["gpt_neo",["GPTNeoModel",An]],["gpt_neox",["GPTNeoXModel",zn]],["codegen",["CodeGenModel",Gn]],["llama",["LlamaModel",Wn]],["cohere",["CohereModel",Qn]],["gemma",["GemmaModel",Kn]],["gemma2",["Gemma2Model",er]],["openelm",["OpenELMModel",rr]],["qwen2",["Qwen2Model",ar]],["phi",["PhiModel",cr]],["phi3",["Phi3Model",pr]],["mpt",["MptModel",Mr]],["opt",["OPTModel",Tr]],["mistral",["MistralModel",Ds]],["starcoder2",["Starcoder2Model",Rs]],["falcon",["FalconModel",$s]],["stablelm",["StableLmModel",ra]]]),va=new Map([["speecht5",["SpeechT5ForSpeechToText",Ls]],["whisper",["WhisperForConditionalGeneration",sn]]]),Sa=new Map([["speecht5",["SpeechT5ForTextToSpeech",zs]]]),Aa=new Map([["vits",["VitsModel",Ks]],["musicgen",["MusicgenForConditionalGeneration",ua]]]),Ea=new Map([["bert",["BertForSequenceClassification",X]],["roformer",["RoFormerForSequenceClassification",te]],["electra",["ElectraForSequenceClassification",he]],["esm",["EsmForSequenceClassification",qe]],["convbert",["ConvBertForSequenceClassification",ie]],["camembert",["CamembertForSequenceClassification",we]],["deberta",["DebertaForSequenceClassification",Fe]],["deberta-v2",["DebertaV2ForSequenceClassification",Ee]],["mpnet",["MPNetForSequenceClassification",Ze]],["albert",["AlbertForSequenceClassification",ct]],["distilbert",["DistilBertForSequenceClassification",Ne]],["roberta",["RobertaForSequenceClassification",Vt]],["xlm",["XLMForSequenceClassification",Wt]],["xlm-roberta",["XLMRobertaForSequenceClassification",Kt]],["bart",["BartForSequenceClassification",kt]],["mbart",["MBartForSequenceClassification",vt]],["mobilebert",["MobileBertForSequenceClassification",Qe]],["squeezebert",["SqueezeBertForSequenceClassification",st]]]),La=new Map([["bert",["BertForTokenClassification",Q]],["roformer",["RoFormerForTokenClassification",ne]],["electra",["ElectraForTokenClassification",me]],["esm",["EsmForTokenClassification",$e]],["convbert",["ConvBertForTokenClassification",le]],["camembert",["CamembertForTokenClassification",be]],["deberta",["DebertaForTokenClassification",Ce]],["deberta-v2",["DebertaV2ForTokenClassification",Le]],["mpnet",["MPNetForTokenClassification",et]],["distilbert",["DistilBertForTokenClassification",Oe]],["roberta",["RobertaForTokenClassification",jt]],["xlm",["XLMForTokenClassification",Ut]],["xlm-roberta",["XLMRobertaForTokenClassification",Jt]]]),za=new Map([["t5",["T5ForConditionalGeneration",mt]],["longt5",["LongT5ForConditionalGeneration",gt]],["mt5",["MT5ForConditionalGeneration",bt]],["bart",["BartForConditionalGeneration",yt]],["mbart",["MBartForConditionalGeneration",Pt]],["marian",["MarianMTModel",Xo]],["m2m_100",["M2M100ForConditionalGeneration",Yo]],["blenderbot",["BlenderbotForConditionalGeneration",Lt]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",Bt]]]),Ia=new Map([["bloom",["BloomForCausalLM",fr]],["gpt2",["GPT2LMHeadModel",vn]],["gptj",["GPTJForCausalLM",On]],["gpt_bigcode",["GPTBigCodeForCausalLM",jn]],["gpt_neo",["GPTNeoForCausalLM",En]],["gpt_neox",["GPTNeoXForCausalLM",In]],["codegen",["CodeGenForCausalLM",qn]],["llama",["LlamaForCausalLM",Un]],["cohere",["CohereForCausalLM",Hn]],["gemma",["GemmaForCausalLM",Jn]],["gemma2",["Gemma2ForCausalLM",tr]],["openelm",["OpenELMForCausalLM",or]],["qwen2",["Qwen2ForCausalLM",ir]],["phi",["PhiForCausalLM",dr]],["phi3",["Phi3ForCausalLM",hr]],["mpt",["MptForCausalLM",wr]],["opt",["OPTForCausalLM",xr]],["mbart",["MBartForCausalLM",St]],["mistral",["MistralForCausalLM",Vs]],["starcoder2",["Starcoder2ForCausalLM",Gs]],["falcon",["FalconForCausalLM",Ws]],["trocr",["TrOCRForCausalLM",Ns]],["stablelm",["StableLmForCausalLM",oa]]]),Ba=new Map([["bert",["BertForMaskedLM",U]],["roformer",["RoFormerForMaskedLM",ee]],["electra",["ElectraForMaskedLM",pe]],["esm",["EsmForMaskedLM",Ge]],["convbert",["ConvBertForMaskedLM",ae]],["camembert",["CamembertForMaskedLM",Me]],["deberta",["DebertaForMaskedLM",ke]],["deberta-v2",["DebertaV2ForMaskedLM",Ae]],["mpnet",["MPNetForMaskedLM",Je]],["albert",["AlbertForMaskedLM",ut]],["distilbert",["DistilBertForMaskedLM",Ve]],["roberta",["RobertaForMaskedLM",Dt]],["xlm",["XLMWithLMHeadModel",$t]],["xlm-roberta",["XLMRobertaForMaskedLM",Yt]],["mobilebert",["MobileBertForMaskedLM",Xe]],["squeezebert",["SqueezeBertForMaskedLM",ot]]]),Na=new Map([["bert",["BertForQuestionAnswering",H]],["roformer",["RoFormerForQuestionAnswering",re]],["electra",["ElectraForQuestionAnswering",_e]],["convbert",["ConvBertForQuestionAnswering",ce]],["camembert",["CamembertForQuestionAnswering",Te]],["deberta",["DebertaForQuestionAnswering",Pe]],["deberta-v2",["DebertaV2ForQuestionAnswering",ze]],["mpnet",["MPNetForQuestionAnswering",tt]],["albert",["AlbertForQuestionAnswering",dt]],["distilbert",["DistilBertForQuestionAnswering",De]],["roberta",["RobertaForQuestionAnswering",Rt]],["xlm",["XLMForQuestionAnswering",Xt]],["xlm-roberta",["XLMRobertaForQuestionAnswering",Zt]],["mobilebert",["MobileBertForQuestionAnswering",He]],["squeezebert",["SqueezeBertForQuestionAnswering",at]]]),Oa=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",an]]]),Da=new Map([["llava",["LlavaForConditionalGeneration",cn]],["moondream1",["Moondream1ForConditionalGeneration",dn]],["florence2",["Florence2ForConditionalGeneration",pn]]]),Va=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",an]]]),ja=new Map([["vit",["ViTForImageClassification",Fr]],["fastvit",["FastViTForImageClassification",vr]],["mobilevit",["MobileViTForImageClassification",zr]],["mobilevitv2",["MobileViTV2ForImageClassification",Nr]],["beit",["BeitForImageClassification",Wr]],["deit",["DeiTForImageClassification",lo]],["convnext",["ConvNextForImageClassification",Eo]],["convnextv2",["ConvNextV2ForImageClassification",Io]],["dinov2",["Dinov2ForImageClassification",Oo]],["resnet",["ResNetForImageClassification",po]],["swin",["SwinForImageClassification",_o]],["segformer",["SegformerForImageClassification",ea]],["efficientnet",["EfficientNetForImageClassification",ia]],["mobilenet_v1",["MobileNetV1ForImageClassification",ma]],["mobilenet_v2",["MobileNetV2ForImageClassification",ga]],["mobilenet_v3",["MobileNetV3ForImageClassification",ba]],["mobilenet_v4",["MobileNetV4ForImageClassification",ya]]]),Ra=new Map([["detr",["DetrForObjectDetection",Qr]],["rt_detr",["RTDetrForObjectDetection",eo]],["table-transformer",["TableTransformerForObjectDetection",oo]],["yolos",["YolosForObjectDetection",jo]]]),Ga=new Map([["owlvit",["OwlViTForObjectDetection",Vr]],["owlv2",["Owlv2ForObjectDetection",Gr]]]),qa=new Map([["detr",["DetrForSegmentation",Hr]],["clipseg",["CLIPSegForImageSegmentation",Fn]]]),$a=new Map([["segformer",["SegformerForSemanticSegmentation",ta]]]),Wa=new Map([["sam",["SamModel",qo]]]),Ua=new Map([["wav2vec2",["Wav2Vec2ForCTC",Zo]],["wav2vec2-bert",["Wav2Vec2BertForCTC",Ms]],["unispeech",["UniSpeechForCTC",cs]],["unispeech-sat",["UniSpeechSatForCTC",hs]],["wavlm",["WavLMForCTC",Cs]],["hubert",["HubertForCTC",xs]]]),Xa=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",es]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",ws]],["unispeech",["UniSpeechForSequenceClassification",ds]],["unispeech-sat",["UniSpeechSatForSequenceClassification",ms]],["wavlm",["WavLMForSequenceClassification",Ps]],["hubert",["HubertForSequenceClassification",ys]],["audio-spectrogram-transformer",["ASTForAudioClassification",nn]]]),Qa=new Map([["wavlm",["WavLMForXVector",vs]]]),Ha=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",_s]],["wavlm",["WavLMForAudioFrameClassification",Ss]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",ts]],["pyannote",["PyAnnoteForAudioFrameClassification",os]]]),Ya=new Map([["vitmatte",["VitMatteForImageMatting",Ar]]]),Ka=new Map([["swin2sr",["Swin2SRForImageSuperResolution",Mo]]]),Ja=new Map([["dpt",["DPTForDepthEstimation",To]],["depth_anything",["DepthAnythingForDepthEstimation",yo]],["glpn",["GLPNForDepthEstimation",Co]]]),Za=new Map([["clip",["CLIPVisionModelWithProjection",fn]],["siglip",["SiglipVisionModel",bn]]]),ei=[[Fa,M],[Ca,w],[Pa,x],[Ea,M],[La,M],[za,b],[va,b],[Ia,x],[Ba,M],[Na,M],[Oa,T],[Da,k],[ja,M],[qa,M],[$a,M],[Ya,M],[Ka,M],[Ja,M],[Ra,M],[Ga,M],[Wa,y],[Ua,M],[Xa,M],[Sa,b],[Aa,M],[Qa,M],[Ha,M],[Za,M]];for(const[e,t]of ei)for(const[n,r]of e.values())C.set(n,t),v.set(r,n),P.set(n,r);const ti=[["MusicgenForConditionalGeneration",ua,F],["CLIPTextModelWithProjection",_n,M],["SiglipTextModel",wn,M],["ClapTextModelWithProjection",Qs,M],["ClapAudioModelWithProjection",Hs,M]];for(const[e,t,n]of ti)C.set(e,n),v.set(t,e),P.set(e,t);class ni extends ka{static MODEL_CLASS_MAPPINGS=ei.map((e=>e[0]));static BASE_IF_FAIL=!0}class ri extends ka{static MODEL_CLASS_MAPPINGS=[Ea]}class oi extends ka{static MODEL_CLASS_MAPPINGS=[La]}class si extends ka{static MODEL_CLASS_MAPPINGS=[za]}class ai extends ka{static MODEL_CLASS_MAPPINGS=[va]}class ii extends ka{static MODEL_CLASS_MAPPINGS=[Sa]}class li extends ka{static MODEL_CLASS_MAPPINGS=[Aa]}class ci extends ka{static MODEL_CLASS_MAPPINGS=[Ia]}class di extends ka{static MODEL_CLASS_MAPPINGS=[Ba]}class ui extends ka{static MODEL_CLASS_MAPPINGS=[Na]}class pi extends ka{static MODEL_CLASS_MAPPINGS=[Oa]}class hi extends ka{static MODEL_CLASS_MAPPINGS=[ja]}class mi extends ka{static MODEL_CLASS_MAPPINGS=[qa]}class _i extends ka{static MODEL_CLASS_MAPPINGS=[$a]}class fi extends ka{static MODEL_CLASS_MAPPINGS=[Ra]}class gi extends ka{static MODEL_CLASS_MAPPINGS=[Ga]}class Mi extends ka{static MODEL_CLASS_MAPPINGS=[Wa]}class wi extends ka{static MODEL_CLASS_MAPPINGS=[Ua]}class bi extends ka{static MODEL_CLASS_MAPPINGS=[Xa]}class Ti extends ka{static MODEL_CLASS_MAPPINGS=[Qa]}class xi extends ka{static MODEL_CLASS_MAPPINGS=[Ha]}class yi extends ka{static MODEL_CLASS_MAPPINGS=[Va]}class ki extends ka{static MODEL_CLASS_MAPPINGS=[Ya]}class Fi extends ka{static MODEL_CLASS_MAPPINGS=[Ka]}class Ci extends ka{static MODEL_CLASS_MAPPINGS=[Ja]}class Pi extends ka{static MODEL_CLASS_MAPPINGS=[Za]}class vi extends G{constructor({logits:e,past_key_values:t,encoder_outputs:n,decoder_attentions:r=null,cross_attentions:o=null}){super(),this.logits=e,this.past_key_values=t,this.encoder_outputs=n,this.decoder_attentions=r,this.cross_attentions=o}}class Si extends G{constructor({logits:e}){super(),this.logits=e}}class Ai extends G{constructor({logits:e,embeddings:t}){super(),this.logits=e,this.embeddings=t}}class Ei extends G{constructor({logits:e}){super(),this.logits=e}}class Li extends G{constructor({logits:e}){super(),this.logits=e}}class zi extends G{constructor({start_logits:e,end_logits:t}){super(),this.start_logits=e,this.end_logits=t}}class Ii extends G{constructor({logits:e}){super(),this.logits=e}}class Bi extends G{constructor({logits:e,past_key_values:t}){super(),this.logits=e,this.past_key_values=t}}class Ni extends G{constructor({alphas:e}){super(),this.alphas=e}}class Oi extends G{constructor({waveform:e,spectrogram:t}){super(),this.waveform=e,this.spectrogram=t}}},"./src/models/whisper/common_whisper.js":
+  \***********************/(e,t,n)=>{n.r(t),n.d(t,{ASTForAudioClassification:()=>nn,ASTModel:()=>tn,ASTPreTrainedModel:()=>en,AlbertForMaskedLM:()=>ut,AlbertForQuestionAnswering:()=>dt,AlbertForSequenceClassification:()=>ct,AlbertModel:()=>lt,AlbertPreTrainedModel:()=>it,AutoModel:()=>ni,AutoModelForAudioClassification:()=>bi,AutoModelForAudioFrameClassification:()=>xi,AutoModelForCTC:()=>wi,AutoModelForCausalLM:()=>ci,AutoModelForDepthEstimation:()=>Ci,AutoModelForDocumentQuestionAnswering:()=>yi,AutoModelForImageClassification:()=>hi,AutoModelForImageFeatureExtraction:()=>Pi,AutoModelForImageMatting:()=>ki,AutoModelForImageSegmentation:()=>mi,AutoModelForImageToImage:()=>Fi,AutoModelForMaskGeneration:()=>Mi,AutoModelForMaskedLM:()=>di,AutoModelForObjectDetection:()=>fi,AutoModelForQuestionAnswering:()=>ui,AutoModelForSemanticSegmentation:()=>_i,AutoModelForSeq2SeqLM:()=>si,AutoModelForSequenceClassification:()=>ri,AutoModelForSpeechSeq2Seq:()=>ai,AutoModelForTextToSpectrogram:()=>ii,AutoModelForTextToWaveform:()=>li,AutoModelForTokenClassification:()=>oi,AutoModelForVision2Seq:()=>pi,AutoModelForXVector:()=>Ti,AutoModelForZeroShotObjectDetection:()=>gi,BartForConditionalGeneration:()=>yt,BartForSequenceClassification:()=>kt,BartModel:()=>xt,BartPretrainedModel:()=>Tt,BaseModelOutput:()=>q,BeitForImageClassification:()=>Wr,BeitModel:()=>$r,BeitPreTrainedModel:()=>qr,BertForMaskedLM:()=>U,BertForQuestionAnswering:()=>H,BertForSequenceClassification:()=>X,BertForTokenClassification:()=>Q,BertModel:()=>W,BertPreTrainedModel:()=>$,BlenderbotForConditionalGeneration:()=>Lt,BlenderbotModel:()=>Et,BlenderbotPreTrainedModel:()=>At,BlenderbotSmallForConditionalGeneration:()=>Bt,BlenderbotSmallModel:()=>It,BlenderbotSmallPreTrainedModel:()=>zt,BloomForCausalLM:()=>fr,BloomModel:()=>_r,BloomPreTrainedModel:()=>mr,CLIPModel:()=>mn,CLIPPreTrainedModel:()=>hn,CLIPSegForImageSegmentation:()=>Fn,CLIPSegModel:()=>kn,CLIPSegPreTrainedModel:()=>yn,CLIPTextModelWithProjection:()=>_n,CLIPVisionModelWithProjection:()=>fn,CamembertForMaskedLM:()=>Me,CamembertForQuestionAnswering:()=>Te,CamembertForSequenceClassification:()=>we,CamembertForTokenClassification:()=>be,CamembertModel:()=>ge,CamembertPreTrainedModel:()=>fe,CausalLMOutput:()=>Ii,CausalLMOutputWithPast:()=>Bi,ChineseCLIPModel:()=>xn,ChineseCLIPPreTrainedModel:()=>Tn,ClapAudioModelWithProjection:()=>Hs,ClapModel:()=>Xs,ClapPreTrainedModel:()=>Us,ClapTextModelWithProjection:()=>Qs,CodeGenForCausalLM:()=>qn,CodeGenModel:()=>Gn,CodeGenPreTrainedModel:()=>Rn,CohereForCausalLM:()=>Hn,CohereModel:()=>Qn,CoherePreTrainedModel:()=>Xn,ConvBertForMaskedLM:()=>ae,ConvBertForQuestionAnswering:()=>ce,ConvBertForSequenceClassification:()=>ie,ConvBertForTokenClassification:()=>le,ConvBertModel:()=>se,ConvBertPreTrainedModel:()=>oe,ConvNextForImageClassification:()=>Eo,ConvNextModel:()=>Ao,ConvNextPreTrainedModel:()=>So,ConvNextV2ForImageClassification:()=>Io,ConvNextV2Model:()=>zo,ConvNextV2PreTrainedModel:()=>Lo,DPTForDepthEstimation:()=>To,DPTModel:()=>bo,DPTPreTrainedModel:()=>wo,DebertaForMaskedLM:()=>ke,DebertaForQuestionAnswering:()=>Pe,DebertaForSequenceClassification:()=>Fe,DebertaForTokenClassification:()=>Ce,DebertaModel:()=>ye,DebertaPreTrainedModel:()=>xe,DebertaV2ForMaskedLM:()=>Ae,DebertaV2ForQuestionAnswering:()=>ze,DebertaV2ForSequenceClassification:()=>Ee,DebertaV2ForTokenClassification:()=>Le,DebertaV2Model:()=>Se,DebertaV2PreTrainedModel:()=>ve,DeiTForImageClassification:()=>lo,DeiTModel:()=>io,DeiTPreTrainedModel:()=>ao,DepthAnythingForDepthEstimation:()=>yo,DepthAnythingPreTrainedModel:()=>xo,DetrForObjectDetection:()=>Qr,DetrForSegmentation:()=>Hr,DetrModel:()=>Xr,DetrObjectDetectionOutput:()=>Yr,DetrPreTrainedModel:()=>Ur,DetrSegmentationOutput:()=>Kr,Dinov2ForImageClassification:()=>Oo,Dinov2Model:()=>No,Dinov2PreTrainedModel:()=>Bo,DistilBertForMaskedLM:()=>Ve,DistilBertForQuestionAnswering:()=>De,DistilBertForSequenceClassification:()=>Ne,DistilBertForTokenClassification:()=>Oe,DistilBertModel:()=>Be,DistilBertPreTrainedModel:()=>Ie,DonutSwinModel:()=>vo,DonutSwinPreTrainedModel:()=>Po,EfficientNetForImageClassification:()=>ia,EfficientNetModel:()=>aa,EfficientNetPreTrainedModel:()=>sa,ElectraForMaskedLM:()=>pe,ElectraForQuestionAnswering:()=>_e,ElectraForSequenceClassification:()=>he,ElectraForTokenClassification:()=>me,ElectraModel:()=>ue,ElectraPreTrainedModel:()=>de,EsmForMaskedLM:()=>Ge,EsmForSequenceClassification:()=>qe,EsmForTokenClassification:()=>$e,EsmModel:()=>Re,EsmPreTrainedModel:()=>je,FalconForCausalLM:()=>Ws,FalconModel:()=>$s,FalconPreTrainedModel:()=>qs,FastViTForImageClassification:()=>vr,FastViTModel:()=>Pr,FastViTPreTrainedModel:()=>Cr,Florence2ForConditionalGeneration:()=>pn,Florence2PreTrainedModel:()=>un,GLPNForDepthEstimation:()=>Co,GLPNModel:()=>Fo,GLPNPreTrainedModel:()=>ko,GPT2LMHeadModel:()=>vn,GPT2Model:()=>Pn,GPT2PreTrainedModel:()=>Cn,GPTBigCodeForCausalLM:()=>jn,GPTBigCodeModel:()=>Vn,GPTBigCodePreTrainedModel:()=>Dn,GPTJForCausalLM:()=>On,GPTJModel:()=>Nn,GPTJPreTrainedModel:()=>Bn,GPTNeoForCausalLM:()=>En,GPTNeoModel:()=>An,GPTNeoPreTrainedModel:()=>Sn,GPTNeoXForCausalLM:()=>In,GPTNeoXModel:()=>zn,GPTNeoXPreTrainedModel:()=>Ln,Gemma2ForCausalLM:()=>tr,Gemma2Model:()=>er,Gemma2PreTrainedModel:()=>Zn,GemmaForCausalLM:()=>Jn,GemmaModel:()=>Kn,GemmaPreTrainedModel:()=>Yn,HubertForCTC:()=>xs,HubertForSequenceClassification:()=>ys,HubertModel:()=>Ts,HubertPreTrainedModel:()=>bs,ImageMattingOutput:()=>Ni,LlamaForCausalLM:()=>Un,LlamaModel:()=>Wn,LlamaPreTrainedModel:()=>$n,LlavaForConditionalGeneration:()=>cn,LlavaPreTrainedModel:()=>ln,LongT5ForConditionalGeneration:()=>gt,LongT5Model:()=>ft,LongT5PreTrainedModel:()=>_t,M2M100ForConditionalGeneration:()=>Yo,M2M100Model:()=>Ho,M2M100PreTrainedModel:()=>Qo,MBartForCausalLM:()=>St,MBartForConditionalGeneration:()=>Pt,MBartForSequenceClassification:()=>vt,MBartModel:()=>Ct,MBartPreTrainedModel:()=>Ft,MPNetForMaskedLM:()=>Je,MPNetForQuestionAnswering:()=>tt,MPNetForSequenceClassification:()=>Ze,MPNetForTokenClassification:()=>et,MPNetModel:()=>Ke,MPNetPreTrainedModel:()=>Ye,MT5ForConditionalGeneration:()=>bt,MT5Model:()=>wt,MT5PreTrainedModel:()=>Mt,MarianMTModel:()=>Xo,MarianModel:()=>Uo,MarianPreTrainedModel:()=>Wo,MaskedLMOutput:()=>Li,MistralForCausalLM:()=>Vs,MistralModel:()=>Ds,MistralPreTrainedModel:()=>Os,MobileBertForMaskedLM:()=>Xe,MobileBertForQuestionAnswering:()=>He,MobileBertForSequenceClassification:()=>Qe,MobileBertModel:()=>Ue,MobileBertPreTrainedModel:()=>We,MobileNetV1ForImageClassification:()=>ma,MobileNetV1Model:()=>ha,MobileNetV1PreTrainedModel:()=>pa,MobileNetV2ForImageClassification:()=>ga,MobileNetV2Model:()=>fa,MobileNetV2PreTrainedModel:()=>_a,MobileNetV3ForImageClassification:()=>ba,MobileNetV3Model:()=>wa,MobileNetV3PreTrainedModel:()=>Ma,MobileNetV4ForImageClassification:()=>ya,MobileNetV4Model:()=>xa,MobileNetV4PreTrainedModel:()=>Ta,MobileViTForImageClassification:()=>zr,MobileViTModel:()=>Lr,MobileViTPreTrainedModel:()=>Er,MobileViTV2ForImageClassification:()=>Nr,MobileViTV2Model:()=>Br,MobileViTV2PreTrainedModel:()=>Ir,ModelOutput:()=>G,Moondream1ForConditionalGeneration:()=>dn,MptForCausalLM:()=>wr,MptModel:()=>Mr,MptPreTrainedModel:()=>gr,MusicgenForCausalLM:()=>da,MusicgenForConditionalGeneration:()=>ua,MusicgenModel:()=>ca,MusicgenPreTrainedModel:()=>la,NomicBertModel:()=>K,NomicBertPreTrainedModel:()=>Y,OPTForCausalLM:()=>xr,OPTModel:()=>Tr,OPTPreTrainedModel:()=>br,OpenELMForCausalLM:()=>or,OpenELMModel:()=>rr,OpenELMPreTrainedModel:()=>nr,OwlViTForObjectDetection:()=>Vr,OwlViTModel:()=>Dr,OwlViTPreTrainedModel:()=>Or,Owlv2ForObjectDetection:()=>Gr,Owlv2Model:()=>Rr,Owlv2PreTrainedModel:()=>jr,Phi3ForCausalLM:()=>hr,Phi3Model:()=>pr,Phi3PreTrainedModel:()=>ur,PhiForCausalLM:()=>dr,PhiModel:()=>cr,PhiPreTrainedModel:()=>lr,PreTrainedModel:()=>R,PretrainedMixin:()=>ka,PyAnnoteForAudioFrameClassification:()=>os,PyAnnoteModel:()=>rs,PyAnnotePreTrainedModel:()=>ns,QuestionAnsweringModelOutput:()=>zi,Qwen2ForCausalLM:()=>ir,Qwen2Model:()=>ar,Qwen2PreTrainedModel:()=>sr,RTDetrForObjectDetection:()=>eo,RTDetrModel:()=>Zr,RTDetrObjectDetectionOutput:()=>to,RTDetrPreTrainedModel:()=>Jr,ResNetForImageClassification:()=>po,ResNetModel:()=>uo,ResNetPreTrainedModel:()=>co,RoFormerForMaskedLM:()=>ee,RoFormerForQuestionAnswering:()=>re,RoFormerForSequenceClassification:()=>te,RoFormerForTokenClassification:()=>ne,RoFormerModel:()=>Z,RoFormerPreTrainedModel:()=>J,RobertaForMaskedLM:()=>Dt,RobertaForQuestionAnswering:()=>Rt,RobertaForSequenceClassification:()=>Vt,RobertaForTokenClassification:()=>jt,RobertaModel:()=>Ot,RobertaPreTrainedModel:()=>Nt,SamImageSegmentationOutput:()=>$o,SamModel:()=>qo,SamPreTrainedModel:()=>Go,SegformerForImageClassification:()=>ea,SegformerForSemanticSegmentation:()=>ta,SegformerModel:()=>Zs,SegformerPreTrainedModel:()=>Js,Seq2SeqLMOutput:()=>vi,SequenceClassifierOutput:()=>Si,SiglipModel:()=>Mn,SiglipPreTrainedModel:()=>gn,SiglipTextModel:()=>wn,SiglipVisionModel:()=>bn,SpeechT5ForSpeechToText:()=>Ls,SpeechT5ForTextToSpeech:()=>zs,SpeechT5HifiGan:()=>Is,SpeechT5Model:()=>Es,SpeechT5PreTrainedModel:()=>As,SqueezeBertForMaskedLM:()=>ot,SqueezeBertForQuestionAnswering:()=>at,SqueezeBertForSequenceClassification:()=>st,SqueezeBertModel:()=>rt,SqueezeBertPreTrainedModel:()=>nt,StableLmForCausalLM:()=>oa,StableLmModel:()=>ra,StableLmPreTrainedModel:()=>na,Starcoder2ForCausalLM:()=>Gs,Starcoder2Model:()=>Rs,Starcoder2PreTrainedModel:()=>js,Swin2SRForImageSuperResolution:()=>Mo,Swin2SRModel:()=>go,Swin2SRPreTrainedModel:()=>fo,SwinForImageClassification:()=>_o,SwinModel:()=>mo,SwinPreTrainedModel:()=>ho,T5ForConditionalGeneration:()=>mt,T5Model:()=>ht,T5PreTrainedModel:()=>pt,TableTransformerForObjectDetection:()=>oo,TableTransformerModel:()=>ro,TableTransformerObjectDetectionOutput:()=>so,TableTransformerPreTrainedModel:()=>no,TokenClassifierOutput:()=>Ei,TrOCRForCausalLM:()=>Ns,TrOCRPreTrainedModel:()=>Bs,UniSpeechForCTC:()=>cs,UniSpeechForSequenceClassification:()=>ds,UniSpeechModel:()=>ls,UniSpeechPreTrainedModel:()=>is,UniSpeechSatForAudioFrameClassification:()=>_s,UniSpeechSatForCTC:()=>hs,UniSpeechSatForSequenceClassification:()=>ms,UniSpeechSatModel:()=>ps,UniSpeechSatPreTrainedModel:()=>us,ViTForImageClassification:()=>Fr,ViTModel:()=>kr,ViTPreTrainedModel:()=>yr,VisionEncoderDecoderModel:()=>an,VitMatteForImageMatting:()=>Ar,VitMattePreTrainedModel:()=>Sr,VitsModel:()=>Ks,VitsModelOutput:()=>Oi,VitsPreTrainedModel:()=>Ys,Wav2Vec2BertForCTC:()=>Ms,Wav2Vec2BertForSequenceClassification:()=>ws,Wav2Vec2BertModel:()=>gs,Wav2Vec2BertPreTrainedModel:()=>fs,Wav2Vec2ForAudioFrameClassification:()=>ts,Wav2Vec2ForCTC:()=>Zo,Wav2Vec2ForSequenceClassification:()=>es,Wav2Vec2Model:()=>Jo,Wav2Vec2PreTrainedModel:()=>Ko,WavLMForAudioFrameClassification:()=>Ss,WavLMForCTC:()=>Cs,WavLMForSequenceClassification:()=>Ps,WavLMForXVector:()=>vs,WavLMModel:()=>Fs,WavLMPreTrainedModel:()=>ks,WeSpeakerResNetModel:()=>as,WeSpeakerResNetPreTrainedModel:()=>ss,WhisperForConditionalGeneration:()=>sn,WhisperModel:()=>on,WhisperPreTrainedModel:()=>rn,XLMForQuestionAnswering:()=>Xt,XLMForSequenceClassification:()=>Wt,XLMForTokenClassification:()=>Ut,XLMModel:()=>qt,XLMPreTrainedModel:()=>Gt,XLMRobertaForMaskedLM:()=>Yt,XLMRobertaForQuestionAnswering:()=>Zt,XLMRobertaForSequenceClassification:()=>Kt,XLMRobertaForTokenClassification:()=>Jt,XLMRobertaModel:()=>Ht,XLMRobertaPreTrainedModel:()=>Qt,XLMWithLMHeadModel:()=>$t,XVectorOutput:()=>Ai,YolosForObjectDetection:()=>jo,YolosModel:()=>Vo,YolosObjectDetectionOutput:()=>Ro,YolosPreTrainedModel:()=>Do});var r=n(/*! ./configs.js */"./src/configs.js"),o=n(/*! ./backends/onnx.js */"./src/backends/onnx.js"),s=n(/*! ./utils/dtypes.js */"./src/utils/dtypes.js"),a=n(/*! ./utils/generic.js */"./src/utils/generic.js"),i=n(/*! ./utils/core.js */"./src/utils/core.js"),l=n(/*! ./utils/hub.js */"./src/utils/hub.js"),c=n(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),d=n(/*! ./generation/configuration_utils.js */"./src/generation/configuration_utils.js"),u=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),p=n(/*! ./utils/maths.js */"./src/utils/maths.js"),h=n(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),m=n(/*! ./generation/logits_sampler.js */"./src/generation/logits_sampler.js"),_=n(/*! ./env.js */"./src/env.js"),f=n(/*! ./models/whisper/generation_whisper.js */"./src/models/whisper/generation_whisper.js"),g=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");const M=0,w=1,b=2,T=3,x=4,y=5,k=6,F=7,C=new Map,P=new Map,v=new Map;async function S(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async a=>{const{buffer:i,session_options:c}=await async function(e,t,n){let a=n.device;a&&"string"!=typeof a&&(a.hasOwnProperty(t)?a=a[t]:(console.warn(`device not specified for "${t}". Using the default device.`),a=null));const i=a??(_.apis.IS_NODE_ENV?"cpu":"wasm"),c=(0,o.deviceToExecutionProviders)(i);let d=n.dtype;"string"!=typeof d&&(d&&d.hasOwnProperty(t)?d=d[t]:(d=s.DEFAULT_DEVICE_DTYPE_MAPPING[i]??s.DATA_TYPES.fp32,console.warn(`dtype not specified for "${t}". Using the default dtype (${d}) for this device (${i}).`)));const u=d;if(!s.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(u))throw new Error(`Invalid dtype: ${u}. Should be one of: ${Object.keys(s.DATA_TYPES).join(", ")}`);if(u===s.DATA_TYPES.fp16&&"webgpu"===i&&!await(0,s.isWebGpuFp16Supported)())throw new Error(`The device (${i}) does not support fp16.`);const p=s.DEFAULT_DTYPE_SUFFIX_MAPPING[u],h=`${n.subfolder??""}/${t}${p}.onnx`,m={...n.session_options}??{};m.executionProviders??=c;const f=(0,l.getModelFile)(e,h,!0,n);let g=[];if(n.use_external_data_format&&(!0===n.use_external_data_format||"object"==typeof n.use_external_data_format&&n.use_external_data_format.hasOwnProperty(t)&&!0===n.use_external_data_format[t])){if(_.apis.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const r=`${t}${p}.onnx_data`,o=`${n.subfolder??""}/${r}`;g.push(new Promise((async(t,s)=>{const a=await(0,l.getModelFile)(e,o,!0,n);t({path:r,data:a})})))}else void 0!==m.externalData&&(g=m.externalData.map((async t=>{if("string"==typeof t.data){const r=await(0,l.getModelFile)(e,t.data,!0,n);return{...t,data:r}}return t})));if(g.length>0&&(m.externalData=await Promise.all(g)),"webgpu"===i){const e=(0,r.getKeyValueShapes)(n.config,{prefix:"present"});if(Object.keys(e).length>0&&!(0,o.isONNXProxy)()){const t={};for(const n in e)t[n]="gpu-buffer";m.preferredOutputLocation=t}}return{buffer:await f,session_options:m}}(e,t[a],n);return[a,await(0,o.createInferenceSession)(i,c)]}))))}async function A(e,t){const n=function(e,t){const n=Object.create(null),r=[];for(const s of e.inputNames){const e=t[s];e instanceof u.Tensor?n[s]=(0,o.isONNXProxy)()?e.clone():e:r.push(s)}if(r.length>0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${r.join(", ")}.`);const s=Object.keys(t).length,a=e.inputNames.length;if(s>a){let n=Object.keys(t).filter((t=>!e.inputNames.includes(t)));console.warn(`WARNING: Too many inputs were provided (${s} > ${a}). The following inputs will be ignored: "${n.join(", ")}".`)}return n}(e,t);try{const t=Object.fromEntries(Object.entries(n).map((([e,t])=>[e,t.ort_tensor])));let r=await e.run(t);return r=E(r),r}catch(e){throw console.error(`An error occurred during model execution: "${e}".`),console.error("Inputs given to model:",n),e}}function E(e){for(let t in e)(0,o.isONNXTensor)(e[t])?e[t]=new u.Tensor(e[t]):"object"==typeof e[t]&&E(e[t]);return e}function L(e){if(e instanceof u.Tensor)return e;if(0===e.length)throw Error("items must be non-empty");if(Array.isArray(e[0])){if(e.some((t=>t.length!==e[0].length)))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new u.Tensor("int64",BigInt64Array.from(e.flat().map((e=>BigInt(e)))),[e.length,e[0].length])}return new u.Tensor("int64",BigInt64Array.from(e.map((e=>BigInt(e)))),[1,e.length])}function z(e){return new u.Tensor("bool",[e],[1])}async function I(e,t){let{encoder_outputs:n,input_ids:r,decoder_input_ids:o,...s}=t;if(!n){const r=(0,i.pick)(t,e.sessions.model.inputNames);n=(await B(e,r)).last_hidden_state}s.input_ids=o,s.encoder_hidden_states=n,e.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(s.encoder_attention_mask=t.attention_mask);return await N(e,s,!0)}async function B(e,t){const n=e.sessions.model,r=(0,i.pick)(t,n.inputNames);if(n.inputNames.includes("inputs_embeds")&&!r.inputs_embeds){if(!t.input_ids)throw new Error("Both `input_ids` and `inputs_embeds` are missing in the model inputs.");r.inputs_embeds=await e.encode_text({input_ids:t.input_ids})}return n.inputNames.includes("token_type_ids")&&!r.token_type_ids&&(r.token_type_ids=new u.Tensor("int64",new BigInt64Array(r.input_ids.data.length),r.input_ids.dims)),await A(n,r)}async function N(e,t,n=!1){const r=e.sessions[n?"decoder_model_merged":"model"],{past_key_values:o,...s}=t;r.inputNames.includes("use_cache_branch")&&(s.use_cache_branch=z(!!o)),r.inputNames.includes("position_ids")&&s.attention_mask&&!s.position_ids&&(s.position_ids=function(e,t=null){const{input_ids:n,inputs_embeds:r,attention_mask:o}=e,[s,a]=o.dims,i=new BigInt64Array(o.data.length);for(let e=0;e<s;++e){const t=e*a;let n=BigInt(0);for(let e=0;e<a;++e){const r=t+e;0n===o.data[r]?i[r]=BigInt(1):(i[r]=n,n+=o.data[r])}}let l=new u.Tensor("int64",i,o.dims);if(t){const e=-(n??r).dims.at(1);l=l.slice(null,[e,null])}return l}(s,o)),e.addPastKeyValues(s,o);const a=(0,i.pick)(s,r.inputNames);return await A(r,a)}async function O(e,{input_ids:t=null,attention_mask:n=null,pixel_values:r=null,position_ids:o=null,inputs_embeds:s=null,past_key_values:a=null,generation_config:i=null,logits_processor:l=null,...c}){if(!s)if(s=await e.encode_text({input_ids:t}),r&&1!==t.dims[1]){const o=await e.encode_image({pixel_values:r});({inputs_embeds:s,attention_mask:n}=e._merge_input_ids_with_image_features({image_features:o,inputs_embeds:s,input_ids:t,attention_mask:n}))}else if(a&&r&&1===t.dims[1]){const e=t.dims[1],r=Object.values(a)[0].dims.at(-2);n=(0,u.cat)([(0,u.ones)([t.dims[0],r]),n.slice(null,[n.dims[1]-e,n.dims[1]])],1)}return await N(e,{inputs_embeds:s,past_key_values:a,attention_mask:n,position_ids:o,generation_config:i,logits_processor:l},!0)}function D(e,t,n,r){if(n.past_key_values){const t=Object.values(n.past_key_values)[0].dims.at(-2),{input_ids:r,attention_mask:o}=n;if(o&&o.dims[1]>r.dims[1]);else if(t<r.dims[1])n.input_ids=r.slice(null,[t,null]);else if(null!=e.config.image_token_index&&r.data.some((t=>t==e.config.image_token_index))){const o=e.config.num_image_tokens;if(!o)throw new Error("`num_image_tokens` is missing in the model configuration.");const s=r.dims[1]-(t-o);n.input_ids=r.slice(null,[-s,null]),n.attention_mask=(0,u.ones)([1,t+s])}}return n}function V(e,t,n,r){return n.past_key_values&&(t=t.map((e=>[e.at(-1)]))),{...n,decoder_input_ids:L(t)}}function j(e,...t){return e.config.is_encoder_decoder?V(e,...t):D(e,...t)}class R extends a.Callable{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,t){super(),this.config=e,this.sessions=t;const n=v.get(this.constructor),r=C.get(n);switch(this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,r){case x:this.can_generate=!0,this._forward=N,this._prepare_inputs_for_generation=D;break;case b:case T:case F:this.can_generate=!0,this._forward=I,this._prepare_inputs_for_generation=V;break;case w:this._forward=I;break;case k:this.can_generate=!0,this._forward=O,this._prepare_inputs_for_generation=j;break;default:this._forward=B}this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const t of Object.values(this.sessions))t?.handler?.dispose&&e.push(t.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:o=null,local_files_only:s=!1,revision:a="main",model_file_name:i=null,subfolder:c="onnx",device:d=null,dtype:u=null,use_external_data_format:p=null,session_options:h={}}={}){let m={progress_callback:t,config:n,cache_dir:o,local_files_only:s,revision:a,model_file_name:i,subfolder:c,device:d,dtype:u,use_external_data_format:p,session_options:h};const _=v.get(this),f=C.get(_);let g;if(n=m.config=await r.AutoConfig.from_pretrained(e,m),f===x)g=await Promise.all([S(e,{model:m.model_file_name??"model"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(f===b||f===T)g=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(f===y)g=await Promise.all([S(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},m)]);else if(f===w)g=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m)]);else if(f===k){const t={embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"};n.is_encoder_decoder&&(t.model="encoder_model"),g=await Promise.all([S(e,t,m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)])}else f===F?g=await Promise.all([S(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]):(f!==M&&console.warn(`Model type for '${_??n?.model_type}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`),g=await Promise.all([S(e,{model:m.model_file_name??"model"},m)]));return new this(n,...g)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}_get_logits_warper(e){const t=new c.LogitsProcessorList;return null!==e.temperature&&1!==e.temperature&&t.push(new c.TemperatureLogitsWarper(e.temperature)),null!==e.top_k&&0!==e.top_k&&t.push(new c.TopKLogitsWarper(e.top_k)),null!==e.top_p&&e.top_p<1&&t.push(new c.TopPLogitsWarper(e.top_p)),t}_get_logits_processor(e,t,n=null){const r=new c.LogitsProcessorList;if(null!==e.repetition_penalty&&1!==e.repetition_penalty&&r.push(new c.RepetitionPenaltyLogitsProcessor(e.repetition_penalty)),null!==e.no_repeat_ngram_size&&e.no_repeat_ngram_size>0&&r.push(new c.NoRepeatNGramLogitsProcessor(e.no_repeat_ngram_size)),null!==e.bad_words_ids&&r.push(new c.NoBadWordsLogitsProcessor(e.bad_words_ids,e.eos_token_id)),null!==e.min_length&&null!==e.eos_token_id&&e.min_length>0&&r.push(new c.MinLengthLogitsProcessor(e.min_length,e.eos_token_id)),null!==e.min_new_tokens&&null!==e.eos_token_id&&e.min_new_tokens>0&&r.push(new c.MinNewTokensLengthLogitsProcessor(t,e.min_new_tokens,e.eos_token_id)),null!==e.forced_bos_token_id&&r.push(new c.ForcedBOSTokenLogitsProcessor(e.forced_bos_token_id)),null!==e.forced_eos_token_id&&r.push(new c.ForcedEOSTokenLogitsProcessor(e.max_length,e.forced_eos_token_id)),null!==e.begin_suppress_tokens){const n=t>1||null===e.forced_bos_token_id?t:t+1;r.push(new c.SuppressTokensAtBeginLogitsProcessor(e.begin_suppress_tokens,n))}return null!==e.guidance_scale&&e.guidance_scale>1&&r.push(new c.ClassifierFreeGuidanceLogitsProcessor(e.guidance_scale)),null!==n&&r.extend(n),r}_prepare_generation_config(e,t,n=d.GenerationConfig){const r={...this.config};for(const e of["decoder","generator","text_config"])e in r&&Object.assign(r,r[e]);const o=new n(r);return"generation_config"in this&&Object.assign(o,this.generation_config),e&&Object.assign(o,e),t&&Object.assign(o,(0,i.pick)(t,Object.getOwnPropertyNames(o))),o}_get_stopping_criteria(e,t=null){const n=new h.StoppingCriteriaList;return null!==e.max_length&&n.push(new h.MaxLengthCriteria(e.max_length,this.config.max_position_embeddings??null)),null!==e.eos_token_id&&n.push(new h.EosTokenCriteria(e.eos_token_id)),t&&n.extend(t),n}_validate_model_class(){if(!this.can_generate){const e=[Ia,Oa,za,va],t=v.get(this.constructor),n=new Set,r=this.config.model_type;for(const t of e){const e=t.get(r);e&&n.add(e[0])}let o=`The current model class (${t}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw n.size>0&&(o+=` Please use the following class instead: ${[...n].join(", ")}`),Error(o)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:t,model_inputs:n,is_encoder_decoder:r}){return n.past_key_values=this.getPastKeyValues(t,n.past_key_values),n.input_ids=new u.Tensor("int64",e.flat(),[e.length,1]),r||(n.attention_mask=(0,u.cat)([n.attention_mask,(0,u.ones)([n.attention_mask.dims[0],1])],1)),n.position_ids=null,n}_prepare_model_inputs({inputs:e,bos_token_id:t,model_kwargs:n}){const r=(0,i.pick)(n,this.forward_params),o=this.main_input_name;if(o in r){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else r[o]=e;return{inputs_tensor:r[o],model_inputs:r,model_input_name:o}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:t,model_input_name:n,generation_config:r}){if(this.sessions.model.inputNames.includes("inputs_embeds")&&!t.inputs_embeds&&"_prepare_inputs_embeds"in this){const{input_ids:e,pixel_values:n,attention_mask:r,...o}=t,s=await this._prepare_inputs_embeds(t);t={...o,...(0,i.pick)(s,["inputs_embeds","attention_mask"])}}let{last_hidden_state:o}=await B(this,t);if(null!==r.guidance_scale&&r.guidance_scale>1)o=(0,u.cat)([o,(0,u.full_like)(o,0)],0),"attention_mask"in t&&(t.attention_mask=(0,u.cat)([t.attention_mask,(0,u.zeros_like)(t.attention_mask)],0));else if(t.decoder_input_ids){const e=L(t.decoder_input_ids).dims[0];if(e!==o.dims[0]){if(1!==o.dims[0])throw new Error(`The encoder outputs have a different batch size (${o.dims[0]}) than the decoder inputs (${e}).`);o=(0,u.cat)(Array.from({length:e},(()=>o)),0)}}return t.encoder_outputs=o,t}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:t,model_kwargs:n,decoder_start_token_id:r,bos_token_id:o,generation_config:s}){let{decoder_input_ids:a,...i}=n;if(a)Array.isArray(a[0])||(a=Array.from({length:e},(()=>a)));else if(r??=o,"musicgen"===this.config.model_type)a=Array.from({length:e*this.config.decoder.num_codebooks},(()=>[r]));else if(Array.isArray(r)){if(r.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${r.length}`);a=r}else a=Array.from({length:e},(()=>[r]));return a=L(a),n.decoder_attention_mask=(0,u.ones_like)(a),{input_ids:a,model_inputs:i}}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,streamer:o=null,...s}){this._validate_model_class(),t=this._prepare_generation_config(t,s);let{inputs_tensor:a,model_inputs:i,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:s});const c=this.config.is_encoder_decoder;let d;c&&("encoder_outputs"in i||(i=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:a,model_inputs:i,model_input_name:l,generation_config:t}))),c?({input_ids:d,model_inputs:i}=this._prepare_decoder_input_ids_for_generation({batch_size:i[l].dims.at(0),model_input_name:l,model_kwargs:i,decoder_start_token_id:t.decoder_start_token_id,bos_token_id:t.bos_token_id,generation_config:t})):d=i[l];let p=d.dims.at(-1);null!==t.max_new_tokens&&(t.max_length=p+t.max_new_tokens);const h=this._get_logits_processor(t,p,n),_=this._get_stopping_criteria(t,r),f=i[l].dims.at(0),g=m.LogitsSampler.getSampler(t),M=new Array(f).fill(0),w=d.tolist();o&&o.put(w);let b=null,T={};for(;;){i=this.prepare_inputs_for_generation(w,i,t);const e=await this.forward(i);if(t.output_attentions&&t.return_dict_in_generate){const t=this.getAttentions(e);for(const e in t)e in T||(T[e]=[]),T[e].push(t[e])}const n=h(w,e.logits.slice(null,-1,null)),r=[];for(let e=0;e<n.dims.at(0);++e){const t=n[e],o=await g(t);for(const[t,n]of o){const o=BigInt(t);M[e]+=n,w[e].push(o),r.push([o]);break}}o&&o.put(r);if(_(w).every((e=>e))){t.return_dict_in_generate&&(b=this.getPastKeyValues(e,i.past_key_values,!1));break}i=this._update_model_kwargs_for_generation({generated_input_ids:r,outputs:e,model_inputs:i,is_encoder_decoder:c})}o&&o.end();const x=new u.Tensor("int64",w.flat(),[w.length,w[0].length]);return t.return_dict_in_generate?{sequences:x,past_key_values:b,...T}:x}getPastKeyValues(e,t,n=!0){const r=Object.create(null);for(const o in e)if(o.startsWith("present")){const s=o.replace("present","past_key_values");if(t&&o.includes("encoder"))r[s]=t[s];else{if(n&&t){const e=t[s];"gpu-buffer"===e.location&&e.dispose()}r[s]=e[o]}}return r}getAttentions(e){const t={};for(const n of["cross_attentions","encoder_attentions","decoder_attentions"])for(const r in e)r.startsWith(n)&&(n in t||(t[n]=[]),t[n].push(e[r]));return t}addPastKeyValues(e,t){if(t)Object.assign(e,t);else{const t=this.custom_config.kv_cache_dtype??"float32",n="float16"===t?new Uint16Array:[],o=(0,r.getKeyValueShapes)(this.config);for(const r in o)e[r]=new u.Tensor(t,n,o[r])}}async encode_image({pixel_values:e}){const t=(await A(this.sessions.vision_encoder,{pixel_values:e})).image_features;return this.config.num_image_tokens||(console.warn(`The number of image tokens was not set in the model configuration. Setting it to the number of features detected by the vision encoder (${t.dims[1]}).`),this.config.num_image_tokens=t.dims[1]),t}async encode_text({input_ids:e}){return(await A(this.sessions.embed_tokens,{input_ids:e})).inputs_embeds}}class G{}class q extends G{constructor({last_hidden_state:e,hidden_states:t=null,attentions:n=null}){super(),this.last_hidden_state=e,this.hidden_states=t,this.attentions=n}}class $ extends R{}class W extends ${}class U extends ${async _call(e){return new Li(await super._call(e))}}class X extends ${async _call(e){return new Si(await super._call(e))}}class Q extends ${async _call(e){return new Ei(await super._call(e))}}class H extends ${async _call(e){return new zi(await super._call(e))}}class Y extends R{}class K extends Y{}class J extends R{}class Z extends J{}class ee extends J{async _call(e){return new Li(await super._call(e))}}class te extends J{async _call(e){return new Si(await super._call(e))}}class ne extends J{async _call(e){return new Ei(await super._call(e))}}class re extends J{async _call(e){return new zi(await super._call(e))}}class oe extends R{}class se extends oe{}class ae extends oe{async _call(e){return new Li(await super._call(e))}}class ie extends oe{async _call(e){return new Si(await super._call(e))}}class le extends oe{async _call(e){return new Ei(await super._call(e))}}class ce extends oe{async _call(e){return new zi(await super._call(e))}}class de extends R{}class ue extends de{}class pe extends de{async _call(e){return new Li(await super._call(e))}}class he extends de{async _call(e){return new Si(await super._call(e))}}class me extends de{async _call(e){return new Ei(await super._call(e))}}class _e extends de{async _call(e){return new zi(await super._call(e))}}class fe extends R{}class ge extends fe{}class Me extends fe{async _call(e){return new Li(await super._call(e))}}class we extends fe{async _call(e){return new Si(await super._call(e))}}class be extends fe{async _call(e){return new Ei(await super._call(e))}}class Te extends fe{async _call(e){return new zi(await super._call(e))}}class xe extends R{}class ye extends xe{}class ke extends xe{async _call(e){return new Li(await super._call(e))}}class Fe extends xe{async _call(e){return new Si(await super._call(e))}}class Ce extends xe{async _call(e){return new Ei(await super._call(e))}}class Pe extends xe{async _call(e){return new zi(await super._call(e))}}class ve extends R{}class Se extends ve{}class Ae extends ve{async _call(e){return new Li(await super._call(e))}}class Ee extends ve{async _call(e){return new Si(await super._call(e))}}class Le extends ve{async _call(e){return new Ei(await super._call(e))}}class ze extends ve{async _call(e){return new zi(await super._call(e))}}class Ie extends R{}class Be extends Ie{}class Ne extends Ie{async _call(e){return new Si(await super._call(e))}}class Oe extends Ie{async _call(e){return new Ei(await super._call(e))}}class De extends Ie{async _call(e){return new zi(await super._call(e))}}class Ve extends Ie{async _call(e){return new Li(await super._call(e))}}class je extends R{}class Re extends je{}class Ge extends je{async _call(e){return new Li(await super._call(e))}}class qe extends je{async _call(e){return new Si(await super._call(e))}}class $e extends je{async _call(e){return new Ei(await super._call(e))}}class We extends R{}class Ue extends We{}class Xe extends We{async _call(e){return new Li(await super._call(e))}}class Qe extends We{async _call(e){return new Si(await super._call(e))}}class He extends We{async _call(e){return new zi(await super._call(e))}}class Ye extends R{}class Ke extends Ye{}class Je extends Ye{async _call(e){return new Li(await super._call(e))}}class Ze extends Ye{async _call(e){return new Si(await super._call(e))}}class et extends Ye{async _call(e){return new Ei(await super._call(e))}}class tt extends Ye{async _call(e){return new zi(await super._call(e))}}class nt extends R{}class rt extends nt{}class ot extends nt{async _call(e){return new Li(await super._call(e))}}class st extends nt{async _call(e){return new Si(await super._call(e))}}class at extends nt{async _call(e){return new zi(await super._call(e))}}class it extends R{}class lt extends it{}class ct extends it{async _call(e){return new Si(await super._call(e))}}class dt extends it{async _call(e){return new zi(await super._call(e))}}class ut extends it{async _call(e){return new Li(await super._call(e))}}class pt extends R{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class ht extends pt{}class mt extends pt{}class _t extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ft extends _t{}class gt extends _t{}class Mt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class wt extends Mt{}class bt extends Mt{}class Tt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class xt extends Tt{}class yt extends Tt{}class kt extends Tt{async _call(e){return new Si(await super._call(e))}}class Ft extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ct extends Ft{}class Pt extends Ft{}class vt extends Ft{async _call(e){return new Si(await super._call(e))}}class St extends Ft{}class At extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Et extends At{}class Lt extends At{}class zt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class It extends zt{}class Bt extends zt{}class Nt extends R{}class Ot extends Nt{}class Dt extends Nt{async _call(e){return new Li(await super._call(e))}}class Vt extends Nt{async _call(e){return new Si(await super._call(e))}}class jt extends Nt{async _call(e){return new Ei(await super._call(e))}}class Rt extends Nt{async _call(e){return new zi(await super._call(e))}}class Gt extends R{}class qt extends Gt{}class $t extends Gt{async _call(e){return new Li(await super._call(e))}}class Wt extends Gt{async _call(e){return new Si(await super._call(e))}}class Ut extends Gt{async _call(e){return new Ei(await super._call(e))}}class Xt extends Gt{async _call(e){return new zi(await super._call(e))}}class Qt extends R{}class Ht extends Qt{}class Yt extends Qt{async _call(e){return new Li(await super._call(e))}}class Kt extends Qt{async _call(e){return new Si(await super._call(e))}}class Jt extends Qt{async _call(e){return new Ei(await super._call(e))}}class Zt extends Qt{async _call(e){return new zi(await super._call(e))}}class en extends R{}class tn extends en{}class nn extends en{}class rn extends R{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class on extends rn{}class sn extends rn{_prepare_generation_config(e,t){return super._prepare_generation_config(e,t,f.WhisperGenerationConfig)}_retrieve_init_tokens(e){const t=[e.decoder_start_token_id];let n=e.language;const r=e.task;if(e.is_multilingual){n||(console.warn("No language specified - defaulting to English (en)."),n="en");const o=`<|${(0,g.whisper_language_to_code)(n)}|>`;t.push(e.lang_to_id[o]),t.push(e.task_to_id[r??"transcribe"])}else if(n||r)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&t.at(-1)!==e.no_timestamps_token_id?t.push(e.no_timestamps_token_id):e.return_timestamps&&t.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),t.pop()),t.filter((e=>null!=e))}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,...o}){t=this._prepare_generation_config(t,o);const s=o.decoder_input_ids??this._retrieve_init_tokens(t);if(t.return_timestamps&&(n??=new c.LogitsProcessorList,n.push(new c.WhisperTimeStampLogitsProcessor(t,s))),t.begin_suppress_tokens&&(n??=new c.LogitsProcessorList,n.push(new c.SuppressTokensAtBeginLogitsProcessor(t.begin_suppress_tokens,s.length))),t.return_token_timestamps){if(!t.alignment_heads)throw new Error("Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config.");"translate"===t.task&&console.warn("Token-level timestamps may not be reliable for task 'translate'."),t.output_attentions=!0,t.return_dict_in_generate=!0}const a=await super.generate({inputs:e,generation_config:t,logits_processor:n,decoder_input_ids:s,...o});return t.return_token_timestamps&&(a.token_timestamps=this._extract_token_timestamps(a,t.alignment_heads,t.num_frames)),a}_extract_token_timestamps(e,t,n=null,r=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");null==n&&console.warn("`num_frames` has not been set, meaning the entire audio will be analyzed. This may lead to inaccurate token-level timestamps for short audios (< 30 seconds).");let o=this.config.median_filter_width;void 0===o&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),o=7);const s=e.cross_attentions,a=Array.from({length:this.config.decoder_layers},((e,t)=>(0,u.cat)(s.map((e=>e[t])),2))),l=(0,u.stack)(t.map((([e,t])=>{if(e>=a.length)throw new Error(`Layer index ${e} is out of bounds for cross attentions (length ${a.length}).`);return n?a[e].slice(null,t,null,[0,n]):a[e].slice(null,t)}))).transpose(1,0,2,3),[c,d]=(0,u.std_mean)(l,-2,0,!0),h=l.clone();for(let e=0;e<h.dims[0];++e){const t=h[e];for(let n=0;n<t.dims[0];++n){const r=t[n],s=c[e][n][0].data,a=d[e][n][0].data;for(let e=0;e<r.dims[0];++e){let t=r[e].data;for(let e=0;e<t.length;++e)t[e]=(t[e]-a[e])/s[e];t.set((0,p.medianFilter)(t,o))}}}const m=[(0,u.mean)(h,1)],_=e.sequences.dims,f=new u.Tensor("float32",new Float32Array(_[0]*_[1]),_);for(let e=0;e<_[0];++e){const t=m[e].neg().squeeze_(0),[n,o]=(0,p.dynamic_time_warping)(t.tolist()),s=Array.from({length:n.length-1},((e,t)=>n[t+1]-n[t])),a=(0,i.mergeArrays)([1],s).map((e=>!!e)),l=[];for(let e=0;e<a.length;++e)a[e]&&l.push(o[e]*r);f[e].data.set(l,1)}return f}}class an extends R{main_input_name="pixel_values";forward_params=["pixel_values","input_ids","encoder_hidden_states","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class ln extends R{forward_params=["input_ids","pixel_values","attention_mask","position_ids","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class cn extends ln{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){const o=this.config.image_token_index,s=n.tolist().map((e=>e.findIndex((e=>e==o)))),a=s.every((e=>-1===e)),i=s.every((e=>-1!==e));if(!a&&!i)throw new Error("Every input should contain either 0 or 1 image token.");if(a)return{inputs_embeds:e,attention_mask:r};const l=[],c=[];for(let n=0;n<s.length;++n){const o=s[n],a=e[n],i=t[n],d=r[n];l.push((0,u.cat)([a.slice([0,o]),i,a.slice([o+1,a.dims[0]])],0)),c.push((0,u.cat)([d.slice([0,o]),(0,u.ones)([i.dims[0]]),d.slice([o+1,d.dims[0]])],0))}return{inputs_embeds:(0,u.stack)(l,0),attention_mask:(0,u.stack)(c,0)}}}class dn extends cn{}class un extends R{forward_params=["input_ids","inputs_embeds","attention_mask","pixel_values","encoder_outputs","decoder_input_ids","decoder_inputs_embeds","decoder_attention_mask","past_key_values"];main_input_name="inputs_embeds";constructor(e,t,n){super(e,t),this.generation_config=n}}class pn extends un{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){return{inputs_embeds:(0,u.cat)([t,e],1),attention_mask:(0,u.cat)([(0,u.ones)(t.dims.slice(0,2)),r],1)}}async _prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:n,attention_mask:r}){if(!e&&!t)throw new Error("Either `input_ids` or `pixel_values` should be provided.");let o,s;return e&&(o=await this.encode_text({input_ids:e})),t&&(s=await this.encode_image({pixel_values:t})),o&&s?({inputs_embeds:n,attention_mask:r}=this._merge_input_ids_with_image_features({inputs_embeds:o,image_features:s,input_ids:e,attention_mask:r})):n=o||s,{inputs_embeds:n,attention_mask:r}}async forward({input_ids:e,pixel_values:t,attention_mask:n,decoder_input_ids:r,decoder_attention_mask:o,encoder_outputs:s,past_key_values:a,inputs_embeds:i,decoder_inputs_embeds:l}){if(i||({inputs_embeds:i,attention_mask:n}=await this._prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:i,attention_mask:n})),!s){let{last_hidden_state:e}=await B(this,{inputs_embeds:i,attention_mask:n});s=e}if(!l){if(!r)throw new Error("Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.");l=await this.encode_text({input_ids:r})}const c={inputs_embeds:l,attention_mask:o,encoder_attention_mask:n,encoder_hidden_states:s,past_key_values:a};return await N(this,c,!0)}}class hn extends R{}class mn extends hn{}class _n extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class fn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class gn extends R{}class Mn extends gn{}class wn extends gn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class bn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class Tn extends R{}class xn extends Tn{}class yn extends R{}class kn extends yn{}class Fn extends yn{}class Cn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Pn extends Cn{}class vn extends Cn{}class Sn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class An extends Sn{}class En extends Sn{}class Ln extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class zn extends Ln{}class In extends Ln{}class Bn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Nn extends Bn{}class On extends Bn{}class Dn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Vn extends Dn{}class jn extends Dn{}class Rn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Gn extends Rn{}class qn extends Rn{}class $n extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Wn extends $n{}class Un extends $n{}class Xn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Qn extends Xn{}class Hn extends Xn{}class Yn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Kn extends Yn{}class Jn extends Yn{}class Zn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class er extends Zn{}class tr extends Zn{}class nr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class rr extends nr{}class or extends nr{}class sr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ar extends sr{}class ir extends sr{}class lr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class cr extends lr{}class dr extends lr{}class ur extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class pr extends ur{}class hr extends ur{}class mr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class _r extends mr{}class fr extends mr{}class gr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Mr extends gr{}class wr extends gr{}class br extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Tr extends br{}class xr extends br{}class yr extends R{}class kr extends yr{}class Fr extends yr{async _call(e){return new Si(await super._call(e))}}class Cr extends R{}class Pr extends Cr{}class vr extends Cr{async _call(e){return new Si(await super._call(e))}}class Sr extends R{}class Ar extends Sr{async _call(e){return new Ni(await super._call(e))}}class Er extends R{}class Lr extends Er{}class zr extends Er{async _call(e){return new Si(await super._call(e))}}class Ir extends R{}class Br extends Ir{}class Nr extends Ir{async _call(e){return new Si(await super._call(e))}}class Or extends R{}class Dr extends Or{}class Vr extends Or{}class jr extends R{}class Rr extends jr{}class Gr extends jr{}class qr extends R{}class $r extends qr{}class Wr extends qr{async _call(e){return new Si(await super._call(e))}}class Ur extends R{}class Xr extends Ur{}class Qr extends Ur{async _call(e){return new Yr(await super._call(e))}}class Hr extends Ur{async _call(e){return new Kr(await super._call(e))}}class Yr extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Kr extends G{constructor({logits:e,pred_boxes:t,pred_masks:n}){super(),this.logits=e,this.pred_boxes=t,this.pred_masks=n}}class Jr extends R{}class Zr extends Jr{}class eo extends Jr{async _call(e){return new to(await super._call(e))}}class to extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class no extends R{}class ro extends no{}class oo extends no{async _call(e){return new so(await super._call(e))}}class so extends Yr{}class ao extends R{}class io extends ao{}class lo extends ao{async _call(e){return new Si(await super._call(e))}}class co extends R{}class uo extends co{}class po extends co{async _call(e){return new Si(await super._call(e))}}class ho extends R{}class mo extends ho{}class _o extends ho{async _call(e){return new Si(await super._call(e))}}class fo extends R{}class go extends fo{}class Mo extends fo{}class wo extends R{}class bo extends wo{}class To extends wo{}class xo extends R{}class yo extends xo{}class ko extends R{}class Fo extends ko{}class Co extends ko{}class Po extends R{}class vo extends Po{}class So extends R{}class Ao extends So{}class Eo extends So{async _call(e){return new Si(await super._call(e))}}class Lo extends R{}class zo extends Lo{}class Io extends Lo{async _call(e){return new Si(await super._call(e))}}class Bo extends R{}class No extends Bo{}class Oo extends Bo{async _call(e){return new Si(await super._call(e))}}class Do extends R{}class Vo extends Do{}class jo extends Do{async _call(e){return new Ro(await super._call(e))}}class Ro extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Go extends R{}class qo extends Go{async get_image_embeddings({pixel_values:e}){return await B(this,{pixel_values:e})}async forward(e){if(e.image_embeddings&&e.image_positional_embeddings||(e={...e,...await this.get_image_embeddings(e)}),!e.input_labels&&e.input_points){const t=e.input_points.dims.slice(0,-1),n=t.reduce(((e,t)=>e*t),1);e.input_labels=new u.Tensor("int64",new BigInt64Array(n).fill(1n),t)}const t={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(t.input_points=e.input_points),e.input_labels&&(t.input_labels=e.input_labels),e.input_boxes&&(t.input_boxes=e.input_boxes),await A(this.sessions.prompt_encoder_mask_decoder,t)}async _call(e){return new $o(await super._call(e))}}class $o extends G{constructor({iou_scores:e,pred_masks:t}){super(),this.iou_scores=e,this.pred_masks=t}}class Wo extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Uo extends Wo{}class Xo extends Wo{}class Qo extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ho extends Qo{}class Yo extends Qo{}class Ko extends R{}class Jo extends Ko{}class Zo extends Ko{async _call(e){return new Ii(await super._call(e))}}class es extends Ko{async _call(e){return new Si(await super._call(e))}}class ts extends Ko{async _call(e){return new Ei(await super._call(e))}}class ns extends R{}class rs extends ns{}class os extends ns{async _call(e){return new Ei(await super._call(e))}}class ss extends R{}class as extends ss{}class is extends R{}class ls extends is{}class cs extends is{async _call(e){return new Ii(await super._call(e))}}class ds extends is{async _call(e){return new Si(await super._call(e))}}class us extends R{}class ps extends us{}class hs extends us{async _call(e){return new Ii(await super._call(e))}}class ms extends us{async _call(e){return new Si(await super._call(e))}}class _s extends us{async _call(e){return new Ei(await super._call(e))}}class fs extends R{}class gs extends fs{}class Ms extends fs{async _call(e){return new Ii(await super._call(e))}}class ws extends fs{async _call(e){return new Si(await super._call(e))}}class bs extends R{}class Ts extends Ko{}class xs extends Ko{async _call(e){return new Ii(await super._call(e))}}class ys extends Ko{async _call(e){return new Si(await super._call(e))}}class ks extends R{}class Fs extends ks{}class Cs extends ks{async _call(e){return new Ii(await super._call(e))}}class Ps extends ks{async _call(e){return new Si(await super._call(e))}}class vs extends ks{async _call(e){return new Ai(await super._call(e))}}class Ss extends ks{async _call(e){return new Ei(await super._call(e))}}class As extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Es extends As{}class Ls extends As{}class zs extends As{async generate_speech(e,t,{threshold:n=.5,minlenratio:r=0,maxlenratio:o=20,vocoder:s=null}={}){const a={input_ids:e},{encoder_outputs:i,encoder_attention_mask:l}=await B(this,a),c=i.dims[1]/this.config.reduction_factor,d=Math.floor(c*o),p=Math.floor(c*r),h=this.config.num_mel_bins;let m=[],_=null,f=null,g=0;for(;;){++g;const e=z(!!f);let r;r=f?f.output_sequence_out:new u.Tensor("float32",new Float32Array(h),[1,1,h]);let o={use_cache_branch:e,output_sequence:r,encoder_attention_mask:l,speaker_embeddings:t,encoder_hidden_states:i};this.addPastKeyValues(o,_),f=await A(this.sessions.decoder_model_merged,o),_=this.getPastKeyValues(f,_);const{prob:s,spectrum:a}=f;if(m.push(a),g>=p&&(Array.from(s.data).filter((e=>e>=n)).length>0||g>=d))break}const M=(0,u.cat)(m),{waveform:w}=await A(s.sessions.model,{spectrogram:M});return{spectrogram:M,waveform:w}}}class Is extends R{main_input_name="spectrogram"}class Bs extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ns extends Bs{}class Os extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ds extends Os{}class Vs extends Os{}class js extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Rs extends js{}class Gs extends js{}class qs extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class $s extends qs{}class Ws extends qs{}class Us extends R{}class Xs extends Us{}class Qs extends Us{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Hs extends Us{static async from_pretrained(e,t={}){return t.model_file_name??="audio_model",super.from_pretrained(e,t)}}class Ys extends R{}class Ks extends Ys{async _call(e){return new Oi(await super._call(e))}}class Js extends R{}class Zs extends Js{}class ea extends Js{}class ta extends Js{}class na extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ra extends na{}class oa extends na{}class sa extends R{}class aa extends sa{}class ia extends sa{async _call(e){return new Si(await super._call(e))}}class la extends R{}class ca extends la{}class da extends la{}class ua extends R{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}_apply_and_filter_by_delay_pattern_mask(e){const[t,n]=e.dims,r=this.config.decoder.num_codebooks,o=n-r;let s=0;for(let t=0;t<e.size;++t){if(e.data[t]===this.config.decoder.pad_token_id)continue;const a=t%n-Math.floor(t/n)%r;a>0&&a<=o&&(e.data[s++]=e.data[t])}const a=Math.floor(t/r),i=s/(a*r);return new u.Tensor(e.type,e.data.slice(0,s),[a,r,i])}prepare_inputs_for_generation(e,t,n){let r=structuredClone(e);for(let e=0;e<r.length;++e)for(let t=0;t<r[e].length;++t)e%this.config.decoder.num_codebooks>=t&&(r[e][t]=BigInt(this.config.decoder.pad_token_id));null!==n.guidance_scale&&n.guidance_scale>1&&(r=r.concat(r));return super.prepare_inputs_for_generation(r,t,n)}async generate(e){const t=await super.generate(e),n=this._apply_and_filter_by_delay_pattern_mask(t).unsqueeze_(0),{audio_values:r}=await A(this.sessions.encodec_decode,{audio_codes:n});return r}}class pa extends R{}class ha extends pa{}class ma extends pa{async _call(e){return new Si(await super._call(e))}}class _a extends R{}class fa extends _a{}class ga extends _a{async _call(e){return new Si(await super._call(e))}}class Ma extends R{}class wa extends Ma{}class ba extends Ma{async _call(e){return new Si(await super._call(e))}}class Ta extends R{}class xa extends Ta{}class ya extends Ta{async _call(e){return new Si(await super._call(e))}}class ka{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:o=null,local_files_only:s=!1,revision:a="main",model_file_name:i=null,subfolder:l="onnx",device:c=null,dtype:d=null,use_external_data_format:u=null,session_options:p={}}={}){let h={progress_callback:t,config:n,cache_dir:o,local_files_only:s,revision:a,model_file_name:i,subfolder:l,device:c,dtype:d,use_external_data_format:u,session_options:p};if(h.config=await r.AutoConfig.from_pretrained(e,h),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(let t of this.MODEL_CLASS_MAPPINGS){const n=t.get(h.config.model_type);if(n)return await n[1].from_pretrained(e,h)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${h.config.model_type}", attempting to construct from base class.`),await R.from_pretrained(e,h);throw Error(`Unsupported model type: ${h.config.model_type}`)}}const Fa=new Map([["bert",["BertModel",W]],["nomic_bert",["NomicBertModel",K]],["roformer",["RoFormerModel",Z]],["electra",["ElectraModel",ue]],["esm",["EsmModel",Re]],["convbert",["ConvBertModel",se]],["camembert",["CamembertModel",ge]],["deberta",["DebertaModel",ye]],["deberta-v2",["DebertaV2Model",Se]],["mpnet",["MPNetModel",Ke]],["albert",["AlbertModel",lt]],["distilbert",["DistilBertModel",Be]],["roberta",["RobertaModel",Ot]],["xlm",["XLMModel",qt]],["xlm-roberta",["XLMRobertaModel",Ht]],["clap",["ClapModel",Xs]],["clip",["CLIPModel",mn]],["clipseg",["CLIPSegModel",kn]],["chinese_clip",["ChineseCLIPModel",xn]],["siglip",["SiglipModel",Mn]],["mobilebert",["MobileBertModel",Ue]],["squeezebert",["SqueezeBertModel",rt]],["wav2vec2",["Wav2Vec2Model",Jo]],["wav2vec2-bert",["Wav2Vec2BertModel",gs]],["unispeech",["UniSpeechModel",ls]],["unispeech-sat",["UniSpeechSatModel",ps]],["hubert",["HubertModel",Ts]],["wavlm",["WavLMModel",Fs]],["audio-spectrogram-transformer",["ASTModel",tn]],["vits",["VitsModel",Ks]],["pyannote",["PyAnnoteModel",rs]],["wespeaker-resnet",["WeSpeakerResNetModel",as]],["detr",["DetrModel",Xr]],["rt_detr",["RTDetrModel",Zr]],["table-transformer",["TableTransformerModel",ro]],["vit",["ViTModel",kr]],["fastvit",["FastViTModel",Pr]],["mobilevit",["MobileViTModel",Lr]],["mobilevitv2",["MobileViTV2Model",Br]],["owlvit",["OwlViTModel",Dr]],["owlv2",["Owlv2Model",Rr]],["beit",["BeitModel",$r]],["deit",["DeiTModel",io]],["convnext",["ConvNextModel",Ao]],["convnextv2",["ConvNextV2Model",zo]],["dinov2",["Dinov2Model",No]],["resnet",["ResNetModel",uo]],["swin",["SwinModel",mo]],["swin2sr",["Swin2SRModel",go]],["donut-swin",["DonutSwinModel",vo]],["yolos",["YolosModel",Vo]],["dpt",["DPTModel",bo]],["glpn",["GLPNModel",Fo]],["hifigan",["SpeechT5HifiGan",Is]],["efficientnet",["EfficientNetModel",aa]],["mobilenet_v1",["MobileNetV1Model",ha]],["mobilenet_v2",["MobileNetV2Model",fa]],["mobilenet_v3",["MobileNetV3Model",wa]],["mobilenet_v4",["MobileNetV4Model",xa]]]),Ca=new Map([["t5",["T5Model",ht]],["longt5",["LongT5Model",ft]],["mt5",["MT5Model",wt]],["bart",["BartModel",xt]],["mbart",["MBartModel",Ct]],["marian",["MarianModel",Uo]],["whisper",["WhisperModel",on]],["m2m_100",["M2M100Model",Ho]],["blenderbot",["BlenderbotModel",Et]],["blenderbot-small",["BlenderbotSmallModel",It]]]),Pa=new Map([["bloom",["BloomModel",_r]],["gpt2",["GPT2Model",Pn]],["gptj",["GPTJModel",Nn]],["gpt_bigcode",["GPTBigCodeModel",Vn]],["gpt_neo",["GPTNeoModel",An]],["gpt_neox",["GPTNeoXModel",zn]],["codegen",["CodeGenModel",Gn]],["llama",["LlamaModel",Wn]],["cohere",["CohereModel",Qn]],["gemma",["GemmaModel",Kn]],["gemma2",["Gemma2Model",er]],["openelm",["OpenELMModel",rr]],["qwen2",["Qwen2Model",ar]],["phi",["PhiModel",cr]],["phi3",["Phi3Model",pr]],["mpt",["MptModel",Mr]],["opt",["OPTModel",Tr]],["mistral",["MistralModel",Ds]],["starcoder2",["Starcoder2Model",Rs]],["falcon",["FalconModel",$s]],["stablelm",["StableLmModel",ra]]]),va=new Map([["speecht5",["SpeechT5ForSpeechToText",Ls]],["whisper",["WhisperForConditionalGeneration",sn]]]),Sa=new Map([["speecht5",["SpeechT5ForTextToSpeech",zs]]]),Aa=new Map([["vits",["VitsModel",Ks]],["musicgen",["MusicgenForConditionalGeneration",ua]]]),Ea=new Map([["bert",["BertForSequenceClassification",X]],["roformer",["RoFormerForSequenceClassification",te]],["electra",["ElectraForSequenceClassification",he]],["esm",["EsmForSequenceClassification",qe]],["convbert",["ConvBertForSequenceClassification",ie]],["camembert",["CamembertForSequenceClassification",we]],["deberta",["DebertaForSequenceClassification",Fe]],["deberta-v2",["DebertaV2ForSequenceClassification",Ee]],["mpnet",["MPNetForSequenceClassification",Ze]],["albert",["AlbertForSequenceClassification",ct]],["distilbert",["DistilBertForSequenceClassification",Ne]],["roberta",["RobertaForSequenceClassification",Vt]],["xlm",["XLMForSequenceClassification",Wt]],["xlm-roberta",["XLMRobertaForSequenceClassification",Kt]],["bart",["BartForSequenceClassification",kt]],["mbart",["MBartForSequenceClassification",vt]],["mobilebert",["MobileBertForSequenceClassification",Qe]],["squeezebert",["SqueezeBertForSequenceClassification",st]]]),La=new Map([["bert",["BertForTokenClassification",Q]],["roformer",["RoFormerForTokenClassification",ne]],["electra",["ElectraForTokenClassification",me]],["esm",["EsmForTokenClassification",$e]],["convbert",["ConvBertForTokenClassification",le]],["camembert",["CamembertForTokenClassification",be]],["deberta",["DebertaForTokenClassification",Ce]],["deberta-v2",["DebertaV2ForTokenClassification",Le]],["mpnet",["MPNetForTokenClassification",et]],["distilbert",["DistilBertForTokenClassification",Oe]],["roberta",["RobertaForTokenClassification",jt]],["xlm",["XLMForTokenClassification",Ut]],["xlm-roberta",["XLMRobertaForTokenClassification",Jt]]]),za=new Map([["t5",["T5ForConditionalGeneration",mt]],["longt5",["LongT5ForConditionalGeneration",gt]],["mt5",["MT5ForConditionalGeneration",bt]],["bart",["BartForConditionalGeneration",yt]],["mbart",["MBartForConditionalGeneration",Pt]],["marian",["MarianMTModel",Xo]],["m2m_100",["M2M100ForConditionalGeneration",Yo]],["blenderbot",["BlenderbotForConditionalGeneration",Lt]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",Bt]]]),Ia=new Map([["bloom",["BloomForCausalLM",fr]],["gpt2",["GPT2LMHeadModel",vn]],["gptj",["GPTJForCausalLM",On]],["gpt_bigcode",["GPTBigCodeForCausalLM",jn]],["gpt_neo",["GPTNeoForCausalLM",En]],["gpt_neox",["GPTNeoXForCausalLM",In]],["codegen",["CodeGenForCausalLM",qn]],["llama",["LlamaForCausalLM",Un]],["cohere",["CohereForCausalLM",Hn]],["gemma",["GemmaForCausalLM",Jn]],["gemma2",["Gemma2ForCausalLM",tr]],["openelm",["OpenELMForCausalLM",or]],["qwen2",["Qwen2ForCausalLM",ir]],["phi",["PhiForCausalLM",dr]],["phi3",["Phi3ForCausalLM",hr]],["mpt",["MptForCausalLM",wr]],["opt",["OPTForCausalLM",xr]],["mbart",["MBartForCausalLM",St]],["mistral",["MistralForCausalLM",Vs]],["starcoder2",["Starcoder2ForCausalLM",Gs]],["falcon",["FalconForCausalLM",Ws]],["trocr",["TrOCRForCausalLM",Ns]],["stablelm",["StableLmForCausalLM",oa]]]),Ba=new Map([["bert",["BertForMaskedLM",U]],["roformer",["RoFormerForMaskedLM",ee]],["electra",["ElectraForMaskedLM",pe]],["esm",["EsmForMaskedLM",Ge]],["convbert",["ConvBertForMaskedLM",ae]],["camembert",["CamembertForMaskedLM",Me]],["deberta",["DebertaForMaskedLM",ke]],["deberta-v2",["DebertaV2ForMaskedLM",Ae]],["mpnet",["MPNetForMaskedLM",Je]],["albert",["AlbertForMaskedLM",ut]],["distilbert",["DistilBertForMaskedLM",Ve]],["roberta",["RobertaForMaskedLM",Dt]],["xlm",["XLMWithLMHeadModel",$t]],["xlm-roberta",["XLMRobertaForMaskedLM",Yt]],["mobilebert",["MobileBertForMaskedLM",Xe]],["squeezebert",["SqueezeBertForMaskedLM",ot]]]),Na=new Map([["bert",["BertForQuestionAnswering",H]],["roformer",["RoFormerForQuestionAnswering",re]],["electra",["ElectraForQuestionAnswering",_e]],["convbert",["ConvBertForQuestionAnswering",ce]],["camembert",["CamembertForQuestionAnswering",Te]],["deberta",["DebertaForQuestionAnswering",Pe]],["deberta-v2",["DebertaV2ForQuestionAnswering",ze]],["mpnet",["MPNetForQuestionAnswering",tt]],["albert",["AlbertForQuestionAnswering",dt]],["distilbert",["DistilBertForQuestionAnswering",De]],["roberta",["RobertaForQuestionAnswering",Rt]],["xlm",["XLMForQuestionAnswering",Xt]],["xlm-roberta",["XLMRobertaForQuestionAnswering",Zt]],["mobilebert",["MobileBertForQuestionAnswering",He]],["squeezebert",["SqueezeBertForQuestionAnswering",at]]]),Oa=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",an]]]),Da=new Map([["llava",["LlavaForConditionalGeneration",cn]],["moondream1",["Moondream1ForConditionalGeneration",dn]],["florence2",["Florence2ForConditionalGeneration",pn]]]),Va=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",an]]]),ja=new Map([["vit",["ViTForImageClassification",Fr]],["fastvit",["FastViTForImageClassification",vr]],["mobilevit",["MobileViTForImageClassification",zr]],["mobilevitv2",["MobileViTV2ForImageClassification",Nr]],["beit",["BeitForImageClassification",Wr]],["deit",["DeiTForImageClassification",lo]],["convnext",["ConvNextForImageClassification",Eo]],["convnextv2",["ConvNextV2ForImageClassification",Io]],["dinov2",["Dinov2ForImageClassification",Oo]],["resnet",["ResNetForImageClassification",po]],["swin",["SwinForImageClassification",_o]],["segformer",["SegformerForImageClassification",ea]],["efficientnet",["EfficientNetForImageClassification",ia]],["mobilenet_v1",["MobileNetV1ForImageClassification",ma]],["mobilenet_v2",["MobileNetV2ForImageClassification",ga]],["mobilenet_v3",["MobileNetV3ForImageClassification",ba]],["mobilenet_v4",["MobileNetV4ForImageClassification",ya]]]),Ra=new Map([["detr",["DetrForObjectDetection",Qr]],["rt_detr",["RTDetrForObjectDetection",eo]],["table-transformer",["TableTransformerForObjectDetection",oo]],["yolos",["YolosForObjectDetection",jo]]]),Ga=new Map([["owlvit",["OwlViTForObjectDetection",Vr]],["owlv2",["Owlv2ForObjectDetection",Gr]]]),qa=new Map([["detr",["DetrForSegmentation",Hr]],["clipseg",["CLIPSegForImageSegmentation",Fn]]]),$a=new Map([["segformer",["SegformerForSemanticSegmentation",ta]]]),Wa=new Map([["sam",["SamModel",qo]]]),Ua=new Map([["wav2vec2",["Wav2Vec2ForCTC",Zo]],["wav2vec2-bert",["Wav2Vec2BertForCTC",Ms]],["unispeech",["UniSpeechForCTC",cs]],["unispeech-sat",["UniSpeechSatForCTC",hs]],["wavlm",["WavLMForCTC",Cs]],["hubert",["HubertForCTC",xs]]]),Xa=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",es]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",ws]],["unispeech",["UniSpeechForSequenceClassification",ds]],["unispeech-sat",["UniSpeechSatForSequenceClassification",ms]],["wavlm",["WavLMForSequenceClassification",Ps]],["hubert",["HubertForSequenceClassification",ys]],["audio-spectrogram-transformer",["ASTForAudioClassification",nn]]]),Qa=new Map([["wavlm",["WavLMForXVector",vs]]]),Ha=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",_s]],["wavlm",["WavLMForAudioFrameClassification",Ss]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",ts]],["pyannote",["PyAnnoteForAudioFrameClassification",os]]]),Ya=new Map([["vitmatte",["VitMatteForImageMatting",Ar]]]),Ka=new Map([["swin2sr",["Swin2SRForImageSuperResolution",Mo]]]),Ja=new Map([["dpt",["DPTForDepthEstimation",To]],["depth_anything",["DepthAnythingForDepthEstimation",yo]],["glpn",["GLPNForDepthEstimation",Co]]]),Za=new Map([["clip",["CLIPVisionModelWithProjection",fn]],["siglip",["SiglipVisionModel",bn]]]),ei=[[Fa,M],[Ca,w],[Pa,x],[Ea,M],[La,M],[za,b],[va,b],[Ia,x],[Ba,M],[Na,M],[Oa,T],[Da,k],[ja,M],[qa,M],[$a,M],[Ya,M],[Ka,M],[Ja,M],[Ra,M],[Ga,M],[Wa,y],[Ua,M],[Xa,M],[Sa,b],[Aa,M],[Qa,M],[Ha,M],[Za,M]];for(const[e,t]of ei)for(const[n,r]of e.values())C.set(n,t),v.set(r,n),P.set(n,r);const ti=[["MusicgenForConditionalGeneration",ua,F],["CLIPTextModelWithProjection",_n,M],["SiglipTextModel",wn,M],["ClapTextModelWithProjection",Qs,M],["ClapAudioModelWithProjection",Hs,M]];for(const[e,t,n]of ti)C.set(e,n),v.set(t,e),P.set(e,t);class ni extends ka{static MODEL_CLASS_MAPPINGS=ei.map((e=>e[0]));static BASE_IF_FAIL=!0}class ri extends ka{static MODEL_CLASS_MAPPINGS=[Ea]}class oi extends ka{static MODEL_CLASS_MAPPINGS=[La]}class si extends ka{static MODEL_CLASS_MAPPINGS=[za]}class ai extends ka{static MODEL_CLASS_MAPPINGS=[va]}class ii extends ka{static MODEL_CLASS_MAPPINGS=[Sa]}class li extends ka{static MODEL_CLASS_MAPPINGS=[Aa]}class ci extends ka{static MODEL_CLASS_MAPPINGS=[Ia]}class di extends ka{static MODEL_CLASS_MAPPINGS=[Ba]}class ui extends ka{static MODEL_CLASS_MAPPINGS=[Na]}class pi extends ka{static MODEL_CLASS_MAPPINGS=[Oa]}class hi extends ka{static MODEL_CLASS_MAPPINGS=[ja]}class mi extends ka{static MODEL_CLASS_MAPPINGS=[qa]}class _i extends ka{static MODEL_CLASS_MAPPINGS=[$a]}class fi extends ka{static MODEL_CLASS_MAPPINGS=[Ra]}class gi extends ka{static MODEL_CLASS_MAPPINGS=[Ga]}class Mi extends ka{static MODEL_CLASS_MAPPINGS=[Wa]}class wi extends ka{static MODEL_CLASS_MAPPINGS=[Ua]}class bi extends ka{static MODEL_CLASS_MAPPINGS=[Xa]}class Ti extends ka{static MODEL_CLASS_MAPPINGS=[Qa]}class xi extends ka{static MODEL_CLASS_MAPPINGS=[Ha]}class yi extends ka{static MODEL_CLASS_MAPPINGS=[Va]}class ki extends ka{static MODEL_CLASS_MAPPINGS=[Ya]}class Fi extends ka{static MODEL_CLASS_MAPPINGS=[Ka]}class Ci extends ka{static MODEL_CLASS_MAPPINGS=[Ja]}class Pi extends ka{static MODEL_CLASS_MAPPINGS=[Za]}class vi extends G{constructor({logits:e,past_key_values:t,encoder_outputs:n,decoder_attentions:r=null,cross_attentions:o=null}){super(),this.logits=e,this.past_key_values=t,this.encoder_outputs=n,this.decoder_attentions=r,this.cross_attentions=o}}class Si extends G{constructor({logits:e}){super(),this.logits=e}}class Ai extends G{constructor({logits:e,embeddings:t}){super(),this.logits=e,this.embeddings=t}}class Ei extends G{constructor({logits:e}){super(),this.logits=e}}class Li extends G{constructor({logits:e}){super(),this.logits=e}}class zi extends G{constructor({start_logits:e,end_logits:t}){super(),this.start_logits=e,this.end_logits=t}}class Ii extends G{constructor({logits:e}){super(),this.logits=e}}class Bi extends G{constructor({logits:e,past_key_values:t}){super(),this.logits=e,this.past_key_values=t}}class Ni extends G{constructor({alphas:e}){super(),this.alphas=e}}class Oi extends G{constructor({waveform:e,spectrogram:t}){super(),this.waveform=e,this.spectrogram=t}}},"./src/models/whisper/common_whisper.js":
 /*!**********************************************!*\
   !*** ./src/models/whisper/common_whisper.js ***!
   \**********************************************/(e,t,n)=>{n.r(t),n.d(t,{WHISPER_LANGUAGE_MAPPING:()=>o,WHISPER_TO_LANGUAGE_CODE_MAPPING:()=>s,whisper_language_to_code:()=>a});const r=[["en","english"],["zh","chinese"],["de","german"],["es","spanish"],["ru","russian"],["ko","korean"],["fr","french"],["ja","japanese"],["pt","portuguese"],["tr","turkish"],["pl","polish"],["ca","catalan"],["nl","dutch"],["ar","arabic"],["sv","swedish"],["it","italian"],["id","indonesian"],["hi","hindi"],["fi","finnish"],["vi","vietnamese"],["he","hebrew"],["uk","ukrainian"],["el","greek"],["ms","malay"],["cs","czech"],["ro","romanian"],["da","danish"],["hu","hungarian"],["ta","tamil"],["no","norwegian"],["th","thai"],["ur","urdu"],["hr","croatian"],["bg","bulgarian"],["lt","lithuanian"],["la","latin"],["mi","maori"],["ml","malayalam"],["cy","welsh"],["sk","slovak"],["te","telugu"],["fa","persian"],["lv","latvian"],["bn","bengali"],["sr","serbian"],["az","azerbaijani"],["sl","slovenian"],["kn","kannada"],["et","estonian"],["mk","macedonian"],["br","breton"],["eu","basque"],["is","icelandic"],["hy","armenian"],["ne","nepali"],["mn","mongolian"],["bs","bosnian"],["kk","kazakh"],["sq","albanian"],["sw","swahili"],["gl","galician"],["mr","marathi"],["pa","punjabi"],["si","sinhala"],["km","khmer"],["sn","shona"],["yo","yoruba"],["so","somali"],["af","afrikaans"],["oc","occitan"],["ka","georgian"],["be","belarusian"],["tg","tajik"],["sd","sindhi"],["gu","gujarati"],["am","amharic"],["yi","yiddish"],["lo","lao"],["uz","uzbek"],["fo","faroese"],["ht","haitian creole"],["ps","pashto"],["tk","turkmen"],["nn","nynorsk"],["mt","maltese"],["sa","sanskrit"],["lb","luxembourgish"],["my","myanmar"],["bo","tibetan"],["tl","tagalog"],["mg","malagasy"],["as","assamese"],["tt","tatar"],["haw","hawaiian"],["ln","lingala"],["ha","hausa"],["ba","bashkir"],["jw","javanese"],["su","sundanese"]],o=new Map(r),s=new Map([...r.map((([e,t])=>[t,e])),["burmese","my"],["valencian","ca"],["flemish","nl"],["haitian","ht"],["letzeburgesch","lb"],["pushto","ps"],["panjabi","pa"],["moldavian","ro"],["moldovan","ro"],["sinhalese","si"],["castilian","es"]]);function a(e){e=e.toLowerCase();let t=s.get(e);if(void 0===t){if(!o.has(e)){const t=2===e.length?o.keys():o.values();throw new Error(`Language "${e}" is not supported. Must be one of: ${JSON.stringify(t)}`)}t=e}return t}},"./src/models/whisper/generation_whisper.js":
@@ -142,10 +142,10 @@ import*as e from"fs";import*as t from"onnxruntime-node";import*as n from"path";i
   \**************************************/(e,t,n)=>{n.r(t),n.d(t,{CharTrie:()=>o,PriorityQueue:()=>r,TokenLattice:()=>a});class r{constructor(e=((e,t)=>e>t),t=1/0){this._heap=[],this._comparator=e,this._maxSize=t}get size(){return this._heap.length}isEmpty(){return 0===this.size}peek(){return this._heap[0]}push(...e){return this.extend(e)}extend(e){for(const t of e)if(this.size<this._maxSize)this._heap.push(t),this._siftUp();else{const e=this._smallest();this._comparator(t,this._heap[e])&&(this._heap[e]=t,this._siftUpFrom(e))}return this.size}pop(){const e=this.peek(),t=this.size-1;return t>0&&this._swap(0,t),this._heap.pop(),this._siftDown(),e}replace(e){const t=this.peek();return this._heap[0]=e,this._siftDown(),t}_parent(e){return(e+1>>>1)-1}_left(e){return 1+(e<<1)}_right(e){return e+1<<1}_greater(e,t){return this._comparator(this._heap[e],this._heap[t])}_swap(e,t){const n=this._heap[e];this._heap[e]=this._heap[t],this._heap[t]=n}_siftUp(){this._siftUpFrom(this.size-1)}_siftUpFrom(e){for(;e>0&&this._greater(e,this._parent(e));)this._swap(e,this._parent(e)),e=this._parent(e)}_siftDown(){let e=0;for(;this._left(e)<this.size&&this._greater(this._left(e),e)||this._right(e)<this.size&&this._greater(this._right(e),e);){const t=this._right(e)<this.size&&this._greater(this._right(e),this._left(e))?this._right(e):this._left(e);this._swap(e,t),e=t}}_smallest(){return 2**Math.floor(Math.log2(this.size))-1}}class o{constructor(){this.root=s.default()}extend(e){for(let t of e)this.push(t)}push(e){let t=this.root;for(let n of e){let e=t.children.get(n);void 0===e&&(e=s.default(),t.children.set(n,e)),t=e}t.isLeaf=!0}*commonPrefixSearch(e){let t=this.root,n="";for(let r=0;r<e.length&&void 0!==t;++r){const o=e[r];n+=o,t=t.children.get(o),void 0!==t&&t.isLeaf&&(yield n)}}}class s{constructor(e,t){this.isLeaf=e,this.children=t}static default(){return new s(!1,new Map)}}class a{constructor(e,t,n){this.sentence=e,this.len=e.length,this.bosTokenId=t,this.eosTokenId=n,this.nodes=[],this.beginNodes=Array.from({length:this.len+1},(()=>[])),this.endNodes=Array.from({length:this.len+1},(()=>[]));const r=new i(this.bosTokenId,0,0,0,0),o=new i(this.eosTokenId,1,this.len,0,0);this.nodes.push(r.clone()),this.nodes.push(o.clone()),this.beginNodes[this.len].push(o),this.endNodes[0].push(r)}insert(e,t,n,r){const o=this.nodes.length,s=new i(r,o,e,t,n);this.beginNodes[e].push(s),this.endNodes[e+t].push(s),this.nodes.push(s)}viterbi(){const e=this.len;let t=0;for(;t<=e;){if(0==this.beginNodes[t].length)return[];for(let e of this.beginNodes[t]){e.prev=null;let n=0,r=null;for(let o of this.endNodes[t]){const t=o.backtraceScore+e.score;(null===r||t>n)&&(r=o.clone(),n=t)}if(null===r)return[];e.prev=r,e.backtraceScore=n}++t}const n=[],r=this.beginNodes[e][0].prev;if(null===r)return[];let o=r.clone();for(;null!==o.prev;){n.push(o.clone());const e=o.clone();o=e.prev.clone()}return n.reverse(),n}piece(e){return this.sentence.slice(e.pos,e.pos+e.length)}tokens(){return this.viterbi().map((e=>this.piece(e)))}tokenIds(){return this.viterbi().map((e=>e.tokenId))}}class i{constructor(e,t,n,r,o){this.tokenId=e,this.nodeId=t,this.pos=n,this.length=r,this.score=o,this.prev=null,this.backtraceScore=0}clone(){const e=new i(this.tokenId,this.nodeId,this.pos,this.length,this.score);return e.prev=this.prev,e.backtraceScore=this.backtraceScore,e}}},"./src/utils/devices.js":
 /*!******************************!*\
   !*** ./src/utils/devices.js ***!
-  \******************************/(e,t,n)=>{n.r(t),n.d(t,{DEVICE_TYPES:()=>r});const r=Object.freeze({cpu:"cpu",gpu:"gpu",wasm:"wasm",webgpu:"webgpu",cuda:"cuda",dml:"dml"})},"./src/utils/dtypes.js":
+  \******************************/(e,t,n)=>{n.r(t),n.d(t,{DEVICE_TYPES:()=>r});const r=Object.freeze({auto:"auto",gpu:"gpu",cpu:"cpu",wasm:"wasm",webgpu:"webgpu",cuda:"cuda",dml:"dml",webnn:"webnn","webnn-npu":"webnn-npu","webnn-gpu":"webnn-gpu","webnn-cpu":"webnn-cpu"})},"./src/utils/dtypes.js":
 /*!*****************************!*\
   !*** ./src/utils/dtypes.js ***!
-  \*****************************/(e,t,n)=>{n.r(t),n.d(t,{DATA_TYPES:()=>a,DEFAULT_DEVICE_DTYPE_MAPPING:()=>i,DEFAULT_DTYPE_SUFFIX_MAPPING:()=>l,isWebGpuFp16Supported:()=>s});var r=n(/*! ../env.js */"./src/env.js"),o=n(/*! ./devices.js */"./src/utils/devices.js");const s=function(){let e;return async function(){if(void 0===e)if(r.apis.IS_WEBGPU_AVAILABLE)try{const t=await navigator.gpu.requestAdapter();e=t.features.has("shader-f16")}catch(t){e=!1}else e=!1;return e}}(),a=Object.freeze({fp32:"fp32",fp16:"fp16",q8:"q8",int8:"int8",uint8:"uint8",q4:"q4",bnb4:"bnb4",q4f16:"q4f16"}),i=Object.freeze({[o.DEVICE_TYPES.cpu]:a.q8,[o.DEVICE_TYPES.gpu]:a.fp32,[o.DEVICE_TYPES.wasm]:a.q8,[o.DEVICE_TYPES.webgpu]:a.fp32,[o.DEVICE_TYPES.cuda]:a.fp32,[o.DEVICE_TYPES.dml]:a.fp32}),l=Object.freeze({[a.fp32]:"",[a.fp16]:"_fp16",[a.int8]:"_int8",[a.uint8]:"_uint8",[a.q8]:"_quantized",[a.q4]:"_q4",[a.q4f16]:"_q4f16",[a.bnb4]:"_bnb4"})},"./src/utils/generic.js":
+  \*****************************/(e,t,n)=>{n.r(t),n.d(t,{DATA_TYPES:()=>a,DEFAULT_DEVICE_DTYPE_MAPPING:()=>i,DEFAULT_DTYPE_SUFFIX_MAPPING:()=>l,isWebGpuFp16Supported:()=>s});var r=n(/*! ../env.js */"./src/env.js"),o=n(/*! ./devices.js */"./src/utils/devices.js");const s=function(){let e;return async function(){if(void 0===e)if(r.apis.IS_WEBGPU_AVAILABLE)try{const t=await navigator.gpu.requestAdapter();e=t.features.has("shader-f16")}catch(t){e=!1}else e=!1;return e}}(),a=Object.freeze({fp32:"fp32",fp16:"fp16",q8:"q8",int8:"int8",uint8:"uint8",q4:"q4",bnb4:"bnb4",q4f16:"q4f16"}),i=Object.freeze({[o.DEVICE_TYPES.wasm]:a.q8}),l=Object.freeze({[a.fp32]:"",[a.fp16]:"_fp16",[a.int8]:"_int8",[a.uint8]:"_uint8",[a.q8]:"_quantized",[a.q4]:"_q4",[a.q4f16]:"_q4f16",[a.bnb4]:"_bnb4"})},"./src/utils/generic.js":
 /*!******************************!*\
   !*** ./src/utils/generic.js ***!
   \******************************/(e,t,n)=>{n.r(t),n.d(t,{Callable:()=>r});const r=class{constructor(){let e=function(...t){return e._call(...t)};return Object.setPrototypeOf(e,new.target.prototype)}_call(...e){throw Error("Must implement _call method in subclass")}}},"./src/utils/hub.js":