@huggingface/transformers 3.0.0-alpha.21 → 3.0.0-alpha.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -85,13 +85,13 @@
85
85
  \*************************************************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{version:()=>s});const s="1.19.2"},"./src/backends/onnx.js":
86
86
  /*!******************************!*\
87
87
  !*** ./src/backends/onnx.js ***!
88
- \******************************/(e,t,n)=>{"use strict";var s,r;n.r(t),n.d(t,{Tensor:()=>l.Tensor,createInferenceSession:()=>g,deviceToExecutionProviders:()=>m,isONNXProxy:()=>b,isONNXTensor:()=>w});var o=n(/*! ../env.js */"./src/env.js"),i=n(/*! onnxruntime-node */"onnxruntime-node"),a=n(/*! #onnxruntime-webgpu */"?cb4d"),l=n(/*! onnxruntime-common */"./node_modules/onnxruntime-common/dist/esm/index.js");const c=Object.freeze({auto:null,gpu:null,cpu:"cpu",wasm:"wasm",webgpu:"webgpu",cuda:"cuda",dml:"dml",webnn:{name:"webnn",deviceType:"cpu"},"webnn-npu":{name:"webnn",deviceType:"npu"},"webnn-gpu":{name:"webnn",deviceType:"gpu"},"webnn-cpu":{name:"webnn",deviceType:"cpu"}}),d=[];let u,p;const h=Symbol.for("onnxruntime");if(h in globalThis)p=globalThis[h];else if(o.apis.IS_NODE_ENV){switch(p=i??(s||(s=n.t(i,2))),process.platform){case"win32":d.push("dml");break;case"linux":"x64"===process.arch&&d.push("cuda")}d.push("cpu"),u=["cpu"]}else p=r||(r=n.t(a,2)),o.apis.IS_WEBNN_AVAILABLE&&d.push("webnn-npu","webnn-gpu","webnn-cpu","webnn"),o.apis.IS_WEBGPU_AVAILABLE&&d.push("webgpu"),d.push("wasm"),u=["wasm"];const _=p.InferenceSession;function m(e=null){if(!e)return u;switch(e){case"auto":return d;case"gpu":return d.filter((e=>["webgpu","cuda","dml","webnn-gpu"].includes(e)))}if(d.includes(e))return[c[e]??e];throw new Error(`Unsupported device: "${e}". Should be one of: ${d.join(", ")}.`)}let f=null;async function g(e,t){f&&await f;const n=_.create(e,t);return f??=n,await n}function w(e){return e instanceof p.Tensor}const M=p?.env;function b(){return M?.wasm?.proxy}M?.wasm&&(M.wasm.wasmPaths=`https://cdn.jsdelivr.net/npm/@huggingface/transformers@${o.env.version}/dist/`,M.wasm.proxy=!1,"undefined"!=typeof crossOriginIsolated&&crossOriginIsolated||(M.wasm.numThreads=1)),M?.webgpu&&(M.webgpu.powerPreference="high-performance"),o.env.backends.onnx=M},"./src/configs.js":
88
+ \******************************/(e,t,n)=>{"use strict";var s,r;n.r(t),n.d(t,{Tensor:()=>l.Tensor,createInferenceSession:()=>g,deviceToExecutionProviders:()=>m,isONNXProxy:()=>b,isONNXTensor:()=>w});var o=n(/*! ../env.js */"./src/env.js"),i=n(/*! onnxruntime-node */"onnxruntime-node"),a=n(/*! #onnxruntime-webgpu */"?cb4d"),l=n(/*! onnxruntime-common */"./node_modules/onnxruntime-common/dist/esm/index.js");const c=Object.freeze({auto:null,gpu:null,cpu:"cpu",wasm:"wasm",webgpu:"webgpu",cuda:"cuda",dml:"dml",webnn:{name:"webnn",deviceType:"cpu"},"webnn-npu":{name:"webnn",deviceType:"npu"},"webnn-gpu":{name:"webnn",deviceType:"gpu"},"webnn-cpu":{name:"webnn",deviceType:"cpu"}}),d=[];let u,p;const h=Symbol.for("onnxruntime");if(h in globalThis)p=globalThis[h];else if(o.apis.IS_NODE_ENV){switch(p=i??(s||(s=n.t(i,2))),process.platform){case"win32":d.push("dml");break;case"linux":"x64"===process.arch&&d.push("cuda")}d.push("cpu"),u=["cpu"]}else p=r||(r=n.t(a,2)),o.apis.IS_WEBNN_AVAILABLE&&d.push("webnn-npu","webnn-gpu","webnn-cpu","webnn"),o.apis.IS_WEBGPU_AVAILABLE&&d.push("webgpu"),d.push("wasm"),u=["wasm"];const _=p.InferenceSession;function m(e=null){if(!e)return u;switch(e){case"auto":return d;case"gpu":return d.filter((e=>["webgpu","cuda","dml","webnn-gpu"].includes(e)))}if(d.includes(e))return[c[e]??e];throw new Error(`Unsupported device: "${e}". Should be one of: ${d.join(", ")}.`)}let f=null;async function g(e,t,n){f&&await f;const s=_.create(e,t);f??=s;const r=await s;return r.config=n,r}function w(e){return e instanceof p.Tensor}const M=p?.env;function b(){return M?.wasm?.proxy}M?.wasm&&(M.wasm.wasmPaths=`https://cdn.jsdelivr.net/npm/@huggingface/transformers@${o.env.version}/dist/`,M.wasm.proxy=!1,"undefined"!=typeof crossOriginIsolated&&crossOriginIsolated||(M.wasm.numThreads=1)),M?.webgpu&&(M.webgpu.powerPreference="high-performance"),o.env.backends.onnx=M},"./src/configs.js":
89
89
  /*!************************!*\
90
90
  !*** ./src/configs.js ***!
91
91
  \************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{AutoConfig:()=>l,PretrainedConfig:()=>a,getKeyValueShapes:()=>i});var s=n(/*! ./utils/core.js */"./src/utils/core.js"),r=n(/*! ./utils/hub.js */"./src/utils/hub.js");function o(e){const t={};let n={};switch(e.model_type){case"llava":case"paligemma":case"florence2":n=o(e.text_config);break;case"moondream1":n=o(e.phi_config);break;case"musicgen":n=o(e.decoder);break;case"gpt2":case"gptj":case"jais":case"codegen":case"gpt_bigcode":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="n_embd";break;case"gpt_neox":case"stablelm":case"opt":case"phi":case"phi3":case"falcon":t.num_heads="num_attention_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size";break;case"llama":case"granite":case"cohere":case"mistral":case"starcoder2":case"qwen2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size",t.num_attention_heads="num_attention_heads";break;case"gemma":case"gemma2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.dim_kv="head_dim";break;case"openelm":t.num_heads="num_kv_heads",t.num_layers="num_transformer_layers",t.dim_kv="head_dim";break;case"gpt_neo":case"donut-swin":t.num_heads="num_heads",t.num_layers="num_layers",t.hidden_size="hidden_size";break;case"bloom":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="hidden_size";break;case"mpt":t.num_heads="n_heads",t.num_layers="n_layers",t.hidden_size="d_model";break;case"t5":case"mt5":case"longt5":t.num_decoder_layers="num_decoder_layers",t.num_decoder_heads="num_heads",t.decoder_dim_kv="d_kv",t.num_encoder_layers="num_layers",t.num_encoder_heads="num_heads",t.encoder_dim_kv="d_kv";break;case"bart":case"mbart":case"marian":case"whisper":case"m2m_100":case"blenderbot":case"blenderbot-small":case"florence2_language":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="d_model",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="d_model";break;case"speecht5":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="hidden_size",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="hidden_size";break;case"trocr":t.num_encoder_layers=t.num_decoder_layers="decoder_layers",t.num_encoder_heads=t.num_decoder_heads="decoder_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="d_model";break;case"musicgen_decoder":t.num_encoder_layers=t.num_decoder_layers="num_hidden_layers",t.num_encoder_heads=t.num_decoder_heads="num_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="hidden_size";break;case"vision-encoder-decoder":const r=o(e.decoder),i="num_decoder_layers"in r,a=(0,s.pick)(e,["model_type","is_encoder_decoder"]);return i?(a.num_decoder_layers=r.num_decoder_layers,a.num_decoder_heads=r.num_decoder_heads,a.decoder_hidden_size=r.decoder_hidden_size,a.num_encoder_layers=r.num_encoder_layers,a.num_encoder_heads=r.num_encoder_heads,a.encoder_hidden_size=r.encoder_hidden_size):(a.num_layers=r.num_layers,a.num_heads=r.num_heads,a.hidden_size=r.hidden_size),a}const r={...n,...(0,s.pick)(e,["model_type","multi_query","is_encoder_decoder"])};for(const n in t)r[n]=e[t[n]];return r}function i(e,{prefix:t="past_key_values"}={}){const n={},s=e.normalized_config;if(s.is_encoder_decoder&&"num_encoder_heads"in s&&"num_decoder_heads"in s){const e=s.encoder_dim_kv??s.encoder_hidden_size/s.num_encoder_heads,r=s.decoder_dim_kv??s.decoder_hidden_size/s.num_decoder_heads,o=[1,s.num_encoder_heads,0,e],i=[1,s.num_decoder_heads,0,r];for(let e=0;e<s.num_decoder_layers;++e)n[`${t}.${e}.encoder.key`]=o,n[`${t}.${e}.encoder.value`]=o,n[`${t}.${e}.decoder.key`]=i,n[`${t}.${e}.decoder.value`]=i}else{const e=s.num_heads,r=s.num_layers,o=s.dim_kv??s.hidden_size/(s.num_attention_heads??e);if("falcon"===s.model_type){const s=[1*e,0,o];for(let e=0;e<r;++e)n[`${t}.${e}.key`]=s,n[`${t}.${e}.value`]=s}else if(s.multi_query){const s=[1*e,0,2*o];for(let e=0;e<r;++e)n[`${t}.${e}.key_value`]=s}else if("bloom"===s.model_type){const s=[1*e,o,0],i=[1*e,0,o];for(let e=0;e<r;++e)n[`${t}.${e}.key`]=s,n[`${t}.${e}.value`]=i}else if("openelm"===s.model_type)for(let s=0;s<r;++s){const r=[1,e[s],0,o];n[`${t}.${s}.key`]=r,n[`${t}.${s}.value`]=r}else{const s=[1,e,0,o];for(let e=0;e<r;++e)n[`${t}.${e}.key`]=s,n[`${t}.${e}.value`]=s}}return n}class a{model_type=null;is_encoder_decoder=!1;max_position_embeddings;"transformers.js_config";constructor(e){Object.assign(this,e),this.normalized_config=o(this)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:s=null,local_files_only:o=!1,revision:i="main"}={}){!n||n instanceof a||(n=new a(n));const l=n??await async function(e,t){return await(0,r.getModelJSON)(e,"config.json",!0,t)}(e,{progress_callback:t,config:n,cache_dir:s,local_files_only:o,revision:i});return new this(l)}}class l{static async from_pretrained(...e){return a.from_pretrained(...e)}}},"./src/env.js":
92
92
  /*!********************!*\
93
93
  !*** ./src/env.js ***!
94
- \********************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{apis:()=>m,env:()=>b});var s=n(/*! fs */"fs"),r=n(/*! path */"path"),o=n(/*! url */"url");const i="undefined"!=typeof self,a=i&&"DedicatedWorkerGlobalScope"===self.constructor.name,l=i&&"caches"in self,c="undefined"!=typeof navigator&&"gpu"in navigator,d="undefined"!=typeof navigator&&"ml"in navigator,u="undefined"!=typeof process,p=u&&"node"===process?.release?.name,h=!y(s),_=!y(r),m=Object.freeze({IS_BROWSER_ENV:i,IS_WEBWORKER_ENV:a,IS_WEB_CACHE_AVAILABLE:l,IS_WEBGPU_AVAILABLE:c,IS_WEBNN_AVAILABLE:d,IS_PROCESS_AVAILABLE:u,IS_NODE_ENV:p,IS_FS_AVAILABLE:h,IS_PATH_AVAILABLE:_}),f=h&&_,g=f?r.dirname(r.dirname(o.fileURLToPath("file:///workspaces/transformers.js/src/env.js"))):"./",w=f?r.join(g,"/.cache/"):null,M="/models/",b={version:"3.0.0-alpha.21",backends:{onnx:{}},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!i,localModelPath:f?r.join(g,M):M,useFS:h,useBrowserCache:l,useFSCache:h,cacheDir:w,useCustomCache:!1,customCache:null};function y(e){return 0===Object.keys(e).length}},"./src/generation/configuration_utils.js":
94
+ \********************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{apis:()=>m,env:()=>b});var s=n(/*! fs */"fs"),r=n(/*! path */"path"),o=n(/*! url */"url");const i="undefined"!=typeof self,a=i&&"DedicatedWorkerGlobalScope"===self.constructor.name,l=i&&"caches"in self,c="undefined"!=typeof navigator&&"gpu"in navigator,d="undefined"!=typeof navigator&&"ml"in navigator,u="undefined"!=typeof process,p=u&&"node"===process?.release?.name,h=!y(s),_=!y(r),m=Object.freeze({IS_BROWSER_ENV:i,IS_WEBWORKER_ENV:a,IS_WEB_CACHE_AVAILABLE:l,IS_WEBGPU_AVAILABLE:c,IS_WEBNN_AVAILABLE:d,IS_PROCESS_AVAILABLE:u,IS_NODE_ENV:p,IS_FS_AVAILABLE:h,IS_PATH_AVAILABLE:_}),f=h&&_,g=f?r.dirname(r.dirname(o.fileURLToPath("file:///workspaces/transformers.js/src/env.js"))):"./",w=f?r.join(g,"/.cache/"):null,M="/models/",b={version:"3.0.0-alpha.22",backends:{onnx:{}},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!i,localModelPath:f?r.join(g,M):M,useFS:h,useBrowserCache:l,useFSCache:h,cacheDir:w,useCustomCache:!1,customCache:null};function y(e){return 0===Object.keys(e).length}},"./src/generation/configuration_utils.js":
95
95
  /*!***********************************************!*\
96
96
  !*** ./src/generation/configuration_utils.js ***!
97
97
  \***********************************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{GenerationConfig:()=>r});var s=n(/*! ../utils/core.js */"./src/utils/core.js");class r{max_length=20;max_new_tokens=null;min_length=0;min_new_tokens=null;early_stopping=!1;max_time=null;do_sample=!1;num_beams=1;num_beam_groups=1;penalty_alpha=null;use_cache=!0;temperature=1;top_k=50;top_p=1;typical_p=1;epsilon_cutoff=0;eta_cutoff=0;diversity_penalty=0;repetition_penalty=1;encoder_repetition_penalty=1;length_penalty=1;no_repeat_ngram_size=0;bad_words_ids=null;force_words_ids=null;renormalize_logits=!1;constraints=null;forced_bos_token_id=null;forced_eos_token_id=null;remove_invalid_values=!1;exponential_decay_length_penalty=null;suppress_tokens=null;begin_suppress_tokens=null;forced_decoder_ids=null;guidance_scale=null;num_return_sequences=1;output_attentions=!1;output_hidden_states=!1;output_scores=!1;return_dict_in_generate=!1;pad_token_id=null;bos_token_id=null;eos_token_id=null;encoder_no_repeat_ngram_size=0;decoder_start_token_id=null;generation_kwargs={};constructor(e){Object.assign(this,(0,s.pick)(e,Object.getOwnPropertyNames(this)))}}},"./src/generation/logits_process.js":
@@ -109,7 +109,7 @@
109
109
  \*************************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{BaseStreamer:()=>i,TextStreamer:()=>l,WhisperTextStreamer:()=>c});var s=n(/*! ../utils/core.js */"./src/utils/core.js"),r=n(/*! ../tokenizers.js */"./src/tokenizers.js"),o=n(/*! ../env.js */"./src/env.js");class i{put(e){throw Error("Not implemented")}end(){throw Error("Not implemented")}}const a=o.apis.IS_PROCESS_AVAILABLE?e=>process.stdout.write(e):e=>console.log(e);class l extends i{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:s=null,decode_kwargs:r={},...o}={}){super(),this.tokenizer=e,this.skip_prompt=t,this.callback_function=n??a,this.token_callback_function=s,this.decode_kwargs={...r,...o},this.token_cache=[],this.print_len=0,this.next_tokens_are_prompt=!0}put(e){if(e.length>1)throw Error("TextStreamer only supports batch size of 1");if(this.skip_prompt&&this.next_tokens_are_prompt)return void(this.next_tokens_are_prompt=!1);const t=e[0];this.token_callback_function?.(t),this.token_cache=(0,s.mergeArrays)(this.token_cache,t);const n=this.tokenizer.decode(this.token_cache,this.decode_kwargs);let o;n.endsWith("\n")?(o=n.slice(this.print_len),this.token_cache=[],this.print_len=0):n.length>0&&(0,r.is_chinese_char)(n.charCodeAt(n.length-1))?(o=n.slice(this.print_len),this.print_len+=o.length):(o=n.slice(this.print_len,n.lastIndexOf(" ")+1),this.print_len+=o.length),this.on_finalized_text(o,!1)}end(){let e;if(this.token_cache.length>0){e=this.tokenizer.decode(this.token_cache,this.decode_kwargs).slice(this.print_len),this.token_cache=[],this.print_len=0}else e="";this.next_tokens_are_prompt=!0,this.on_finalized_text(e,!0)}on_finalized_text(e,t){e.length>0&&this.callback_function?.(e),t&&this.callback_function===a&&o.apis.IS_PROCESS_AVAILABLE&&this.callback_function?.("\n")}}class c extends l{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:s=null,on_chunk_start:r=null,on_chunk_end:o=null,on_finalize:i=null,time_precision:a=.02,skip_special_tokens:l=!0,decode_kwargs:c={}}={}){super(e,{skip_prompt:t,callback_function:n,token_callback_function:s,decode_kwargs:{skip_special_tokens:l,...c}}),this.timestamp_begin=e.timestamp_begin,this.on_chunk_start=r,this.on_chunk_end=o,this.on_finalize=i,this.time_precision=a,this.waiting_for_timestamp=!1}put(e){if(e.length>1)throw Error("WhisperTextStreamer only supports batch size of 1");const t=e[0];if(1===t.length){const n=Number(t[0])-this.timestamp_begin;if(n>=0){const t=n*this.time_precision;this.waiting_for_timestamp?this.on_chunk_end?.(t):this.on_chunk_start?.(t),this.waiting_for_timestamp=!this.waiting_for_timestamp,e=[[]]}}return super.put(e)}end(){super.end(),this.on_finalize?.()}}},"./src/models.js":
110
110
  /*!***********************!*\
111
111
  !*** ./src/models.js ***!
112
- \***********************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{ASTForAudioClassification:()=>sn,ASTModel:()=>nn,ASTPreTrainedModel:()=>tn,AlbertForMaskedLM:()=>pt,AlbertForQuestionAnswering:()=>ut,AlbertForSequenceClassification:()=>dt,AlbertModel:()=>ct,AlbertPreTrainedModel:()=>lt,AutoModel:()=>Oa,AutoModelForAudioClassification:()=>nl,AutoModelForAudioFrameClassification:()=>rl,AutoModelForCTC:()=>tl,AutoModelForCausalLM:()=>qa,AutoModelForDepthEstimation:()=>ll,AutoModelForDocumentQuestionAnswering:()=>ol,AutoModelForImageClassification:()=>Qa,AutoModelForImageFeatureExtraction:()=>dl,AutoModelForImageMatting:()=>il,AutoModelForImageSegmentation:()=>Ha,AutoModelForImageToImage:()=>al,AutoModelForMaskGeneration:()=>el,AutoModelForMaskedLM:()=>Ua,AutoModelForNormalEstimation:()=>cl,AutoModelForObjectDetection:()=>Ka,AutoModelForQuestionAnswering:()=>Wa,AutoModelForSemanticSegmentation:()=>Ya,AutoModelForSeq2SeqLM:()=>Va,AutoModelForSequenceClassification:()=>ja,AutoModelForSpeechSeq2Seq:()=>Ra,AutoModelForTextToSpectrogram:()=>Ga,AutoModelForTextToWaveform:()=>$a,AutoModelForTokenClassification:()=>Da,AutoModelForUniversalSegmentation:()=>Ja,AutoModelForVision2Seq:()=>Xa,AutoModelForXVector:()=>sl,AutoModelForZeroShotObjectDetection:()=>Za,BartForConditionalGeneration:()=>Tt,BartForSequenceClassification:()=>vt,BartModel:()=>kt,BartPretrainedModel:()=>xt,BaseModelOutput:()=>q,BeitForImageClassification:()=>dr,BeitModel:()=>cr,BeitPreTrainedModel:()=>lr,BertForMaskedLM:()=>X,BertForQuestionAnswering:()=>Y,BertForSequenceClassification:()=>Q,BertForTokenClassification:()=>H,BertModel:()=>W,BertPreTrainedModel:()=>U,BlenderbotForConditionalGeneration:()=>zt,BlenderbotModel:()=>Lt,BlenderbotPreTrainedModel:()=>Et,BlenderbotSmallForConditionalGeneration:()=>Nt,BlenderbotSmallModel:()=>Bt,BlenderbotSmallPreTrainedModel:()=>It,BloomForCausalLM:()=>vs,BloomModel:()=>Ts,BloomPreTrainedModel:()=>ks,CLIPModel:()=>mn,CLIPPreTrainedModel:()=>_n,CLIPSegForImageSegmentation:()=>Pn,CLIPSegModel:()=>Cn,CLIPSegPreTrainedModel:()=>Fn,CLIPTextModel:()=>fn,CLIPTextModelWithProjection:()=>gn,CLIPVisionModel:()=>wn,CLIPVisionModelWithProjection:()=>Mn,CamembertForMaskedLM:()=>Me,CamembertForQuestionAnswering:()=>xe,CamembertForSequenceClassification:()=>be,CamembertForTokenClassification:()=>ye,CamembertModel:()=>we,CamembertPreTrainedModel:()=>ge,CausalLMOutput:()=>gl,CausalLMOutputWithPast:()=>wl,ChineseCLIPModel:()=>vn,ChineseCLIPPreTrainedModel:()=>Tn,ClapAudioModelWithProjection:()=>Fi,ClapModel:()=>Ti,ClapPreTrainedModel:()=>ki,ClapTextModelWithProjection:()=>vi,CodeGenForCausalLM:()=>Hn,CodeGenModel:()=>Qn,CodeGenPreTrainedModel:()=>Xn,CohereForCausalLM:()=>rs,CohereModel:()=>ss,CoherePreTrainedModel:()=>ns,ConvBertForMaskedLM:()=>ae,ConvBertForQuestionAnswering:()=>de,ConvBertForSequenceClassification:()=>le,ConvBertForTokenClassification:()=>ce,ConvBertModel:()=>ie,ConvBertPreTrainedModel:()=>oe,ConvNextForImageClassification:()=>ao,ConvNextModel:()=>io,ConvNextPreTrainedModel:()=>oo,ConvNextV2ForImageClassification:()=>uo,ConvNextV2Model:()=>co,ConvNextV2PreTrainedModel:()=>lo,DPTForDepthEstimation:()=>Gr,DPTModel:()=>Rr,DPTPreTrainedModel:()=>Vr,DebertaForMaskedLM:()=>ve,DebertaForQuestionAnswering:()=>Pe,DebertaForSequenceClassification:()=>Fe,DebertaForTokenClassification:()=>Ce,DebertaModel:()=>Te,DebertaPreTrainedModel:()=>ke,DebertaV2ForMaskedLM:()=>Ee,DebertaV2ForQuestionAnswering:()=>Ie,DebertaV2ForSequenceClassification:()=>Le,DebertaV2ForTokenClassification:()=>ze,DebertaV2Model:()=>Ae,DebertaV2PreTrainedModel:()=>Se,DecisionTransformerModel:()=>na,DecisionTransformerPreTrainedModel:()=>ta,DeiTForImageClassification:()=>Cr,DeiTModel:()=>Fr,DeiTPreTrainedModel:()=>vr,DepthAnythingForDepthEstimation:()=>qr,DepthAnythingPreTrainedModel:()=>$r,DepthProForDepthEstimation:()=>Yr,DepthProPreTrainedModel:()=>Hr,DetrForObjectDetection:()=>hr,DetrForSegmentation:()=>_r,DetrModel:()=>pr,DetrObjectDetectionOutput:()=>mr,DetrPreTrainedModel:()=>ur,DetrSegmentationOutput:()=>fr,Dinov2ForImageClassification:()=>_o,Dinov2Model:()=>ho,Dinov2PreTrainedModel:()=>po,DistilBertForMaskedLM:()=>Ve,DistilBertForQuestionAnswering:()=>De,DistilBertForSequenceClassification:()=>Oe,DistilBertForTokenClassification:()=>je,DistilBertModel:()=>Ne,DistilBertPreTrainedModel:()=>Be,DonutSwinModel:()=>ro,DonutSwinPreTrainedModel:()=>so,EfficientNetForImageClassification:()=>ji,EfficientNetModel:()=>Oi,EfficientNetPreTrainedModel:()=>Ni,ElectraForMaskedLM:()=>he,ElectraForQuestionAnswering:()=>fe,ElectraForSequenceClassification:()=>_e,ElectraForTokenClassification:()=>me,ElectraModel:()=>pe,ElectraPreTrainedModel:()=>ue,EsmForMaskedLM:()=>$e,EsmForSequenceClassification:()=>qe,EsmForTokenClassification:()=>Ue,EsmModel:()=>Ge,EsmPreTrainedModel:()=>Re,FalconForCausalLM:()=>xi,FalconModel:()=>yi,FalconPreTrainedModel:()=>bi,FastViTForImageClassification:()=>Xs,FastViTModel:()=>Ws,FastViTPreTrainedModel:()=>Us,Florence2ForConditionalGeneration:()=>hn,Florence2PreTrainedModel:()=>pn,GLPNForDepthEstimation:()=>no,GLPNModel:()=>to,GLPNPreTrainedModel:()=>eo,GPT2LMHeadModel:()=>En,GPT2Model:()=>An,GPT2PreTrainedModel:()=>Sn,GPTBigCodeForCausalLM:()=>Wn,GPTBigCodeModel:()=>Un,GPTBigCodePreTrainedModel:()=>qn,GPTJForCausalLM:()=>$n,GPTJModel:()=>Gn,GPTJPreTrainedModel:()=>Rn,GPTNeoForCausalLM:()=>On,GPTNeoModel:()=>Nn,GPTNeoPreTrainedModel:()=>Bn,GPTNeoXForCausalLM:()=>Vn,GPTNeoXModel:()=>Dn,GPTNeoXPreTrainedModel:()=>jn,Gemma2ForCausalLM:()=>ds,Gemma2Model:()=>cs,Gemma2PreTrainedModel:()=>ls,GemmaForCausalLM:()=>as,GemmaModel:()=>is,GemmaPreTrainedModel:()=>os,GraniteForCausalLM:()=>ts,GraniteModel:()=>es,GranitePreTrainedModel:()=>Zn,GroupViTModel:()=>qs,GroupViTPreTrainedModel:()=>$s,HieraForImageClassification:()=>Ar,HieraModel:()=>Sr,HieraPreTrainedModel:()=>Pr,HubertForCTC:()=>Zo,HubertForSequenceClassification:()=>ei,HubertModel:()=>Ko,HubertPreTrainedModel:()=>Jo,ImageMattingOutput:()=>Ml,JAISLMHeadModel:()=>In,JAISModel:()=>zn,JAISPreTrainedModel:()=>Ln,LlamaForCausalLM:()=>Kn,LlamaModel:()=>Jn,LlamaPreTrainedModel:()=>Yn,LlavaForConditionalGeneration:()=>dn,LlavaPreTrainedModel:()=>cn,LongT5ForConditionalGeneration:()=>wt,LongT5Model:()=>gt,LongT5PreTrainedModel:()=>ft,M2M100ForConditionalGeneration:()=>Co,M2M100Model:()=>Fo,M2M100PreTrainedModel:()=>vo,MBartForCausalLM:()=>At,MBartForConditionalGeneration:()=>Pt,MBartForSequenceClassification:()=>St,MBartModel:()=>Ct,MBartPreTrainedModel:()=>Ft,MPNetForMaskedLM:()=>Ze,MPNetForQuestionAnswering:()=>nt,MPNetForSequenceClassification:()=>et,MPNetForTokenClassification:()=>tt,MPNetModel:()=>Ke,MPNetPreTrainedModel:()=>Je,MT5ForConditionalGeneration:()=>yt,MT5Model:()=>bt,MT5PreTrainedModel:()=>Mt,MarianMTModel:()=>To,MarianModel:()=>ko,MarianPreTrainedModel:()=>xo,MaskFormerForInstanceSegmentation:()=>Zr,MaskFormerModel:()=>Kr,MaskFormerPreTrainedModel:()=>Jr,MaskedLMOutput:()=>ml,MistralForCausalLM:()=>fi,MistralModel:()=>mi,MistralPreTrainedModel:()=>_i,MobileBertForMaskedLM:()=>Qe,MobileBertForQuestionAnswering:()=>Ye,MobileBertForSequenceClassification:()=>He,MobileBertModel:()=>Xe,MobileBertPreTrainedModel:()=>We,MobileNetV1ForImageClassification:()=>Ui,MobileNetV1Model:()=>qi,MobileNetV1PreTrainedModel:()=>$i,MobileNetV2ForImageClassification:()=>Qi,MobileNetV2Model:()=>Xi,MobileNetV2PreTrainedModel:()=>Wi,MobileNetV3ForImageClassification:()=>Ji,MobileNetV3Model:()=>Yi,MobileNetV3PreTrainedModel:()=>Hi,MobileNetV4ForImageClassification:()=>ea,MobileNetV4Model:()=>Zi,MobileNetV4PreTrainedModel:()=>Ki,MobileViTForImageClassification:()=>Ks,MobileViTModel:()=>Js,MobileViTPreTrainedModel:()=>Ys,MobileViTV2ForImageClassification:()=>tr,MobileViTV2Model:()=>er,MobileViTV2PreTrainedModel:()=>Zs,ModelOutput:()=>$,Moondream1ForConditionalGeneration:()=>un,MptForCausalLM:()=>Ps,MptModel:()=>Cs,MptPreTrainedModel:()=>Fs,MusicgenForCausalLM:()=>Ri,MusicgenForConditionalGeneration:()=>Gi,MusicgenModel:()=>Vi,MusicgenPreTrainedModel:()=>Di,NomicBertModel:()=>K,NomicBertPreTrainedModel:()=>J,OPTForCausalLM:()=>Es,OPTModel:()=>As,OPTPreTrainedModel:()=>Ss,OpenELMForCausalLM:()=>hs,OpenELMModel:()=>ps,OpenELMPreTrainedModel:()=>us,OwlViTForObjectDetection:()=>rr,OwlViTModel:()=>sr,OwlViTPreTrainedModel:()=>nr,Owlv2ForObjectDetection:()=>ar,Owlv2Model:()=>ir,Owlv2PreTrainedModel:()=>or,Phi3ForCausalLM:()=>xs,Phi3Model:()=>ys,Phi3PreTrainedModel:()=>bs,PhiForCausalLM:()=>Ms,PhiModel:()=>ws,PhiPreTrainedModel:()=>gs,PreTrainedModel:()=>G,PretrainedMixin:()=>sa,PvtForImageClassification:()=>Os,PvtModel:()=>Ns,PvtPreTrainedModel:()=>Bs,PyAnnoteForAudioFrameClassification:()=>Bo,PyAnnoteModel:()=>Io,PyAnnotePreTrainedModel:()=>zo,QuestionAnsweringModelOutput:()=>fl,Qwen2ForCausalLM:()=>fs,Qwen2Model:()=>ms,Qwen2PreTrainedModel:()=>_s,RTDetrForObjectDetection:()=>Mr,RTDetrModel:()=>wr,RTDetrObjectDetectionOutput:()=>br,RTDetrPreTrainedModel:()=>gr,ResNetForImageClassification:()=>zr,ResNetModel:()=>Lr,ResNetPreTrainedModel:()=>Er,RoFormerForMaskedLM:()=>te,RoFormerForQuestionAnswering:()=>re,RoFormerForSequenceClassification:()=>ne,RoFormerForTokenClassification:()=>se,RoFormerModel:()=>ee,RoFormerPreTrainedModel:()=>Z,RobertaForMaskedLM:()=>Dt,RobertaForQuestionAnswering:()=>Gt,RobertaForSequenceClassification:()=>Vt,RobertaForTokenClassification:()=>Rt,RobertaModel:()=>jt,RobertaPreTrainedModel:()=>Ot,SamImageSegmentationOutput:()=>yo,SamModel:()=>bo,SamPreTrainedModel:()=>Mo,SapiensForDepthEstimation:()=>Xr,SapiensForNormalEstimation:()=>Qr,SapiensForSemanticSegmentation:()=>Wr,SapiensPreTrainedModel:()=>Ur,SegformerForImageClassification:()=>Ei,SegformerForSemanticSegmentation:()=>Li,SegformerModel:()=>Ai,SegformerPreTrainedModel:()=>Si,Seq2SeqLMOutput:()=>ul,SequenceClassifierOutput:()=>pl,SiglipModel:()=>yn,SiglipPreTrainedModel:()=>bn,SiglipTextModel:()=>xn,SiglipVisionModel:()=>kn,SpeechT5ForSpeechToText:()=>ci,SpeechT5ForTextToSpeech:()=>di,SpeechT5HifiGan:()=>ui,SpeechT5Model:()=>li,SpeechT5PreTrainedModel:()=>ai,SqueezeBertForMaskedLM:()=>ot,SqueezeBertForQuestionAnswering:()=>at,SqueezeBertForSequenceClassification:()=>it,SqueezeBertModel:()=>rt,SqueezeBertPreTrainedModel:()=>st,StableLmForCausalLM:()=>Bi,StableLmModel:()=>Ii,StableLmPreTrainedModel:()=>zi,Starcoder2ForCausalLM:()=>Mi,Starcoder2Model:()=>wi,Starcoder2PreTrainedModel:()=>gi,Swin2SRForImageSuperResolution:()=>Dr,Swin2SRModel:()=>jr,Swin2SRPreTrainedModel:()=>Or,SwinForImageClassification:()=>Nr,SwinModel:()=>Br,SwinPreTrainedModel:()=>Ir,T5ForConditionalGeneration:()=>mt,T5Model:()=>_t,T5PreTrainedModel:()=>ht,TableTransformerForObjectDetection:()=>kr,TableTransformerModel:()=>xr,TableTransformerObjectDetectionOutput:()=>Tr,TableTransformerPreTrainedModel:()=>yr,TokenClassifierOutput:()=>_l,TrOCRForCausalLM:()=>hi,TrOCRPreTrainedModel:()=>pi,UniSpeechForCTC:()=>Vo,UniSpeechForSequenceClassification:()=>Ro,UniSpeechModel:()=>Do,UniSpeechPreTrainedModel:()=>jo,UniSpeechSatForAudioFrameClassification:()=>Wo,UniSpeechSatForCTC:()=>qo,UniSpeechSatForSequenceClassification:()=>Uo,UniSpeechSatModel:()=>$o,UniSpeechSatPreTrainedModel:()=>Go,ViTForImageClassification:()=>Is,ViTMAEModel:()=>Ds,ViTMAEPreTrainedModel:()=>js,ViTMSNForImageClassification:()=>Gs,ViTMSNModel:()=>Rs,ViTMSNPreTrainedModel:()=>Vs,ViTModel:()=>zs,ViTPreTrainedModel:()=>Ls,VisionEncoderDecoderModel:()=>ln,VitMatteForImageMatting:()=>Hs,VitMattePreTrainedModel:()=>Qs,VitsModel:()=>Pi,VitsModelOutput:()=>bl,VitsPreTrainedModel:()=>Ci,Wav2Vec2BertForCTC:()=>Ho,Wav2Vec2BertForSequenceClassification:()=>Yo,Wav2Vec2BertModel:()=>Qo,Wav2Vec2BertPreTrainedModel:()=>Xo,Wav2Vec2ForAudioFrameClassification:()=>Lo,Wav2Vec2ForCTC:()=>Ao,Wav2Vec2ForSequenceClassification:()=>Eo,Wav2Vec2Model:()=>So,Wav2Vec2PreTrainedModel:()=>Po,WavLMForAudioFrameClassification:()=>ii,WavLMForCTC:()=>si,WavLMForSequenceClassification:()=>ri,WavLMForXVector:()=>oi,WavLMModel:()=>ni,WavLMPreTrainedModel:()=>ti,WeSpeakerResNetModel:()=>Oo,WeSpeakerResNetPreTrainedModel:()=>No,WhisperForConditionalGeneration:()=>an,WhisperModel:()=>on,WhisperPreTrainedModel:()=>rn,XLMForQuestionAnswering:()=>Qt,XLMForSequenceClassification:()=>Wt,XLMForTokenClassification:()=>Xt,XLMModel:()=>qt,XLMPreTrainedModel:()=>$t,XLMRobertaForMaskedLM:()=>Jt,XLMRobertaForQuestionAnswering:()=>en,XLMRobertaForSequenceClassification:()=>Kt,XLMRobertaForTokenClassification:()=>Zt,XLMRobertaModel:()=>Yt,XLMRobertaPreTrainedModel:()=>Ht,XLMWithLMHeadModel:()=>Ut,XVectorOutput:()=>hl,YolosForObjectDetection:()=>go,YolosModel:()=>fo,YolosObjectDetectionOutput:()=>wo,YolosPreTrainedModel:()=>mo});var s=n(/*! ./configs.js */"./src/configs.js"),r=n(/*! ./backends/onnx.js */"./src/backends/onnx.js"),o=n(/*! ./utils/dtypes.js */"./src/utils/dtypes.js"),i=n(/*! ./utils/generic.js */"./src/utils/generic.js"),a=n(/*! ./utils/core.js */"./src/utils/core.js"),l=n(/*! ./utils/hub.js */"./src/utils/hub.js"),c=n(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),d=n(/*! ./generation/configuration_utils.js */"./src/generation/configuration_utils.js"),u=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),p=n(/*! ./utils/maths.js */"./src/utils/maths.js"),h=n(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),_=n(/*! ./generation/logits_sampler.js */"./src/generation/logits_sampler.js"),m=n(/*! ./env.js */"./src/env.js"),f=n(/*! ./models/whisper/generation_whisper.js */"./src/models/whisper/generation_whisper.js"),g=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");const w=0,M=1,b=2,y=3,x=4,k=5,T=6,v=7,F=new Map,C=new Map,P=new Map;async function S(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async i=>{const{buffer:a,session_options:c}=await async function(e,t,n){const i=n.config?.["transformers.js_config"]??{};let a=n.device??i.device;a&&"string"!=typeof a&&(a.hasOwnProperty(t)?a=a[t]:(console.warn(`device not specified for "${t}". Using the default device.`),a=null));const c=a??(m.apis.IS_NODE_ENV?"cpu":"wasm"),d=(0,r.deviceToExecutionProviders)(c);let u=n.dtype??i.dtype;"string"!=typeof u&&(u&&u.hasOwnProperty(t)?u=u[t]:(u=o.DEFAULT_DEVICE_DTYPE_MAPPING[c]??o.DATA_TYPES.fp32,console.warn(`dtype not specified for "${t}". Using the default dtype (${u}) for this device (${c}).`)));const p=u;if(!o.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(p))throw new Error(`Invalid dtype: ${p}. Should be one of: ${Object.keys(o.DATA_TYPES).join(", ")}`);if(p===o.DATA_TYPES.fp16&&"webgpu"===c&&!await(0,o.isWebGpuFp16Supported)())throw new Error(`The device (${c}) does not support fp16.`);const h=o.DEFAULT_DTYPE_SUFFIX_MAPPING[p],_=`${n.subfolder??""}/${t}${h}.onnx`,f={...n.session_options};f.executionProviders??=d;const g=i.free_dimension_overrides;g?f.freeDimensionOverrides??=g:c.startsWith("webnn")&&!f.freeDimensionOverrides&&console.warn('WebNN does not currently support dynamic shapes and requires `free_dimension_overrides` to be set in config.json as a field within "transformers.js_config". When `free_dimension_overrides` is not set, you may experience significant performance degradation.');const w=(0,l.getModelFile)(e,_,!0,n),M=n.use_external_data_format??i.use_external_data_format;let b=[];if(M&&(!0===M||"object"==typeof M&&M.hasOwnProperty(t)&&!0===M[t])){if(m.apis.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const s=`${t}${h}.onnx_data`,r=`${n.subfolder??""}/${s}`;b.push(new Promise((async(t,o)=>{const i=await(0,l.getModelFile)(e,r,!0,n);t({path:s,data:i})})))}else void 0!==f.externalData&&(b=f.externalData.map((async t=>{if("string"==typeof t.data){const s=await(0,l.getModelFile)(e,t.data,!0,n);return{...t,data:s}}return t})));if(b.length>0&&(f.externalData=await Promise.all(b)),"webgpu"===c){const e=(0,s.getKeyValueShapes)(n.config,{prefix:"present"});if(Object.keys(e).length>0&&!(0,r.isONNXProxy)()){const t={};for(const n in e)t[n]="gpu-buffer";f.preferredOutputLocation=t}}return{buffer:await w,session_options:f}}(e,t[i],n);return[i,await(0,r.createInferenceSession)(a,c)]}))))}async function A(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async s=>[s,await(0,l.getModelJSON)(e,t[s],!1,n)]))))}async function E(e,t){const n=function(e,t){const n=Object.create(null),s=[];for(const o of e.inputNames){const e=t[o];e instanceof u.Tensor?n[o]=(0,r.isONNXProxy)()?e.clone():e:s.push(o)}if(s.length>0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${s.join(", ")}.`);const o=Object.keys(t).length,i=e.inputNames.length;if(o>i){let n=Object.keys(t).filter((t=>!e.inputNames.includes(t)));console.warn(`WARNING: Too many inputs were provided (${o} > ${i}). The following inputs will be ignored: "${n.join(", ")}".`)}return n}(e,t);try{const t=Object.fromEntries(Object.entries(n).map((([e,t])=>[e,t.ort_tensor])));let s=await e.run(t);return s=L(s),s}catch(e){throw console.error(`An error occurred during model execution: "${e}".`),console.error("Inputs given to model:",n),e}}function L(e){for(let t in e)(0,r.isONNXTensor)(e[t])?e[t]=new u.Tensor(e[t]):"object"==typeof e[t]&&L(e[t]);return e}function z(e){if(e instanceof u.Tensor)return e;if(0===e.length)throw Error("items must be non-empty");if(Array.isArray(e[0])){if(e.some((t=>t.length!==e[0].length)))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new u.Tensor("int64",BigInt64Array.from(e.flat().map((e=>BigInt(e)))),[e.length,e[0].length])}return new u.Tensor("int64",BigInt64Array.from(e.map((e=>BigInt(e)))),[1,e.length])}function I(e){return new u.Tensor("bool",[e],[1])}async function B(e,t){let{encoder_outputs:n,input_ids:s,decoder_input_ids:r,...o}=t;if(!n){const s=(0,a.pick)(t,e.sessions.model.inputNames);n=(await N(e,s)).last_hidden_state}o.input_ids=r,o.encoder_hidden_states=n,e.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(o.encoder_attention_mask=t.attention_mask);return await O(e,o,!0)}async function N(e,t){const n=e.sessions.model,s=(0,a.pick)(t,n.inputNames);if(n.inputNames.includes("inputs_embeds")&&!s.inputs_embeds){if(!t.input_ids)throw new Error("Both `input_ids` and `inputs_embeds` are missing in the model inputs.");s.inputs_embeds=await e.encode_text({input_ids:t.input_ids})}return n.inputNames.includes("token_type_ids")&&!s.token_type_ids&&(s.token_type_ids=new u.Tensor("int64",new BigInt64Array(s.input_ids.data.length),s.input_ids.dims)),await E(n,s)}async function O(e,t,n=!1){const s=e.sessions[n?"decoder_model_merged":"model"],{past_key_values:r,...o}=t;s.inputNames.includes("use_cache_branch")&&(o.use_cache_branch=I(!!r)),s.inputNames.includes("position_ids")&&o.attention_mask&&!o.position_ids&&(o.position_ids=function(e,t=null){const{input_ids:n,inputs_embeds:s,attention_mask:r}=e,[o,i]=r.dims,a=new BigInt64Array(r.data.length);for(let e=0;e<o;++e){const t=e*i;let n=BigInt(0);for(let e=0;e<i;++e){const s=t+e;0n===r.data[s]?a[s]=BigInt(1):(a[s]=n,n+=r.data[s])}}let l=new u.Tensor("int64",a,r.dims);if(t){const e=-(n??s).dims.at(1);l=l.slice(null,[e,null])}return l}(o,r)),e.addPastKeyValues(o,r);const i=(0,a.pick)(o,s.inputNames);return await E(s,i)}async function j(e,{input_ids:t=null,attention_mask:n=null,pixel_values:s=null,position_ids:r=null,inputs_embeds:o=null,past_key_values:i=null,generation_config:a=null,logits_processor:l=null,...c}){if(!o)if(o=await e.encode_text({input_ids:t}),s&&1!==t.dims[1]){const r=await e.encode_image({pixel_values:s});({inputs_embeds:o,attention_mask:n}=e._merge_input_ids_with_image_features({image_features:r,inputs_embeds:o,input_ids:t,attention_mask:n}))}else if(i&&s&&1===t.dims[1]){const e=t.dims[1],s=Object.values(i)[0].dims.at(-2);n=(0,u.cat)([(0,u.ones)([t.dims[0],s]),n.slice(null,[n.dims[1]-e,n.dims[1]])],1)}return await O(e,{inputs_embeds:o,past_key_values:i,attention_mask:n,position_ids:r,generation_config:a,logits_processor:l},!0)}function D(e,t,n,s){if(n.past_key_values){const t=Object.values(n.past_key_values)[0].dims.at(-2),{input_ids:s,attention_mask:r}=n;if(r&&r.dims[1]>s.dims[1]);else if(t<s.dims[1])n.input_ids=s.slice(null,[t,null]);else if(null!=e.config.image_token_index&&s.data.some((t=>t==e.config.image_token_index))){const r=e.config.num_image_tokens;if(!r)throw new Error("`num_image_tokens` is missing in the model configuration.");const o=s.dims[1]-(t-r);n.input_ids=s.slice(null,[-o,null]),n.attention_mask=(0,u.ones)([1,t+o])}}return n}function V(e,t,n,s){return n.past_key_values&&(t=t.map((e=>[e.at(-1)]))),{...n,decoder_input_ids:z(t)}}function R(e,...t){return e.config.is_encoder_decoder?V(e,...t):D(e,...t)}class G extends i.Callable{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,t,n){super(),this.config=e,this.sessions=t,this.configs=n;const s=P.get(this.constructor),r=F.get(s);switch(this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,r){case x:this.can_generate=!0,this._forward=O,this._prepare_inputs_for_generation=D;break;case b:case y:case v:this.can_generate=!0,this._forward=B,this._prepare_inputs_for_generation=V;break;case M:this._forward=B;break;case T:this.can_generate=!0,this._forward=j,this._prepare_inputs_for_generation=R;break;default:this._forward=N}this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const t of Object.values(this.sessions))t?.handler?.dispose&&e.push(t.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:o=!1,revision:i="main",model_file_name:a=null,subfolder:l="onnx",device:c=null,dtype:d=null,use_external_data_format:u=null,session_options:p={}}={}){let h={progress_callback:t,config:n,cache_dir:r,local_files_only:o,revision:i,model_file_name:a,subfolder:l,device:c,dtype:d,use_external_data_format:u,session_options:p};const _=P.get(this),m=F.get(_);let f;if(n=h.config=await s.AutoConfig.from_pretrained(e,h),m===x)f=await Promise.all([S(e,{model:h.model_file_name??"model"},h),A(e,{generation_config:"generation_config.json"},h)]);else if(m===b||m===y)f=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},h),A(e,{generation_config:"generation_config.json"},h)]);else if(m===k)f=await Promise.all([S(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},h)]);else if(m===M)f=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},h)]);else if(m===T){const t={embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"};n.is_encoder_decoder&&(t.model="encoder_model"),f=await Promise.all([S(e,t,h),A(e,{generation_config:"generation_config.json"},h)])}else m===v?f=await Promise.all([S(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},h),A(e,{generation_config:"generation_config.json"},h)]):(m!==w&&console.warn(`Model type for '${_??n?.model_type}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`),f=await Promise.all([S(e,{model:h.model_file_name??"model"},h)]));return new this(n,...f)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}get generation_config(){return this.configs?.generation_config??null}_get_logits_warper(e){const t=new c.LogitsProcessorList;return null!==e.temperature&&1!==e.temperature&&t.push(new c.TemperatureLogitsWarper(e.temperature)),null!==e.top_k&&0!==e.top_k&&t.push(new c.TopKLogitsWarper(e.top_k)),null!==e.top_p&&e.top_p<1&&t.push(new c.TopPLogitsWarper(e.top_p)),t}_get_logits_processor(e,t,n=null){const s=new c.LogitsProcessorList;if(null!==e.repetition_penalty&&1!==e.repetition_penalty&&s.push(new c.RepetitionPenaltyLogitsProcessor(e.repetition_penalty)),null!==e.no_repeat_ngram_size&&e.no_repeat_ngram_size>0&&s.push(new c.NoRepeatNGramLogitsProcessor(e.no_repeat_ngram_size)),null!==e.bad_words_ids&&s.push(new c.NoBadWordsLogitsProcessor(e.bad_words_ids,e.eos_token_id)),null!==e.min_length&&null!==e.eos_token_id&&e.min_length>0&&s.push(new c.MinLengthLogitsProcessor(e.min_length,e.eos_token_id)),null!==e.min_new_tokens&&null!==e.eos_token_id&&e.min_new_tokens>0&&s.push(new c.MinNewTokensLengthLogitsProcessor(t,e.min_new_tokens,e.eos_token_id)),null!==e.forced_bos_token_id&&s.push(new c.ForcedBOSTokenLogitsProcessor(e.forced_bos_token_id)),null!==e.forced_eos_token_id&&s.push(new c.ForcedEOSTokenLogitsProcessor(e.max_length,e.forced_eos_token_id)),null!==e.begin_suppress_tokens){const n=t>1||null===e.forced_bos_token_id?t:t+1;s.push(new c.SuppressTokensAtBeginLogitsProcessor(e.begin_suppress_tokens,n))}return null!==e.guidance_scale&&e.guidance_scale>1&&s.push(new c.ClassifierFreeGuidanceLogitsProcessor(e.guidance_scale)),null!==n&&s.extend(n),s}_prepare_generation_config(e,t,n=d.GenerationConfig){const s={...this.config};for(const e of["decoder","generator","text_config"])e in s&&Object.assign(s,s[e]);const r=new n(s);return Object.assign(r,this.generation_config??{}),e&&Object.assign(r,e),t&&Object.assign(r,(0,a.pick)(t,Object.getOwnPropertyNames(r))),r}_get_stopping_criteria(e,t=null){const n=new h.StoppingCriteriaList;return null!==e.max_length&&n.push(new h.MaxLengthCriteria(e.max_length,this.config.max_position_embeddings??null)),null!==e.eos_token_id&&n.push(new h.EosTokenCriteria(e.eos_token_id)),t&&n.extend(t),n}_validate_model_class(){if(!this.can_generate){const e=[ha,fa,pa,aa],t=P.get(this.constructor),n=new Set,s=this.config.model_type;for(const t of e){const e=t.get(s);e&&n.add(e[0])}let r=`The current model class (${t}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw n.size>0&&(r+=` Please use the following class instead: ${[...n].join(", ")}`),Error(r)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:t,model_inputs:n,is_encoder_decoder:s}){return n.past_key_values=this.getPastKeyValues(t,n.past_key_values),n.input_ids=new u.Tensor("int64",e.flat(),[e.length,1]),s||(n.attention_mask=(0,u.cat)([n.attention_mask,(0,u.ones)([n.attention_mask.dims[0],1])],1)),n.position_ids=null,n}_prepare_model_inputs({inputs:e,bos_token_id:t,model_kwargs:n}){const s=(0,a.pick)(n,this.forward_params),r=this.main_input_name;if(r in s){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else s[r]=e;return{inputs_tensor:s[r],model_inputs:s,model_input_name:r}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:t,model_input_name:n,generation_config:s}){if(this.sessions.model.inputNames.includes("inputs_embeds")&&!t.inputs_embeds&&"_prepare_inputs_embeds"in this){const{input_ids:e,pixel_values:n,attention_mask:s,...r}=t,o=await this._prepare_inputs_embeds(t);t={...r,...(0,a.pick)(o,["inputs_embeds","attention_mask"])}}let{last_hidden_state:r}=await N(this,t);if(null!==s.guidance_scale&&s.guidance_scale>1)r=(0,u.cat)([r,(0,u.full_like)(r,0)],0),"attention_mask"in t&&(t.attention_mask=(0,u.cat)([t.attention_mask,(0,u.zeros_like)(t.attention_mask)],0));else if(t.decoder_input_ids){const e=z(t.decoder_input_ids).dims[0];if(e!==r.dims[0]){if(1!==r.dims[0])throw new Error(`The encoder outputs have a different batch size (${r.dims[0]}) than the decoder inputs (${e}).`);r=(0,u.cat)(Array.from({length:e},(()=>r)),0)}}return t.encoder_outputs=r,t}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:t,model_kwargs:n,decoder_start_token_id:s,bos_token_id:r,generation_config:o}){let{decoder_input_ids:i,...a}=n;if(i)Array.isArray(i[0])||(i=Array.from({length:e},(()=>i)));else if(s??=r,"musicgen"===this.config.model_type)i=Array.from({length:e*this.config.decoder.num_codebooks},(()=>[s]));else if(Array.isArray(s)){if(s.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${s.length}`);i=s}else i=Array.from({length:e},(()=>[s]));return i=z(i),n.decoder_attention_mask=(0,u.ones_like)(i),{input_ids:i,model_inputs:a}}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:s=null,streamer:r=null,...o}){this._validate_model_class(),t=this._prepare_generation_config(t,o);let{inputs_tensor:i,model_inputs:a,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:o});const c=this.config.is_encoder_decoder;let d;c&&("encoder_outputs"in a||(a=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:i,model_inputs:a,model_input_name:l,generation_config:t}))),c?({input_ids:d,model_inputs:a}=this._prepare_decoder_input_ids_for_generation({batch_size:a[l].dims.at(0),model_input_name:l,model_kwargs:a,decoder_start_token_id:t.decoder_start_token_id,bos_token_id:t.bos_token_id,generation_config:t})):d=a[l];let p=d.dims.at(-1);null!==t.max_new_tokens&&(t.max_length=p+t.max_new_tokens);const h=this._get_logits_processor(t,p,n),m=this._get_stopping_criteria(t,s),f=a[l].dims.at(0),g=_.LogitsSampler.getSampler(t),w=new Array(f).fill(0),M=d.tolist();let b;r&&r.put(M);let y={};for(;;){if(a=this.prepare_inputs_for_generation(M,a,t),b=await this.forward(a),t.output_attentions&&t.return_dict_in_generate){const e=this.getAttentions(b);for(const t in e)t in y||(y[t]=[]),y[t].push(e[t])}const e=h(M,b.logits.slice(null,-1,null)),n=[];for(let t=0;t<e.dims.at(0);++t){const s=e[t],r=await g(s);for(const[e,s]of r){const r=BigInt(e);w[t]+=s,M[t].push(r),n.push([r]);break}}r&&r.put(n);if(m(M).every((e=>e)))break;a=this._update_model_kwargs_for_generation({generated_input_ids:n,outputs:b,model_inputs:a,is_encoder_decoder:c})}r&&r.end();const x=this.getPastKeyValues(b,a.past_key_values,!0),k=new u.Tensor("int64",M.flat(),[M.length,M[0].length]);if(t.return_dict_in_generate)return{sequences:k,past_key_values:x,...y};for(const e of Object.values(b))"gpu-buffer"===e.location&&e.dispose();return k}getPastKeyValues(e,t,n=!1){const s=Object.create(null);for(const r in e)if(r.startsWith("present")){const o=r.replace("present","past_key_values"),i=r.includes("encoder");if(s[o]=i&&t?t[o]:e[r],t&&(!i||n)){const e=t[o];"gpu-buffer"===e.location&&e.dispose()}}return s}getAttentions(e){const t={};for(const n of["cross_attentions","encoder_attentions","decoder_attentions"])for(const s in e)s.startsWith(n)&&(n in t||(t[n]=[]),t[n].push(e[s]));return t}addPastKeyValues(e,t){if(t)Object.assign(e,t);else{const t=this.custom_config.kv_cache_dtype??"float32",n="float16"===t?new Uint16Array:[],r=(0,s.getKeyValueShapes)(this.config);for(const s in r)e[s]=new u.Tensor(t,n,r[s])}}async encode_image({pixel_values:e}){const t=(await E(this.sessions.vision_encoder,{pixel_values:e})).image_features;return this.config.num_image_tokens||(console.warn(`The number of image tokens was not set in the model configuration. Setting it to the number of features detected by the vision encoder (${t.dims[1]}).`),this.config.num_image_tokens=t.dims[1]),t}async encode_text({input_ids:e}){return(await E(this.sessions.embed_tokens,{input_ids:e})).inputs_embeds}}class ${}class q extends ${constructor({last_hidden_state:e,hidden_states:t=null,attentions:n=null}){super(),this.last_hidden_state=e,this.hidden_states=t,this.attentions=n}}class U extends G{}class W extends U{}class X extends U{async _call(e){return new ml(await super._call(e))}}class Q extends U{async _call(e){return new pl(await super._call(e))}}class H extends U{async _call(e){return new _l(await super._call(e))}}class Y extends U{async _call(e){return new fl(await super._call(e))}}class J extends G{}class K extends J{}class Z extends G{}class ee extends Z{}class te extends Z{async _call(e){return new ml(await super._call(e))}}class ne extends Z{async _call(e){return new pl(await super._call(e))}}class se extends Z{async _call(e){return new _l(await super._call(e))}}class re extends Z{async _call(e){return new fl(await super._call(e))}}class oe extends G{}class ie extends oe{}class ae extends oe{async _call(e){return new ml(await super._call(e))}}class le extends oe{async _call(e){return new pl(await super._call(e))}}class ce extends oe{async _call(e){return new _l(await super._call(e))}}class de extends oe{async _call(e){return new fl(await super._call(e))}}class ue extends G{}class pe extends ue{}class he extends ue{async _call(e){return new ml(await super._call(e))}}class _e extends ue{async _call(e){return new pl(await super._call(e))}}class me extends ue{async _call(e){return new _l(await super._call(e))}}class fe extends ue{async _call(e){return new fl(await super._call(e))}}class ge extends G{}class we extends ge{}class Me extends ge{async _call(e){return new ml(await super._call(e))}}class be extends ge{async _call(e){return new pl(await super._call(e))}}class ye extends ge{async _call(e){return new _l(await super._call(e))}}class xe extends ge{async _call(e){return new fl(await super._call(e))}}class ke extends G{}class Te extends ke{}class ve extends ke{async _call(e){return new ml(await super._call(e))}}class Fe extends ke{async _call(e){return new pl(await super._call(e))}}class Ce extends ke{async _call(e){return new _l(await super._call(e))}}class Pe extends ke{async _call(e){return new fl(await super._call(e))}}class Se extends G{}class Ae extends Se{}class Ee extends Se{async _call(e){return new ml(await super._call(e))}}class Le extends Se{async _call(e){return new pl(await super._call(e))}}class ze extends Se{async _call(e){return new _l(await super._call(e))}}class Ie extends Se{async _call(e){return new fl(await super._call(e))}}class Be extends G{}class Ne extends Be{}class Oe extends Be{async _call(e){return new pl(await super._call(e))}}class je extends Be{async _call(e){return new _l(await super._call(e))}}class De extends Be{async _call(e){return new fl(await super._call(e))}}class Ve extends Be{async _call(e){return new ml(await super._call(e))}}class Re extends G{}class Ge extends Re{}class $e extends Re{async _call(e){return new ml(await super._call(e))}}class qe extends Re{async _call(e){return new pl(await super._call(e))}}class Ue extends Re{async _call(e){return new _l(await super._call(e))}}class We extends G{}class Xe extends We{}class Qe extends We{async _call(e){return new ml(await super._call(e))}}class He extends We{async _call(e){return new pl(await super._call(e))}}class Ye extends We{async _call(e){return new fl(await super._call(e))}}class Je extends G{}class Ke extends Je{}class Ze extends Je{async _call(e){return new ml(await super._call(e))}}class et extends Je{async _call(e){return new pl(await super._call(e))}}class tt extends Je{async _call(e){return new _l(await super._call(e))}}class nt extends Je{async _call(e){return new fl(await super._call(e))}}class st extends G{}class rt extends st{}class ot extends st{async _call(e){return new ml(await super._call(e))}}class it extends st{async _call(e){return new pl(await super._call(e))}}class at extends st{async _call(e){return new fl(await super._call(e))}}class lt extends G{}class ct extends lt{}class dt extends lt{async _call(e){return new pl(await super._call(e))}}class ut extends lt{async _call(e){return new fl(await super._call(e))}}class pt extends lt{async _call(e){return new ml(await super._call(e))}}class ht extends G{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"]}class _t extends ht{}class mt extends ht{}class ft extends G{}class gt extends ft{}class wt extends ft{}class Mt extends G{}class bt extends Mt{}class yt extends Mt{}class xt extends G{}class kt extends xt{}class Tt extends xt{}class vt extends xt{async _call(e){return new pl(await super._call(e))}}class Ft extends G{}class Ct extends Ft{}class Pt extends Ft{}class St extends Ft{async _call(e){return new pl(await super._call(e))}}class At extends Ft{}class Et extends G{}class Lt extends Et{}class zt extends Et{}class It extends G{}class Bt extends It{}class Nt extends It{}class Ot extends G{}class jt extends Ot{}class Dt extends Ot{async _call(e){return new ml(await super._call(e))}}class Vt extends Ot{async _call(e){return new pl(await super._call(e))}}class Rt extends Ot{async _call(e){return new _l(await super._call(e))}}class Gt extends Ot{async _call(e){return new fl(await super._call(e))}}class $t extends G{}class qt extends $t{}class Ut extends $t{async _call(e){return new ml(await super._call(e))}}class Wt extends $t{async _call(e){return new pl(await super._call(e))}}class Xt extends $t{async _call(e){return new _l(await super._call(e))}}class Qt extends $t{async _call(e){return new fl(await super._call(e))}}class Ht extends G{}class Yt extends Ht{}class Jt extends Ht{async _call(e){return new ml(await super._call(e))}}class Kt extends Ht{async _call(e){return new pl(await super._call(e))}}class Zt extends Ht{async _call(e){return new _l(await super._call(e))}}class en extends Ht{async _call(e){return new fl(await super._call(e))}}class tn extends G{}class nn extends tn{}class sn extends tn{}class rn extends G{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"]}class on extends rn{}class an extends rn{_prepare_generation_config(e,t){return super._prepare_generation_config(e,t,f.WhisperGenerationConfig)}_retrieve_init_tokens(e){const t=[e.decoder_start_token_id];let n=e.language;const s=e.task;if(e.is_multilingual){n||(console.warn("No language specified - defaulting to English (en)."),n="en");const r=`<|${(0,g.whisper_language_to_code)(n)}|>`;t.push(e.lang_to_id[r]),t.push(e.task_to_id[s??"transcribe"])}else if(n||s)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&t.at(-1)!==e.no_timestamps_token_id?t.push(e.no_timestamps_token_id):e.return_timestamps&&t.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),t.pop()),t.filter((e=>null!=e))}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:s=null,...r}){t=this._prepare_generation_config(t,r);const o=r.decoder_input_ids??this._retrieve_init_tokens(t);if(t.return_timestamps&&(n??=new c.LogitsProcessorList,n.push(new c.WhisperTimeStampLogitsProcessor(t,o))),t.begin_suppress_tokens&&(n??=new c.LogitsProcessorList,n.push(new c.SuppressTokensAtBeginLogitsProcessor(t.begin_suppress_tokens,o.length))),t.return_token_timestamps){if(!t.alignment_heads)throw new Error("Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config.");"translate"===t.task&&console.warn("Token-level timestamps may not be reliable for task 'translate'."),t.output_attentions=!0,t.return_dict_in_generate=!0}const i=await super.generate({inputs:e,generation_config:t,logits_processor:n,decoder_input_ids:o,...r});return t.return_token_timestamps&&(i.token_timestamps=this._extract_token_timestamps(i,t.alignment_heads,t.num_frames)),i}_extract_token_timestamps(e,t,n=null,s=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");null==n&&console.warn("`num_frames` has not been set, meaning the entire audio will be analyzed. This may lead to inaccurate token-level timestamps for short audios (< 30 seconds).");let r=this.config.median_filter_width;void 0===r&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),r=7);const o=e.cross_attentions,i=Array.from({length:this.config.decoder_layers},((e,t)=>(0,u.cat)(o.map((e=>e[t])),2))),l=(0,u.stack)(t.map((([e,t])=>{if(e>=i.length)throw new Error(`Layer index ${e} is out of bounds for cross attentions (length ${i.length}).`);return n?i[e].slice(null,t,null,[0,n]):i[e].slice(null,t)}))).transpose(1,0,2,3),[c,d]=(0,u.std_mean)(l,-2,0,!0),h=l.clone();for(let e=0;e<h.dims[0];++e){const t=h[e];for(let n=0;n<t.dims[0];++n){const s=t[n],o=c[e][n][0].data,i=d[e][n][0].data;for(let e=0;e<s.dims[0];++e){let t=s[e].data;for(let e=0;e<t.length;++e)t[e]=(t[e]-i[e])/o[e];t.set((0,p.medianFilter)(t,r))}}}const _=[(0,u.mean)(h,1)],m=e.sequences.dims,f=new u.Tensor("float32",new Float32Array(m[0]*m[1]),m);for(let e=0;e<m[0];++e){const t=_[e].neg().squeeze_(0),[n,r]=(0,p.dynamic_time_warping)(t.tolist()),o=Array.from({length:n.length-1},((e,t)=>n[t+1]-n[t])),i=(0,a.mergeArrays)([1],o).map((e=>!!e)),l=[];for(let e=0;e<i.length;++e)i[e]&&l.push(r[e]*s);f[e].data.set(l,1)}return f}}class ln extends G{main_input_name="pixel_values";forward_params=["pixel_values","input_ids","encoder_hidden_states","past_key_values"]}class cn extends G{forward_params=["input_ids","pixel_values","attention_mask","position_ids","past_key_values"]}class dn extends cn{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:s}){const r=this.config.image_token_index,o=n.tolist().map((e=>e.findIndex((e=>e==r)))),i=o.every((e=>-1===e)),a=o.every((e=>-1!==e));if(!i&&!a)throw new Error("Every input should contain either 0 or 1 image token.");if(i)return{inputs_embeds:e,attention_mask:s};const l=[],c=[];for(let n=0;n<o.length;++n){const r=o[n],i=e[n],a=t[n],d=s[n];l.push((0,u.cat)([i.slice([0,r]),a,i.slice([r+1,i.dims[0]])],0)),c.push((0,u.cat)([d.slice([0,r]),(0,u.ones)([a.dims[0]]),d.slice([r+1,d.dims[0]])],0))}return{inputs_embeds:(0,u.stack)(l,0),attention_mask:(0,u.stack)(c,0)}}}class un extends dn{}class pn extends G{forward_params=["input_ids","inputs_embeds","attention_mask","pixel_values","encoder_outputs","decoder_input_ids","decoder_inputs_embeds","decoder_attention_mask","past_key_values"];main_input_name="inputs_embeds"}class hn extends pn{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:s}){return{inputs_embeds:(0,u.cat)([t,e],1),attention_mask:(0,u.cat)([(0,u.ones)(t.dims.slice(0,2)),s],1)}}async _prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:n,attention_mask:s}){if(!e&&!t)throw new Error("Either `input_ids` or `pixel_values` should be provided.");let r,o;return e&&(r=await this.encode_text({input_ids:e})),t&&(o=await this.encode_image({pixel_values:t})),r&&o?({inputs_embeds:n,attention_mask:s}=this._merge_input_ids_with_image_features({inputs_embeds:r,image_features:o,input_ids:e,attention_mask:s})):n=r||o,{inputs_embeds:n,attention_mask:s}}async forward({input_ids:e,pixel_values:t,attention_mask:n,decoder_input_ids:s,decoder_attention_mask:r,encoder_outputs:o,past_key_values:i,inputs_embeds:a,decoder_inputs_embeds:l}){if(a||({inputs_embeds:a,attention_mask:n}=await this._prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:a,attention_mask:n})),!o){let{last_hidden_state:e}=await N(this,{inputs_embeds:a,attention_mask:n});o=e}if(!l){if(!s)throw new Error("Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.");l=await this.encode_text({input_ids:s})}const c={inputs_embeds:l,attention_mask:r,encoder_attention_mask:n,encoder_hidden_states:o,past_key_values:i};return await O(this,c,!0)}}class _n extends G{}class mn extends _n{}class fn extends _n{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class gn extends _n{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class wn extends _n{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class Mn extends _n{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class bn extends G{}class yn extends bn{}class xn extends bn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class kn extends _n{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class Tn extends G{}class vn extends Tn{}class Fn extends G{}class Cn extends Fn{}class Pn extends Fn{}class Sn extends G{}class An extends Sn{}class En extends Sn{}class Ln extends G{}class zn extends Ln{}class In extends Ln{}class Bn extends G{}class Nn extends Bn{}class On extends Bn{}class jn extends G{}class Dn extends jn{}class Vn extends jn{}class Rn extends G{}class Gn extends Rn{}class $n extends Rn{}class qn extends G{}class Un extends qn{}class Wn extends qn{}class Xn extends G{}class Qn extends Xn{}class Hn extends Xn{}class Yn extends G{}class Jn extends Yn{}class Kn extends Yn{}class Zn extends G{}class es extends Zn{}class ts extends Zn{}class ns extends G{}class ss extends ns{}class rs extends ns{}class os extends G{}class is extends os{}class as extends os{}class ls extends G{}class cs extends ls{}class ds extends ls{}class us extends G{}class ps extends us{}class hs extends us{}class _s extends G{}class ms extends _s{}class fs extends _s{}class gs extends G{}class ws extends gs{}class Ms extends gs{}class bs extends G{}class ys extends bs{}class xs extends bs{}class ks extends G{}class Ts extends ks{}class vs extends ks{}class Fs extends G{}class Cs extends Fs{}class Ps extends Fs{}class Ss extends G{}class As extends Ss{}class Es extends Ss{}class Ls extends G{}class zs extends Ls{}class Is extends Ls{async _call(e){return new pl(await super._call(e))}}class Bs extends G{}class Ns extends Bs{}class Os extends Bs{async _call(e){return new pl(await super._call(e))}}class js extends G{}class Ds extends js{}class Vs extends G{}class Rs extends Vs{}class Gs extends Vs{async _call(e){return new pl(await super._call(e))}}class $s extends G{}class qs extends $s{}class Us extends G{}class Ws extends Us{}class Xs extends Us{async _call(e){return new pl(await super._call(e))}}class Qs extends G{}class Hs extends Qs{async _call(e){return new Ml(await super._call(e))}}class Ys extends G{}class Js extends Ys{}class Ks extends Ys{async _call(e){return new pl(await super._call(e))}}class Zs extends G{}class er extends Zs{}class tr extends Zs{async _call(e){return new pl(await super._call(e))}}class nr extends G{}class sr extends nr{}class rr extends nr{}class or extends G{}class ir extends or{}class ar extends or{}class lr extends G{}class cr extends lr{}class dr extends lr{async _call(e){return new pl(await super._call(e))}}class ur extends G{}class pr extends ur{}class hr extends ur{async _call(e){return new mr(await super._call(e))}}class _r extends ur{async _call(e){return new fr(await super._call(e))}}class mr extends ${constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class fr extends ${constructor({logits:e,pred_boxes:t,pred_masks:n}){super(),this.logits=e,this.pred_boxes=t,this.pred_masks=n}}class gr extends G{}class wr extends gr{}class Mr extends gr{async _call(e){return new br(await super._call(e))}}class br extends ${constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class yr extends G{}class xr extends yr{}class kr extends yr{async _call(e){return new Tr(await super._call(e))}}class Tr extends mr{}class vr extends G{}class Fr extends vr{}class Cr extends vr{async _call(e){return new pl(await super._call(e))}}class Pr extends G{}class Sr extends Pr{}class Ar extends Pr{async _call(e){return new pl(await super._call(e))}}class Er extends G{}class Lr extends Er{}class zr extends Er{async _call(e){return new pl(await super._call(e))}}class Ir extends G{}class Br extends Ir{}class Nr extends Ir{async _call(e){return new pl(await super._call(e))}}class Or extends G{}class jr extends Or{}class Dr extends Or{}class Vr extends G{}class Rr extends Vr{}class Gr extends Vr{}class $r extends G{}class qr extends $r{}class Ur extends G{}class Wr extends Ur{}class Xr extends Ur{}class Qr extends Ur{}class Hr extends G{}class Yr extends Hr{}class Jr extends G{}class Kr extends Jr{}class Zr extends Jr{}class eo extends G{}class to extends eo{}class no extends eo{}class so extends G{}class ro extends so{}class oo extends G{}class io extends oo{}class ao extends oo{async _call(e){return new pl(await super._call(e))}}class lo extends G{}class co extends lo{}class uo extends lo{async _call(e){return new pl(await super._call(e))}}class po extends G{}class ho extends po{}class _o extends po{async _call(e){return new pl(await super._call(e))}}class mo extends G{}class fo extends mo{}class go extends mo{async _call(e){return new wo(await super._call(e))}}class wo extends ${constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Mo extends G{}class bo extends Mo{async get_image_embeddings({pixel_values:e}){return await N(this,{pixel_values:e})}async forward(e){if(e.image_embeddings&&e.image_positional_embeddings||(e={...e,...await this.get_image_embeddings(e)}),!e.input_labels&&e.input_points){const t=e.input_points.dims.slice(0,-1),n=t.reduce(((e,t)=>e*t),1);e.input_labels=new u.Tensor("int64",new BigInt64Array(n).fill(1n),t)}const t={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(t.input_points=e.input_points),e.input_labels&&(t.input_labels=e.input_labels),e.input_boxes&&(t.input_boxes=e.input_boxes),await E(this.sessions.prompt_encoder_mask_decoder,t)}async _call(e){return new yo(await super._call(e))}}class yo extends ${constructor({iou_scores:e,pred_masks:t}){super(),this.iou_scores=e,this.pred_masks=t}}class xo extends G{}class ko extends xo{}class To extends xo{}class vo extends G{}class Fo extends vo{}class Co extends vo{}class Po extends G{}class So extends Po{}class Ao extends Po{async _call(e){return new gl(await super._call(e))}}class Eo extends Po{async _call(e){return new pl(await super._call(e))}}class Lo extends Po{async _call(e){return new _l(await super._call(e))}}class zo extends G{}class Io extends zo{}class Bo extends zo{async _call(e){return new _l(await super._call(e))}}class No extends G{}class Oo extends No{}class jo extends G{}class Do extends jo{}class Vo extends jo{async _call(e){return new gl(await super._call(e))}}class Ro extends jo{async _call(e){return new pl(await super._call(e))}}class Go extends G{}class $o extends Go{}class qo extends Go{async _call(e){return new gl(await super._call(e))}}class Uo extends Go{async _call(e){return new pl(await super._call(e))}}class Wo extends Go{async _call(e){return new _l(await super._call(e))}}class Xo extends G{}class Qo extends Xo{}class Ho extends Xo{async _call(e){return new gl(await super._call(e))}}class Yo extends Xo{async _call(e){return new pl(await super._call(e))}}class Jo extends G{}class Ko extends Po{}class Zo extends Po{async _call(e){return new gl(await super._call(e))}}class ei extends Po{async _call(e){return new pl(await super._call(e))}}class ti extends G{}class ni extends ti{}class si extends ti{async _call(e){return new gl(await super._call(e))}}class ri extends ti{async _call(e){return new pl(await super._call(e))}}class oi extends ti{async _call(e){return new hl(await super._call(e))}}class ii extends ti{async _call(e){return new _l(await super._call(e))}}class ai extends G{}class li extends ai{}class ci extends ai{}class di extends ai{async generate_speech(e,t,{threshold:n=.5,minlenratio:s=0,maxlenratio:r=20,vocoder:o=null}={}){const i={input_ids:e},{encoder_outputs:a,encoder_attention_mask:l}=await N(this,i),c=a.dims[1]/this.config.reduction_factor,d=Math.floor(c*r),p=Math.floor(c*s),h=this.config.num_mel_bins;let _=[],m=null,f=null,g=0;for(;;){++g;const e=I(!!f);let s;s=f?f.output_sequence_out:new u.Tensor("float32",new Float32Array(h),[1,1,h]);let r={use_cache_branch:e,output_sequence:s,encoder_attention_mask:l,speaker_embeddings:t,encoder_hidden_states:a};this.addPastKeyValues(r,m),f=await E(this.sessions.decoder_model_merged,r),m=this.getPastKeyValues(f,m);const{prob:o,spectrum:i}=f;if(_.push(i),g>=p&&(Array.from(o.data).filter((e=>e>=n)).length>0||g>=d))break}const w=(0,u.cat)(_),{waveform:M}=await E(o.sessions.model,{spectrogram:w});return{spectrogram:w,waveform:M}}}class ui extends G{main_input_name="spectrogram"}class pi extends G{}class hi extends pi{}class _i extends G{}class mi extends _i{}class fi extends _i{}class gi extends G{}class wi extends gi{}class Mi extends gi{}class bi extends G{}class yi extends bi{}class xi extends bi{}class ki extends G{}class Ti extends ki{}class vi extends ki{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Fi extends ki{static async from_pretrained(e,t={}){return t.model_file_name??="audio_model",super.from_pretrained(e,t)}}class Ci extends G{}class Pi extends Ci{async _call(e){return new bl(await super._call(e))}}class Si extends G{}class Ai extends Si{}class Ei extends Si{}class Li extends Si{}class zi extends G{}class Ii extends zi{}class Bi extends zi{}class Ni extends G{}class Oi extends Ni{}class ji extends Ni{async _call(e){return new pl(await super._call(e))}}class Di extends G{}class Vi extends Di{}class Ri extends Di{}class Gi extends G{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];_apply_and_filter_by_delay_pattern_mask(e){const[t,n]=e.dims,s=this.config.decoder.num_codebooks,r=n-s;let o=0;for(let t=0;t<e.size;++t){if(e.data[t]===this.config.decoder.pad_token_id)continue;const i=t%n-Math.floor(t/n)%s;i>0&&i<=r&&(e.data[o++]=e.data[t])}const i=Math.floor(t/s),a=o/(i*s);return new u.Tensor(e.type,e.data.slice(0,o),[i,s,a])}prepare_inputs_for_generation(e,t,n){let s=structuredClone(e);for(let e=0;e<s.length;++e)for(let t=0;t<s[e].length;++t)e%this.config.decoder.num_codebooks>=t&&(s[e][t]=BigInt(this.config.decoder.pad_token_id));null!==n.guidance_scale&&n.guidance_scale>1&&(s=s.concat(s));return super.prepare_inputs_for_generation(s,t,n)}async generate(e){const t=await super.generate(e),n=this._apply_and_filter_by_delay_pattern_mask(t).unsqueeze_(0),{audio_values:s}=await E(this.sessions.encodec_decode,{audio_codes:n});return s}}class $i extends G{}class qi extends $i{}class Ui extends $i{async _call(e){return new pl(await super._call(e))}}class Wi extends G{}class Xi extends Wi{}class Qi extends Wi{async _call(e){return new pl(await super._call(e))}}class Hi extends G{}class Yi extends Hi{}class Ji extends Hi{async _call(e){return new pl(await super._call(e))}}class Ki extends G{}class Zi extends Ki{}class ea extends Ki{async _call(e){return new pl(await super._call(e))}}class ta extends G{}class na extends ta{}class sa{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:o=!1,revision:i="main",model_file_name:a=null,subfolder:l="onnx",device:c=null,dtype:d=null,use_external_data_format:u=null,session_options:p={}}={}){const h={progress_callback:t,config:n,cache_dir:r,local_files_only:o,revision:i,model_file_name:a,subfolder:l,device:c,dtype:d,use_external_data_format:u,session_options:p};if(h.config=await s.AutoConfig.from_pretrained(e,h),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(const t of this.MODEL_CLASS_MAPPINGS){const n=t.get(h.config.model_type);if(n)return await n[1].from_pretrained(e,h)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${h.config.model_type}", attempting to construct from base class.`),await G.from_pretrained(e,h);throw Error(`Unsupported model type: ${h.config.model_type}`)}}const ra=new Map([["bert",["BertModel",W]],["nomic_bert",["NomicBertModel",K]],["roformer",["RoFormerModel",ee]],["electra",["ElectraModel",pe]],["esm",["EsmModel",Ge]],["convbert",["ConvBertModel",ie]],["camembert",["CamembertModel",we]],["deberta",["DebertaModel",Te]],["deberta-v2",["DebertaV2Model",Ae]],["mpnet",["MPNetModel",Ke]],["albert",["AlbertModel",ct]],["distilbert",["DistilBertModel",Ne]],["roberta",["RobertaModel",jt]],["xlm",["XLMModel",qt]],["xlm-roberta",["XLMRobertaModel",Yt]],["clap",["ClapModel",Ti]],["clip",["CLIPModel",mn]],["clipseg",["CLIPSegModel",Cn]],["chinese_clip",["ChineseCLIPModel",vn]],["siglip",["SiglipModel",yn]],["mobilebert",["MobileBertModel",Xe]],["squeezebert",["SqueezeBertModel",rt]],["wav2vec2",["Wav2Vec2Model",So]],["wav2vec2-bert",["Wav2Vec2BertModel",Qo]],["unispeech",["UniSpeechModel",Do]],["unispeech-sat",["UniSpeechSatModel",$o]],["hubert",["HubertModel",Ko]],["wavlm",["WavLMModel",ni]],["audio-spectrogram-transformer",["ASTModel",nn]],["vits",["VitsModel",Pi]],["pyannote",["PyAnnoteModel",Io]],["wespeaker-resnet",["WeSpeakerResNetModel",Oo]],["detr",["DetrModel",pr]],["rt_detr",["RTDetrModel",wr]],["table-transformer",["TableTransformerModel",xr]],["vit",["ViTModel",zs]],["pvt",["PvtModel",Ns]],["vit_msn",["ViTMSNModel",Rs]],["vit_mae",["ViTMAEModel",Ds]],["groupvit",["GroupViTModel",qs]],["fastvit",["FastViTModel",Ws]],["mobilevit",["MobileViTModel",Js]],["mobilevitv2",["MobileViTV2Model",er]],["owlvit",["OwlViTModel",sr]],["owlv2",["Owlv2Model",ir]],["beit",["BeitModel",cr]],["deit",["DeiTModel",Fr]],["hiera",["HieraModel",Sr]],["convnext",["ConvNextModel",io]],["convnextv2",["ConvNextV2Model",co]],["dinov2",["Dinov2Model",ho]],["resnet",["ResNetModel",Lr]],["swin",["SwinModel",Br]],["swin2sr",["Swin2SRModel",jr]],["donut-swin",["DonutSwinModel",ro]],["yolos",["YolosModel",fo]],["dpt",["DPTModel",Rr]],["glpn",["GLPNModel",to]],["hifigan",["SpeechT5HifiGan",ui]],["efficientnet",["EfficientNetModel",Oi]],["decision_transformer",["DecisionTransformerModel",na]],["mobilenet_v1",["MobileNetV1Model",qi]],["mobilenet_v2",["MobileNetV2Model",Xi]],["mobilenet_v3",["MobileNetV3Model",Yi]],["mobilenet_v4",["MobileNetV4Model",Zi]],["maskformer",["MaskFormerModel",Kr]]]),oa=new Map([["t5",["T5Model",_t]],["longt5",["LongT5Model",gt]],["mt5",["MT5Model",bt]],["bart",["BartModel",kt]],["mbart",["MBartModel",Ct]],["marian",["MarianModel",ko]],["whisper",["WhisperModel",on]],["m2m_100",["M2M100Model",Fo]],["blenderbot",["BlenderbotModel",Lt]],["blenderbot-small",["BlenderbotSmallModel",Bt]]]),ia=new Map([["bloom",["BloomModel",Ts]],["jais",["JAISModel",zn]],["gpt2",["GPT2Model",An]],["gptj",["GPTJModel",Gn]],["gpt_bigcode",["GPTBigCodeModel",Un]],["gpt_neo",["GPTNeoModel",Nn]],["gpt_neox",["GPTNeoXModel",Dn]],["codegen",["CodeGenModel",Qn]],["llama",["LlamaModel",Jn]],["granite",["GraniteModel",es]],["cohere",["CohereModel",ss]],["gemma",["GemmaModel",is]],["gemma2",["Gemma2Model",cs]],["openelm",["OpenELMModel",ps]],["qwen2",["Qwen2Model",ms]],["phi",["PhiModel",ws]],["phi3",["Phi3Model",ys]],["mpt",["MptModel",Cs]],["opt",["OPTModel",As]],["mistral",["MistralModel",mi]],["starcoder2",["Starcoder2Model",wi]],["falcon",["FalconModel",yi]],["stablelm",["StableLmModel",Ii]]]),aa=new Map([["speecht5",["SpeechT5ForSpeechToText",ci]],["whisper",["WhisperForConditionalGeneration",an]]]),la=new Map([["speecht5",["SpeechT5ForTextToSpeech",di]]]),ca=new Map([["vits",["VitsModel",Pi]],["musicgen",["MusicgenForConditionalGeneration",Gi]]]),da=new Map([["bert",["BertForSequenceClassification",Q]],["roformer",["RoFormerForSequenceClassification",ne]],["electra",["ElectraForSequenceClassification",_e]],["esm",["EsmForSequenceClassification",qe]],["convbert",["ConvBertForSequenceClassification",le]],["camembert",["CamembertForSequenceClassification",be]],["deberta",["DebertaForSequenceClassification",Fe]],["deberta-v2",["DebertaV2ForSequenceClassification",Le]],["mpnet",["MPNetForSequenceClassification",et]],["albert",["AlbertForSequenceClassification",dt]],["distilbert",["DistilBertForSequenceClassification",Oe]],["roberta",["RobertaForSequenceClassification",Vt]],["xlm",["XLMForSequenceClassification",Wt]],["xlm-roberta",["XLMRobertaForSequenceClassification",Kt]],["bart",["BartForSequenceClassification",vt]],["mbart",["MBartForSequenceClassification",St]],["mobilebert",["MobileBertForSequenceClassification",He]],["squeezebert",["SqueezeBertForSequenceClassification",it]]]),ua=new Map([["bert",["BertForTokenClassification",H]],["roformer",["RoFormerForTokenClassification",se]],["electra",["ElectraForTokenClassification",me]],["esm",["EsmForTokenClassification",Ue]],["convbert",["ConvBertForTokenClassification",ce]],["camembert",["CamembertForTokenClassification",ye]],["deberta",["DebertaForTokenClassification",Ce]],["deberta-v2",["DebertaV2ForTokenClassification",ze]],["mpnet",["MPNetForTokenClassification",tt]],["distilbert",["DistilBertForTokenClassification",je]],["roberta",["RobertaForTokenClassification",Rt]],["xlm",["XLMForTokenClassification",Xt]],["xlm-roberta",["XLMRobertaForTokenClassification",Zt]]]),pa=new Map([["t5",["T5ForConditionalGeneration",mt]],["longt5",["LongT5ForConditionalGeneration",wt]],["mt5",["MT5ForConditionalGeneration",yt]],["bart",["BartForConditionalGeneration",Tt]],["mbart",["MBartForConditionalGeneration",Pt]],["marian",["MarianMTModel",To]],["m2m_100",["M2M100ForConditionalGeneration",Co]],["blenderbot",["BlenderbotForConditionalGeneration",zt]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",Nt]]]),ha=new Map([["bloom",["BloomForCausalLM",vs]],["gpt2",["GPT2LMHeadModel",En]],["jais",["JAISLMHeadModel",In]],["gptj",["GPTJForCausalLM",$n]],["gpt_bigcode",["GPTBigCodeForCausalLM",Wn]],["gpt_neo",["GPTNeoForCausalLM",On]],["gpt_neox",["GPTNeoXForCausalLM",Vn]],["codegen",["CodeGenForCausalLM",Hn]],["llama",["LlamaForCausalLM",Kn]],["granite",["GraniteForCausalLM",ts]],["cohere",["CohereForCausalLM",rs]],["gemma",["GemmaForCausalLM",as]],["gemma2",["Gemma2ForCausalLM",ds]],["openelm",["OpenELMForCausalLM",hs]],["qwen2",["Qwen2ForCausalLM",fs]],["phi",["PhiForCausalLM",Ms]],["phi3",["Phi3ForCausalLM",xs]],["mpt",["MptForCausalLM",Ps]],["opt",["OPTForCausalLM",Es]],["mbart",["MBartForCausalLM",At]],["mistral",["MistralForCausalLM",fi]],["starcoder2",["Starcoder2ForCausalLM",Mi]],["falcon",["FalconForCausalLM",xi]],["trocr",["TrOCRForCausalLM",hi]],["stablelm",["StableLmForCausalLM",Bi]]]),_a=new Map([["bert",["BertForMaskedLM",X]],["roformer",["RoFormerForMaskedLM",te]],["electra",["ElectraForMaskedLM",he]],["esm",["EsmForMaskedLM",$e]],["convbert",["ConvBertForMaskedLM",ae]],["camembert",["CamembertForMaskedLM",Me]],["deberta",["DebertaForMaskedLM",ve]],["deberta-v2",["DebertaV2ForMaskedLM",Ee]],["mpnet",["MPNetForMaskedLM",Ze]],["albert",["AlbertForMaskedLM",pt]],["distilbert",["DistilBertForMaskedLM",Ve]],["roberta",["RobertaForMaskedLM",Dt]],["xlm",["XLMWithLMHeadModel",Ut]],["xlm-roberta",["XLMRobertaForMaskedLM",Jt]],["mobilebert",["MobileBertForMaskedLM",Qe]],["squeezebert",["SqueezeBertForMaskedLM",ot]]]),ma=new Map([["bert",["BertForQuestionAnswering",Y]],["roformer",["RoFormerForQuestionAnswering",re]],["electra",["ElectraForQuestionAnswering",fe]],["convbert",["ConvBertForQuestionAnswering",de]],["camembert",["CamembertForQuestionAnswering",xe]],["deberta",["DebertaForQuestionAnswering",Pe]],["deberta-v2",["DebertaV2ForQuestionAnswering",Ie]],["mpnet",["MPNetForQuestionAnswering",nt]],["albert",["AlbertForQuestionAnswering",ut]],["distilbert",["DistilBertForQuestionAnswering",De]],["roberta",["RobertaForQuestionAnswering",Gt]],["xlm",["XLMForQuestionAnswering",Qt]],["xlm-roberta",["XLMRobertaForQuestionAnswering",en]],["mobilebert",["MobileBertForQuestionAnswering",Ye]],["squeezebert",["SqueezeBertForQuestionAnswering",at]]]),fa=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",ln]]]),ga=new Map([["llava",["LlavaForConditionalGeneration",dn]],["moondream1",["Moondream1ForConditionalGeneration",un]],["florence2",["Florence2ForConditionalGeneration",hn]]]),wa=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",ln]]]),Ma=new Map([["vit",["ViTForImageClassification",Is]],["pvt",["PvtForImageClassification",Os]],["vit_msn",["ViTMSNForImageClassification",Gs]],["fastvit",["FastViTForImageClassification",Xs]],["mobilevit",["MobileViTForImageClassification",Ks]],["mobilevitv2",["MobileViTV2ForImageClassification",tr]],["beit",["BeitForImageClassification",dr]],["deit",["DeiTForImageClassification",Cr]],["hiera",["HieraForImageClassification",Ar]],["convnext",["ConvNextForImageClassification",ao]],["convnextv2",["ConvNextV2ForImageClassification",uo]],["dinov2",["Dinov2ForImageClassification",_o]],["resnet",["ResNetForImageClassification",zr]],["swin",["SwinForImageClassification",Nr]],["segformer",["SegformerForImageClassification",Ei]],["efficientnet",["EfficientNetForImageClassification",ji]],["mobilenet_v1",["MobileNetV1ForImageClassification",Ui]],["mobilenet_v2",["MobileNetV2ForImageClassification",Qi]],["mobilenet_v3",["MobileNetV3ForImageClassification",Ji]],["mobilenet_v4",["MobileNetV4ForImageClassification",ea]]]),ba=new Map([["detr",["DetrForObjectDetection",hr]],["rt_detr",["RTDetrForObjectDetection",Mr]],["table-transformer",["TableTransformerForObjectDetection",kr]],["yolos",["YolosForObjectDetection",go]]]),ya=new Map([["owlvit",["OwlViTForObjectDetection",rr]],["owlv2",["Owlv2ForObjectDetection",ar]]]),xa=new Map([["detr",["DetrForSegmentation",_r]],["clipseg",["CLIPSegForImageSegmentation",Pn]]]),ka=new Map([["segformer",["SegformerForSemanticSegmentation",Li]],["sapiens",["SapiensForSemanticSegmentation",Wr]]]),Ta=new Map([["detr",["DetrForSegmentation",_r]],["maskformer",["MaskFormerForInstanceSegmentation",Zr]]]),va=new Map([["sam",["SamModel",bo]]]),Fa=new Map([["wav2vec2",["Wav2Vec2ForCTC",Ao]],["wav2vec2-bert",["Wav2Vec2BertForCTC",Ho]],["unispeech",["UniSpeechForCTC",Vo]],["unispeech-sat",["UniSpeechSatForCTC",qo]],["wavlm",["WavLMForCTC",si]],["hubert",["HubertForCTC",Zo]]]),Ca=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",Eo]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",Yo]],["unispeech",["UniSpeechForSequenceClassification",Ro]],["unispeech-sat",["UniSpeechSatForSequenceClassification",Uo]],["wavlm",["WavLMForSequenceClassification",ri]],["hubert",["HubertForSequenceClassification",ei]],["audio-spectrogram-transformer",["ASTForAudioClassification",sn]]]),Pa=new Map([["wavlm",["WavLMForXVector",oi]]]),Sa=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",Wo]],["wavlm",["WavLMForAudioFrameClassification",ii]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",Lo]],["pyannote",["PyAnnoteForAudioFrameClassification",Bo]]]),Aa=new Map([["vitmatte",["VitMatteForImageMatting",Hs]]]),Ea=new Map([["swin2sr",["Swin2SRForImageSuperResolution",Dr]]]),La=new Map([["dpt",["DPTForDepthEstimation",Gr]],["depth_anything",["DepthAnythingForDepthEstimation",qr]],["glpn",["GLPNForDepthEstimation",no]],["sapiens",["SapiensForDepthEstimation",Xr]],["depth_pro",["DepthProForDepthEstimation",Yr]]]),za=new Map([["sapiens",["SapiensForNormalEstimation",Qr]]]),Ia=new Map([["clip",["CLIPVisionModelWithProjection",Mn]],["siglip",["SiglipVisionModel",kn]]]),Ba=[[ra,w],[oa,M],[ia,x],[da,w],[ua,w],[pa,b],[aa,b],[ha,x],[_a,w],[ma,w],[fa,y],[ga,T],[Ma,w],[xa,w],[Ta,w],[ka,w],[Aa,w],[Ea,w],[La,w],[za,w],[ba,w],[ya,w],[va,k],[Fa,w],[Ca,w],[la,b],[ca,w],[Pa,w],[Sa,w],[Ia,w]];for(const[e,t]of Ba)for(const[n,s]of e.values())F.set(n,t),P.set(s,n),C.set(n,s);const Na=[["MusicgenForConditionalGeneration",Gi,v],["CLIPTextModelWithProjection",gn,w],["SiglipTextModel",xn,w],["ClapTextModelWithProjection",vi,w],["ClapAudioModelWithProjection",Fi,w]];for(const[e,t,n]of Na)F.set(e,n),P.set(t,e),C.set(e,t);class Oa extends sa{static MODEL_CLASS_MAPPINGS=Ba.map((e=>e[0]));static BASE_IF_FAIL=!0}class ja extends sa{static MODEL_CLASS_MAPPINGS=[da]}class Da extends sa{static MODEL_CLASS_MAPPINGS=[ua]}class Va extends sa{static MODEL_CLASS_MAPPINGS=[pa]}class Ra extends sa{static MODEL_CLASS_MAPPINGS=[aa]}class Ga extends sa{static MODEL_CLASS_MAPPINGS=[la]}class $a extends sa{static MODEL_CLASS_MAPPINGS=[ca]}class qa extends sa{static MODEL_CLASS_MAPPINGS=[ha]}class Ua extends sa{static MODEL_CLASS_MAPPINGS=[_a]}class Wa extends sa{static MODEL_CLASS_MAPPINGS=[ma]}class Xa extends sa{static MODEL_CLASS_MAPPINGS=[fa]}class Qa extends sa{static MODEL_CLASS_MAPPINGS=[Ma]}class Ha extends sa{static MODEL_CLASS_MAPPINGS=[xa]}class Ya extends sa{static MODEL_CLASS_MAPPINGS=[ka]}class Ja extends sa{static MODEL_CLASS_MAPPINGS=[Ta]}class Ka extends sa{static MODEL_CLASS_MAPPINGS=[ba]}class Za extends sa{static MODEL_CLASS_MAPPINGS=[ya]}class el extends sa{static MODEL_CLASS_MAPPINGS=[va]}class tl extends sa{static MODEL_CLASS_MAPPINGS=[Fa]}class nl extends sa{static MODEL_CLASS_MAPPINGS=[Ca]}class sl extends sa{static MODEL_CLASS_MAPPINGS=[Pa]}class rl extends sa{static MODEL_CLASS_MAPPINGS=[Sa]}class ol extends sa{static MODEL_CLASS_MAPPINGS=[wa]}class il extends sa{static MODEL_CLASS_MAPPINGS=[Aa]}class al extends sa{static MODEL_CLASS_MAPPINGS=[Ea]}class ll extends sa{static MODEL_CLASS_MAPPINGS=[La]}class cl extends sa{static MODEL_CLASS_MAPPINGS=[za]}class dl extends sa{static MODEL_CLASS_MAPPINGS=[Ia]}class ul extends ${constructor({logits:e,past_key_values:t,encoder_outputs:n,decoder_attentions:s=null,cross_attentions:r=null}){super(),this.logits=e,this.past_key_values=t,this.encoder_outputs=n,this.decoder_attentions=s,this.cross_attentions=r}}class pl extends ${constructor({logits:e}){super(),this.logits=e}}class hl extends ${constructor({logits:e,embeddings:t}){super(),this.logits=e,this.embeddings=t}}class _l extends ${constructor({logits:e}){super(),this.logits=e}}class ml extends ${constructor({logits:e}){super(),this.logits=e}}class fl extends ${constructor({start_logits:e,end_logits:t}){super(),this.start_logits=e,this.end_logits=t}}class gl extends ${constructor({logits:e}){super(),this.logits=e}}class wl extends ${constructor({logits:e,past_key_values:t}){super(),this.logits=e,this.past_key_values=t}}class Ml extends ${constructor({alphas:e}){super(),this.alphas=e}}class bl extends ${constructor({waveform:e,spectrogram:t}){super(),this.waveform=e,this.spectrogram=t}}},"./src/models/whisper/common_whisper.js":
112
+ \***********************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{ASTForAudioClassification:()=>sn,ASTModel:()=>nn,ASTPreTrainedModel:()=>tn,AlbertForMaskedLM:()=>pt,AlbertForQuestionAnswering:()=>ut,AlbertForSequenceClassification:()=>dt,AlbertModel:()=>ct,AlbertPreTrainedModel:()=>lt,AutoModel:()=>Oa,AutoModelForAudioClassification:()=>nl,AutoModelForAudioFrameClassification:()=>rl,AutoModelForCTC:()=>tl,AutoModelForCausalLM:()=>qa,AutoModelForDepthEstimation:()=>ll,AutoModelForDocumentQuestionAnswering:()=>ol,AutoModelForImageClassification:()=>Qa,AutoModelForImageFeatureExtraction:()=>dl,AutoModelForImageMatting:()=>il,AutoModelForImageSegmentation:()=>Ha,AutoModelForImageToImage:()=>al,AutoModelForMaskGeneration:()=>el,AutoModelForMaskedLM:()=>Ua,AutoModelForNormalEstimation:()=>cl,AutoModelForObjectDetection:()=>Ka,AutoModelForQuestionAnswering:()=>Wa,AutoModelForSemanticSegmentation:()=>Ya,AutoModelForSeq2SeqLM:()=>Va,AutoModelForSequenceClassification:()=>ja,AutoModelForSpeechSeq2Seq:()=>Ra,AutoModelForTextToSpectrogram:()=>Ga,AutoModelForTextToWaveform:()=>$a,AutoModelForTokenClassification:()=>Da,AutoModelForUniversalSegmentation:()=>Ja,AutoModelForVision2Seq:()=>Xa,AutoModelForXVector:()=>sl,AutoModelForZeroShotObjectDetection:()=>Za,BartForConditionalGeneration:()=>Tt,BartForSequenceClassification:()=>vt,BartModel:()=>kt,BartPretrainedModel:()=>xt,BaseModelOutput:()=>q,BeitForImageClassification:()=>dr,BeitModel:()=>cr,BeitPreTrainedModel:()=>lr,BertForMaskedLM:()=>X,BertForQuestionAnswering:()=>Y,BertForSequenceClassification:()=>Q,BertForTokenClassification:()=>H,BertModel:()=>W,BertPreTrainedModel:()=>U,BlenderbotForConditionalGeneration:()=>zt,BlenderbotModel:()=>Lt,BlenderbotPreTrainedModel:()=>Et,BlenderbotSmallForConditionalGeneration:()=>Nt,BlenderbotSmallModel:()=>Bt,BlenderbotSmallPreTrainedModel:()=>It,BloomForCausalLM:()=>vs,BloomModel:()=>Ts,BloomPreTrainedModel:()=>ks,CLIPModel:()=>mn,CLIPPreTrainedModel:()=>_n,CLIPSegForImageSegmentation:()=>Pn,CLIPSegModel:()=>Cn,CLIPSegPreTrainedModel:()=>Fn,CLIPTextModel:()=>fn,CLIPTextModelWithProjection:()=>gn,CLIPVisionModel:()=>wn,CLIPVisionModelWithProjection:()=>Mn,CamembertForMaskedLM:()=>Me,CamembertForQuestionAnswering:()=>xe,CamembertForSequenceClassification:()=>be,CamembertForTokenClassification:()=>ye,CamembertModel:()=>we,CamembertPreTrainedModel:()=>ge,CausalLMOutput:()=>gl,CausalLMOutputWithPast:()=>wl,ChineseCLIPModel:()=>vn,ChineseCLIPPreTrainedModel:()=>Tn,ClapAudioModelWithProjection:()=>Fi,ClapModel:()=>Ti,ClapPreTrainedModel:()=>ki,ClapTextModelWithProjection:()=>vi,CodeGenForCausalLM:()=>Hn,CodeGenModel:()=>Qn,CodeGenPreTrainedModel:()=>Xn,CohereForCausalLM:()=>rs,CohereModel:()=>ss,CoherePreTrainedModel:()=>ns,ConvBertForMaskedLM:()=>ae,ConvBertForQuestionAnswering:()=>de,ConvBertForSequenceClassification:()=>le,ConvBertForTokenClassification:()=>ce,ConvBertModel:()=>ie,ConvBertPreTrainedModel:()=>oe,ConvNextForImageClassification:()=>ao,ConvNextModel:()=>io,ConvNextPreTrainedModel:()=>oo,ConvNextV2ForImageClassification:()=>uo,ConvNextV2Model:()=>co,ConvNextV2PreTrainedModel:()=>lo,DPTForDepthEstimation:()=>Gr,DPTModel:()=>Rr,DPTPreTrainedModel:()=>Vr,DebertaForMaskedLM:()=>ve,DebertaForQuestionAnswering:()=>Pe,DebertaForSequenceClassification:()=>Fe,DebertaForTokenClassification:()=>Ce,DebertaModel:()=>Te,DebertaPreTrainedModel:()=>ke,DebertaV2ForMaskedLM:()=>Ee,DebertaV2ForQuestionAnswering:()=>Ie,DebertaV2ForSequenceClassification:()=>Le,DebertaV2ForTokenClassification:()=>ze,DebertaV2Model:()=>Ae,DebertaV2PreTrainedModel:()=>Se,DecisionTransformerModel:()=>na,DecisionTransformerPreTrainedModel:()=>ta,DeiTForImageClassification:()=>Cr,DeiTModel:()=>Fr,DeiTPreTrainedModel:()=>vr,DepthAnythingForDepthEstimation:()=>qr,DepthAnythingPreTrainedModel:()=>$r,DepthProForDepthEstimation:()=>Yr,DepthProPreTrainedModel:()=>Hr,DetrForObjectDetection:()=>hr,DetrForSegmentation:()=>_r,DetrModel:()=>pr,DetrObjectDetectionOutput:()=>mr,DetrPreTrainedModel:()=>ur,DetrSegmentationOutput:()=>fr,Dinov2ForImageClassification:()=>_o,Dinov2Model:()=>ho,Dinov2PreTrainedModel:()=>po,DistilBertForMaskedLM:()=>Ve,DistilBertForQuestionAnswering:()=>De,DistilBertForSequenceClassification:()=>Oe,DistilBertForTokenClassification:()=>je,DistilBertModel:()=>Ne,DistilBertPreTrainedModel:()=>Be,DonutSwinModel:()=>ro,DonutSwinPreTrainedModel:()=>so,EfficientNetForImageClassification:()=>ji,EfficientNetModel:()=>Oi,EfficientNetPreTrainedModel:()=>Ni,ElectraForMaskedLM:()=>he,ElectraForQuestionAnswering:()=>fe,ElectraForSequenceClassification:()=>_e,ElectraForTokenClassification:()=>me,ElectraModel:()=>pe,ElectraPreTrainedModel:()=>ue,EsmForMaskedLM:()=>$e,EsmForSequenceClassification:()=>qe,EsmForTokenClassification:()=>Ue,EsmModel:()=>Ge,EsmPreTrainedModel:()=>Re,FalconForCausalLM:()=>xi,FalconModel:()=>yi,FalconPreTrainedModel:()=>bi,FastViTForImageClassification:()=>Xs,FastViTModel:()=>Ws,FastViTPreTrainedModel:()=>Us,Florence2ForConditionalGeneration:()=>hn,Florence2PreTrainedModel:()=>pn,GLPNForDepthEstimation:()=>no,GLPNModel:()=>to,GLPNPreTrainedModel:()=>eo,GPT2LMHeadModel:()=>En,GPT2Model:()=>An,GPT2PreTrainedModel:()=>Sn,GPTBigCodeForCausalLM:()=>Wn,GPTBigCodeModel:()=>Un,GPTBigCodePreTrainedModel:()=>qn,GPTJForCausalLM:()=>$n,GPTJModel:()=>Gn,GPTJPreTrainedModel:()=>Rn,GPTNeoForCausalLM:()=>On,GPTNeoModel:()=>Nn,GPTNeoPreTrainedModel:()=>Bn,GPTNeoXForCausalLM:()=>Vn,GPTNeoXModel:()=>Dn,GPTNeoXPreTrainedModel:()=>jn,Gemma2ForCausalLM:()=>ds,Gemma2Model:()=>cs,Gemma2PreTrainedModel:()=>ls,GemmaForCausalLM:()=>as,GemmaModel:()=>is,GemmaPreTrainedModel:()=>os,GraniteForCausalLM:()=>ts,GraniteModel:()=>es,GranitePreTrainedModel:()=>Zn,GroupViTModel:()=>qs,GroupViTPreTrainedModel:()=>$s,HieraForImageClassification:()=>Ar,HieraModel:()=>Sr,HieraPreTrainedModel:()=>Pr,HubertForCTC:()=>Zo,HubertForSequenceClassification:()=>ei,HubertModel:()=>Ko,HubertPreTrainedModel:()=>Jo,ImageMattingOutput:()=>Ml,JAISLMHeadModel:()=>In,JAISModel:()=>zn,JAISPreTrainedModel:()=>Ln,LlamaForCausalLM:()=>Kn,LlamaModel:()=>Jn,LlamaPreTrainedModel:()=>Yn,LlavaForConditionalGeneration:()=>dn,LlavaPreTrainedModel:()=>cn,LongT5ForConditionalGeneration:()=>wt,LongT5Model:()=>gt,LongT5PreTrainedModel:()=>ft,M2M100ForConditionalGeneration:()=>Co,M2M100Model:()=>Fo,M2M100PreTrainedModel:()=>vo,MBartForCausalLM:()=>At,MBartForConditionalGeneration:()=>Pt,MBartForSequenceClassification:()=>St,MBartModel:()=>Ct,MBartPreTrainedModel:()=>Ft,MPNetForMaskedLM:()=>Ze,MPNetForQuestionAnswering:()=>nt,MPNetForSequenceClassification:()=>et,MPNetForTokenClassification:()=>tt,MPNetModel:()=>Ke,MPNetPreTrainedModel:()=>Je,MT5ForConditionalGeneration:()=>yt,MT5Model:()=>bt,MT5PreTrainedModel:()=>Mt,MarianMTModel:()=>To,MarianModel:()=>ko,MarianPreTrainedModel:()=>xo,MaskFormerForInstanceSegmentation:()=>Zr,MaskFormerModel:()=>Kr,MaskFormerPreTrainedModel:()=>Jr,MaskedLMOutput:()=>ml,MistralForCausalLM:()=>fi,MistralModel:()=>mi,MistralPreTrainedModel:()=>_i,MobileBertForMaskedLM:()=>Qe,MobileBertForQuestionAnswering:()=>Ye,MobileBertForSequenceClassification:()=>He,MobileBertModel:()=>Xe,MobileBertPreTrainedModel:()=>We,MobileNetV1ForImageClassification:()=>Ui,MobileNetV1Model:()=>qi,MobileNetV1PreTrainedModel:()=>$i,MobileNetV2ForImageClassification:()=>Qi,MobileNetV2Model:()=>Xi,MobileNetV2PreTrainedModel:()=>Wi,MobileNetV3ForImageClassification:()=>Ji,MobileNetV3Model:()=>Yi,MobileNetV3PreTrainedModel:()=>Hi,MobileNetV4ForImageClassification:()=>ea,MobileNetV4Model:()=>Zi,MobileNetV4PreTrainedModel:()=>Ki,MobileViTForImageClassification:()=>Ks,MobileViTModel:()=>Js,MobileViTPreTrainedModel:()=>Ys,MobileViTV2ForImageClassification:()=>tr,MobileViTV2Model:()=>er,MobileViTV2PreTrainedModel:()=>Zs,ModelOutput:()=>$,Moondream1ForConditionalGeneration:()=>un,MptForCausalLM:()=>Ps,MptModel:()=>Cs,MptPreTrainedModel:()=>Fs,MusicgenForCausalLM:()=>Ri,MusicgenForConditionalGeneration:()=>Gi,MusicgenModel:()=>Vi,MusicgenPreTrainedModel:()=>Di,NomicBertModel:()=>K,NomicBertPreTrainedModel:()=>J,OPTForCausalLM:()=>Es,OPTModel:()=>As,OPTPreTrainedModel:()=>Ss,OpenELMForCausalLM:()=>hs,OpenELMModel:()=>ps,OpenELMPreTrainedModel:()=>us,OwlViTForObjectDetection:()=>rr,OwlViTModel:()=>sr,OwlViTPreTrainedModel:()=>nr,Owlv2ForObjectDetection:()=>ar,Owlv2Model:()=>ir,Owlv2PreTrainedModel:()=>or,Phi3ForCausalLM:()=>xs,Phi3Model:()=>ys,Phi3PreTrainedModel:()=>bs,PhiForCausalLM:()=>Ms,PhiModel:()=>ws,PhiPreTrainedModel:()=>gs,PreTrainedModel:()=>G,PretrainedMixin:()=>sa,PvtForImageClassification:()=>Os,PvtModel:()=>Ns,PvtPreTrainedModel:()=>Bs,PyAnnoteForAudioFrameClassification:()=>Bo,PyAnnoteModel:()=>Io,PyAnnotePreTrainedModel:()=>zo,QuestionAnsweringModelOutput:()=>fl,Qwen2ForCausalLM:()=>fs,Qwen2Model:()=>ms,Qwen2PreTrainedModel:()=>_s,RTDetrForObjectDetection:()=>Mr,RTDetrModel:()=>wr,RTDetrObjectDetectionOutput:()=>br,RTDetrPreTrainedModel:()=>gr,ResNetForImageClassification:()=>zr,ResNetModel:()=>Lr,ResNetPreTrainedModel:()=>Er,RoFormerForMaskedLM:()=>te,RoFormerForQuestionAnswering:()=>re,RoFormerForSequenceClassification:()=>ne,RoFormerForTokenClassification:()=>se,RoFormerModel:()=>ee,RoFormerPreTrainedModel:()=>Z,RobertaForMaskedLM:()=>Dt,RobertaForQuestionAnswering:()=>Gt,RobertaForSequenceClassification:()=>Vt,RobertaForTokenClassification:()=>Rt,RobertaModel:()=>jt,RobertaPreTrainedModel:()=>Ot,SamImageSegmentationOutput:()=>yo,SamModel:()=>bo,SamPreTrainedModel:()=>Mo,SapiensForDepthEstimation:()=>Xr,SapiensForNormalEstimation:()=>Qr,SapiensForSemanticSegmentation:()=>Wr,SapiensPreTrainedModel:()=>Ur,SegformerForImageClassification:()=>Ei,SegformerForSemanticSegmentation:()=>Li,SegformerModel:()=>Ai,SegformerPreTrainedModel:()=>Si,Seq2SeqLMOutput:()=>ul,SequenceClassifierOutput:()=>pl,SiglipModel:()=>yn,SiglipPreTrainedModel:()=>bn,SiglipTextModel:()=>xn,SiglipVisionModel:()=>kn,SpeechT5ForSpeechToText:()=>ci,SpeechT5ForTextToSpeech:()=>di,SpeechT5HifiGan:()=>ui,SpeechT5Model:()=>li,SpeechT5PreTrainedModel:()=>ai,SqueezeBertForMaskedLM:()=>ot,SqueezeBertForQuestionAnswering:()=>at,SqueezeBertForSequenceClassification:()=>it,SqueezeBertModel:()=>rt,SqueezeBertPreTrainedModel:()=>st,StableLmForCausalLM:()=>Bi,StableLmModel:()=>Ii,StableLmPreTrainedModel:()=>zi,Starcoder2ForCausalLM:()=>Mi,Starcoder2Model:()=>wi,Starcoder2PreTrainedModel:()=>gi,Swin2SRForImageSuperResolution:()=>Dr,Swin2SRModel:()=>jr,Swin2SRPreTrainedModel:()=>Or,SwinForImageClassification:()=>Nr,SwinModel:()=>Br,SwinPreTrainedModel:()=>Ir,T5ForConditionalGeneration:()=>mt,T5Model:()=>_t,T5PreTrainedModel:()=>ht,TableTransformerForObjectDetection:()=>kr,TableTransformerModel:()=>xr,TableTransformerObjectDetectionOutput:()=>Tr,TableTransformerPreTrainedModel:()=>yr,TokenClassifierOutput:()=>_l,TrOCRForCausalLM:()=>hi,TrOCRPreTrainedModel:()=>pi,UniSpeechForCTC:()=>Vo,UniSpeechForSequenceClassification:()=>Ro,UniSpeechModel:()=>Do,UniSpeechPreTrainedModel:()=>jo,UniSpeechSatForAudioFrameClassification:()=>Wo,UniSpeechSatForCTC:()=>qo,UniSpeechSatForSequenceClassification:()=>Uo,UniSpeechSatModel:()=>$o,UniSpeechSatPreTrainedModel:()=>Go,ViTForImageClassification:()=>Is,ViTMAEModel:()=>Ds,ViTMAEPreTrainedModel:()=>js,ViTMSNForImageClassification:()=>Gs,ViTMSNModel:()=>Rs,ViTMSNPreTrainedModel:()=>Vs,ViTModel:()=>zs,ViTPreTrainedModel:()=>Ls,VisionEncoderDecoderModel:()=>ln,VitMatteForImageMatting:()=>Hs,VitMattePreTrainedModel:()=>Qs,VitsModel:()=>Pi,VitsModelOutput:()=>bl,VitsPreTrainedModel:()=>Ci,Wav2Vec2BertForCTC:()=>Ho,Wav2Vec2BertForSequenceClassification:()=>Yo,Wav2Vec2BertModel:()=>Qo,Wav2Vec2BertPreTrainedModel:()=>Xo,Wav2Vec2ForAudioFrameClassification:()=>Lo,Wav2Vec2ForCTC:()=>Ao,Wav2Vec2ForSequenceClassification:()=>Eo,Wav2Vec2Model:()=>So,Wav2Vec2PreTrainedModel:()=>Po,WavLMForAudioFrameClassification:()=>ii,WavLMForCTC:()=>si,WavLMForSequenceClassification:()=>ri,WavLMForXVector:()=>oi,WavLMModel:()=>ni,WavLMPreTrainedModel:()=>ti,WeSpeakerResNetModel:()=>Oo,WeSpeakerResNetPreTrainedModel:()=>No,WhisperForConditionalGeneration:()=>an,WhisperModel:()=>on,WhisperPreTrainedModel:()=>rn,XLMForQuestionAnswering:()=>Qt,XLMForSequenceClassification:()=>Wt,XLMForTokenClassification:()=>Xt,XLMModel:()=>qt,XLMPreTrainedModel:()=>$t,XLMRobertaForMaskedLM:()=>Jt,XLMRobertaForQuestionAnswering:()=>en,XLMRobertaForSequenceClassification:()=>Kt,XLMRobertaForTokenClassification:()=>Zt,XLMRobertaModel:()=>Yt,XLMRobertaPreTrainedModel:()=>Ht,XLMWithLMHeadModel:()=>Ut,XVectorOutput:()=>hl,YolosForObjectDetection:()=>go,YolosModel:()=>fo,YolosObjectDetectionOutput:()=>wo,YolosPreTrainedModel:()=>mo});var s=n(/*! ./configs.js */"./src/configs.js"),r=n(/*! ./backends/onnx.js */"./src/backends/onnx.js"),o=n(/*! ./utils/dtypes.js */"./src/utils/dtypes.js"),i=n(/*! ./utils/generic.js */"./src/utils/generic.js"),a=n(/*! ./utils/core.js */"./src/utils/core.js"),l=n(/*! ./utils/hub.js */"./src/utils/hub.js"),c=n(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),d=n(/*! ./generation/configuration_utils.js */"./src/generation/configuration_utils.js"),u=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),p=n(/*! ./utils/maths.js */"./src/utils/maths.js"),h=n(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),_=n(/*! ./generation/logits_sampler.js */"./src/generation/logits_sampler.js"),m=n(/*! ./env.js */"./src/env.js"),f=n(/*! ./models/whisper/generation_whisper.js */"./src/models/whisper/generation_whisper.js"),g=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");const w=0,M=1,b=2,y=3,x=4,k=5,T=6,v=7,F=new Map,C=new Map,P=new Map;async function S(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async i=>{const{buffer:a,session_options:c,session_config:d}=await async function(e,t,n){const i=n.config?.["transformers.js_config"]??{};let a=n.device??i.device;a&&"string"!=typeof a&&(a.hasOwnProperty(t)?a=a[t]:(console.warn(`device not specified for "${t}". Using the default device.`),a=null));const c=a??(m.apis.IS_NODE_ENV?"cpu":"wasm"),d=(0,r.deviceToExecutionProviders)(c);let u=n.dtype??i.dtype;"string"!=typeof u&&(u&&u.hasOwnProperty(t)?u=u[t]:(u=o.DEFAULT_DEVICE_DTYPE_MAPPING[c]??o.DATA_TYPES.fp32,console.warn(`dtype not specified for "${t}". Using the default dtype (${u}) for this device (${c}).`)));const p=u;if(!o.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(p))throw new Error(`Invalid dtype: ${p}. Should be one of: ${Object.keys(o.DATA_TYPES).join(", ")}`);if(p===o.DATA_TYPES.fp16&&"webgpu"===c&&!await(0,o.isWebGpuFp16Supported)())throw new Error(`The device (${c}) does not support fp16.`);const h=i.kv_cache_dtype?"string"==typeof i.kv_cache_dtype?i.kv_cache_dtype:i.kv_cache_dtype[p]??"float32":void 0;if(h&&!["float32","float16"].includes(h))throw new Error(`Invalid kv_cache_dtype: ${h}. Should be one of: float32, float16`);const _={dtype:p,kv_cache_dtype:h},f=o.DEFAULT_DTYPE_SUFFIX_MAPPING[p],g=`${n.subfolder??""}/${t}${f}.onnx`,w={...n.session_options};w.executionProviders??=d;const M=i.free_dimension_overrides;M?w.freeDimensionOverrides??=M:c.startsWith("webnn")&&!w.freeDimensionOverrides&&console.warn('WebNN does not currently support dynamic shapes and requires `free_dimension_overrides` to be set in config.json as a field within "transformers.js_config". When `free_dimension_overrides` is not set, you may experience significant performance degradation.');const b=(0,l.getModelFile)(e,g,!0,n),y=n.use_external_data_format??i.use_external_data_format;let x=[];if(y&&(!0===y||"object"==typeof y&&y.hasOwnProperty(t)&&!0===y[t])){if(m.apis.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const s=`${t}${f}.onnx_data`,r=`${n.subfolder??""}/${s}`;x.push(new Promise((async(t,o)=>{const i=await(0,l.getModelFile)(e,r,!0,n);t({path:s,data:i})})))}else void 0!==w.externalData&&(x=w.externalData.map((async t=>{if("string"==typeof t.data){const s=await(0,l.getModelFile)(e,t.data,!0,n);return{...t,data:s}}return t})));if(x.length>0&&(w.externalData=await Promise.all(x)),"webgpu"===c){const e=(0,s.getKeyValueShapes)(n.config,{prefix:"present"});if(Object.keys(e).length>0&&!(0,r.isONNXProxy)()){const t={};for(const n in e)t[n]="gpu-buffer";w.preferredOutputLocation=t}}return{buffer:await b,session_options:w,session_config:_}}(e,t[i],n);return[i,await(0,r.createInferenceSession)(a,c,d)]}))))}async function A(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async s=>[s,await(0,l.getModelJSON)(e,t[s],!1,n)]))))}async function E(e,t){const n=function(e,t){const n=Object.create(null),s=[];for(const o of e.inputNames){const e=t[o];e instanceof u.Tensor?n[o]=(0,r.isONNXProxy)()?e.clone():e:s.push(o)}if(s.length>0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${s.join(", ")}.`);const o=Object.keys(t).length,i=e.inputNames.length;if(o>i){let n=Object.keys(t).filter((t=>!e.inputNames.includes(t)));console.warn(`WARNING: Too many inputs were provided (${o} > ${i}). The following inputs will be ignored: "${n.join(", ")}".`)}return n}(e,t);try{const t=Object.fromEntries(Object.entries(n).map((([e,t])=>[e,t.ort_tensor])));let s=await e.run(t);return s=L(s),s}catch(e){throw console.error(`An error occurred during model execution: "${e}".`),console.error("Inputs given to model:",n),e}}function L(e){for(let t in e)(0,r.isONNXTensor)(e[t])?e[t]=new u.Tensor(e[t]):"object"==typeof e[t]&&L(e[t]);return e}function z(e){if(e instanceof u.Tensor)return e;if(0===e.length)throw Error("items must be non-empty");if(Array.isArray(e[0])){if(e.some((t=>t.length!==e[0].length)))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new u.Tensor("int64",BigInt64Array.from(e.flat().map((e=>BigInt(e)))),[e.length,e[0].length])}return new u.Tensor("int64",BigInt64Array.from(e.map((e=>BigInt(e)))),[1,e.length])}function I(e){return new u.Tensor("bool",[e],[1])}async function B(e,t){let{encoder_outputs:n,input_ids:s,decoder_input_ids:r,...o}=t;if(!n){const s=(0,a.pick)(t,e.sessions.model.inputNames);n=(await N(e,s)).last_hidden_state}o.input_ids=r,o.encoder_hidden_states=n,e.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(o.encoder_attention_mask=t.attention_mask);return await O(e,o,!0)}async function N(e,t){const n=e.sessions.model,s=(0,a.pick)(t,n.inputNames);if(n.inputNames.includes("inputs_embeds")&&!s.inputs_embeds){if(!t.input_ids)throw new Error("Both `input_ids` and `inputs_embeds` are missing in the model inputs.");s.inputs_embeds=await e.encode_text({input_ids:t.input_ids})}return n.inputNames.includes("token_type_ids")&&!s.token_type_ids&&(s.token_type_ids=new u.Tensor("int64",new BigInt64Array(s.input_ids.data.length),s.input_ids.dims)),await E(n,s)}async function O(e,t,n=!1){const s=e.sessions[n?"decoder_model_merged":"model"],{past_key_values:r,...o}=t;s.inputNames.includes("use_cache_branch")&&(o.use_cache_branch=I(!!r)),s.inputNames.includes("position_ids")&&o.attention_mask&&!o.position_ids&&(o.position_ids=function(e,t=null){const{input_ids:n,inputs_embeds:s,attention_mask:r}=e,[o,i]=r.dims,a=new BigInt64Array(r.data.length);for(let e=0;e<o;++e){const t=e*i;let n=BigInt(0);for(let e=0;e<i;++e){const s=t+e;0n===r.data[s]?a[s]=BigInt(1):(a[s]=n,n+=r.data[s])}}let l=new u.Tensor("int64",a,r.dims);if(t){const e=-(n??s).dims.at(1);l=l.slice(null,[e,null])}return l}(o,r)),e.addPastKeyValues(o,r);const i=(0,a.pick)(o,s.inputNames);return await E(s,i)}async function j(e,{input_ids:t=null,attention_mask:n=null,pixel_values:s=null,position_ids:r=null,inputs_embeds:o=null,past_key_values:i=null,generation_config:a=null,logits_processor:l=null,...c}){if(!o)if(o=await e.encode_text({input_ids:t}),s&&1!==t.dims[1]){const r=await e.encode_image({pixel_values:s});({inputs_embeds:o,attention_mask:n}=e._merge_input_ids_with_image_features({image_features:r,inputs_embeds:o,input_ids:t,attention_mask:n}))}else if(i&&s&&1===t.dims[1]){const e=t.dims[1],s=Object.values(i)[0].dims.at(-2);n=(0,u.cat)([(0,u.ones)([t.dims[0],s]),n.slice(null,[n.dims[1]-e,n.dims[1]])],1)}return await O(e,{inputs_embeds:o,past_key_values:i,attention_mask:n,position_ids:r,generation_config:a,logits_processor:l},!0)}function D(e,t,n,s){if(n.past_key_values){const t=Object.values(n.past_key_values)[0].dims.at(-2),{input_ids:s,attention_mask:r}=n;if(r&&r.dims[1]>s.dims[1]);else if(t<s.dims[1])n.input_ids=s.slice(null,[t,null]);else if(null!=e.config.image_token_index&&s.data.some((t=>t==e.config.image_token_index))){const r=e.config.num_image_tokens;if(!r)throw new Error("`num_image_tokens` is missing in the model configuration.");const o=s.dims[1]-(t-r);n.input_ids=s.slice(null,[-o,null]),n.attention_mask=(0,u.ones)([1,t+o])}}return n}function V(e,t,n,s){return n.past_key_values&&(t=t.map((e=>[e.at(-1)]))),{...n,decoder_input_ids:z(t)}}function R(e,...t){return e.config.is_encoder_decoder?V(e,...t):D(e,...t)}class G extends i.Callable{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,t,n){super(),this.config=e,this.sessions=t,this.configs=n;const s=P.get(this.constructor),r=F.get(s);switch(this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,r){case x:this.can_generate=!0,this._forward=O,this._prepare_inputs_for_generation=D;break;case b:case y:case v:this.can_generate=!0,this._forward=B,this._prepare_inputs_for_generation=V;break;case M:this._forward=B;break;case T:this.can_generate=!0,this._forward=j,this._prepare_inputs_for_generation=R;break;default:this._forward=N}this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const t of Object.values(this.sessions))t?.handler?.dispose&&e.push(t.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:o=!1,revision:i="main",model_file_name:a=null,subfolder:l="onnx",device:c=null,dtype:d=null,use_external_data_format:u=null,session_options:p={}}={}){let h={progress_callback:t,config:n,cache_dir:r,local_files_only:o,revision:i,model_file_name:a,subfolder:l,device:c,dtype:d,use_external_data_format:u,session_options:p};const _=P.get(this),m=F.get(_);let f;if(n=h.config=await s.AutoConfig.from_pretrained(e,h),m===x)f=await Promise.all([S(e,{model:h.model_file_name??"model"},h),A(e,{generation_config:"generation_config.json"},h)]);else if(m===b||m===y)f=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},h),A(e,{generation_config:"generation_config.json"},h)]);else if(m===k)f=await Promise.all([S(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},h)]);else if(m===M)f=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},h)]);else if(m===T){const t={embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"};n.is_encoder_decoder&&(t.model="encoder_model"),f=await Promise.all([S(e,t,h),A(e,{generation_config:"generation_config.json"},h)])}else m===v?f=await Promise.all([S(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},h),A(e,{generation_config:"generation_config.json"},h)]):(m!==w&&console.warn(`Model type for '${_??n?.model_type}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`),f=await Promise.all([S(e,{model:h.model_file_name??"model"},h)]));return new this(n,...f)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}get generation_config(){return this.configs?.generation_config??null}_get_logits_warper(e){const t=new c.LogitsProcessorList;return null!==e.temperature&&1!==e.temperature&&t.push(new c.TemperatureLogitsWarper(e.temperature)),null!==e.top_k&&0!==e.top_k&&t.push(new c.TopKLogitsWarper(e.top_k)),null!==e.top_p&&e.top_p<1&&t.push(new c.TopPLogitsWarper(e.top_p)),t}_get_logits_processor(e,t,n=null){const s=new c.LogitsProcessorList;if(null!==e.repetition_penalty&&1!==e.repetition_penalty&&s.push(new c.RepetitionPenaltyLogitsProcessor(e.repetition_penalty)),null!==e.no_repeat_ngram_size&&e.no_repeat_ngram_size>0&&s.push(new c.NoRepeatNGramLogitsProcessor(e.no_repeat_ngram_size)),null!==e.bad_words_ids&&s.push(new c.NoBadWordsLogitsProcessor(e.bad_words_ids,e.eos_token_id)),null!==e.min_length&&null!==e.eos_token_id&&e.min_length>0&&s.push(new c.MinLengthLogitsProcessor(e.min_length,e.eos_token_id)),null!==e.min_new_tokens&&null!==e.eos_token_id&&e.min_new_tokens>0&&s.push(new c.MinNewTokensLengthLogitsProcessor(t,e.min_new_tokens,e.eos_token_id)),null!==e.forced_bos_token_id&&s.push(new c.ForcedBOSTokenLogitsProcessor(e.forced_bos_token_id)),null!==e.forced_eos_token_id&&s.push(new c.ForcedEOSTokenLogitsProcessor(e.max_length,e.forced_eos_token_id)),null!==e.begin_suppress_tokens){const n=t>1||null===e.forced_bos_token_id?t:t+1;s.push(new c.SuppressTokensAtBeginLogitsProcessor(e.begin_suppress_tokens,n))}return null!==e.guidance_scale&&e.guidance_scale>1&&s.push(new c.ClassifierFreeGuidanceLogitsProcessor(e.guidance_scale)),null!==n&&s.extend(n),s}_prepare_generation_config(e,t,n=d.GenerationConfig){const s={...this.config};for(const e of["decoder","generator","text_config"])e in s&&Object.assign(s,s[e]);const r=new n(s);return Object.assign(r,this.generation_config??{}),e&&Object.assign(r,e),t&&Object.assign(r,(0,a.pick)(t,Object.getOwnPropertyNames(r))),r}_get_stopping_criteria(e,t=null){const n=new h.StoppingCriteriaList;return null!==e.max_length&&n.push(new h.MaxLengthCriteria(e.max_length,this.config.max_position_embeddings??null)),null!==e.eos_token_id&&n.push(new h.EosTokenCriteria(e.eos_token_id)),t&&n.extend(t),n}_validate_model_class(){if(!this.can_generate){const e=[ha,fa,pa,aa],t=P.get(this.constructor),n=new Set,s=this.config.model_type;for(const t of e){const e=t.get(s);e&&n.add(e[0])}let r=`The current model class (${t}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw n.size>0&&(r+=` Please use the following class instead: ${[...n].join(", ")}`),Error(r)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:t,model_inputs:n,is_encoder_decoder:s}){return n.past_key_values=this.getPastKeyValues(t,n.past_key_values),n.input_ids=new u.Tensor("int64",e.flat(),[e.length,1]),s||(n.attention_mask=(0,u.cat)([n.attention_mask,(0,u.ones)([n.attention_mask.dims[0],1])],1)),n.position_ids=null,n}_prepare_model_inputs({inputs:e,bos_token_id:t,model_kwargs:n}){const s=(0,a.pick)(n,this.forward_params),r=this.main_input_name;if(r in s){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else s[r]=e;return{inputs_tensor:s[r],model_inputs:s,model_input_name:r}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:t,model_input_name:n,generation_config:s}){if(this.sessions.model.inputNames.includes("inputs_embeds")&&!t.inputs_embeds&&"_prepare_inputs_embeds"in this){const{input_ids:e,pixel_values:n,attention_mask:s,...r}=t,o=await this._prepare_inputs_embeds(t);t={...r,...(0,a.pick)(o,["inputs_embeds","attention_mask"])}}let{last_hidden_state:r}=await N(this,t);if(null!==s.guidance_scale&&s.guidance_scale>1)r=(0,u.cat)([r,(0,u.full_like)(r,0)],0),"attention_mask"in t&&(t.attention_mask=(0,u.cat)([t.attention_mask,(0,u.zeros_like)(t.attention_mask)],0));else if(t.decoder_input_ids){const e=z(t.decoder_input_ids).dims[0];if(e!==r.dims[0]){if(1!==r.dims[0])throw new Error(`The encoder outputs have a different batch size (${r.dims[0]}) than the decoder inputs (${e}).`);r=(0,u.cat)(Array.from({length:e},(()=>r)),0)}}return t.encoder_outputs=r,t}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:t,model_kwargs:n,decoder_start_token_id:s,bos_token_id:r,generation_config:o}){let{decoder_input_ids:i,...a}=n;if(i)Array.isArray(i[0])||(i=Array.from({length:e},(()=>i)));else if(s??=r,"musicgen"===this.config.model_type)i=Array.from({length:e*this.config.decoder.num_codebooks},(()=>[s]));else if(Array.isArray(s)){if(s.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${s.length}`);i=s}else i=Array.from({length:e},(()=>[s]));return i=z(i),n.decoder_attention_mask=(0,u.ones_like)(i),{input_ids:i,model_inputs:a}}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:s=null,streamer:r=null,...o}){this._validate_model_class(),t=this._prepare_generation_config(t,o);let{inputs_tensor:i,model_inputs:a,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:o});const c=this.config.is_encoder_decoder;let d;c&&("encoder_outputs"in a||(a=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:i,model_inputs:a,model_input_name:l,generation_config:t}))),c?({input_ids:d,model_inputs:a}=this._prepare_decoder_input_ids_for_generation({batch_size:a[l].dims.at(0),model_input_name:l,model_kwargs:a,decoder_start_token_id:t.decoder_start_token_id,bos_token_id:t.bos_token_id,generation_config:t})):d=a[l];let p=d.dims.at(-1);null!==t.max_new_tokens&&(t.max_length=p+t.max_new_tokens);const h=this._get_logits_processor(t,p,n),m=this._get_stopping_criteria(t,s),f=a[l].dims.at(0),g=_.LogitsSampler.getSampler(t),w=new Array(f).fill(0),M=d.tolist();let b;r&&r.put(M);let y={};for(;;){if(a=this.prepare_inputs_for_generation(M,a,t),b=await this.forward(a),t.output_attentions&&t.return_dict_in_generate){const e=this.getAttentions(b);for(const t in e)t in y||(y[t]=[]),y[t].push(e[t])}const e=h(M,b.logits.slice(null,-1,null)),n=[];for(let t=0;t<e.dims.at(0);++t){const s=e[t],r=await g(s);for(const[e,s]of r){const r=BigInt(e);w[t]+=s,M[t].push(r),n.push([r]);break}}r&&r.put(n);if(m(M).every((e=>e)))break;a=this._update_model_kwargs_for_generation({generated_input_ids:n,outputs:b,model_inputs:a,is_encoder_decoder:c})}r&&r.end();const x=this.getPastKeyValues(b,a.past_key_values,!0),k=new u.Tensor("int64",M.flat(),[M.length,M[0].length]);if(t.return_dict_in_generate)return{sequences:k,past_key_values:x,...y};for(const e of Object.values(b))"gpu-buffer"===e.location&&e.dispose();return k}getPastKeyValues(e,t,n=!1){const s=Object.create(null);for(const r in e)if(r.startsWith("present")){const o=r.replace("present","past_key_values"),i=r.includes("encoder");if(s[o]=i&&t?t[o]:e[r],t&&(!i||n)){const e=t[o];"gpu-buffer"===e.location&&e.dispose()}}return s}getAttentions(e){const t={};for(const n of["cross_attentions","encoder_attentions","decoder_attentions"])for(const s in e)s.startsWith(n)&&(n in t||(t[n]=[]),t[n].push(e[s]));return t}addPastKeyValues(e,t){if(t)Object.assign(e,t);else{const t=this.sessions.decoder_model_merged??this.sessions.model,n=t?.config?.kv_cache_dtype??"float32",r="float16"===n?new Uint16Array:[],o=(0,s.getKeyValueShapes)(this.config);for(const t in o)e[t]=new u.Tensor(n,r,o[t])}}async encode_image({pixel_values:e}){const t=(await E(this.sessions.vision_encoder,{pixel_values:e})).image_features;return this.config.num_image_tokens||(console.warn(`The number of image tokens was not set in the model configuration. Setting it to the number of features detected by the vision encoder (${t.dims[1]}).`),this.config.num_image_tokens=t.dims[1]),t}async encode_text({input_ids:e}){return(await E(this.sessions.embed_tokens,{input_ids:e})).inputs_embeds}}class ${}class q extends ${constructor({last_hidden_state:e,hidden_states:t=null,attentions:n=null}){super(),this.last_hidden_state=e,this.hidden_states=t,this.attentions=n}}class U extends G{}class W extends U{}class X extends U{async _call(e){return new ml(await super._call(e))}}class Q extends U{async _call(e){return new pl(await super._call(e))}}class H extends U{async _call(e){return new _l(await super._call(e))}}class Y extends U{async _call(e){return new fl(await super._call(e))}}class J extends G{}class K extends J{}class Z extends G{}class ee extends Z{}class te extends Z{async _call(e){return new ml(await super._call(e))}}class ne extends Z{async _call(e){return new pl(await super._call(e))}}class se extends Z{async _call(e){return new _l(await super._call(e))}}class re extends Z{async _call(e){return new fl(await super._call(e))}}class oe extends G{}class ie extends oe{}class ae extends oe{async _call(e){return new ml(await super._call(e))}}class le extends oe{async _call(e){return new pl(await super._call(e))}}class ce extends oe{async _call(e){return new _l(await super._call(e))}}class de extends oe{async _call(e){return new fl(await super._call(e))}}class ue extends G{}class pe extends ue{}class he extends ue{async _call(e){return new ml(await super._call(e))}}class _e extends ue{async _call(e){return new pl(await super._call(e))}}class me extends ue{async _call(e){return new _l(await super._call(e))}}class fe extends ue{async _call(e){return new fl(await super._call(e))}}class ge extends G{}class we extends ge{}class Me extends ge{async _call(e){return new ml(await super._call(e))}}class be extends ge{async _call(e){return new pl(await super._call(e))}}class ye extends ge{async _call(e){return new _l(await super._call(e))}}class xe extends ge{async _call(e){return new fl(await super._call(e))}}class ke extends G{}class Te extends ke{}class ve extends ke{async _call(e){return new ml(await super._call(e))}}class Fe extends ke{async _call(e){return new pl(await super._call(e))}}class Ce extends ke{async _call(e){return new _l(await super._call(e))}}class Pe extends ke{async _call(e){return new fl(await super._call(e))}}class Se extends G{}class Ae extends Se{}class Ee extends Se{async _call(e){return new ml(await super._call(e))}}class Le extends Se{async _call(e){return new pl(await super._call(e))}}class ze extends Se{async _call(e){return new _l(await super._call(e))}}class Ie extends Se{async _call(e){return new fl(await super._call(e))}}class Be extends G{}class Ne extends Be{}class Oe extends Be{async _call(e){return new pl(await super._call(e))}}class je extends Be{async _call(e){return new _l(await super._call(e))}}class De extends Be{async _call(e){return new fl(await super._call(e))}}class Ve extends Be{async _call(e){return new ml(await super._call(e))}}class Re extends G{}class Ge extends Re{}class $e extends Re{async _call(e){return new ml(await super._call(e))}}class qe extends Re{async _call(e){return new pl(await super._call(e))}}class Ue extends Re{async _call(e){return new _l(await super._call(e))}}class We extends G{}class Xe extends We{}class Qe extends We{async _call(e){return new ml(await super._call(e))}}class He extends We{async _call(e){return new pl(await super._call(e))}}class Ye extends We{async _call(e){return new fl(await super._call(e))}}class Je extends G{}class Ke extends Je{}class Ze extends Je{async _call(e){return new ml(await super._call(e))}}class et extends Je{async _call(e){return new pl(await super._call(e))}}class tt extends Je{async _call(e){return new _l(await super._call(e))}}class nt extends Je{async _call(e){return new fl(await super._call(e))}}class st extends G{}class rt extends st{}class ot extends st{async _call(e){return new ml(await super._call(e))}}class it extends st{async _call(e){return new pl(await super._call(e))}}class at extends st{async _call(e){return new fl(await super._call(e))}}class lt extends G{}class ct extends lt{}class dt extends lt{async _call(e){return new pl(await super._call(e))}}class ut extends lt{async _call(e){return new fl(await super._call(e))}}class pt extends lt{async _call(e){return new ml(await super._call(e))}}class ht extends G{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"]}class _t extends ht{}class mt extends ht{}class ft extends G{}class gt extends ft{}class wt extends ft{}class Mt extends G{}class bt extends Mt{}class yt extends Mt{}class xt extends G{}class kt extends xt{}class Tt extends xt{}class vt extends xt{async _call(e){return new pl(await super._call(e))}}class Ft extends G{}class Ct extends Ft{}class Pt extends Ft{}class St extends Ft{async _call(e){return new pl(await super._call(e))}}class At extends Ft{}class Et extends G{}class Lt extends Et{}class zt extends Et{}class It extends G{}class Bt extends It{}class Nt extends It{}class Ot extends G{}class jt extends Ot{}class Dt extends Ot{async _call(e){return new ml(await super._call(e))}}class Vt extends Ot{async _call(e){return new pl(await super._call(e))}}class Rt extends Ot{async _call(e){return new _l(await super._call(e))}}class Gt extends Ot{async _call(e){return new fl(await super._call(e))}}class $t extends G{}class qt extends $t{}class Ut extends $t{async _call(e){return new ml(await super._call(e))}}class Wt extends $t{async _call(e){return new pl(await super._call(e))}}class Xt extends $t{async _call(e){return new _l(await super._call(e))}}class Qt extends $t{async _call(e){return new fl(await super._call(e))}}class Ht extends G{}class Yt extends Ht{}class Jt extends Ht{async _call(e){return new ml(await super._call(e))}}class Kt extends Ht{async _call(e){return new pl(await super._call(e))}}class Zt extends Ht{async _call(e){return new _l(await super._call(e))}}class en extends Ht{async _call(e){return new fl(await super._call(e))}}class tn extends G{}class nn extends tn{}class sn extends tn{}class rn extends G{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"]}class on extends rn{}class an extends rn{_prepare_generation_config(e,t){return super._prepare_generation_config(e,t,f.WhisperGenerationConfig)}_retrieve_init_tokens(e){const t=[e.decoder_start_token_id];let n=e.language;const s=e.task;if(e.is_multilingual){n||(console.warn("No language specified - defaulting to English (en)."),n="en");const r=`<|${(0,g.whisper_language_to_code)(n)}|>`;t.push(e.lang_to_id[r]),t.push(e.task_to_id[s??"transcribe"])}else if(n||s)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&t.at(-1)!==e.no_timestamps_token_id?t.push(e.no_timestamps_token_id):e.return_timestamps&&t.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),t.pop()),t.filter((e=>null!=e))}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:s=null,...r}){t=this._prepare_generation_config(t,r);const o=r.decoder_input_ids??this._retrieve_init_tokens(t);if(t.return_timestamps&&(n??=new c.LogitsProcessorList,n.push(new c.WhisperTimeStampLogitsProcessor(t,o))),t.begin_suppress_tokens&&(n??=new c.LogitsProcessorList,n.push(new c.SuppressTokensAtBeginLogitsProcessor(t.begin_suppress_tokens,o.length))),t.return_token_timestamps){if(!t.alignment_heads)throw new Error("Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config.");"translate"===t.task&&console.warn("Token-level timestamps may not be reliable for task 'translate'."),t.output_attentions=!0,t.return_dict_in_generate=!0}const i=await super.generate({inputs:e,generation_config:t,logits_processor:n,decoder_input_ids:o,...r});return t.return_token_timestamps&&(i.token_timestamps=this._extract_token_timestamps(i,t.alignment_heads,t.num_frames)),i}_extract_token_timestamps(e,t,n=null,s=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");null==n&&console.warn("`num_frames` has not been set, meaning the entire audio will be analyzed. This may lead to inaccurate token-level timestamps for short audios (< 30 seconds).");let r=this.config.median_filter_width;void 0===r&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),r=7);const o=e.cross_attentions,i=Array.from({length:this.config.decoder_layers},((e,t)=>(0,u.cat)(o.map((e=>e[t])),2))),l=(0,u.stack)(t.map((([e,t])=>{if(e>=i.length)throw new Error(`Layer index ${e} is out of bounds for cross attentions (length ${i.length}).`);return n?i[e].slice(null,t,null,[0,n]):i[e].slice(null,t)}))).transpose(1,0,2,3),[c,d]=(0,u.std_mean)(l,-2,0,!0),h=l.clone();for(let e=0;e<h.dims[0];++e){const t=h[e];for(let n=0;n<t.dims[0];++n){const s=t[n],o=c[e][n][0].data,i=d[e][n][0].data;for(let e=0;e<s.dims[0];++e){let t=s[e].data;for(let e=0;e<t.length;++e)t[e]=(t[e]-i[e])/o[e];t.set((0,p.medianFilter)(t,r))}}}const _=[(0,u.mean)(h,1)],m=e.sequences.dims,f=new u.Tensor("float32",new Float32Array(m[0]*m[1]),m);for(let e=0;e<m[0];++e){const t=_[e].neg().squeeze_(0),[n,r]=(0,p.dynamic_time_warping)(t.tolist()),o=Array.from({length:n.length-1},((e,t)=>n[t+1]-n[t])),i=(0,a.mergeArrays)([1],o).map((e=>!!e)),l=[];for(let e=0;e<i.length;++e)i[e]&&l.push(r[e]*s);f[e].data.set(l,1)}return f}}class ln extends G{main_input_name="pixel_values";forward_params=["pixel_values","input_ids","encoder_hidden_states","past_key_values"]}class cn extends G{forward_params=["input_ids","pixel_values","attention_mask","position_ids","past_key_values"]}class dn extends cn{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:s}){const r=this.config.image_token_index,o=n.tolist().map((e=>e.findIndex((e=>e==r)))),i=o.every((e=>-1===e)),a=o.every((e=>-1!==e));if(!i&&!a)throw new Error("Every input should contain either 0 or 1 image token.");if(i)return{inputs_embeds:e,attention_mask:s};const l=[],c=[];for(let n=0;n<o.length;++n){const r=o[n],i=e[n],a=t[n],d=s[n];l.push((0,u.cat)([i.slice([0,r]),a,i.slice([r+1,i.dims[0]])],0)),c.push((0,u.cat)([d.slice([0,r]),(0,u.ones)([a.dims[0]]),d.slice([r+1,d.dims[0]])],0))}return{inputs_embeds:(0,u.stack)(l,0),attention_mask:(0,u.stack)(c,0)}}}class un extends dn{}class pn extends G{forward_params=["input_ids","inputs_embeds","attention_mask","pixel_values","encoder_outputs","decoder_input_ids","decoder_inputs_embeds","decoder_attention_mask","past_key_values"];main_input_name="inputs_embeds"}class hn extends pn{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:s}){return{inputs_embeds:(0,u.cat)([t,e],1),attention_mask:(0,u.cat)([(0,u.ones)(t.dims.slice(0,2)),s],1)}}async _prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:n,attention_mask:s}){if(!e&&!t)throw new Error("Either `input_ids` or `pixel_values` should be provided.");let r,o;return e&&(r=await this.encode_text({input_ids:e})),t&&(o=await this.encode_image({pixel_values:t})),r&&o?({inputs_embeds:n,attention_mask:s}=this._merge_input_ids_with_image_features({inputs_embeds:r,image_features:o,input_ids:e,attention_mask:s})):n=r||o,{inputs_embeds:n,attention_mask:s}}async forward({input_ids:e,pixel_values:t,attention_mask:n,decoder_input_ids:s,decoder_attention_mask:r,encoder_outputs:o,past_key_values:i,inputs_embeds:a,decoder_inputs_embeds:l}){if(a||({inputs_embeds:a,attention_mask:n}=await this._prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:a,attention_mask:n})),!o){let{last_hidden_state:e}=await N(this,{inputs_embeds:a,attention_mask:n});o=e}if(!l){if(!s)throw new Error("Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.");l=await this.encode_text({input_ids:s})}const c={inputs_embeds:l,attention_mask:r,encoder_attention_mask:n,encoder_hidden_states:o,past_key_values:i};return await O(this,c,!0)}}class _n extends G{}class mn extends _n{}class fn extends _n{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class gn extends _n{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class wn extends _n{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class Mn extends _n{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class bn extends G{}class yn extends bn{}class xn extends bn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class kn extends _n{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class Tn extends G{}class vn extends Tn{}class Fn extends G{}class Cn extends Fn{}class Pn extends Fn{}class Sn extends G{}class An extends Sn{}class En extends Sn{}class Ln extends G{}class zn extends Ln{}class In extends Ln{}class Bn extends G{}class Nn extends Bn{}class On extends Bn{}class jn extends G{}class Dn extends jn{}class Vn extends jn{}class Rn extends G{}class Gn extends Rn{}class $n extends Rn{}class qn extends G{}class Un extends qn{}class Wn extends qn{}class Xn extends G{}class Qn extends Xn{}class Hn extends Xn{}class Yn extends G{}class Jn extends Yn{}class Kn extends Yn{}class Zn extends G{}class es extends Zn{}class ts extends Zn{}class ns extends G{}class ss extends ns{}class rs extends ns{}class os extends G{}class is extends os{}class as extends os{}class ls extends G{}class cs extends ls{}class ds extends ls{}class us extends G{}class ps extends us{}class hs extends us{}class _s extends G{}class ms extends _s{}class fs extends _s{}class gs extends G{}class ws extends gs{}class Ms extends gs{}class bs extends G{}class ys extends bs{}class xs extends bs{}class ks extends G{}class Ts extends ks{}class vs extends ks{}class Fs extends G{}class Cs extends Fs{}class Ps extends Fs{}class Ss extends G{}class As extends Ss{}class Es extends Ss{}class Ls extends G{}class zs extends Ls{}class Is extends Ls{async _call(e){return new pl(await super._call(e))}}class Bs extends G{}class Ns extends Bs{}class Os extends Bs{async _call(e){return new pl(await super._call(e))}}class js extends G{}class Ds extends js{}class Vs extends G{}class Rs extends Vs{}class Gs extends Vs{async _call(e){return new pl(await super._call(e))}}class $s extends G{}class qs extends $s{}class Us extends G{}class Ws extends Us{}class Xs extends Us{async _call(e){return new pl(await super._call(e))}}class Qs extends G{}class Hs extends Qs{async _call(e){return new Ml(await super._call(e))}}class Ys extends G{}class Js extends Ys{}class Ks extends Ys{async _call(e){return new pl(await super._call(e))}}class Zs extends G{}class er extends Zs{}class tr extends Zs{async _call(e){return new pl(await super._call(e))}}class nr extends G{}class sr extends nr{}class rr extends nr{}class or extends G{}class ir extends or{}class ar extends or{}class lr extends G{}class cr extends lr{}class dr extends lr{async _call(e){return new pl(await super._call(e))}}class ur extends G{}class pr extends ur{}class hr extends ur{async _call(e){return new mr(await super._call(e))}}class _r extends ur{async _call(e){return new fr(await super._call(e))}}class mr extends ${constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class fr extends ${constructor({logits:e,pred_boxes:t,pred_masks:n}){super(),this.logits=e,this.pred_boxes=t,this.pred_masks=n}}class gr extends G{}class wr extends gr{}class Mr extends gr{async _call(e){return new br(await super._call(e))}}class br extends ${constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class yr extends G{}class xr extends yr{}class kr extends yr{async _call(e){return new Tr(await super._call(e))}}class Tr extends mr{}class vr extends G{}class Fr extends vr{}class Cr extends vr{async _call(e){return new pl(await super._call(e))}}class Pr extends G{}class Sr extends Pr{}class Ar extends Pr{async _call(e){return new pl(await super._call(e))}}class Er extends G{}class Lr extends Er{}class zr extends Er{async _call(e){return new pl(await super._call(e))}}class Ir extends G{}class Br extends Ir{}class Nr extends Ir{async _call(e){return new pl(await super._call(e))}}class Or extends G{}class jr extends Or{}class Dr extends Or{}class Vr extends G{}class Rr extends Vr{}class Gr extends Vr{}class $r extends G{}class qr extends $r{}class Ur extends G{}class Wr extends Ur{}class Xr extends Ur{}class Qr extends Ur{}class Hr extends G{}class Yr extends Hr{}class Jr extends G{}class Kr extends Jr{}class Zr extends Jr{}class eo extends G{}class to extends eo{}class no extends eo{}class so extends G{}class ro extends so{}class oo extends G{}class io extends oo{}class ao extends oo{async _call(e){return new pl(await super._call(e))}}class lo extends G{}class co extends lo{}class uo extends lo{async _call(e){return new pl(await super._call(e))}}class po extends G{}class ho extends po{}class _o extends po{async _call(e){return new pl(await super._call(e))}}class mo extends G{}class fo extends mo{}class go extends mo{async _call(e){return new wo(await super._call(e))}}class wo extends ${constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Mo extends G{}class bo extends Mo{async get_image_embeddings({pixel_values:e}){return await N(this,{pixel_values:e})}async forward(e){if(e.image_embeddings&&e.image_positional_embeddings||(e={...e,...await this.get_image_embeddings(e)}),!e.input_labels&&e.input_points){const t=e.input_points.dims.slice(0,-1),n=t.reduce(((e,t)=>e*t),1);e.input_labels=new u.Tensor("int64",new BigInt64Array(n).fill(1n),t)}const t={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(t.input_points=e.input_points),e.input_labels&&(t.input_labels=e.input_labels),e.input_boxes&&(t.input_boxes=e.input_boxes),await E(this.sessions.prompt_encoder_mask_decoder,t)}async _call(e){return new yo(await super._call(e))}}class yo extends ${constructor({iou_scores:e,pred_masks:t}){super(),this.iou_scores=e,this.pred_masks=t}}class xo extends G{}class ko extends xo{}class To extends xo{}class vo extends G{}class Fo extends vo{}class Co extends vo{}class Po extends G{}class So extends Po{}class Ao extends Po{async _call(e){return new gl(await super._call(e))}}class Eo extends Po{async _call(e){return new pl(await super._call(e))}}class Lo extends Po{async _call(e){return new _l(await super._call(e))}}class zo extends G{}class Io extends zo{}class Bo extends zo{async _call(e){return new _l(await super._call(e))}}class No extends G{}class Oo extends No{}class jo extends G{}class Do extends jo{}class Vo extends jo{async _call(e){return new gl(await super._call(e))}}class Ro extends jo{async _call(e){return new pl(await super._call(e))}}class Go extends G{}class $o extends Go{}class qo extends Go{async _call(e){return new gl(await super._call(e))}}class Uo extends Go{async _call(e){return new pl(await super._call(e))}}class Wo extends Go{async _call(e){return new _l(await super._call(e))}}class Xo extends G{}class Qo extends Xo{}class Ho extends Xo{async _call(e){return new gl(await super._call(e))}}class Yo extends Xo{async _call(e){return new pl(await super._call(e))}}class Jo extends G{}class Ko extends Po{}class Zo extends Po{async _call(e){return new gl(await super._call(e))}}class ei extends Po{async _call(e){return new pl(await super._call(e))}}class ti extends G{}class ni extends ti{}class si extends ti{async _call(e){return new gl(await super._call(e))}}class ri extends ti{async _call(e){return new pl(await super._call(e))}}class oi extends ti{async _call(e){return new hl(await super._call(e))}}class ii extends ti{async _call(e){return new _l(await super._call(e))}}class ai extends G{}class li extends ai{}class ci extends ai{}class di extends ai{async generate_speech(e,t,{threshold:n=.5,minlenratio:s=0,maxlenratio:r=20,vocoder:o=null}={}){const i={input_ids:e},{encoder_outputs:a,encoder_attention_mask:l}=await N(this,i),c=a.dims[1]/this.config.reduction_factor,d=Math.floor(c*r),p=Math.floor(c*s),h=this.config.num_mel_bins;let _=[],m=null,f=null,g=0;for(;;){++g;const e=I(!!f);let s;s=f?f.output_sequence_out:new u.Tensor("float32",new Float32Array(h),[1,1,h]);let r={use_cache_branch:e,output_sequence:s,encoder_attention_mask:l,speaker_embeddings:t,encoder_hidden_states:a};this.addPastKeyValues(r,m),f=await E(this.sessions.decoder_model_merged,r),m=this.getPastKeyValues(f,m);const{prob:o,spectrum:i}=f;if(_.push(i),g>=p&&(Array.from(o.data).filter((e=>e>=n)).length>0||g>=d))break}const w=(0,u.cat)(_),{waveform:M}=await E(o.sessions.model,{spectrogram:w});return{spectrogram:w,waveform:M}}}class ui extends G{main_input_name="spectrogram"}class pi extends G{}class hi extends pi{}class _i extends G{}class mi extends _i{}class fi extends _i{}class gi extends G{}class wi extends gi{}class Mi extends gi{}class bi extends G{}class yi extends bi{}class xi extends bi{}class ki extends G{}class Ti extends ki{}class vi extends ki{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Fi extends ki{static async from_pretrained(e,t={}){return t.model_file_name??="audio_model",super.from_pretrained(e,t)}}class Ci extends G{}class Pi extends Ci{async _call(e){return new bl(await super._call(e))}}class Si extends G{}class Ai extends Si{}class Ei extends Si{}class Li extends Si{}class zi extends G{}class Ii extends zi{}class Bi extends zi{}class Ni extends G{}class Oi extends Ni{}class ji extends Ni{async _call(e){return new pl(await super._call(e))}}class Di extends G{}class Vi extends Di{}class Ri extends Di{}class Gi extends G{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];_apply_and_filter_by_delay_pattern_mask(e){const[t,n]=e.dims,s=this.config.decoder.num_codebooks,r=n-s;let o=0;for(let t=0;t<e.size;++t){if(e.data[t]===this.config.decoder.pad_token_id)continue;const i=t%n-Math.floor(t/n)%s;i>0&&i<=r&&(e.data[o++]=e.data[t])}const i=Math.floor(t/s),a=o/(i*s);return new u.Tensor(e.type,e.data.slice(0,o),[i,s,a])}prepare_inputs_for_generation(e,t,n){let s=structuredClone(e);for(let e=0;e<s.length;++e)for(let t=0;t<s[e].length;++t)e%this.config.decoder.num_codebooks>=t&&(s[e][t]=BigInt(this.config.decoder.pad_token_id));null!==n.guidance_scale&&n.guidance_scale>1&&(s=s.concat(s));return super.prepare_inputs_for_generation(s,t,n)}async generate(e){const t=await super.generate(e),n=this._apply_and_filter_by_delay_pattern_mask(t).unsqueeze_(0),{audio_values:s}=await E(this.sessions.encodec_decode,{audio_codes:n});return s}}class $i extends G{}class qi extends $i{}class Ui extends $i{async _call(e){return new pl(await super._call(e))}}class Wi extends G{}class Xi extends Wi{}class Qi extends Wi{async _call(e){return new pl(await super._call(e))}}class Hi extends G{}class Yi extends Hi{}class Ji extends Hi{async _call(e){return new pl(await super._call(e))}}class Ki extends G{}class Zi extends Ki{}class ea extends Ki{async _call(e){return new pl(await super._call(e))}}class ta extends G{}class na extends ta{}class sa{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:o=!1,revision:i="main",model_file_name:a=null,subfolder:l="onnx",device:c=null,dtype:d=null,use_external_data_format:u=null,session_options:p={}}={}){const h={progress_callback:t,config:n,cache_dir:r,local_files_only:o,revision:i,model_file_name:a,subfolder:l,device:c,dtype:d,use_external_data_format:u,session_options:p};if(h.config=await s.AutoConfig.from_pretrained(e,h),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(const t of this.MODEL_CLASS_MAPPINGS){const n=t.get(h.config.model_type);if(n)return await n[1].from_pretrained(e,h)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${h.config.model_type}", attempting to construct from base class.`),await G.from_pretrained(e,h);throw Error(`Unsupported model type: ${h.config.model_type}`)}}const ra=new Map([["bert",["BertModel",W]],["nomic_bert",["NomicBertModel",K]],["roformer",["RoFormerModel",ee]],["electra",["ElectraModel",pe]],["esm",["EsmModel",Ge]],["convbert",["ConvBertModel",ie]],["camembert",["CamembertModel",we]],["deberta",["DebertaModel",Te]],["deberta-v2",["DebertaV2Model",Ae]],["mpnet",["MPNetModel",Ke]],["albert",["AlbertModel",ct]],["distilbert",["DistilBertModel",Ne]],["roberta",["RobertaModel",jt]],["xlm",["XLMModel",qt]],["xlm-roberta",["XLMRobertaModel",Yt]],["clap",["ClapModel",Ti]],["clip",["CLIPModel",mn]],["clipseg",["CLIPSegModel",Cn]],["chinese_clip",["ChineseCLIPModel",vn]],["siglip",["SiglipModel",yn]],["mobilebert",["MobileBertModel",Xe]],["squeezebert",["SqueezeBertModel",rt]],["wav2vec2",["Wav2Vec2Model",So]],["wav2vec2-bert",["Wav2Vec2BertModel",Qo]],["unispeech",["UniSpeechModel",Do]],["unispeech-sat",["UniSpeechSatModel",$o]],["hubert",["HubertModel",Ko]],["wavlm",["WavLMModel",ni]],["audio-spectrogram-transformer",["ASTModel",nn]],["vits",["VitsModel",Pi]],["pyannote",["PyAnnoteModel",Io]],["wespeaker-resnet",["WeSpeakerResNetModel",Oo]],["detr",["DetrModel",pr]],["rt_detr",["RTDetrModel",wr]],["table-transformer",["TableTransformerModel",xr]],["vit",["ViTModel",zs]],["pvt",["PvtModel",Ns]],["vit_msn",["ViTMSNModel",Rs]],["vit_mae",["ViTMAEModel",Ds]],["groupvit",["GroupViTModel",qs]],["fastvit",["FastViTModel",Ws]],["mobilevit",["MobileViTModel",Js]],["mobilevitv2",["MobileViTV2Model",er]],["owlvit",["OwlViTModel",sr]],["owlv2",["Owlv2Model",ir]],["beit",["BeitModel",cr]],["deit",["DeiTModel",Fr]],["hiera",["HieraModel",Sr]],["convnext",["ConvNextModel",io]],["convnextv2",["ConvNextV2Model",co]],["dinov2",["Dinov2Model",ho]],["resnet",["ResNetModel",Lr]],["swin",["SwinModel",Br]],["swin2sr",["Swin2SRModel",jr]],["donut-swin",["DonutSwinModel",ro]],["yolos",["YolosModel",fo]],["dpt",["DPTModel",Rr]],["glpn",["GLPNModel",to]],["hifigan",["SpeechT5HifiGan",ui]],["efficientnet",["EfficientNetModel",Oi]],["decision_transformer",["DecisionTransformerModel",na]],["mobilenet_v1",["MobileNetV1Model",qi]],["mobilenet_v2",["MobileNetV2Model",Xi]],["mobilenet_v3",["MobileNetV3Model",Yi]],["mobilenet_v4",["MobileNetV4Model",Zi]],["maskformer",["MaskFormerModel",Kr]]]),oa=new Map([["t5",["T5Model",_t]],["longt5",["LongT5Model",gt]],["mt5",["MT5Model",bt]],["bart",["BartModel",kt]],["mbart",["MBartModel",Ct]],["marian",["MarianModel",ko]],["whisper",["WhisperModel",on]],["m2m_100",["M2M100Model",Fo]],["blenderbot",["BlenderbotModel",Lt]],["blenderbot-small",["BlenderbotSmallModel",Bt]]]),ia=new Map([["bloom",["BloomModel",Ts]],["jais",["JAISModel",zn]],["gpt2",["GPT2Model",An]],["gptj",["GPTJModel",Gn]],["gpt_bigcode",["GPTBigCodeModel",Un]],["gpt_neo",["GPTNeoModel",Nn]],["gpt_neox",["GPTNeoXModel",Dn]],["codegen",["CodeGenModel",Qn]],["llama",["LlamaModel",Jn]],["granite",["GraniteModel",es]],["cohere",["CohereModel",ss]],["gemma",["GemmaModel",is]],["gemma2",["Gemma2Model",cs]],["openelm",["OpenELMModel",ps]],["qwen2",["Qwen2Model",ms]],["phi",["PhiModel",ws]],["phi3",["Phi3Model",ys]],["mpt",["MptModel",Cs]],["opt",["OPTModel",As]],["mistral",["MistralModel",mi]],["starcoder2",["Starcoder2Model",wi]],["falcon",["FalconModel",yi]],["stablelm",["StableLmModel",Ii]]]),aa=new Map([["speecht5",["SpeechT5ForSpeechToText",ci]],["whisper",["WhisperForConditionalGeneration",an]]]),la=new Map([["speecht5",["SpeechT5ForTextToSpeech",di]]]),ca=new Map([["vits",["VitsModel",Pi]],["musicgen",["MusicgenForConditionalGeneration",Gi]]]),da=new Map([["bert",["BertForSequenceClassification",Q]],["roformer",["RoFormerForSequenceClassification",ne]],["electra",["ElectraForSequenceClassification",_e]],["esm",["EsmForSequenceClassification",qe]],["convbert",["ConvBertForSequenceClassification",le]],["camembert",["CamembertForSequenceClassification",be]],["deberta",["DebertaForSequenceClassification",Fe]],["deberta-v2",["DebertaV2ForSequenceClassification",Le]],["mpnet",["MPNetForSequenceClassification",et]],["albert",["AlbertForSequenceClassification",dt]],["distilbert",["DistilBertForSequenceClassification",Oe]],["roberta",["RobertaForSequenceClassification",Vt]],["xlm",["XLMForSequenceClassification",Wt]],["xlm-roberta",["XLMRobertaForSequenceClassification",Kt]],["bart",["BartForSequenceClassification",vt]],["mbart",["MBartForSequenceClassification",St]],["mobilebert",["MobileBertForSequenceClassification",He]],["squeezebert",["SqueezeBertForSequenceClassification",it]]]),ua=new Map([["bert",["BertForTokenClassification",H]],["roformer",["RoFormerForTokenClassification",se]],["electra",["ElectraForTokenClassification",me]],["esm",["EsmForTokenClassification",Ue]],["convbert",["ConvBertForTokenClassification",ce]],["camembert",["CamembertForTokenClassification",ye]],["deberta",["DebertaForTokenClassification",Ce]],["deberta-v2",["DebertaV2ForTokenClassification",ze]],["mpnet",["MPNetForTokenClassification",tt]],["distilbert",["DistilBertForTokenClassification",je]],["roberta",["RobertaForTokenClassification",Rt]],["xlm",["XLMForTokenClassification",Xt]],["xlm-roberta",["XLMRobertaForTokenClassification",Zt]]]),pa=new Map([["t5",["T5ForConditionalGeneration",mt]],["longt5",["LongT5ForConditionalGeneration",wt]],["mt5",["MT5ForConditionalGeneration",yt]],["bart",["BartForConditionalGeneration",Tt]],["mbart",["MBartForConditionalGeneration",Pt]],["marian",["MarianMTModel",To]],["m2m_100",["M2M100ForConditionalGeneration",Co]],["blenderbot",["BlenderbotForConditionalGeneration",zt]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",Nt]]]),ha=new Map([["bloom",["BloomForCausalLM",vs]],["gpt2",["GPT2LMHeadModel",En]],["jais",["JAISLMHeadModel",In]],["gptj",["GPTJForCausalLM",$n]],["gpt_bigcode",["GPTBigCodeForCausalLM",Wn]],["gpt_neo",["GPTNeoForCausalLM",On]],["gpt_neox",["GPTNeoXForCausalLM",Vn]],["codegen",["CodeGenForCausalLM",Hn]],["llama",["LlamaForCausalLM",Kn]],["granite",["GraniteForCausalLM",ts]],["cohere",["CohereForCausalLM",rs]],["gemma",["GemmaForCausalLM",as]],["gemma2",["Gemma2ForCausalLM",ds]],["openelm",["OpenELMForCausalLM",hs]],["qwen2",["Qwen2ForCausalLM",fs]],["phi",["PhiForCausalLM",Ms]],["phi3",["Phi3ForCausalLM",xs]],["mpt",["MptForCausalLM",Ps]],["opt",["OPTForCausalLM",Es]],["mbart",["MBartForCausalLM",At]],["mistral",["MistralForCausalLM",fi]],["starcoder2",["Starcoder2ForCausalLM",Mi]],["falcon",["FalconForCausalLM",xi]],["trocr",["TrOCRForCausalLM",hi]],["stablelm",["StableLmForCausalLM",Bi]]]),_a=new Map([["bert",["BertForMaskedLM",X]],["roformer",["RoFormerForMaskedLM",te]],["electra",["ElectraForMaskedLM",he]],["esm",["EsmForMaskedLM",$e]],["convbert",["ConvBertForMaskedLM",ae]],["camembert",["CamembertForMaskedLM",Me]],["deberta",["DebertaForMaskedLM",ve]],["deberta-v2",["DebertaV2ForMaskedLM",Ee]],["mpnet",["MPNetForMaskedLM",Ze]],["albert",["AlbertForMaskedLM",pt]],["distilbert",["DistilBertForMaskedLM",Ve]],["roberta",["RobertaForMaskedLM",Dt]],["xlm",["XLMWithLMHeadModel",Ut]],["xlm-roberta",["XLMRobertaForMaskedLM",Jt]],["mobilebert",["MobileBertForMaskedLM",Qe]],["squeezebert",["SqueezeBertForMaskedLM",ot]]]),ma=new Map([["bert",["BertForQuestionAnswering",Y]],["roformer",["RoFormerForQuestionAnswering",re]],["electra",["ElectraForQuestionAnswering",fe]],["convbert",["ConvBertForQuestionAnswering",de]],["camembert",["CamembertForQuestionAnswering",xe]],["deberta",["DebertaForQuestionAnswering",Pe]],["deberta-v2",["DebertaV2ForQuestionAnswering",Ie]],["mpnet",["MPNetForQuestionAnswering",nt]],["albert",["AlbertForQuestionAnswering",ut]],["distilbert",["DistilBertForQuestionAnswering",De]],["roberta",["RobertaForQuestionAnswering",Gt]],["xlm",["XLMForQuestionAnswering",Qt]],["xlm-roberta",["XLMRobertaForQuestionAnswering",en]],["mobilebert",["MobileBertForQuestionAnswering",Ye]],["squeezebert",["SqueezeBertForQuestionAnswering",at]]]),fa=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",ln]]]),ga=new Map([["llava",["LlavaForConditionalGeneration",dn]],["moondream1",["Moondream1ForConditionalGeneration",un]],["florence2",["Florence2ForConditionalGeneration",hn]]]),wa=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",ln]]]),Ma=new Map([["vit",["ViTForImageClassification",Is]],["pvt",["PvtForImageClassification",Os]],["vit_msn",["ViTMSNForImageClassification",Gs]],["fastvit",["FastViTForImageClassification",Xs]],["mobilevit",["MobileViTForImageClassification",Ks]],["mobilevitv2",["MobileViTV2ForImageClassification",tr]],["beit",["BeitForImageClassification",dr]],["deit",["DeiTForImageClassification",Cr]],["hiera",["HieraForImageClassification",Ar]],["convnext",["ConvNextForImageClassification",ao]],["convnextv2",["ConvNextV2ForImageClassification",uo]],["dinov2",["Dinov2ForImageClassification",_o]],["resnet",["ResNetForImageClassification",zr]],["swin",["SwinForImageClassification",Nr]],["segformer",["SegformerForImageClassification",Ei]],["efficientnet",["EfficientNetForImageClassification",ji]],["mobilenet_v1",["MobileNetV1ForImageClassification",Ui]],["mobilenet_v2",["MobileNetV2ForImageClassification",Qi]],["mobilenet_v3",["MobileNetV3ForImageClassification",Ji]],["mobilenet_v4",["MobileNetV4ForImageClassification",ea]]]),ba=new Map([["detr",["DetrForObjectDetection",hr]],["rt_detr",["RTDetrForObjectDetection",Mr]],["table-transformer",["TableTransformerForObjectDetection",kr]],["yolos",["YolosForObjectDetection",go]]]),ya=new Map([["owlvit",["OwlViTForObjectDetection",rr]],["owlv2",["Owlv2ForObjectDetection",ar]]]),xa=new Map([["detr",["DetrForSegmentation",_r]],["clipseg",["CLIPSegForImageSegmentation",Pn]]]),ka=new Map([["segformer",["SegformerForSemanticSegmentation",Li]],["sapiens",["SapiensForSemanticSegmentation",Wr]]]),Ta=new Map([["detr",["DetrForSegmentation",_r]],["maskformer",["MaskFormerForInstanceSegmentation",Zr]]]),va=new Map([["sam",["SamModel",bo]]]),Fa=new Map([["wav2vec2",["Wav2Vec2ForCTC",Ao]],["wav2vec2-bert",["Wav2Vec2BertForCTC",Ho]],["unispeech",["UniSpeechForCTC",Vo]],["unispeech-sat",["UniSpeechSatForCTC",qo]],["wavlm",["WavLMForCTC",si]],["hubert",["HubertForCTC",Zo]]]),Ca=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",Eo]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",Yo]],["unispeech",["UniSpeechForSequenceClassification",Ro]],["unispeech-sat",["UniSpeechSatForSequenceClassification",Uo]],["wavlm",["WavLMForSequenceClassification",ri]],["hubert",["HubertForSequenceClassification",ei]],["audio-spectrogram-transformer",["ASTForAudioClassification",sn]]]),Pa=new Map([["wavlm",["WavLMForXVector",oi]]]),Sa=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",Wo]],["wavlm",["WavLMForAudioFrameClassification",ii]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",Lo]],["pyannote",["PyAnnoteForAudioFrameClassification",Bo]]]),Aa=new Map([["vitmatte",["VitMatteForImageMatting",Hs]]]),Ea=new Map([["swin2sr",["Swin2SRForImageSuperResolution",Dr]]]),La=new Map([["dpt",["DPTForDepthEstimation",Gr]],["depth_anything",["DepthAnythingForDepthEstimation",qr]],["glpn",["GLPNForDepthEstimation",no]],["sapiens",["SapiensForDepthEstimation",Xr]],["depth_pro",["DepthProForDepthEstimation",Yr]]]),za=new Map([["sapiens",["SapiensForNormalEstimation",Qr]]]),Ia=new Map([["clip",["CLIPVisionModelWithProjection",Mn]],["siglip",["SiglipVisionModel",kn]]]),Ba=[[ra,w],[oa,M],[ia,x],[da,w],[ua,w],[pa,b],[aa,b],[ha,x],[_a,w],[ma,w],[fa,y],[ga,T],[Ma,w],[xa,w],[Ta,w],[ka,w],[Aa,w],[Ea,w],[La,w],[za,w],[ba,w],[ya,w],[va,k],[Fa,w],[Ca,w],[la,b],[ca,w],[Pa,w],[Sa,w],[Ia,w]];for(const[e,t]of Ba)for(const[n,s]of e.values())F.set(n,t),P.set(s,n),C.set(n,s);const Na=[["MusicgenForConditionalGeneration",Gi,v],["CLIPTextModelWithProjection",gn,w],["SiglipTextModel",xn,w],["ClapTextModelWithProjection",vi,w],["ClapAudioModelWithProjection",Fi,w]];for(const[e,t,n]of Na)F.set(e,n),P.set(t,e),C.set(e,t);class Oa extends sa{static MODEL_CLASS_MAPPINGS=Ba.map((e=>e[0]));static BASE_IF_FAIL=!0}class ja extends sa{static MODEL_CLASS_MAPPINGS=[da]}class Da extends sa{static MODEL_CLASS_MAPPINGS=[ua]}class Va extends sa{static MODEL_CLASS_MAPPINGS=[pa]}class Ra extends sa{static MODEL_CLASS_MAPPINGS=[aa]}class Ga extends sa{static MODEL_CLASS_MAPPINGS=[la]}class $a extends sa{static MODEL_CLASS_MAPPINGS=[ca]}class qa extends sa{static MODEL_CLASS_MAPPINGS=[ha]}class Ua extends sa{static MODEL_CLASS_MAPPINGS=[_a]}class Wa extends sa{static MODEL_CLASS_MAPPINGS=[ma]}class Xa extends sa{static MODEL_CLASS_MAPPINGS=[fa]}class Qa extends sa{static MODEL_CLASS_MAPPINGS=[Ma]}class Ha extends sa{static MODEL_CLASS_MAPPINGS=[xa]}class Ya extends sa{static MODEL_CLASS_MAPPINGS=[ka]}class Ja extends sa{static MODEL_CLASS_MAPPINGS=[Ta]}class Ka extends sa{static MODEL_CLASS_MAPPINGS=[ba]}class Za extends sa{static MODEL_CLASS_MAPPINGS=[ya]}class el extends sa{static MODEL_CLASS_MAPPINGS=[va]}class tl extends sa{static MODEL_CLASS_MAPPINGS=[Fa]}class nl extends sa{static MODEL_CLASS_MAPPINGS=[Ca]}class sl extends sa{static MODEL_CLASS_MAPPINGS=[Pa]}class rl extends sa{static MODEL_CLASS_MAPPINGS=[Sa]}class ol extends sa{static MODEL_CLASS_MAPPINGS=[wa]}class il extends sa{static MODEL_CLASS_MAPPINGS=[Aa]}class al extends sa{static MODEL_CLASS_MAPPINGS=[Ea]}class ll extends sa{static MODEL_CLASS_MAPPINGS=[La]}class cl extends sa{static MODEL_CLASS_MAPPINGS=[za]}class dl extends sa{static MODEL_CLASS_MAPPINGS=[Ia]}class ul extends ${constructor({logits:e,past_key_values:t,encoder_outputs:n,decoder_attentions:s=null,cross_attentions:r=null}){super(),this.logits=e,this.past_key_values=t,this.encoder_outputs=n,this.decoder_attentions=s,this.cross_attentions=r}}class pl extends ${constructor({logits:e}){super(),this.logits=e}}class hl extends ${constructor({logits:e,embeddings:t}){super(),this.logits=e,this.embeddings=t}}class _l extends ${constructor({logits:e}){super(),this.logits=e}}class ml extends ${constructor({logits:e}){super(),this.logits=e}}class fl extends ${constructor({start_logits:e,end_logits:t}){super(),this.start_logits=e,this.end_logits=t}}class gl extends ${constructor({logits:e}){super(),this.logits=e}}class wl extends ${constructor({logits:e,past_key_values:t}){super(),this.logits=e,this.past_key_values=t}}class Ml extends ${constructor({alphas:e}){super(),this.alphas=e}}class bl extends ${constructor({waveform:e,spectrogram:t}){super(),this.waveform=e,this.spectrogram=t}}},"./src/models/whisper/common_whisper.js":
113
113
  /*!**********************************************!*\
114
114
  !*** ./src/models/whisper/common_whisper.js ***!
115
115
  \**********************************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{WHISPER_LANGUAGE_MAPPING:()=>r,WHISPER_TO_LANGUAGE_CODE_MAPPING:()=>o,whisper_language_to_code:()=>i});const s=[["en","english"],["zh","chinese"],["de","german"],["es","spanish"],["ru","russian"],["ko","korean"],["fr","french"],["ja","japanese"],["pt","portuguese"],["tr","turkish"],["pl","polish"],["ca","catalan"],["nl","dutch"],["ar","arabic"],["sv","swedish"],["it","italian"],["id","indonesian"],["hi","hindi"],["fi","finnish"],["vi","vietnamese"],["he","hebrew"],["uk","ukrainian"],["el","greek"],["ms","malay"],["cs","czech"],["ro","romanian"],["da","danish"],["hu","hungarian"],["ta","tamil"],["no","norwegian"],["th","thai"],["ur","urdu"],["hr","croatian"],["bg","bulgarian"],["lt","lithuanian"],["la","latin"],["mi","maori"],["ml","malayalam"],["cy","welsh"],["sk","slovak"],["te","telugu"],["fa","persian"],["lv","latvian"],["bn","bengali"],["sr","serbian"],["az","azerbaijani"],["sl","slovenian"],["kn","kannada"],["et","estonian"],["mk","macedonian"],["br","breton"],["eu","basque"],["is","icelandic"],["hy","armenian"],["ne","nepali"],["mn","mongolian"],["bs","bosnian"],["kk","kazakh"],["sq","albanian"],["sw","swahili"],["gl","galician"],["mr","marathi"],["pa","punjabi"],["si","sinhala"],["km","khmer"],["sn","shona"],["yo","yoruba"],["so","somali"],["af","afrikaans"],["oc","occitan"],["ka","georgian"],["be","belarusian"],["tg","tajik"],["sd","sindhi"],["gu","gujarati"],["am","amharic"],["yi","yiddish"],["lo","lao"],["uz","uzbek"],["fo","faroese"],["ht","haitian creole"],["ps","pashto"],["tk","turkmen"],["nn","nynorsk"],["mt","maltese"],["sa","sanskrit"],["lb","luxembourgish"],["my","myanmar"],["bo","tibetan"],["tl","tagalog"],["mg","malagasy"],["as","assamese"],["tt","tatar"],["haw","hawaiian"],["ln","lingala"],["ha","hausa"],["ba","bashkir"],["jw","javanese"],["su","sundanese"]],r=new Map(s),o=new Map([...s.map((([e,t])=>[t,e])),["burmese","my"],["valencian","ca"],["flemish","nl"],["haitian","ht"],["letzeburgesch","lb"],["pushto","ps"],["panjabi","pa"],["moldavian","ro"],["moldovan","ro"],["sinhalese","si"],["castilian","es"]]);function i(e){e=e.toLowerCase();let t=o.get(e);if(void 0===t){if(!r.has(e)){const t=2===e.length?r.keys():r.values();throw new Error(`Language "${e}" is not supported. Must be one of: ${JSON.stringify(t)}`)}t=e}return t}},"./src/models/whisper/generation_whisper.js":