@huggingface/transformers 3.0.1 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -151,10 +151,10 @@ var r,a,s,i,o,l,u,d,c,p,h,m,f,g,_,w,y,b,v,x,M,T,k,$,C,S,P,E,F,A,I,z,O,B,L=Object
151
151
  \******************************/(e,t,n)=>{var r;n.r(t),n.d(t,{Tensor:()=>o.Tensor,createInferenceSession:()=>g,deviceToExecutionProviders:()=>m,isONNXProxy:()=>y,isONNXTensor:()=>_});var a=n(/*! ../env.js */"./src/env.js"),s=n(/*! onnxruntime-node */"?2ce3"),i=n(/*! #onnxruntime-webgpu */"./node_modules/onnxruntime-web/dist/ort.webgpu.bundle.min.mjs"),o=n(/*! onnxruntime-common */"./node_modules/onnxruntime-common/dist/esm/index.js");const l=Object.freeze({auto:null,gpu:null,cpu:"cpu",wasm:"wasm",webgpu:"webgpu",cuda:"cuda",dml:"dml",webnn:{name:"webnn",deviceType:"cpu"},"webnn-npu":{name:"webnn",deviceType:"npu"},"webnn-gpu":{name:"webnn",deviceType:"gpu"},"webnn-cpu":{name:"webnn",deviceType:"cpu"}}),u=[];let d,c;const p=Symbol.for("onnxruntime");if(p in globalThis)c=globalThis[p];else if(a.apis.IS_NODE_ENV){switch(c=s??(r||(r=n.t(s,2))),process.platform){case"win32":u.push("dml");break;case"linux":"x64"===process.arch&&u.push("cuda")}u.push("cpu"),d=["cpu"]}else c=i,a.apis.IS_WEBNN_AVAILABLE&&u.push("webnn-npu","webnn-gpu","webnn-cpu","webnn"),a.apis.IS_WEBGPU_AVAILABLE&&u.push("webgpu"),u.push("wasm"),d=["wasm"];const h=c.InferenceSession;function m(e=null){if(!e)return d;switch(e){case"auto":return u;case"gpu":return u.filter((e=>["webgpu","cuda","dml","webnn-gpu"].includes(e)))}if(u.includes(e))return[l[e]??e];throw new Error(`Unsupported device: "${e}". Should be one of: ${u.join(", ")}.`)}let f=null;async function g(e,t,n){f&&await f;const r=h.create(e,t);f??=r;const a=await r;return a.config=n,a}function _(e){return e instanceof c.Tensor}const w=c?.env;function y(){return w?.wasm?.proxy}w?.wasm&&(w.wasm.wasmPaths=`https://cdn.jsdelivr.net/npm/@huggingface/transformers@${a.env.version}/dist/`,w.wasm.proxy=!1,"undefined"!=typeof crossOriginIsolated&&crossOriginIsolated||(w.wasm.numThreads=1)),w?.webgpu&&(w.webgpu.powerPreference="high-performance"),a.env.backends.onnx=w},"./src/configs.js":
152
152
  /*!************************!*\
153
153
  !*** ./src/configs.js ***!
154
- \************************/(e,t,n)=>{n.r(t),n.d(t,{AutoConfig:()=>l,PretrainedConfig:()=>o,getKeyValueShapes:()=>i});var r=n(/*! ./utils/core.js */"./src/utils/core.js"),a=n(/*! ./utils/hub.js */"./src/utils/hub.js");function s(e){const t={};let n={};switch(e.model_type){case"llava":case"paligemma":case"florence2":n=s(e.text_config);break;case"moondream1":n=s(e.phi_config);break;case"musicgen":n=s(e.decoder);break;case"gpt2":case"gptj":case"jais":case"codegen":case"gpt_bigcode":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="n_embd";break;case"gpt_neox":case"stablelm":case"opt":case"phi":case"phi3":case"falcon":t.num_heads="num_attention_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size";break;case"llama":case"granite":case"cohere":case"mistral":case"starcoder2":case"qwen2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size",t.num_attention_heads="num_attention_heads";break;case"gemma":case"gemma2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.dim_kv="head_dim";break;case"openelm":t.num_heads="num_kv_heads",t.num_layers="num_transformer_layers",t.dim_kv="head_dim";break;case"gpt_neo":case"donut-swin":t.num_heads="num_heads",t.num_layers="num_layers",t.hidden_size="hidden_size";break;case"bloom":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="hidden_size";break;case"mpt":t.num_heads="n_heads",t.num_layers="n_layers",t.hidden_size="d_model";break;case"t5":case"mt5":case"longt5":t.num_decoder_layers="num_decoder_layers",t.num_decoder_heads="num_heads",t.decoder_dim_kv="d_kv",t.num_encoder_layers="num_layers",t.num_encoder_heads="num_heads",t.encoder_dim_kv="d_kv";break;case"bart":case"mbart":case"marian":case"whisper":case"m2m_100":case"blenderbot":case"blenderbot-small":case"florence2_language":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="d_model",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="d_model";break;case"speecht5":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="hidden_size",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="hidden_size";break;case"trocr":t.num_encoder_layers=t.num_decoder_layers="decoder_layers",t.num_encoder_heads=t.num_decoder_heads="decoder_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="d_model";break;case"musicgen_decoder":t.num_encoder_layers=t.num_decoder_layers="num_hidden_layers",t.num_encoder_heads=t.num_decoder_heads="num_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="hidden_size";break;case"vision-encoder-decoder":const a=s(e.decoder),i="num_decoder_layers"in a,o=(0,r.pick)(e,["model_type","is_encoder_decoder"]);return i?(o.num_decoder_layers=a.num_decoder_layers,o.num_decoder_heads=a.num_decoder_heads,o.decoder_hidden_size=a.decoder_hidden_size,o.num_encoder_layers=a.num_encoder_layers,o.num_encoder_heads=a.num_encoder_heads,o.encoder_hidden_size=a.encoder_hidden_size):(o.num_layers=a.num_layers,o.num_heads=a.num_heads,o.hidden_size=a.hidden_size),o}const a={...n,...(0,r.pick)(e,["model_type","multi_query","is_encoder_decoder"])};for(const n in t)a[n]=e[t[n]];return a}function i(e,{prefix:t="past_key_values"}={}){const n={},r=e.normalized_config;if(r.is_encoder_decoder&&"num_encoder_heads"in r&&"num_decoder_heads"in r){const e=r.encoder_dim_kv??r.encoder_hidden_size/r.num_encoder_heads,a=r.decoder_dim_kv??r.decoder_hidden_size/r.num_decoder_heads,s=[1,r.num_encoder_heads,0,e],i=[1,r.num_decoder_heads,0,a];for(let e=0;e<r.num_decoder_layers;++e)n[`${t}.${e}.encoder.key`]=s,n[`${t}.${e}.encoder.value`]=s,n[`${t}.${e}.decoder.key`]=i,n[`${t}.${e}.decoder.value`]=i}else{const e=r.num_heads,a=r.num_layers,s=r.dim_kv??r.hidden_size/(r.num_attention_heads??e);if("falcon"===r.model_type){const r=[1*e,0,s];for(let e=0;e<a;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=r}else if(r.multi_query){const r=[1*e,0,2*s];for(let e=0;e<a;++e)n[`${t}.${e}.key_value`]=r}else if("bloom"===r.model_type){const r=[1*e,s,0],i=[1*e,0,s];for(let e=0;e<a;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=i}else if("openelm"===r.model_type)for(let r=0;r<a;++r){const a=[1,e[r],0,s];n[`${t}.${r}.key`]=a,n[`${t}.${r}.value`]=a}else{const r=[1,e,0,s];for(let e=0;e<a;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=r}}return n}class o{model_type=null;is_encoder_decoder=!1;max_position_embeddings;"transformers.js_config";constructor(e){Object.assign(this,e),this.normalized_config=s(this)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:s=!1,revision:i="main"}={}){!n||n instanceof o||(n=new o(n));const l=n??await async function(e,t){return await(0,a.getModelJSON)(e,"config.json",!0,t)}(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:s,revision:i});return new this(l)}}class l{static async from_pretrained(...e){return o.from_pretrained(...e)}}},"./src/env.js":
154
+ \************************/(e,t,n)=>{n.r(t),n.d(t,{AutoConfig:()=>l,PretrainedConfig:()=>o,getKeyValueShapes:()=>i});var r=n(/*! ./utils/core.js */"./src/utils/core.js"),a=n(/*! ./utils/hub.js */"./src/utils/hub.js");function s(e){const t={};let n={};switch(e.model_type){case"llava":case"paligemma":case"florence2":n=s(e.text_config);break;case"moondream1":n=s(e.phi_config);break;case"musicgen":n=s(e.decoder);break;case"gpt2":case"gptj":case"jais":case"codegen":case"gpt_bigcode":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="n_embd";break;case"gpt_neox":case"stablelm":case"opt":case"phi":case"phi3":case"falcon":t.num_heads="num_attention_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size";break;case"llama":case"olmo":case"mobilellm":case"granite":case"cohere":case"mistral":case"starcoder2":case"qwen2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size",t.num_attention_heads="num_attention_heads";break;case"gemma":case"gemma2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.dim_kv="head_dim";break;case"openelm":t.num_heads="num_kv_heads",t.num_layers="num_transformer_layers",t.dim_kv="head_dim";break;case"gpt_neo":case"donut-swin":t.num_heads="num_heads",t.num_layers="num_layers",t.hidden_size="hidden_size";break;case"bloom":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="hidden_size";break;case"mpt":t.num_heads="n_heads",t.num_layers="n_layers",t.hidden_size="d_model";break;case"t5":case"mt5":case"longt5":t.num_decoder_layers="num_decoder_layers",t.num_decoder_heads="num_heads",t.decoder_dim_kv="d_kv",t.num_encoder_layers="num_layers",t.num_encoder_heads="num_heads",t.encoder_dim_kv="d_kv";break;case"bart":case"mbart":case"marian":case"whisper":case"m2m_100":case"blenderbot":case"blenderbot-small":case"florence2_language":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="d_model",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="d_model";break;case"speecht5":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="hidden_size",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="hidden_size";break;case"trocr":t.num_encoder_layers=t.num_decoder_layers="decoder_layers",t.num_encoder_heads=t.num_decoder_heads="decoder_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="d_model";break;case"musicgen_decoder":t.num_encoder_layers=t.num_decoder_layers="num_hidden_layers",t.num_encoder_heads=t.num_decoder_heads="num_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="hidden_size";break;case"vision-encoder-decoder":const a=s(e.decoder),i="num_decoder_layers"in a,o=(0,r.pick)(e,["model_type","is_encoder_decoder"]);return i?(o.num_decoder_layers=a.num_decoder_layers,o.num_decoder_heads=a.num_decoder_heads,o.decoder_hidden_size=a.decoder_hidden_size,o.num_encoder_layers=a.num_encoder_layers,o.num_encoder_heads=a.num_encoder_heads,o.encoder_hidden_size=a.encoder_hidden_size):(o.num_layers=a.num_layers,o.num_heads=a.num_heads,o.hidden_size=a.hidden_size),o}const a={...n,...(0,r.pick)(e,["model_type","multi_query","is_encoder_decoder"])};for(const n in t)a[n]=e[t[n]];return a}function i(e,{prefix:t="past_key_values"}={}){const n={},r=e.normalized_config;if(r.is_encoder_decoder&&"num_encoder_heads"in r&&"num_decoder_heads"in r){const e=r.encoder_dim_kv??r.encoder_hidden_size/r.num_encoder_heads,a=r.decoder_dim_kv??r.decoder_hidden_size/r.num_decoder_heads,s=[1,r.num_encoder_heads,0,e],i=[1,r.num_decoder_heads,0,a];for(let e=0;e<r.num_decoder_layers;++e)n[`${t}.${e}.encoder.key`]=s,n[`${t}.${e}.encoder.value`]=s,n[`${t}.${e}.decoder.key`]=i,n[`${t}.${e}.decoder.value`]=i}else{const e=r.num_heads,a=r.num_layers,s=r.dim_kv??r.hidden_size/(r.num_attention_heads??e);if("falcon"===r.model_type){const r=[1*e,0,s];for(let e=0;e<a;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=r}else if(r.multi_query){const r=[1*e,0,2*s];for(let e=0;e<a;++e)n[`${t}.${e}.key_value`]=r}else if("bloom"===r.model_type){const r=[1*e,s,0],i=[1*e,0,s];for(let e=0;e<a;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=i}else if("openelm"===r.model_type)for(let r=0;r<a;++r){const a=[1,e[r],0,s];n[`${t}.${r}.key`]=a,n[`${t}.${r}.value`]=a}else{const r=[1,e,0,s];for(let e=0;e<a;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=r}}return n}class o{model_type=null;is_encoder_decoder=!1;max_position_embeddings;"transformers.js_config";constructor(e){Object.assign(this,e),this.normalized_config=s(this)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:s=!1,revision:i="main"}={}){!n||n instanceof o||(n=new o(n));const l=n??await async function(e,t){return await(0,a.getModelJSON)(e,"config.json",!0,t)}(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:s,revision:i});return new this(l)}}class l{static async from_pretrained(...e){return o.from_pretrained(...e)}}},"./src/env.js":
155
155
  /*!********************!*\
156
156
  !*** ./src/env.js ***!
157
- \********************/(e,t,n)=>{n.r(t),n.d(t,{apis:()=>f,env:()=>b});var r=n(/*! fs */"?569f"),a=n(/*! path */"?3f59"),s=n(/*! url */"?154a");const i="undefined"!=typeof self,o=i&&"DedicatedWorkerGlobalScope"===self.constructor.name,l=i&&"caches"in self,u="undefined"!=typeof navigator&&"gpu"in navigator,d="undefined"!=typeof navigator&&"ml"in navigator,c="undefined"!=typeof process,p=c&&"node"===process?.release?.name,h=!v(r),m=!v(a),f=Object.freeze({IS_BROWSER_ENV:i,IS_WEBWORKER_ENV:o,IS_WEB_CACHE_AVAILABLE:l,IS_WEBGPU_AVAILABLE:u,IS_WEBNN_AVAILABLE:d,IS_PROCESS_AVAILABLE:c,IS_NODE_ENV:p,IS_FS_AVAILABLE:h,IS_PATH_AVAILABLE:m}),g=h&&m,_=g?a.dirname(a.dirname(s.fileURLToPath(import.meta.url))):"./",w=g?a.join(_,"/.cache/"):null,y="/models/",b={version:"3.0.1",backends:{onnx:{}},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!i,localModelPath:g?a.join(_,y):y,useFS:h,useBrowserCache:l,useFSCache:h,cacheDir:w,useCustomCache:!1,customCache:null};function v(e){return 0===Object.keys(e).length}},"./src/generation/configuration_utils.js":
157
+ \********************/(e,t,n)=>{n.r(t),n.d(t,{apis:()=>f,env:()=>b});var r=n(/*! fs */"?569f"),a=n(/*! path */"?3f59"),s=n(/*! url */"?154a");const i="undefined"!=typeof self,o=i&&"DedicatedWorkerGlobalScope"===self.constructor.name,l=i&&"caches"in self,u="undefined"!=typeof navigator&&"gpu"in navigator,d="undefined"!=typeof navigator&&"ml"in navigator,c="undefined"!=typeof process,p=c&&"node"===process?.release?.name,h=!v(r),m=!v(a),f=Object.freeze({IS_BROWSER_ENV:i,IS_WEBWORKER_ENV:o,IS_WEB_CACHE_AVAILABLE:l,IS_WEBGPU_AVAILABLE:u,IS_WEBNN_AVAILABLE:d,IS_PROCESS_AVAILABLE:c,IS_NODE_ENV:p,IS_FS_AVAILABLE:h,IS_PATH_AVAILABLE:m}),g=h&&m;let _="./";if(g){const e=Object(import.meta).url;e?_=a.dirname(a.dirname(s.fileURLToPath(e))):"undefined"!=typeof __dirname&&(_=a.dirname(__dirname))}const w=g?a.join(_,"/.cache/"):null,y="/models/",b={version:"3.0.2",backends:{onnx:{}},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!i,localModelPath:g?a.join(_,y):y,useFS:h,useBrowserCache:l,useFSCache:h,cacheDir:w,useCustomCache:!1,customCache:null};function v(e){return 0===Object.keys(e).length}},"./src/generation/configuration_utils.js":
158
158
  /*!***********************************************!*\
159
159
  !*** ./src/generation/configuration_utils.js ***!
160
160
  \***********************************************/(e,t,n)=>{n.r(t),n.d(t,{GenerationConfig:()=>a});var r=n(/*! ../utils/core.js */"./src/utils/core.js");class a{max_length=20;max_new_tokens=null;min_length=0;min_new_tokens=null;early_stopping=!1;max_time=null;do_sample=!1;num_beams=1;num_beam_groups=1;penalty_alpha=null;use_cache=!0;temperature=1;top_k=50;top_p=1;typical_p=1;epsilon_cutoff=0;eta_cutoff=0;diversity_penalty=0;repetition_penalty=1;encoder_repetition_penalty=1;length_penalty=1;no_repeat_ngram_size=0;bad_words_ids=null;force_words_ids=null;renormalize_logits=!1;constraints=null;forced_bos_token_id=null;forced_eos_token_id=null;remove_invalid_values=!1;exponential_decay_length_penalty=null;suppress_tokens=null;begin_suppress_tokens=null;forced_decoder_ids=null;guidance_scale=null;num_return_sequences=1;output_attentions=!1;output_hidden_states=!1;output_scores=!1;return_dict_in_generate=!1;pad_token_id=null;bos_token_id=null;eos_token_id=null;encoder_no_repeat_ngram_size=0;decoder_start_token_id=null;generation_kwargs={};constructor(e){Object.assign(this,(0,r.pick)(e,Object.getOwnPropertyNames(this)))}}},"./src/generation/logits_process.js":
@@ -172,7 +172,7 @@ var r,a,s,i,o,l,u,d,c,p,h,m,f,g,_,w,y,b,v,x,M,T,k,$,C,S,P,E,F,A,I,z,O,B,L=Object
172
172
  \*************************************/(e,t,n)=>{n.r(t),n.d(t,{BaseStreamer:()=>i,TextStreamer:()=>l,WhisperTextStreamer:()=>u});var r=n(/*! ../utils/core.js */"./src/utils/core.js"),a=n(/*! ../tokenizers.js */"./src/tokenizers.js"),s=n(/*! ../env.js */"./src/env.js");class i{put(e){throw Error("Not implemented")}end(){throw Error("Not implemented")}}const o=s.apis.IS_PROCESS_AVAILABLE?e=>process.stdout.write(e):e=>console.log(e);class l extends i{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:r=null,decode_kwargs:a={},...s}={}){super(),this.tokenizer=e,this.skip_prompt=t,this.callback_function=n??o,this.token_callback_function=r,this.decode_kwargs={...a,...s},this.token_cache=[],this.print_len=0,this.next_tokens_are_prompt=!0}put(e){if(e.length>1)throw Error("TextStreamer only supports batch size of 1");if(this.skip_prompt&&this.next_tokens_are_prompt)return void(this.next_tokens_are_prompt=!1);const t=e[0];this.token_callback_function?.(t),this.token_cache=(0,r.mergeArrays)(this.token_cache,t);const n=this.tokenizer.decode(this.token_cache,this.decode_kwargs);let s;n.endsWith("\n")?(s=n.slice(this.print_len),this.token_cache=[],this.print_len=0):n.length>0&&(0,a.is_chinese_char)(n.charCodeAt(n.length-1))?(s=n.slice(this.print_len),this.print_len+=s.length):(s=n.slice(this.print_len,n.lastIndexOf(" ")+1),this.print_len+=s.length),this.on_finalized_text(s,!1)}end(){let e;if(this.token_cache.length>0){e=this.tokenizer.decode(this.token_cache,this.decode_kwargs).slice(this.print_len),this.token_cache=[],this.print_len=0}else e="";this.next_tokens_are_prompt=!0,this.on_finalized_text(e,!0)}on_finalized_text(e,t){e.length>0&&this.callback_function?.(e),t&&this.callback_function===o&&s.apis.IS_PROCESS_AVAILABLE&&this.callback_function?.("\n")}}class u extends l{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:r=null,on_chunk_start:a=null,on_chunk_end:s=null,on_finalize:i=null,time_precision:o=.02,skip_special_tokens:l=!0,decode_kwargs:u={}}={}){super(e,{skip_prompt:t,callback_function:n,token_callback_function:r,decode_kwargs:{skip_special_tokens:l,...u}}),this.timestamp_begin=e.timestamp_begin,this.on_chunk_start=a,this.on_chunk_end=s,this.on_finalize=i,this.time_precision=o,this.waiting_for_timestamp=!1}put(e){if(e.length>1)throw Error("WhisperTextStreamer only supports batch size of 1");const t=e[0];if(1===t.length){const n=Number(t[0])-this.timestamp_begin;if(n>=0){const t=n*this.time_precision;this.waiting_for_timestamp?this.on_chunk_end?.(t):this.on_chunk_start?.(t),this.waiting_for_timestamp=!this.waiting_for_timestamp,e=[[]]}}return super.put(e)}end(){super.end(),this.on_finalize?.()}}},"./src/models.js":
173
173
  /*!***********************!*\
174
174
  !*** ./src/models.js ***!
175
- \***********************/(e,t,n)=>{n.r(t),n.d(t,{ASTForAudioClassification:()=>an,ASTModel:()=>rn,ASTPreTrainedModel:()=>nn,AlbertForMaskedLM:()=>ht,AlbertForQuestionAnswering:()=>pt,AlbertForSequenceClassification:()=>ct,AlbertModel:()=>dt,AlbertPreTrainedModel:()=>ut,AutoModel:()=>Do,AutoModelForAudioClassification:()=>rl,AutoModelForAudioFrameClassification:()=>sl,AutoModelForCTC:()=>nl,AutoModelForCausalLM:()=>Uo,AutoModelForDepthEstimation:()=>ul,AutoModelForDocumentQuestionAnswering:()=>il,AutoModelForImageClassification:()=>Ko,AutoModelForImageFeatureExtraction:()=>cl,AutoModelForImageMatting:()=>ol,AutoModelForImageSegmentation:()=>Qo,AutoModelForImageToImage:()=>ll,AutoModelForMaskGeneration:()=>tl,AutoModelForMaskedLM:()=>Wo,AutoModelForNormalEstimation:()=>dl,AutoModelForObjectDetection:()=>Jo,AutoModelForQuestionAnswering:()=>Ho,AutoModelForSemanticSegmentation:()=>Yo,AutoModelForSeq2SeqLM:()=>Vo,AutoModelForSequenceClassification:()=>Ro,AutoModelForSpeechSeq2Seq:()=>jo,AutoModelForTextToSpectrogram:()=>qo,AutoModelForTextToWaveform:()=>Go,AutoModelForTokenClassification:()=>No,AutoModelForUniversalSegmentation:()=>Zo,AutoModelForVision2Seq:()=>Xo,AutoModelForXVector:()=>al,AutoModelForZeroShotObjectDetection:()=>el,BartForConditionalGeneration:()=>kt,BartForSequenceClassification:()=>$t,BartModel:()=>Tt,BartPretrainedModel:()=>Mt,BaseModelOutput:()=>U,BeitForImageClassification:()=>ca,BeitModel:()=>da,BeitPreTrainedModel:()=>ua,BertForMaskedLM:()=>X,BertForQuestionAnswering:()=>Y,BertForSequenceClassification:()=>K,BertForTokenClassification:()=>Q,BertModel:()=>H,BertPreTrainedModel:()=>W,BlenderbotForConditionalGeneration:()=>zt,BlenderbotModel:()=>It,BlenderbotPreTrainedModel:()=>At,BlenderbotSmallForConditionalGeneration:()=>Lt,BlenderbotSmallModel:()=>Bt,BlenderbotSmallPreTrainedModel:()=>Ot,BloomForCausalLM:()=>$r,BloomModel:()=>kr,BloomPreTrainedModel:()=>Tr,CLIPModel:()=>gn,CLIPPreTrainedModel:()=>fn,CLIPSegForImageSegmentation:()=>Pn,CLIPSegModel:()=>Sn,CLIPSegPreTrainedModel:()=>Cn,CLIPTextModel:()=>_n,CLIPTextModelWithProjection:()=>wn,CLIPVisionModel:()=>yn,CLIPVisionModelWithProjection:()=>bn,CamembertForMaskedLM:()=>be,CamembertForQuestionAnswering:()=>Me,CamembertForSequenceClassification:()=>ve,CamembertForTokenClassification:()=>xe,CamembertModel:()=>ye,CamembertPreTrainedModel:()=>we,CausalLMOutput:()=>wl,CausalLMOutputWithPast:()=>yl,ChineseCLIPModel:()=>$n,ChineseCLIPPreTrainedModel:()=>kn,ClapAudioModelWithProjection:()=>$i,ClapModel:()=>Ti,ClapPreTrainedModel:()=>Mi,ClapTextModelWithProjection:()=>ki,CodeGenForCausalLM:()=>Qn,CodeGenModel:()=>Kn,CodeGenPreTrainedModel:()=>Xn,CohereForCausalLM:()=>sr,CohereModel:()=>ar,CoherePreTrainedModel:()=>rr,ConvBertForMaskedLM:()=>le,ConvBertForQuestionAnswering:()=>ce,ConvBertForSequenceClassification:()=>ue,ConvBertForTokenClassification:()=>de,ConvBertModel:()=>oe,ConvBertPreTrainedModel:()=>ie,ConvNextForImageClassification:()=>ls,ConvNextModel:()=>os,ConvNextPreTrainedModel:()=>is,ConvNextV2ForImageClassification:()=>cs,ConvNextV2Model:()=>ds,ConvNextV2PreTrainedModel:()=>us,DPTForDepthEstimation:()=>qa,DPTModel:()=>ja,DPTPreTrainedModel:()=>Va,DebertaForMaskedLM:()=>$e,DebertaForQuestionAnswering:()=>Pe,DebertaForSequenceClassification:()=>Ce,DebertaForTokenClassification:()=>Se,DebertaModel:()=>ke,DebertaPreTrainedModel:()=>Te,DebertaV2ForMaskedLM:()=>Ae,DebertaV2ForQuestionAnswering:()=>Oe,DebertaV2ForSequenceClassification:()=>Ie,DebertaV2ForTokenClassification:()=>ze,DebertaV2Model:()=>Fe,DebertaV2PreTrainedModel:()=>Ee,DecisionTransformerModel:()=>no,DecisionTransformerPreTrainedModel:()=>to,DeiTForImageClassification:()=>Sa,DeiTModel:()=>Ca,DeiTPreTrainedModel:()=>$a,DepthAnythingForDepthEstimation:()=>Ua,DepthAnythingPreTrainedModel:()=>Ga,DepthProForDepthEstimation:()=>Ya,DepthProPreTrainedModel:()=>Qa,DetrForObjectDetection:()=>ma,DetrForSegmentation:()=>fa,DetrModel:()=>ha,DetrObjectDetectionOutput:()=>ga,DetrPreTrainedModel:()=>pa,DetrSegmentationOutput:()=>_a,Dinov2ForImageClassification:()=>ms,Dinov2Model:()=>hs,Dinov2PreTrainedModel:()=>ps,DistilBertForMaskedLM:()=>Ve,DistilBertForQuestionAnswering:()=>Ne,DistilBertForSequenceClassification:()=>De,DistilBertForTokenClassification:()=>Re,DistilBertModel:()=>Le,DistilBertPreTrainedModel:()=>Be,DonutSwinModel:()=>ss,DonutSwinPreTrainedModel:()=>as,EfficientNetForImageClassification:()=>Di,EfficientNetModel:()=>Li,EfficientNetPreTrainedModel:()=>Bi,ElectraForMaskedLM:()=>me,ElectraForQuestionAnswering:()=>_e,ElectraForSequenceClassification:()=>fe,ElectraForTokenClassification:()=>ge,ElectraModel:()=>he,ElectraPreTrainedModel:()=>pe,EsmForMaskedLM:()=>Ge,EsmForSequenceClassification:()=>Ue,EsmForTokenClassification:()=>We,EsmModel:()=>qe,EsmPreTrainedModel:()=>je,FalconForCausalLM:()=>xi,FalconModel:()=>vi,FalconPreTrainedModel:()=>bi,FastViTForImageClassification:()=>Xr,FastViTModel:()=>Hr,FastViTPreTrainedModel:()=>Wr,Florence2ForConditionalGeneration:()=>mn,Florence2PreTrainedModel:()=>hn,GLPNForDepthEstimation:()=>rs,GLPNModel:()=>ns,GLPNPreTrainedModel:()=>ts,GPT2LMHeadModel:()=>An,GPT2Model:()=>Fn,GPT2PreTrainedModel:()=>En,GPTBigCodeForCausalLM:()=>Hn,GPTBigCodeModel:()=>Wn,GPTBigCodePreTrainedModel:()=>Un,GPTJForCausalLM:()=>Gn,GPTJModel:()=>qn,GPTJPreTrainedModel:()=>jn,GPTNeoForCausalLM:()=>Dn,GPTNeoModel:()=>Ln,GPTNeoPreTrainedModel:()=>Bn,GPTNeoXForCausalLM:()=>Vn,GPTNeoXModel:()=>Nn,GPTNeoXPreTrainedModel:()=>Rn,Gemma2ForCausalLM:()=>cr,Gemma2Model:()=>dr,Gemma2PreTrainedModel:()=>ur,GemmaForCausalLM:()=>lr,GemmaModel:()=>or,GemmaPreTrainedModel:()=>ir,GraniteForCausalLM:()=>nr,GraniteModel:()=>tr,GranitePreTrainedModel:()=>er,GroupViTModel:()=>Ur,GroupViTPreTrainedModel:()=>Gr,HieraForImageClassification:()=>Fa,HieraModel:()=>Ea,HieraPreTrainedModel:()=>Pa,HubertForCTC:()=>Js,HubertForSequenceClassification:()=>ei,HubertModel:()=>Zs,HubertPreTrainedModel:()=>Ys,ImageMattingOutput:()=>bl,JAISLMHeadModel:()=>On,JAISModel:()=>zn,JAISPreTrainedModel:()=>In,LlamaForCausalLM:()=>Jn,LlamaModel:()=>Zn,LlamaPreTrainedModel:()=>Yn,LlavaForConditionalGeneration:()=>cn,LlavaPreTrainedModel:()=>dn,LongT5ForConditionalGeneration:()=>yt,LongT5Model:()=>wt,LongT5PreTrainedModel:()=>_t,M2M100ForConditionalGeneration:()=>Cs,M2M100Model:()=>$s,M2M100PreTrainedModel:()=>ks,MBartForCausalLM:()=>Ft,MBartForConditionalGeneration:()=>Pt,MBartForSequenceClassification:()=>Et,MBartModel:()=>St,MBartPreTrainedModel:()=>Ct,MPNetForMaskedLM:()=>et,MPNetForQuestionAnswering:()=>rt,MPNetForSequenceClassification:()=>tt,MPNetForTokenClassification:()=>nt,MPNetModel:()=>Je,MPNetPreTrainedModel:()=>Ze,MT5ForConditionalGeneration:()=>xt,MT5Model:()=>vt,MT5PreTrainedModel:()=>bt,MarianMTModel:()=>Ts,MarianModel:()=>Ms,MarianPreTrainedModel:()=>xs,MaskFormerForInstanceSegmentation:()=>es,MaskFormerModel:()=>Ja,MaskFormerPreTrainedModel:()=>Za,MaskedLMOutput:()=>gl,MistralForCausalLM:()=>gi,MistralModel:()=>fi,MistralPreTrainedModel:()=>mi,MobileBertForMaskedLM:()=>Ke,MobileBertForQuestionAnswering:()=>Ye,MobileBertForSequenceClassification:()=>Qe,MobileBertModel:()=>Xe,MobileBertPreTrainedModel:()=>He,MobileNetV1ForImageClassification:()=>Ui,MobileNetV1Model:()=>Gi,MobileNetV1PreTrainedModel:()=>qi,MobileNetV2ForImageClassification:()=>Xi,MobileNetV2Model:()=>Hi,MobileNetV2PreTrainedModel:()=>Wi,MobileNetV3ForImageClassification:()=>Yi,MobileNetV3Model:()=>Qi,MobileNetV3PreTrainedModel:()=>Ki,MobileNetV4ForImageClassification:()=>eo,MobileNetV4Model:()=>Ji,MobileNetV4PreTrainedModel:()=>Zi,MobileViTForImageClassification:()=>Jr,MobileViTModel:()=>Zr,MobileViTPreTrainedModel:()=>Yr,MobileViTV2ForImageClassification:()=>na,MobileViTV2Model:()=>ta,MobileViTV2PreTrainedModel:()=>ea,ModelOutput:()=>G,Moondream1ForConditionalGeneration:()=>pn,MptForCausalLM:()=>Pr,MptModel:()=>Sr,MptPreTrainedModel:()=>Cr,MusicgenForCausalLM:()=>Vi,MusicgenForConditionalGeneration:()=>ji,MusicgenModel:()=>Ni,MusicgenPreTrainedModel:()=>Ri,NomicBertModel:()=>J,NomicBertPreTrainedModel:()=>Z,OPTForCausalLM:()=>Ar,OPTModel:()=>Fr,OPTPreTrainedModel:()=>Er,OpenELMForCausalLM:()=>mr,OpenELMModel:()=>hr,OpenELMPreTrainedModel:()=>pr,OwlViTForObjectDetection:()=>sa,OwlViTModel:()=>aa,OwlViTPreTrainedModel:()=>ra,Owlv2ForObjectDetection:()=>la,Owlv2Model:()=>oa,Owlv2PreTrainedModel:()=>ia,Phi3ForCausalLM:()=>Mr,Phi3Model:()=>xr,Phi3PreTrainedModel:()=>vr,PhiForCausalLM:()=>br,PhiModel:()=>yr,PhiPreTrainedModel:()=>wr,PreTrainedModel:()=>q,PretrainedMixin:()=>ro,PvtForImageClassification:()=>Dr,PvtModel:()=>Lr,PvtPreTrainedModel:()=>Br,PyAnnoteForAudioFrameClassification:()=>Os,PyAnnoteModel:()=>zs,PyAnnotePreTrainedModel:()=>Is,QuestionAnsweringModelOutput:()=>_l,Qwen2ForCausalLM:()=>_r,Qwen2Model:()=>gr,Qwen2PreTrainedModel:()=>fr,RTDetrForObjectDetection:()=>ba,RTDetrModel:()=>ya,RTDetrObjectDetectionOutput:()=>va,RTDetrPreTrainedModel:()=>wa,ResNetForImageClassification:()=>za,ResNetModel:()=>Ia,ResNetPreTrainedModel:()=>Aa,RoFormerForMaskedLM:()=>ne,RoFormerForQuestionAnswering:()=>se,RoFormerForSequenceClassification:()=>re,RoFormerForTokenClassification:()=>ae,RoFormerModel:()=>te,RoFormerPreTrainedModel:()=>ee,RobertaForMaskedLM:()=>Nt,RobertaForQuestionAnswering:()=>qt,RobertaForSequenceClassification:()=>Vt,RobertaForTokenClassification:()=>jt,RobertaModel:()=>Rt,RobertaPreTrainedModel:()=>Dt,SamImageSegmentationOutput:()=>vs,SamModel:()=>bs,SamPreTrainedModel:()=>ys,SapiensForDepthEstimation:()=>Xa,SapiensForNormalEstimation:()=>Ka,SapiensForSemanticSegmentation:()=>Ha,SapiensPreTrainedModel:()=>Wa,SegformerForImageClassification:()=>Fi,SegformerForSemanticSegmentation:()=>Ai,SegformerModel:()=>Ei,SegformerPreTrainedModel:()=>Pi,Seq2SeqLMOutput:()=>pl,SequenceClassifierOutput:()=>hl,SiglipModel:()=>xn,SiglipPreTrainedModel:()=>vn,SiglipTextModel:()=>Mn,SiglipVisionModel:()=>Tn,SpeechT5ForSpeechToText:()=>ui,SpeechT5ForTextToSpeech:()=>di,SpeechT5HifiGan:()=>ci,SpeechT5Model:()=>li,SpeechT5PreTrainedModel:()=>oi,SqueezeBertForMaskedLM:()=>it,SqueezeBertForQuestionAnswering:()=>lt,SqueezeBertForSequenceClassification:()=>ot,SqueezeBertModel:()=>st,SqueezeBertPreTrainedModel:()=>at,StableLmForCausalLM:()=>Oi,StableLmModel:()=>zi,StableLmPreTrainedModel:()=>Ii,Starcoder2ForCausalLM:()=>yi,Starcoder2Model:()=>wi,Starcoder2PreTrainedModel:()=>_i,Swin2SRForImageSuperResolution:()=>Na,Swin2SRModel:()=>Ra,Swin2SRPreTrainedModel:()=>Da,SwinForImageClassification:()=>La,SwinModel:()=>Ba,SwinPreTrainedModel:()=>Oa,T5ForConditionalGeneration:()=>gt,T5Model:()=>ft,T5PreTrainedModel:()=>mt,TableTransformerForObjectDetection:()=>Ta,TableTransformerModel:()=>Ma,TableTransformerObjectDetectionOutput:()=>ka,TableTransformerPreTrainedModel:()=>xa,TokenClassifierOutput:()=>fl,TrOCRForCausalLM:()=>hi,TrOCRPreTrainedModel:()=>pi,UniSpeechForCTC:()=>Ns,UniSpeechForSequenceClassification:()=>Vs,UniSpeechModel:()=>Rs,UniSpeechPreTrainedModel:()=>Ds,UniSpeechSatForAudioFrameClassification:()=>Ws,UniSpeechSatForCTC:()=>Gs,UniSpeechSatForSequenceClassification:()=>Us,UniSpeechSatModel:()=>qs,UniSpeechSatPreTrainedModel:()=>js,ViTForImageClassification:()=>Or,ViTMAEModel:()=>Nr,ViTMAEPreTrainedModel:()=>Rr,ViTMSNForImageClassification:()=>qr,ViTMSNModel:()=>jr,ViTMSNPreTrainedModel:()=>Vr,ViTModel:()=>zr,ViTPreTrainedModel:()=>Ir,VisionEncoderDecoderModel:()=>un,VitMatteForImageMatting:()=>Qr,VitMattePreTrainedModel:()=>Kr,VitsModel:()=>Si,VitsModelOutput:()=>vl,VitsPreTrainedModel:()=>Ci,Wav2Vec2BertForCTC:()=>Ks,Wav2Vec2BertForSequenceClassification:()=>Qs,Wav2Vec2BertModel:()=>Xs,Wav2Vec2BertPreTrainedModel:()=>Hs,Wav2Vec2ForAudioFrameClassification:()=>As,Wav2Vec2ForCTC:()=>Es,Wav2Vec2ForSequenceClassification:()=>Fs,Wav2Vec2Model:()=>Ps,Wav2Vec2PreTrainedModel:()=>Ss,WavLMForAudioFrameClassification:()=>ii,WavLMForCTC:()=>ri,WavLMForSequenceClassification:()=>ai,WavLMForXVector:()=>si,WavLMModel:()=>ni,WavLMPreTrainedModel:()=>ti,WeSpeakerResNetModel:()=>Ls,WeSpeakerResNetPreTrainedModel:()=>Bs,WhisperForConditionalGeneration:()=>ln,WhisperModel:()=>on,WhisperPreTrainedModel:()=>sn,XLMForQuestionAnswering:()=>Kt,XLMForSequenceClassification:()=>Ht,XLMForTokenClassification:()=>Xt,XLMModel:()=>Ut,XLMPreTrainedModel:()=>Gt,XLMRobertaForMaskedLM:()=>Zt,XLMRobertaForQuestionAnswering:()=>tn,XLMRobertaForSequenceClassification:()=>Jt,XLMRobertaForTokenClassification:()=>en,XLMRobertaModel:()=>Yt,XLMRobertaPreTrainedModel:()=>Qt,XLMWithLMHeadModel:()=>Wt,XVectorOutput:()=>ml,YolosForObjectDetection:()=>_s,YolosModel:()=>gs,YolosObjectDetectionOutput:()=>ws,YolosPreTrainedModel:()=>fs});var r=n(/*! ./configs.js */"./src/configs.js"),a=n(/*! ./backends/onnx.js */"./src/backends/onnx.js"),s=n(/*! ./utils/dtypes.js */"./src/utils/dtypes.js"),i=n(/*! ./utils/generic.js */"./src/utils/generic.js"),o=n(/*! ./utils/core.js */"./src/utils/core.js"),l=n(/*! ./utils/hub.js */"./src/utils/hub.js"),u=n(/*! ./utils/constants.js */"./src/utils/constants.js"),d=n(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),c=n(/*! ./generation/configuration_utils.js */"./src/generation/configuration_utils.js"),p=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),h=n(/*! ./utils/maths.js */"./src/utils/maths.js"),m=n(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),f=n(/*! ./generation/logits_sampler.js */"./src/generation/logits_sampler.js"),g=n(/*! ./env.js */"./src/env.js"),_=n(/*! ./models/whisper/generation_whisper.js */"./src/models/whisper/generation_whisper.js"),w=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");const y=0,b=1,v=2,x=3,M=4,T=5,k=6,$=7,C=new Map,S=new Map,P=new Map;async function E(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async i=>{const{buffer:o,session_options:u,session_config:d}=await async function(e,t,n){const i=n.config?.["transformers.js_config"]??{};let o=n.device??i.device;o&&"string"!=typeof o&&(o.hasOwnProperty(t)?o=o[t]:(console.warn(`device not specified for "${t}". Using the default device.`),o=null));const u=o??(g.apis.IS_NODE_ENV?"cpu":"wasm"),d=(0,a.deviceToExecutionProviders)(u);let c=n.dtype??i.dtype;"string"!=typeof c&&(c&&c.hasOwnProperty(t)?c=c[t]:(c=s.DEFAULT_DEVICE_DTYPE_MAPPING[u]??s.DATA_TYPES.fp32,console.warn(`dtype not specified for "${t}". Using the default dtype (${c}) for this device (${u}).`)));const p=c;if(!s.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(p))throw new Error(`Invalid dtype: ${p}. Should be one of: ${Object.keys(s.DATA_TYPES).join(", ")}`);if(p===s.DATA_TYPES.fp16&&"webgpu"===u&&!await(0,s.isWebGpuFp16Supported)())throw new Error(`The device (${u}) does not support fp16.`);const h=i.kv_cache_dtype?"string"==typeof i.kv_cache_dtype?i.kv_cache_dtype:i.kv_cache_dtype[p]??"float32":void 0;if(h&&!["float32","float16"].includes(h))throw new Error(`Invalid kv_cache_dtype: ${h}. Should be one of: float32, float16`);const m={dtype:p,kv_cache_dtype:h},f=s.DEFAULT_DTYPE_SUFFIX_MAPPING[p],_=`${n.subfolder??""}/${t}${f}.onnx`,w={...n.session_options};w.executionProviders??=d;const y=i.free_dimension_overrides;y?w.freeDimensionOverrides??=y:u.startsWith("webnn")&&!w.freeDimensionOverrides&&console.warn('WebNN does not currently support dynamic shapes and requires `free_dimension_overrides` to be set in config.json as a field within "transformers.js_config". When `free_dimension_overrides` is not set, you may experience significant performance degradation.');const b=(0,l.getModelFile)(e,_,!0,n),v=n.use_external_data_format??i.use_external_data_format;let x=[];if(v&&(!0===v||"object"==typeof v&&v.hasOwnProperty(t)&&!0===v[t])){if(g.apis.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const r=`${t}${f}.onnx_data`,a=`${n.subfolder??""}/${r}`;x.push(new Promise((async(t,s)=>{const i=await(0,l.getModelFile)(e,a,!0,n);t({path:r,data:i})})))}else void 0!==w.externalData&&(x=w.externalData.map((async t=>{if("string"==typeof t.data){const r=await(0,l.getModelFile)(e,t.data,!0,n);return{...t,data:r}}return t})));if(x.length>0&&(w.externalData=await Promise.all(x)),"webgpu"===u){const e=(0,r.getKeyValueShapes)(n.config,{prefix:"present"});if(Object.keys(e).length>0&&!(0,a.isONNXProxy)()){const t={};for(const n in e)t[n]="gpu-buffer";w.preferredOutputLocation=t}}return{buffer:await b,session_options:w,session_config:m}}(e,t[i],n);return[i,await(0,a.createInferenceSession)(o,u,d)]}))))}async function F(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async r=>[r,await(0,l.getModelJSON)(e,t[r],!1,n)]))))}async function A(e,t){const n=function(e,t){const n=Object.create(null),r=[];for(const s of e.inputNames){const e=t[s];e instanceof p.Tensor?n[s]=(0,a.isONNXProxy)()?e.clone():e:r.push(s)}if(r.length>0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${r.join(", ")}.`);const s=Object.keys(t).length,i=e.inputNames.length;if(s>i){let n=Object.keys(t).filter((t=>!e.inputNames.includes(t)));console.warn(`WARNING: Too many inputs were provided (${s} > ${i}). The following inputs will be ignored: "${n.join(", ")}".`)}return n}(e,t);try{const t=Object.fromEntries(Object.entries(n).map((([e,t])=>[e,t.ort_tensor])));let r=await e.run(t);return r=I(r),r}catch(e){throw console.error(`An error occurred during model execution: "${e}".`),console.error("Inputs given to model:",n),e}}function I(e){for(let t in e)(0,a.isONNXTensor)(e[t])?e[t]=new p.Tensor(e[t]):"object"==typeof e[t]&&I(e[t]);return e}function z(e){if(e instanceof p.Tensor)return e;if(0===e.length)throw Error("items must be non-empty");if(Array.isArray(e[0])){if(e.some((t=>t.length!==e[0].length)))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new p.Tensor("int64",BigInt64Array.from(e.flat().map((e=>BigInt(e)))),[e.length,e[0].length])}return new p.Tensor("int64",BigInt64Array.from(e.map((e=>BigInt(e)))),[1,e.length])}function O(e){return new p.Tensor("bool",[e],[1])}async function B(e,t){let{encoder_outputs:n,input_ids:r,decoder_input_ids:a,...s}=t;if(!n){const r=(0,o.pick)(t,e.sessions.model.inputNames);n=(await L(e,r)).last_hidden_state}s.input_ids=a,s.encoder_hidden_states=n,e.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(s.encoder_attention_mask=t.attention_mask);return await D(e,s,!0)}async function L(e,t){const n=e.sessions.model,r=(0,o.pick)(t,n.inputNames);if(n.inputNames.includes("inputs_embeds")&&!r.inputs_embeds){if(!t.input_ids)throw new Error("Both `input_ids` and `inputs_embeds` are missing in the model inputs.");r.inputs_embeds=await e.encode_text({input_ids:t.input_ids})}return n.inputNames.includes("token_type_ids")&&!r.token_type_ids&&(r.token_type_ids=new p.Tensor("int64",new BigInt64Array(r.input_ids.data.length),r.input_ids.dims)),await A(n,r)}async function D(e,t,n=!1){const r=e.sessions[n?"decoder_model_merged":"model"],{past_key_values:a,...s}=t;r.inputNames.includes("use_cache_branch")&&(s.use_cache_branch=O(!!a)),r.inputNames.includes("position_ids")&&s.attention_mask&&!s.position_ids&&(s.position_ids=function(e,t=null){const{input_ids:n,inputs_embeds:r,attention_mask:a}=e,[s,i]=a.dims,o=new BigInt64Array(a.data.length);for(let e=0;e<s;++e){const t=e*i;let n=BigInt(0);for(let e=0;e<i;++e){const r=t+e;0n===a.data[r]?o[r]=BigInt(1):(o[r]=n,n+=a.data[r])}}let l=new p.Tensor("int64",o,a.dims);if(t){const e=-(n??r).dims.at(1);l=l.slice(null,[e,null])}return l}(s,a)),e.addPastKeyValues(s,a);const i=(0,o.pick)(s,r.inputNames);return await A(r,i)}async function R(e,{input_ids:t=null,attention_mask:n=null,pixel_values:r=null,position_ids:a=null,inputs_embeds:s=null,past_key_values:i=null,generation_config:o=null,logits_processor:l=null,...u}){if(!s)if(s=await e.encode_text({input_ids:t}),r&&1!==t.dims[1]){const a=await e.encode_image({pixel_values:r});({inputs_embeds:s,attention_mask:n}=e._merge_input_ids_with_image_features({image_features:a,inputs_embeds:s,input_ids:t,attention_mask:n}))}else if(i&&r&&1===t.dims[1]){const e=t.dims[1],r=Object.values(i)[0].dims.at(-2);n=(0,p.cat)([(0,p.ones)([t.dims[0],r]),n.slice(null,[n.dims[1]-e,n.dims[1]])],1)}return await D(e,{inputs_embeds:s,past_key_values:i,attention_mask:n,position_ids:a,generation_config:o,logits_processor:l},!0)}function N(e,t,n,r){if(n.past_key_values){const t=Object.values(n.past_key_values)[0].dims.at(-2),{input_ids:r,attention_mask:a}=n;if(a&&a.dims[1]>r.dims[1]);else if(t<r.dims[1])n.input_ids=r.slice(null,[t,null]);else if(null!=e.config.image_token_index&&r.data.some((t=>t==e.config.image_token_index))){const a=e.config.num_image_tokens;if(!a)throw new Error("`num_image_tokens` is missing in the model configuration.");const s=r.dims[1]-(t-a);n.input_ids=r.slice(null,[-s,null]),n.attention_mask=(0,p.ones)([1,t+s])}}return n}function V(e,t,n,r){return n.past_key_values&&(t=t.map((e=>[e.at(-1)]))),{...n,decoder_input_ids:z(t)}}function j(e,...t){return e.config.is_encoder_decoder?V(e,...t):N(e,...t)}class q extends i.Callable{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,t,n){super(),this.config=e,this.sessions=t,this.configs=n;const r=P.get(this.constructor),a=C.get(r);switch(this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,a){case M:this.can_generate=!0,this._forward=D,this._prepare_inputs_for_generation=N;break;case v:case x:case $:this.can_generate=!0,this._forward=B,this._prepare_inputs_for_generation=V;break;case b:this._forward=B;break;case k:this.can_generate=!0,this._forward=R,this._prepare_inputs_for_generation=j;break;default:this._forward=L}this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const t of Object.values(this.sessions))t?.handler?.dispose&&e.push(t.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:a=null,local_files_only:s=!1,revision:i="main",model_file_name:o=null,subfolder:l="onnx",device:d=null,dtype:c=null,use_external_data_format:p=null,session_options:h={}}={}){let m={progress_callback:t,config:n,cache_dir:a,local_files_only:s,revision:i,model_file_name:o,subfolder:l,device:d,dtype:c,use_external_data_format:p,session_options:h};const f=P.get(this),g=C.get(f);let _;if(n=m.config=await r.AutoConfig.from_pretrained(e,m),g===M)_=await Promise.all([E(e,{model:m.model_file_name??"model"},m),F(e,{generation_config:"generation_config.json"},m)]);else if(g===v||g===x)_=await Promise.all([E(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m),F(e,{generation_config:"generation_config.json"},m)]);else if(g===T)_=await Promise.all([E(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},m)]);else if(g===b)_=await Promise.all([E(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m)]);else if(g===k){const t={embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"};n.is_encoder_decoder&&(t.model="encoder_model"),_=await Promise.all([E(e,t,m),F(e,{generation_config:"generation_config.json"},m)])}else g===$?_=await Promise.all([E(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},m),F(e,{generation_config:"generation_config.json"},m)]):(g!==y&&console.warn(`Model type for '${f??n?.model_type}' not found, assuming encoder-only architecture. Please report this at ${u.GITHUB_ISSUE_URL}.`),_=await Promise.all([E(e,{model:m.model_file_name??"model"},m)]));return new this(n,..._)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}get generation_config(){return this.configs?.generation_config??null}_get_logits_warper(e){const t=new d.LogitsProcessorList;return null!==e.temperature&&1!==e.temperature&&t.push(new d.TemperatureLogitsWarper(e.temperature)),null!==e.top_k&&0!==e.top_k&&t.push(new d.TopKLogitsWarper(e.top_k)),null!==e.top_p&&e.top_p<1&&t.push(new d.TopPLogitsWarper(e.top_p)),t}_get_logits_processor(e,t,n=null){const r=new d.LogitsProcessorList;if(null!==e.repetition_penalty&&1!==e.repetition_penalty&&r.push(new d.RepetitionPenaltyLogitsProcessor(e.repetition_penalty)),null!==e.no_repeat_ngram_size&&e.no_repeat_ngram_size>0&&r.push(new d.NoRepeatNGramLogitsProcessor(e.no_repeat_ngram_size)),null!==e.bad_words_ids&&r.push(new d.NoBadWordsLogitsProcessor(e.bad_words_ids,e.eos_token_id)),null!==e.min_length&&null!==e.eos_token_id&&e.min_length>0&&r.push(new d.MinLengthLogitsProcessor(e.min_length,e.eos_token_id)),null!==e.min_new_tokens&&null!==e.eos_token_id&&e.min_new_tokens>0&&r.push(new d.MinNewTokensLengthLogitsProcessor(t,e.min_new_tokens,e.eos_token_id)),null!==e.forced_bos_token_id&&r.push(new d.ForcedBOSTokenLogitsProcessor(e.forced_bos_token_id)),null!==e.forced_eos_token_id&&r.push(new d.ForcedEOSTokenLogitsProcessor(e.max_length,e.forced_eos_token_id)),null!==e.begin_suppress_tokens){const n=t>1||null===e.forced_bos_token_id?t:t+1;r.push(new d.SuppressTokensAtBeginLogitsProcessor(e.begin_suppress_tokens,n))}return null!==e.guidance_scale&&e.guidance_scale>1&&r.push(new d.ClassifierFreeGuidanceLogitsProcessor(e.guidance_scale)),null!==n&&r.extend(n),r}_prepare_generation_config(e,t,n=c.GenerationConfig){const r={...this.config};for(const e of["decoder","generator","text_config"])e in r&&Object.assign(r,r[e]);const a=new n(r);return Object.assign(a,this.generation_config??{}),e&&Object.assign(a,e),t&&Object.assign(a,(0,o.pick)(t,Object.getOwnPropertyNames(a))),a}_get_stopping_criteria(e,t=null){const n=new m.StoppingCriteriaList;return null!==e.max_length&&n.push(new m.MaxLengthCriteria(e.max_length,this.config.max_position_embeddings??null)),null!==e.eos_token_id&&n.push(new m.EosTokenCriteria(e.eos_token_id)),t&&n.extend(t),n}_validate_model_class(){if(!this.can_generate){const e=[mo,_o,ho,oo],t=P.get(this.constructor),n=new Set,r=this.config.model_type;for(const t of e){const e=t.get(r);e&&n.add(e[0])}let a=`The current model class (${t}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw n.size>0&&(a+=` Please use the following class instead: ${[...n].join(", ")}`),Error(a)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:t,model_inputs:n,is_encoder_decoder:r}){return n.past_key_values=this.getPastKeyValues(t,n.past_key_values),n.input_ids=new p.Tensor("int64",e.flat(),[e.length,1]),r||(n.attention_mask=(0,p.cat)([n.attention_mask,(0,p.ones)([n.attention_mask.dims[0],1])],1)),n.position_ids=null,n}_prepare_model_inputs({inputs:e,bos_token_id:t,model_kwargs:n}){const r=(0,o.pick)(n,this.forward_params),a=this.main_input_name;if(a in r){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else r[a]=e;return{inputs_tensor:r[a],model_inputs:r,model_input_name:a}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:t,model_input_name:n,generation_config:r}){if(this.sessions.model.inputNames.includes("inputs_embeds")&&!t.inputs_embeds&&"_prepare_inputs_embeds"in this){const{input_ids:e,pixel_values:n,attention_mask:r,...a}=t,s=await this._prepare_inputs_embeds(t);t={...a,...(0,o.pick)(s,["inputs_embeds","attention_mask"])}}let{last_hidden_state:a}=await L(this,t);if(null!==r.guidance_scale&&r.guidance_scale>1)a=(0,p.cat)([a,(0,p.full_like)(a,0)],0),"attention_mask"in t&&(t.attention_mask=(0,p.cat)([t.attention_mask,(0,p.zeros_like)(t.attention_mask)],0));else if(t.decoder_input_ids){const e=z(t.decoder_input_ids).dims[0];if(e!==a.dims[0]){if(1!==a.dims[0])throw new Error(`The encoder outputs have a different batch size (${a.dims[0]}) than the decoder inputs (${e}).`);a=(0,p.cat)(Array.from({length:e},(()=>a)),0)}}return t.encoder_outputs=a,t}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:t,model_kwargs:n,decoder_start_token_id:r,bos_token_id:a,generation_config:s}){let{decoder_input_ids:i,...o}=n;if(!(i instanceof p.Tensor)){if(i)Array.isArray(i[0])||(i=Array.from({length:e},(()=>i)));else if(r??=a,"musicgen"===this.config.model_type)i=Array.from({length:e*this.config.decoder.num_codebooks},(()=>[r]));else if(Array.isArray(r)){if(r.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${r.length}`);i=r}else i=Array.from({length:e},(()=>[r]));i=z(i)}return n.decoder_attention_mask=(0,p.ones_like)(i),{input_ids:i,model_inputs:o}}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,streamer:a=null,...s}){this._validate_model_class(),t=this._prepare_generation_config(t,s);let{inputs_tensor:i,model_inputs:o,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:s});const u=this.config.is_encoder_decoder;let d;u&&("encoder_outputs"in o||(o=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:i,model_inputs:o,model_input_name:l,generation_config:t}))),u?({input_ids:d,model_inputs:o}=this._prepare_decoder_input_ids_for_generation({batch_size:o[l].dims.at(0),model_input_name:l,model_kwargs:o,decoder_start_token_id:t.decoder_start_token_id,bos_token_id:t.bos_token_id,generation_config:t})):d=o[l];let c=d.dims.at(-1);null!==t.max_new_tokens&&(t.max_length=c+t.max_new_tokens);const h=this._get_logits_processor(t,c,n),m=this._get_stopping_criteria(t,r),g=o[l].dims.at(0),_=f.LogitsSampler.getSampler(t),w=new Array(g).fill(0),y=d.tolist();let b;a&&a.put(y);let v={};for(;;){if(o=this.prepare_inputs_for_generation(y,o,t),b=await this.forward(o),t.output_attentions&&t.return_dict_in_generate){const e=this.getAttentions(b);for(const t in e)t in v||(v[t]=[]),v[t].push(e[t])}const e=h(y,b.logits.slice(null,-1,null)),n=[];for(let t=0;t<e.dims.at(0);++t){const r=e[t],a=await _(r);for(const[e,r]of a){const a=BigInt(e);w[t]+=r,y[t].push(a),n.push([a]);break}}a&&a.put(n);if(m(y).every((e=>e)))break;o=this._update_model_kwargs_for_generation({generated_input_ids:n,outputs:b,model_inputs:o,is_encoder_decoder:u})}a&&a.end();const x=this.getPastKeyValues(b,o.past_key_values,!0),M=new p.Tensor("int64",y.flat(),[y.length,y[0].length]);if(t.return_dict_in_generate)return{sequences:M,past_key_values:x,...v};for(const e of Object.values(b))"gpu-buffer"===e.location&&e.dispose();return M}getPastKeyValues(e,t,n=!1){const r=Object.create(null);for(const a in e)if(a.startsWith("present")){const s=a.replace("present","past_key_values"),i=a.includes("encoder");if(r[s]=i&&t?t[s]:e[a],t&&(!i||n)){const e=t[s];"gpu-buffer"===e.location&&e.dispose()}}return r}getAttentions(e){const t={};for(const n of["cross_attentions","encoder_attentions","decoder_attentions"])for(const r in e)r.startsWith(n)&&(n in t||(t[n]=[]),t[n].push(e[r]));return t}addPastKeyValues(e,t){if(t)Object.assign(e,t);else{const t=this.sessions.decoder_model_merged??this.sessions.model,n=t?.config?.kv_cache_dtype??"float32",a="float16"===n?new Uint16Array:[],s=(0,r.getKeyValueShapes)(this.config);for(const t in s)e[t]=new p.Tensor(n,a,s[t])}}async encode_image({pixel_values:e}){const t=(await A(this.sessions.vision_encoder,{pixel_values:e})).image_features;return this.config.num_image_tokens||(console.warn(`The number of image tokens was not set in the model configuration. Setting it to the number of features detected by the vision encoder (${t.dims[1]}).`),this.config.num_image_tokens=t.dims[1]),t}async encode_text({input_ids:e}){return(await A(this.sessions.embed_tokens,{input_ids:e})).inputs_embeds}}class G{}class U extends G{constructor({last_hidden_state:e,hidden_states:t=null,attentions:n=null}){super(),this.last_hidden_state=e,this.hidden_states=t,this.attentions=n}}class W extends q{}class H extends W{}class X extends W{async _call(e){return new gl(await super._call(e))}}class K extends W{async _call(e){return new hl(await super._call(e))}}class Q extends W{async _call(e){return new fl(await super._call(e))}}class Y extends W{async _call(e){return new _l(await super._call(e))}}class Z extends q{}class J extends Z{}class ee extends q{}class te extends ee{}class ne extends ee{async _call(e){return new gl(await super._call(e))}}class re extends ee{async _call(e){return new hl(await super._call(e))}}class ae extends ee{async _call(e){return new fl(await super._call(e))}}class se extends ee{async _call(e){return new _l(await super._call(e))}}class ie extends q{}class oe extends ie{}class le extends ie{async _call(e){return new gl(await super._call(e))}}class ue extends ie{async _call(e){return new hl(await super._call(e))}}class de extends ie{async _call(e){return new fl(await super._call(e))}}class ce extends ie{async _call(e){return new _l(await super._call(e))}}class pe extends q{}class he extends pe{}class me extends pe{async _call(e){return new gl(await super._call(e))}}class fe extends pe{async _call(e){return new hl(await super._call(e))}}class ge extends pe{async _call(e){return new fl(await super._call(e))}}class _e extends pe{async _call(e){return new _l(await super._call(e))}}class we extends q{}class ye extends we{}class be extends we{async _call(e){return new gl(await super._call(e))}}class ve extends we{async _call(e){return new hl(await super._call(e))}}class xe extends we{async _call(e){return new fl(await super._call(e))}}class Me extends we{async _call(e){return new _l(await super._call(e))}}class Te extends q{}class ke extends Te{}class $e extends Te{async _call(e){return new gl(await super._call(e))}}class Ce extends Te{async _call(e){return new hl(await super._call(e))}}class Se extends Te{async _call(e){return new fl(await super._call(e))}}class Pe extends Te{async _call(e){return new _l(await super._call(e))}}class Ee extends q{}class Fe extends Ee{}class Ae extends Ee{async _call(e){return new gl(await super._call(e))}}class Ie extends Ee{async _call(e){return new hl(await super._call(e))}}class ze extends Ee{async _call(e){return new fl(await super._call(e))}}class Oe extends Ee{async _call(e){return new _l(await super._call(e))}}class Be extends q{}class Le extends Be{}class De extends Be{async _call(e){return new hl(await super._call(e))}}class Re extends Be{async _call(e){return new fl(await super._call(e))}}class Ne extends Be{async _call(e){return new _l(await super._call(e))}}class Ve extends Be{async _call(e){return new gl(await super._call(e))}}class je extends q{}class qe extends je{}class Ge extends je{async _call(e){return new gl(await super._call(e))}}class Ue extends je{async _call(e){return new hl(await super._call(e))}}class We extends je{async _call(e){return new fl(await super._call(e))}}class He extends q{}class Xe extends He{}class Ke extends He{async _call(e){return new gl(await super._call(e))}}class Qe extends He{async _call(e){return new hl(await super._call(e))}}class Ye extends He{async _call(e){return new _l(await super._call(e))}}class Ze extends q{}class Je extends Ze{}class et extends Ze{async _call(e){return new gl(await super._call(e))}}class tt extends Ze{async _call(e){return new hl(await super._call(e))}}class nt extends Ze{async _call(e){return new fl(await super._call(e))}}class rt extends Ze{async _call(e){return new _l(await super._call(e))}}class at extends q{}class st extends at{}class it extends at{async _call(e){return new gl(await super._call(e))}}class ot extends at{async _call(e){return new hl(await super._call(e))}}class lt extends at{async _call(e){return new _l(await super._call(e))}}class ut extends q{}class dt extends ut{}class ct extends ut{async _call(e){return new hl(await super._call(e))}}class pt extends ut{async _call(e){return new _l(await super._call(e))}}class ht extends ut{async _call(e){return new gl(await super._call(e))}}class mt extends q{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"]}class ft extends mt{}class gt extends mt{}class _t extends q{}class wt extends _t{}class yt extends _t{}class bt extends q{}class vt extends bt{}class xt extends bt{}class Mt extends q{}class Tt extends Mt{}class kt extends Mt{}class $t extends Mt{async _call(e){return new hl(await super._call(e))}}class Ct extends q{}class St extends Ct{}class Pt extends Ct{}class Et extends Ct{async _call(e){return new hl(await super._call(e))}}class Ft extends Ct{}class At extends q{}class It extends At{}class zt extends At{}class Ot extends q{}class Bt extends Ot{}class Lt extends Ot{}class Dt extends q{}class Rt extends Dt{}class Nt extends Dt{async _call(e){return new gl(await super._call(e))}}class Vt extends Dt{async _call(e){return new hl(await super._call(e))}}class jt extends Dt{async _call(e){return new fl(await super._call(e))}}class qt extends Dt{async _call(e){return new _l(await super._call(e))}}class Gt extends q{}class Ut extends Gt{}class Wt extends Gt{async _call(e){return new gl(await super._call(e))}}class Ht extends Gt{async _call(e){return new hl(await super._call(e))}}class Xt extends Gt{async _call(e){return new fl(await super._call(e))}}class Kt extends Gt{async _call(e){return new _l(await super._call(e))}}class Qt extends q{}class Yt extends Qt{}class Zt extends Qt{async _call(e){return new gl(await super._call(e))}}class Jt extends Qt{async _call(e){return new hl(await super._call(e))}}class en extends Qt{async _call(e){return new fl(await super._call(e))}}class tn extends Qt{async _call(e){return new _l(await super._call(e))}}class nn extends q{}class rn extends nn{}class an extends nn{}class sn extends q{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"]}class on extends sn{}class ln extends sn{_prepare_generation_config(e,t){return super._prepare_generation_config(e,t,_.WhisperGenerationConfig)}_retrieve_init_tokens(e){const t=[e.decoder_start_token_id];let n=e.language;const r=e.task;if(e.is_multilingual){n||(console.warn("No language specified - defaulting to English (en)."),n="en");const a=`<|${(0,w.whisper_language_to_code)(n)}|>`;t.push(e.lang_to_id[a]),t.push(e.task_to_id[r??"transcribe"])}else if(n||r)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&t.at(-1)!==e.no_timestamps_token_id?t.push(e.no_timestamps_token_id):e.return_timestamps&&t.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),t.pop()),t.filter((e=>null!=e))}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,...a}){t=this._prepare_generation_config(t,a);const s=a.decoder_input_ids??this._retrieve_init_tokens(t);if(t.return_timestamps&&(n??=new d.LogitsProcessorList,n.push(new d.WhisperTimeStampLogitsProcessor(t,s))),t.begin_suppress_tokens&&(n??=new d.LogitsProcessorList,n.push(new d.SuppressTokensAtBeginLogitsProcessor(t.begin_suppress_tokens,s.length))),t.return_token_timestamps){if(!t.alignment_heads)throw new Error("Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config.");"translate"===t.task&&console.warn("Token-level timestamps may not be reliable for task 'translate'."),t.output_attentions=!0,t.return_dict_in_generate=!0}const i=await super.generate({inputs:e,generation_config:t,logits_processor:n,decoder_input_ids:s,...a});return t.return_token_timestamps&&(i.token_timestamps=this._extract_token_timestamps(i,t.alignment_heads,t.num_frames)),i}_extract_token_timestamps(e,t,n=null,r=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");null==n&&console.warn("`num_frames` has not been set, meaning the entire audio will be analyzed. This may lead to inaccurate token-level timestamps for short audios (< 30 seconds).");let a=this.config.median_filter_width;void 0===a&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),a=7);const s=e.cross_attentions,i=Array.from({length:this.config.decoder_layers},((e,t)=>(0,p.cat)(s.map((e=>e[t])),2))),l=(0,p.stack)(t.map((([e,t])=>{if(e>=i.length)throw new Error(`Layer index ${e} is out of bounds for cross attentions (length ${i.length}).`);return n?i[e].slice(null,t,null,[0,n]):i[e].slice(null,t)}))).transpose(1,0,2,3),[u,d]=(0,p.std_mean)(l,-2,0,!0),c=l.clone();for(let e=0;e<c.dims[0];++e){const t=c[e];for(let n=0;n<t.dims[0];++n){const r=t[n],s=u[e][n][0].data,i=d[e][n][0].data;for(let e=0;e<r.dims[0];++e){let t=r[e].data;for(let e=0;e<t.length;++e)t[e]=(t[e]-i[e])/s[e];t.set((0,h.medianFilter)(t,a))}}}const m=[(0,p.mean)(c,1)],f=e.sequences.dims,g=new p.Tensor("float32",new Float32Array(f[0]*f[1]),f);for(let e=0;e<f[0];++e){const t=m[e].neg().squeeze_(0),[n,a]=(0,h.dynamic_time_warping)(t.tolist()),s=Array.from({length:n.length-1},((e,t)=>n[t+1]-n[t])),i=(0,o.mergeArrays)([1],s).map((e=>!!e)),l=[];for(let e=0;e<i.length;++e)i[e]&&l.push(a[e]*r);g[e].data.set(l,1)}return g}}class un extends q{main_input_name="pixel_values";forward_params=["pixel_values","decoder_input_ids","encoder_hidden_states","past_key_values"]}class dn extends q{forward_params=["input_ids","pixel_values","attention_mask","position_ids","past_key_values"]}class cn extends dn{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){const a=this.config.image_token_index,s=n.tolist().map((e=>e.findIndex((e=>e==a)))),i=s.every((e=>-1===e)),o=s.every((e=>-1!==e));if(!i&&!o)throw new Error("Every input should contain either 0 or 1 image token.");if(i)return{inputs_embeds:e,attention_mask:r};const l=[],u=[];for(let n=0;n<s.length;++n){const a=s[n],i=e[n],o=t[n],d=r[n];l.push((0,p.cat)([i.slice([0,a]),o,i.slice([a+1,i.dims[0]])],0)),u.push((0,p.cat)([d.slice([0,a]),(0,p.ones)([o.dims[0]]),d.slice([a+1,d.dims[0]])],0))}return{inputs_embeds:(0,p.stack)(l,0),attention_mask:(0,p.stack)(u,0)}}}class pn extends cn{}class hn extends q{forward_params=["input_ids","inputs_embeds","attention_mask","pixel_values","encoder_outputs","decoder_input_ids","decoder_inputs_embeds","decoder_attention_mask","past_key_values"];main_input_name="inputs_embeds"}class mn extends hn{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){return{inputs_embeds:(0,p.cat)([t,e],1),attention_mask:(0,p.cat)([(0,p.ones)(t.dims.slice(0,2)),r],1)}}async _prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:n,attention_mask:r}){if(!e&&!t)throw new Error("Either `input_ids` or `pixel_values` should be provided.");let a,s;return e&&(a=await this.encode_text({input_ids:e})),t&&(s=await this.encode_image({pixel_values:t})),a&&s?({inputs_embeds:n,attention_mask:r}=this._merge_input_ids_with_image_features({inputs_embeds:a,image_features:s,input_ids:e,attention_mask:r})):n=a||s,{inputs_embeds:n,attention_mask:r}}async forward({input_ids:e,pixel_values:t,attention_mask:n,decoder_input_ids:r,decoder_attention_mask:a,encoder_outputs:s,past_key_values:i,inputs_embeds:o,decoder_inputs_embeds:l}){if(o||({inputs_embeds:o,attention_mask:n}=await this._prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:o,attention_mask:n})),!s){let{last_hidden_state:e}=await L(this,{inputs_embeds:o,attention_mask:n});s=e}if(!l){if(!r)throw new Error("Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.");l=await this.encode_text({input_ids:r})}const u={inputs_embeds:l,attention_mask:a,encoder_attention_mask:n,encoder_hidden_states:s,past_key_values:i};return await D(this,u,!0)}}class fn extends q{}class gn extends fn{}class _n extends fn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class wn extends fn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class yn extends fn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class bn extends fn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class vn extends q{}class xn extends vn{}class Mn extends vn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Tn extends fn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class kn extends q{}class $n extends kn{}class Cn extends q{}class Sn extends Cn{}class Pn extends Cn{}class En extends q{}class Fn extends En{}class An extends En{}class In extends q{}class zn extends In{}class On extends In{}class Bn extends q{}class Ln extends Bn{}class Dn extends Bn{}class Rn extends q{}class Nn extends Rn{}class Vn extends Rn{}class jn extends q{}class qn extends jn{}class Gn extends jn{}class Un extends q{}class Wn extends Un{}class Hn extends Un{}class Xn extends q{}class Kn extends Xn{}class Qn extends Xn{}class Yn extends q{}class Zn extends Yn{}class Jn extends Yn{}class er extends q{}class tr extends er{}class nr extends er{}class rr extends q{}class ar extends rr{}class sr extends rr{}class ir extends q{}class or extends ir{}class lr extends ir{}class ur extends q{}class dr extends ur{}class cr extends ur{}class pr extends q{}class hr extends pr{}class mr extends pr{}class fr extends q{}class gr extends fr{}class _r extends fr{}class wr extends q{}class yr extends wr{}class br extends wr{}class vr extends q{}class xr extends vr{}class Mr extends vr{}class Tr extends q{}class kr extends Tr{}class $r extends Tr{}class Cr extends q{}class Sr extends Cr{}class Pr extends Cr{}class Er extends q{}class Fr extends Er{}class Ar extends Er{}class Ir extends q{}class zr extends Ir{}class Or extends Ir{async _call(e){return new hl(await super._call(e))}}class Br extends q{}class Lr extends Br{}class Dr extends Br{async _call(e){return new hl(await super._call(e))}}class Rr extends q{}class Nr extends Rr{}class Vr extends q{}class jr extends Vr{}class qr extends Vr{async _call(e){return new hl(await super._call(e))}}class Gr extends q{}class Ur extends Gr{}class Wr extends q{}class Hr extends Wr{}class Xr extends Wr{async _call(e){return new hl(await super._call(e))}}class Kr extends q{}class Qr extends Kr{async _call(e){return new bl(await super._call(e))}}class Yr extends q{}class Zr extends Yr{}class Jr extends Yr{async _call(e){return new hl(await super._call(e))}}class ea extends q{}class ta extends ea{}class na extends ea{async _call(e){return new hl(await super._call(e))}}class ra extends q{}class aa extends ra{}class sa extends ra{}class ia extends q{}class oa extends ia{}class la extends ia{}class ua extends q{}class da extends ua{}class ca extends ua{async _call(e){return new hl(await super._call(e))}}class pa extends q{}class ha extends pa{}class ma extends pa{async _call(e){return new ga(await super._call(e))}}class fa extends pa{async _call(e){return new _a(await super._call(e))}}class ga extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class _a extends G{constructor({logits:e,pred_boxes:t,pred_masks:n}){super(),this.logits=e,this.pred_boxes=t,this.pred_masks=n}}class wa extends q{}class ya extends wa{}class ba extends wa{async _call(e){return new va(await super._call(e))}}class va extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class xa extends q{}class Ma extends xa{}class Ta extends xa{async _call(e){return new ka(await super._call(e))}}class ka extends ga{}class $a extends q{}class Ca extends $a{}class Sa extends $a{async _call(e){return new hl(await super._call(e))}}class Pa extends q{}class Ea extends Pa{}class Fa extends Pa{async _call(e){return new hl(await super._call(e))}}class Aa extends q{}class Ia extends Aa{}class za extends Aa{async _call(e){return new hl(await super._call(e))}}class Oa extends q{}class Ba extends Oa{}class La extends Oa{async _call(e){return new hl(await super._call(e))}}class Da extends q{}class Ra extends Da{}class Na extends Da{}class Va extends q{}class ja extends Va{}class qa extends Va{}class Ga extends q{}class Ua extends Ga{}class Wa extends q{}class Ha extends Wa{}class Xa extends Wa{}class Ka extends Wa{}class Qa extends q{}class Ya extends Qa{}class Za extends q{}class Ja extends Za{}class es extends Za{}class ts extends q{}class ns extends ts{}class rs extends ts{}class as extends q{}class ss extends as{}class is extends q{}class os extends is{}class ls extends is{async _call(e){return new hl(await super._call(e))}}class us extends q{}class ds extends us{}class cs extends us{async _call(e){return new hl(await super._call(e))}}class ps extends q{}class hs extends ps{}class ms extends ps{async _call(e){return new hl(await super._call(e))}}class fs extends q{}class gs extends fs{}class _s extends fs{async _call(e){return new ws(await super._call(e))}}class ws extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class ys extends q{}class bs extends ys{async get_image_embeddings({pixel_values:e}){return await L(this,{pixel_values:e})}async forward(e){if(e.image_embeddings&&e.image_positional_embeddings||(e={...e,...await this.get_image_embeddings(e)}),!e.input_labels&&e.input_points){const t=e.input_points.dims.slice(0,-1),n=t.reduce(((e,t)=>e*t),1);e.input_labels=new p.Tensor("int64",new BigInt64Array(n).fill(1n),t)}const t={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(t.input_points=e.input_points),e.input_labels&&(t.input_labels=e.input_labels),e.input_boxes&&(t.input_boxes=e.input_boxes),await A(this.sessions.prompt_encoder_mask_decoder,t)}async _call(e){return new vs(await super._call(e))}}class vs extends G{constructor({iou_scores:e,pred_masks:t}){super(),this.iou_scores=e,this.pred_masks=t}}class xs extends q{}class Ms extends xs{}class Ts extends xs{}class ks extends q{}class $s extends ks{}class Cs extends ks{}class Ss extends q{}class Ps extends Ss{}class Es extends Ss{async _call(e){return new wl(await super._call(e))}}class Fs extends Ss{async _call(e){return new hl(await super._call(e))}}class As extends Ss{async _call(e){return new fl(await super._call(e))}}class Is extends q{}class zs extends Is{}class Os extends Is{async _call(e){return new fl(await super._call(e))}}class Bs extends q{}class Ls extends Bs{}class Ds extends q{}class Rs extends Ds{}class Ns extends Ds{async _call(e){return new wl(await super._call(e))}}class Vs extends Ds{async _call(e){return new hl(await super._call(e))}}class js extends q{}class qs extends js{}class Gs extends js{async _call(e){return new wl(await super._call(e))}}class Us extends js{async _call(e){return new hl(await super._call(e))}}class Ws extends js{async _call(e){return new fl(await super._call(e))}}class Hs extends q{}class Xs extends Hs{}class Ks extends Hs{async _call(e){return new wl(await super._call(e))}}class Qs extends Hs{async _call(e){return new hl(await super._call(e))}}class Ys extends q{}class Zs extends Ss{}class Js extends Ss{async _call(e){return new wl(await super._call(e))}}class ei extends Ss{async _call(e){return new hl(await super._call(e))}}class ti extends q{}class ni extends ti{}class ri extends ti{async _call(e){return new wl(await super._call(e))}}class ai extends ti{async _call(e){return new hl(await super._call(e))}}class si extends ti{async _call(e){return new ml(await super._call(e))}}class ii extends ti{async _call(e){return new fl(await super._call(e))}}class oi extends q{}class li extends oi{}class ui extends oi{}class di extends oi{async generate_speech(e,t,{threshold:n=.5,minlenratio:r=0,maxlenratio:a=20,vocoder:s=null}={}){const i={input_ids:e},{encoder_outputs:o,encoder_attention_mask:l}=await L(this,i),u=o.dims[1]/this.config.reduction_factor,d=Math.floor(u*a),c=Math.floor(u*r),h=this.config.num_mel_bins;let m=[],f=null,g=null,_=0;for(;;){++_;const e=O(!!g);let r;r=g?g.output_sequence_out:new p.Tensor("float32",new Float32Array(h),[1,1,h]);let a={use_cache_branch:e,output_sequence:r,encoder_attention_mask:l,speaker_embeddings:t,encoder_hidden_states:o};this.addPastKeyValues(a,f),g=await A(this.sessions.decoder_model_merged,a),f=this.getPastKeyValues(g,f);const{prob:s,spectrum:i}=g;if(m.push(i),_>=c&&(Array.from(s.data).filter((e=>e>=n)).length>0||_>=d))break}const w=(0,p.cat)(m),{waveform:y}=await A(s.sessions.model,{spectrogram:w});return{spectrogram:w,waveform:y}}}class ci extends q{main_input_name="spectrogram"}class pi extends q{}class hi extends pi{}class mi extends q{}class fi extends mi{}class gi extends mi{}class _i extends q{}class wi extends _i{}class yi extends _i{}class bi extends q{}class vi extends bi{}class xi extends bi{}class Mi extends q{}class Ti extends Mi{}class ki extends Mi{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class $i extends Mi{static async from_pretrained(e,t={}){return t.model_file_name??="audio_model",super.from_pretrained(e,t)}}class Ci extends q{}class Si extends Ci{async _call(e){return new vl(await super._call(e))}}class Pi extends q{}class Ei extends Pi{}class Fi extends Pi{}class Ai extends Pi{}class Ii extends q{}class zi extends Ii{}class Oi extends Ii{}class Bi extends q{}class Li extends Bi{}class Di extends Bi{async _call(e){return new hl(await super._call(e))}}class Ri extends q{}class Ni extends Ri{}class Vi extends Ri{}class ji extends q{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];_apply_and_filter_by_delay_pattern_mask(e){const[t,n]=e.dims,r=this.config.decoder.num_codebooks,a=n-r;let s=0;for(let t=0;t<e.size;++t){if(e.data[t]===this.config.decoder.pad_token_id)continue;const i=t%n-Math.floor(t/n)%r;i>0&&i<=a&&(e.data[s++]=e.data[t])}const i=Math.floor(t/r),o=s/(i*r);return new p.Tensor(e.type,e.data.slice(0,s),[i,r,o])}prepare_inputs_for_generation(e,t,n){let r=structuredClone(e);for(let e=0;e<r.length;++e)for(let t=0;t<r[e].length;++t)e%this.config.decoder.num_codebooks>=t&&(r[e][t]=BigInt(this.config.decoder.pad_token_id));null!==n.guidance_scale&&n.guidance_scale>1&&(r=r.concat(r));return super.prepare_inputs_for_generation(r,t,n)}async generate(e){const t=await super.generate(e),n=this._apply_and_filter_by_delay_pattern_mask(t).unsqueeze_(0),{audio_values:r}=await A(this.sessions.encodec_decode,{audio_codes:n});return r}}class qi extends q{}class Gi extends qi{}class Ui extends qi{async _call(e){return new hl(await super._call(e))}}class Wi extends q{}class Hi extends Wi{}class Xi extends Wi{async _call(e){return new hl(await super._call(e))}}class Ki extends q{}class Qi extends Ki{}class Yi extends Ki{async _call(e){return new hl(await super._call(e))}}class Zi extends q{}class Ji extends Zi{}class eo extends Zi{async _call(e){return new hl(await super._call(e))}}class to extends q{}class no extends to{}class ro{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:a=null,local_files_only:s=!1,revision:i="main",model_file_name:o=null,subfolder:l="onnx",device:u=null,dtype:d=null,use_external_data_format:c=null,session_options:p={}}={}){const h={progress_callback:t,config:n,cache_dir:a,local_files_only:s,revision:i,model_file_name:o,subfolder:l,device:u,dtype:d,use_external_data_format:c,session_options:p};if(h.config=await r.AutoConfig.from_pretrained(e,h),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(const t of this.MODEL_CLASS_MAPPINGS){const n=t.get(h.config.model_type);if(n)return await n[1].from_pretrained(e,h)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${h.config.model_type}", attempting to construct from base class.`),await q.from_pretrained(e,h);throw Error(`Unsupported model type: ${h.config.model_type}`)}}const ao=new Map([["bert",["BertModel",H]],["nomic_bert",["NomicBertModel",J]],["roformer",["RoFormerModel",te]],["electra",["ElectraModel",he]],["esm",["EsmModel",qe]],["convbert",["ConvBertModel",oe]],["camembert",["CamembertModel",ye]],["deberta",["DebertaModel",ke]],["deberta-v2",["DebertaV2Model",Fe]],["mpnet",["MPNetModel",Je]],["albert",["AlbertModel",dt]],["distilbert",["DistilBertModel",Le]],["roberta",["RobertaModel",Rt]],["xlm",["XLMModel",Ut]],["xlm-roberta",["XLMRobertaModel",Yt]],["clap",["ClapModel",Ti]],["clip",["CLIPModel",gn]],["clipseg",["CLIPSegModel",Sn]],["chinese_clip",["ChineseCLIPModel",$n]],["siglip",["SiglipModel",xn]],["mobilebert",["MobileBertModel",Xe]],["squeezebert",["SqueezeBertModel",st]],["wav2vec2",["Wav2Vec2Model",Ps]],["wav2vec2-bert",["Wav2Vec2BertModel",Xs]],["unispeech",["UniSpeechModel",Rs]],["unispeech-sat",["UniSpeechSatModel",qs]],["hubert",["HubertModel",Zs]],["wavlm",["WavLMModel",ni]],["audio-spectrogram-transformer",["ASTModel",rn]],["vits",["VitsModel",Si]],["pyannote",["PyAnnoteModel",zs]],["wespeaker-resnet",["WeSpeakerResNetModel",Ls]],["detr",["DetrModel",ha]],["rt_detr",["RTDetrModel",ya]],["table-transformer",["TableTransformerModel",Ma]],["vit",["ViTModel",zr]],["pvt",["PvtModel",Lr]],["vit_msn",["ViTMSNModel",jr]],["vit_mae",["ViTMAEModel",Nr]],["groupvit",["GroupViTModel",Ur]],["fastvit",["FastViTModel",Hr]],["mobilevit",["MobileViTModel",Zr]],["mobilevitv2",["MobileViTV2Model",ta]],["owlvit",["OwlViTModel",aa]],["owlv2",["Owlv2Model",oa]],["beit",["BeitModel",da]],["deit",["DeiTModel",Ca]],["hiera",["HieraModel",Ea]],["convnext",["ConvNextModel",os]],["convnextv2",["ConvNextV2Model",ds]],["dinov2",["Dinov2Model",hs]],["resnet",["ResNetModel",Ia]],["swin",["SwinModel",Ba]],["swin2sr",["Swin2SRModel",Ra]],["donut-swin",["DonutSwinModel",ss]],["yolos",["YolosModel",gs]],["dpt",["DPTModel",ja]],["glpn",["GLPNModel",ns]],["hifigan",["SpeechT5HifiGan",ci]],["efficientnet",["EfficientNetModel",Li]],["decision_transformer",["DecisionTransformerModel",no]],["mobilenet_v1",["MobileNetV1Model",Gi]],["mobilenet_v2",["MobileNetV2Model",Hi]],["mobilenet_v3",["MobileNetV3Model",Qi]],["mobilenet_v4",["MobileNetV4Model",Ji]],["maskformer",["MaskFormerModel",Ja]]]),so=new Map([["t5",["T5Model",ft]],["longt5",["LongT5Model",wt]],["mt5",["MT5Model",vt]],["bart",["BartModel",Tt]],["mbart",["MBartModel",St]],["marian",["MarianModel",Ms]],["whisper",["WhisperModel",on]],["m2m_100",["M2M100Model",$s]],["blenderbot",["BlenderbotModel",It]],["blenderbot-small",["BlenderbotSmallModel",Bt]]]),io=new Map([["bloom",["BloomModel",kr]],["jais",["JAISModel",zn]],["gpt2",["GPT2Model",Fn]],["gptj",["GPTJModel",qn]],["gpt_bigcode",["GPTBigCodeModel",Wn]],["gpt_neo",["GPTNeoModel",Ln]],["gpt_neox",["GPTNeoXModel",Nn]],["codegen",["CodeGenModel",Kn]],["llama",["LlamaModel",Zn]],["granite",["GraniteModel",tr]],["cohere",["CohereModel",ar]],["gemma",["GemmaModel",or]],["gemma2",["Gemma2Model",dr]],["openelm",["OpenELMModel",hr]],["qwen2",["Qwen2Model",gr]],["phi",["PhiModel",yr]],["phi3",["Phi3Model",xr]],["mpt",["MptModel",Sr]],["opt",["OPTModel",Fr]],["mistral",["MistralModel",fi]],["starcoder2",["Starcoder2Model",wi]],["falcon",["FalconModel",vi]],["stablelm",["StableLmModel",zi]]]),oo=new Map([["speecht5",["SpeechT5ForSpeechToText",ui]],["whisper",["WhisperForConditionalGeneration",ln]]]),lo=new Map([["speecht5",["SpeechT5ForTextToSpeech",di]]]),uo=new Map([["vits",["VitsModel",Si]],["musicgen",["MusicgenForConditionalGeneration",ji]]]),co=new Map([["bert",["BertForSequenceClassification",K]],["roformer",["RoFormerForSequenceClassification",re]],["electra",["ElectraForSequenceClassification",fe]],["esm",["EsmForSequenceClassification",Ue]],["convbert",["ConvBertForSequenceClassification",ue]],["camembert",["CamembertForSequenceClassification",ve]],["deberta",["DebertaForSequenceClassification",Ce]],["deberta-v2",["DebertaV2ForSequenceClassification",Ie]],["mpnet",["MPNetForSequenceClassification",tt]],["albert",["AlbertForSequenceClassification",ct]],["distilbert",["DistilBertForSequenceClassification",De]],["roberta",["RobertaForSequenceClassification",Vt]],["xlm",["XLMForSequenceClassification",Ht]],["xlm-roberta",["XLMRobertaForSequenceClassification",Jt]],["bart",["BartForSequenceClassification",$t]],["mbart",["MBartForSequenceClassification",Et]],["mobilebert",["MobileBertForSequenceClassification",Qe]],["squeezebert",["SqueezeBertForSequenceClassification",ot]]]),po=new Map([["bert",["BertForTokenClassification",Q]],["roformer",["RoFormerForTokenClassification",ae]],["electra",["ElectraForTokenClassification",ge]],["esm",["EsmForTokenClassification",We]],["convbert",["ConvBertForTokenClassification",de]],["camembert",["CamembertForTokenClassification",xe]],["deberta",["DebertaForTokenClassification",Se]],["deberta-v2",["DebertaV2ForTokenClassification",ze]],["mpnet",["MPNetForTokenClassification",nt]],["distilbert",["DistilBertForTokenClassification",Re]],["roberta",["RobertaForTokenClassification",jt]],["xlm",["XLMForTokenClassification",Xt]],["xlm-roberta",["XLMRobertaForTokenClassification",en]]]),ho=new Map([["t5",["T5ForConditionalGeneration",gt]],["longt5",["LongT5ForConditionalGeneration",yt]],["mt5",["MT5ForConditionalGeneration",xt]],["bart",["BartForConditionalGeneration",kt]],["mbart",["MBartForConditionalGeneration",Pt]],["marian",["MarianMTModel",Ts]],["m2m_100",["M2M100ForConditionalGeneration",Cs]],["blenderbot",["BlenderbotForConditionalGeneration",zt]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",Lt]]]),mo=new Map([["bloom",["BloomForCausalLM",$r]],["gpt2",["GPT2LMHeadModel",An]],["jais",["JAISLMHeadModel",On]],["gptj",["GPTJForCausalLM",Gn]],["gpt_bigcode",["GPTBigCodeForCausalLM",Hn]],["gpt_neo",["GPTNeoForCausalLM",Dn]],["gpt_neox",["GPTNeoXForCausalLM",Vn]],["codegen",["CodeGenForCausalLM",Qn]],["llama",["LlamaForCausalLM",Jn]],["granite",["GraniteForCausalLM",nr]],["cohere",["CohereForCausalLM",sr]],["gemma",["GemmaForCausalLM",lr]],["gemma2",["Gemma2ForCausalLM",cr]],["openelm",["OpenELMForCausalLM",mr]],["qwen2",["Qwen2ForCausalLM",_r]],["phi",["PhiForCausalLM",br]],["phi3",["Phi3ForCausalLM",Mr]],["mpt",["MptForCausalLM",Pr]],["opt",["OPTForCausalLM",Ar]],["mbart",["MBartForCausalLM",Ft]],["mistral",["MistralForCausalLM",gi]],["starcoder2",["Starcoder2ForCausalLM",yi]],["falcon",["FalconForCausalLM",xi]],["trocr",["TrOCRForCausalLM",hi]],["stablelm",["StableLmForCausalLM",Oi]]]),fo=new Map([["bert",["BertForMaskedLM",X]],["roformer",["RoFormerForMaskedLM",ne]],["electra",["ElectraForMaskedLM",me]],["esm",["EsmForMaskedLM",Ge]],["convbert",["ConvBertForMaskedLM",le]],["camembert",["CamembertForMaskedLM",be]],["deberta",["DebertaForMaskedLM",$e]],["deberta-v2",["DebertaV2ForMaskedLM",Ae]],["mpnet",["MPNetForMaskedLM",et]],["albert",["AlbertForMaskedLM",ht]],["distilbert",["DistilBertForMaskedLM",Ve]],["roberta",["RobertaForMaskedLM",Nt]],["xlm",["XLMWithLMHeadModel",Wt]],["xlm-roberta",["XLMRobertaForMaskedLM",Zt]],["mobilebert",["MobileBertForMaskedLM",Ke]],["squeezebert",["SqueezeBertForMaskedLM",it]]]),go=new Map([["bert",["BertForQuestionAnswering",Y]],["roformer",["RoFormerForQuestionAnswering",se]],["electra",["ElectraForQuestionAnswering",_e]],["convbert",["ConvBertForQuestionAnswering",ce]],["camembert",["CamembertForQuestionAnswering",Me]],["deberta",["DebertaForQuestionAnswering",Pe]],["deberta-v2",["DebertaV2ForQuestionAnswering",Oe]],["mpnet",["MPNetForQuestionAnswering",rt]],["albert",["AlbertForQuestionAnswering",pt]],["distilbert",["DistilBertForQuestionAnswering",Ne]],["roberta",["RobertaForQuestionAnswering",qt]],["xlm",["XLMForQuestionAnswering",Kt]],["xlm-roberta",["XLMRobertaForQuestionAnswering",tn]],["mobilebert",["MobileBertForQuestionAnswering",Ye]],["squeezebert",["SqueezeBertForQuestionAnswering",lt]]]),_o=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",un]]]),wo=new Map([["llava",["LlavaForConditionalGeneration",cn]],["moondream1",["Moondream1ForConditionalGeneration",pn]],["florence2",["Florence2ForConditionalGeneration",mn]]]),yo=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",un]]]),bo=new Map([["vit",["ViTForImageClassification",Or]],["pvt",["PvtForImageClassification",Dr]],["vit_msn",["ViTMSNForImageClassification",qr]],["fastvit",["FastViTForImageClassification",Xr]],["mobilevit",["MobileViTForImageClassification",Jr]],["mobilevitv2",["MobileViTV2ForImageClassification",na]],["beit",["BeitForImageClassification",ca]],["deit",["DeiTForImageClassification",Sa]],["hiera",["HieraForImageClassification",Fa]],["convnext",["ConvNextForImageClassification",ls]],["convnextv2",["ConvNextV2ForImageClassification",cs]],["dinov2",["Dinov2ForImageClassification",ms]],["resnet",["ResNetForImageClassification",za]],["swin",["SwinForImageClassification",La]],["segformer",["SegformerForImageClassification",Fi]],["efficientnet",["EfficientNetForImageClassification",Di]],["mobilenet_v1",["MobileNetV1ForImageClassification",Ui]],["mobilenet_v2",["MobileNetV2ForImageClassification",Xi]],["mobilenet_v3",["MobileNetV3ForImageClassification",Yi]],["mobilenet_v4",["MobileNetV4ForImageClassification",eo]]]),vo=new Map([["detr",["DetrForObjectDetection",ma]],["rt_detr",["RTDetrForObjectDetection",ba]],["table-transformer",["TableTransformerForObjectDetection",Ta]],["yolos",["YolosForObjectDetection",_s]]]),xo=new Map([["owlvit",["OwlViTForObjectDetection",sa]],["owlv2",["Owlv2ForObjectDetection",la]]]),Mo=new Map([["detr",["DetrForSegmentation",fa]],["clipseg",["CLIPSegForImageSegmentation",Pn]]]),To=new Map([["segformer",["SegformerForSemanticSegmentation",Ai]],["sapiens",["SapiensForSemanticSegmentation",Ha]]]),ko=new Map([["detr",["DetrForSegmentation",fa]],["maskformer",["MaskFormerForInstanceSegmentation",es]]]),$o=new Map([["sam",["SamModel",bs]]]),Co=new Map([["wav2vec2",["Wav2Vec2ForCTC",Es]],["wav2vec2-bert",["Wav2Vec2BertForCTC",Ks]],["unispeech",["UniSpeechForCTC",Ns]],["unispeech-sat",["UniSpeechSatForCTC",Gs]],["wavlm",["WavLMForCTC",ri]],["hubert",["HubertForCTC",Js]]]),So=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",Fs]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",Qs]],["unispeech",["UniSpeechForSequenceClassification",Vs]],["unispeech-sat",["UniSpeechSatForSequenceClassification",Us]],["wavlm",["WavLMForSequenceClassification",ai]],["hubert",["HubertForSequenceClassification",ei]],["audio-spectrogram-transformer",["ASTForAudioClassification",an]]]),Po=new Map([["wavlm",["WavLMForXVector",si]]]),Eo=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",Ws]],["wavlm",["WavLMForAudioFrameClassification",ii]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",As]],["pyannote",["PyAnnoteForAudioFrameClassification",Os]]]),Fo=new Map([["vitmatte",["VitMatteForImageMatting",Qr]]]),Ao=new Map([["swin2sr",["Swin2SRForImageSuperResolution",Na]]]),Io=new Map([["dpt",["DPTForDepthEstimation",qa]],["depth_anything",["DepthAnythingForDepthEstimation",Ua]],["glpn",["GLPNForDepthEstimation",rs]],["sapiens",["SapiensForDepthEstimation",Xa]],["depth_pro",["DepthProForDepthEstimation",Ya]]]),zo=new Map([["sapiens",["SapiensForNormalEstimation",Ka]]]),Oo=new Map([["clip",["CLIPVisionModelWithProjection",bn]],["siglip",["SiglipVisionModel",Tn]]]),Bo=[[ao,y],[so,b],[io,M],[co,y],[po,y],[ho,v],[oo,v],[mo,M],[fo,y],[go,y],[_o,x],[wo,k],[bo,y],[Mo,y],[ko,y],[To,y],[Fo,y],[Ao,y],[Io,y],[zo,y],[vo,y],[xo,y],[$o,T],[Co,y],[So,y],[lo,v],[uo,y],[Po,y],[Eo,y],[Oo,y]];for(const[e,t]of Bo)for(const[n,r]of e.values())C.set(n,t),P.set(r,n),S.set(n,r);const Lo=[["MusicgenForConditionalGeneration",ji,$],["CLIPTextModelWithProjection",wn,y],["SiglipTextModel",Mn,y],["ClapTextModelWithProjection",ki,y],["ClapAudioModelWithProjection",$i,y]];for(const[e,t,n]of Lo)C.set(e,n),P.set(t,e),S.set(e,t);class Do extends ro{static MODEL_CLASS_MAPPINGS=Bo.map((e=>e[0]));static BASE_IF_FAIL=!0}class Ro extends ro{static MODEL_CLASS_MAPPINGS=[co]}class No extends ro{static MODEL_CLASS_MAPPINGS=[po]}class Vo extends ro{static MODEL_CLASS_MAPPINGS=[ho]}class jo extends ro{static MODEL_CLASS_MAPPINGS=[oo]}class qo extends ro{static MODEL_CLASS_MAPPINGS=[lo]}class Go extends ro{static MODEL_CLASS_MAPPINGS=[uo]}class Uo extends ro{static MODEL_CLASS_MAPPINGS=[mo]}class Wo extends ro{static MODEL_CLASS_MAPPINGS=[fo]}class Ho extends ro{static MODEL_CLASS_MAPPINGS=[go]}class Xo extends ro{static MODEL_CLASS_MAPPINGS=[_o]}class Ko extends ro{static MODEL_CLASS_MAPPINGS=[bo]}class Qo extends ro{static MODEL_CLASS_MAPPINGS=[Mo]}class Yo extends ro{static MODEL_CLASS_MAPPINGS=[To]}class Zo extends ro{static MODEL_CLASS_MAPPINGS=[ko]}class Jo extends ro{static MODEL_CLASS_MAPPINGS=[vo]}class el extends ro{static MODEL_CLASS_MAPPINGS=[xo]}class tl extends ro{static MODEL_CLASS_MAPPINGS=[$o]}class nl extends ro{static MODEL_CLASS_MAPPINGS=[Co]}class rl extends ro{static MODEL_CLASS_MAPPINGS=[So]}class al extends ro{static MODEL_CLASS_MAPPINGS=[Po]}class sl extends ro{static MODEL_CLASS_MAPPINGS=[Eo]}class il extends ro{static MODEL_CLASS_MAPPINGS=[yo]}class ol extends ro{static MODEL_CLASS_MAPPINGS=[Fo]}class ll extends ro{static MODEL_CLASS_MAPPINGS=[Ao]}class ul extends ro{static MODEL_CLASS_MAPPINGS=[Io]}class dl extends ro{static MODEL_CLASS_MAPPINGS=[zo]}class cl extends ro{static MODEL_CLASS_MAPPINGS=[Oo]}class pl extends G{constructor({logits:e,past_key_values:t,encoder_outputs:n,decoder_attentions:r=null,cross_attentions:a=null}){super(),this.logits=e,this.past_key_values=t,this.encoder_outputs=n,this.decoder_attentions=r,this.cross_attentions=a}}class hl extends G{constructor({logits:e}){super(),this.logits=e}}class ml extends G{constructor({logits:e,embeddings:t}){super(),this.logits=e,this.embeddings=t}}class fl extends G{constructor({logits:e}){super(),this.logits=e}}class gl extends G{constructor({logits:e}){super(),this.logits=e}}class _l extends G{constructor({start_logits:e,end_logits:t}){super(),this.start_logits=e,this.end_logits=t}}class wl extends G{constructor({logits:e}){super(),this.logits=e}}class yl extends G{constructor({logits:e,past_key_values:t}){super(),this.logits=e,this.past_key_values=t}}class bl extends G{constructor({alphas:e}){super(),this.alphas=e}}class vl extends G{constructor({waveform:e,spectrogram:t}){super(),this.waveform=e,this.spectrogram=t}}},"./src/models/whisper/common_whisper.js":
175
+ \***********************/(e,t,n)=>{n.r(t),n.d(t,{ASTForAudioClassification:()=>an,ASTModel:()=>rn,ASTPreTrainedModel:()=>nn,AlbertForMaskedLM:()=>ht,AlbertForQuestionAnswering:()=>pt,AlbertForSequenceClassification:()=>ct,AlbertModel:()=>dt,AlbertPreTrainedModel:()=>ut,AutoModel:()=>Go,AutoModelForAudioClassification:()=>ul,AutoModelForAudioFrameClassification:()=>cl,AutoModelForCTC:()=>ll,AutoModelForCausalLM:()=>Yo,AutoModelForDepthEstimation:()=>fl,AutoModelForDocumentQuestionAnswering:()=>pl,AutoModelForImageClassification:()=>tl,AutoModelForImageFeatureExtraction:()=>_l,AutoModelForImageMatting:()=>hl,AutoModelForImageSegmentation:()=>nl,AutoModelForImageToImage:()=>ml,AutoModelForMaskGeneration:()=>ol,AutoModelForMaskedLM:()=>Zo,AutoModelForNormalEstimation:()=>gl,AutoModelForObjectDetection:()=>sl,AutoModelForQuestionAnswering:()=>Jo,AutoModelForSemanticSegmentation:()=>rl,AutoModelForSeq2SeqLM:()=>Ho,AutoModelForSequenceClassification:()=>Uo,AutoModelForSpeechSeq2Seq:()=>Xo,AutoModelForTextToSpectrogram:()=>Ko,AutoModelForTextToWaveform:()=>Qo,AutoModelForTokenClassification:()=>Wo,AutoModelForUniversalSegmentation:()=>al,AutoModelForVision2Seq:()=>el,AutoModelForXVector:()=>dl,AutoModelForZeroShotObjectDetection:()=>il,BartForConditionalGeneration:()=>kt,BartForSequenceClassification:()=>$t,BartModel:()=>Tt,BartPretrainedModel:()=>Mt,BaseModelOutput:()=>U,BeitForImageClassification:()=>_a,BeitModel:()=>ga,BeitPreTrainedModel:()=>fa,BertForMaskedLM:()=>X,BertForQuestionAnswering:()=>Y,BertForSequenceClassification:()=>K,BertForTokenClassification:()=>Q,BertModel:()=>H,BertPreTrainedModel:()=>W,BlenderbotForConditionalGeneration:()=>zt,BlenderbotModel:()=>It,BlenderbotPreTrainedModel:()=>At,BlenderbotSmallForConditionalGeneration:()=>Lt,BlenderbotSmallModel:()=>Bt,BlenderbotSmallPreTrainedModel:()=>Ot,BloomForCausalLM:()=>Ar,BloomModel:()=>Fr,BloomPreTrainedModel:()=>Er,CLIPModel:()=>gn,CLIPPreTrainedModel:()=>fn,CLIPSegForImageSegmentation:()=>Pn,CLIPSegModel:()=>Sn,CLIPSegPreTrainedModel:()=>Cn,CLIPTextModel:()=>_n,CLIPTextModelWithProjection:()=>wn,CLIPVisionModel:()=>yn,CLIPVisionModelWithProjection:()=>bn,CamembertForMaskedLM:()=>be,CamembertForQuestionAnswering:()=>Me,CamembertForSequenceClassification:()=>ve,CamembertForTokenClassification:()=>xe,CamembertModel:()=>ye,CamembertPreTrainedModel:()=>we,CausalLMOutput:()=>Tl,CausalLMOutputWithPast:()=>kl,ChineseCLIPModel:()=>$n,ChineseCLIPPreTrainedModel:()=>kn,ClapAudioModelWithProjection:()=>Ai,ClapModel:()=>Ei,ClapPreTrainedModel:()=>Pi,ClapTextModelWithProjection:()=>Fi,CodeGenForCausalLM:()=>Qn,CodeGenModel:()=>Kn,CodeGenPreTrainedModel:()=>Xn,CohereForCausalLM:()=>cr,CohereModel:()=>dr,CoherePreTrainedModel:()=>ur,ConvBertForMaskedLM:()=>le,ConvBertForQuestionAnswering:()=>ce,ConvBertForSequenceClassification:()=>ue,ConvBertForTokenClassification:()=>de,ConvBertModel:()=>oe,ConvBertPreTrainedModel:()=>ie,ConvNextForImageClassification:()=>ms,ConvNextModel:()=>hs,ConvNextPreTrainedModel:()=>ps,ConvNextV2ForImageClassification:()=>_s,ConvNextV2Model:()=>gs,ConvNextV2PreTrainedModel:()=>fs,DPTForDepthEstimation:()=>Ka,DPTModel:()=>Xa,DPTPreTrainedModel:()=>Ha,DebertaForMaskedLM:()=>$e,DebertaForQuestionAnswering:()=>Pe,DebertaForSequenceClassification:()=>Ce,DebertaForTokenClassification:()=>Se,DebertaModel:()=>ke,DebertaPreTrainedModel:()=>Te,DebertaV2ForMaskedLM:()=>Ae,DebertaV2ForQuestionAnswering:()=>Oe,DebertaV2ForSequenceClassification:()=>Ie,DebertaV2ForTokenClassification:()=>ze,DebertaV2Model:()=>Fe,DebertaV2PreTrainedModel:()=>Ee,DecisionTransformerModel:()=>lo,DecisionTransformerPreTrainedModel:()=>oo,DeiTForImageClassification:()=>za,DeiTModel:()=>Ia,DeiTPreTrainedModel:()=>Aa,DepthAnythingForDepthEstimation:()=>Ya,DepthAnythingPreTrainedModel:()=>Qa,DepthProForDepthEstimation:()=>rs,DepthProPreTrainedModel:()=>ns,DetrForObjectDetection:()=>ba,DetrForSegmentation:()=>va,DetrModel:()=>ya,DetrObjectDetectionOutput:()=>xa,DetrPreTrainedModel:()=>wa,DetrSegmentationOutput:()=>Ma,Dinov2ForImageClassification:()=>bs,Dinov2Model:()=>ys,Dinov2PreTrainedModel:()=>ws,DistilBertForMaskedLM:()=>Ve,DistilBertForQuestionAnswering:()=>Ne,DistilBertForSequenceClassification:()=>De,DistilBertForTokenClassification:()=>Re,DistilBertModel:()=>Le,DistilBertPreTrainedModel:()=>Be,DonutSwinModel:()=>cs,DonutSwinPreTrainedModel:()=>ds,EfficientNetForImageClassification:()=>Gi,EfficientNetModel:()=>qi,EfficientNetPreTrainedModel:()=>ji,ElectraForMaskedLM:()=>me,ElectraForQuestionAnswering:()=>_e,ElectraForSequenceClassification:()=>fe,ElectraForTokenClassification:()=>ge,ElectraModel:()=>he,ElectraPreTrainedModel:()=>pe,EsmForMaskedLM:()=>Ge,EsmForSequenceClassification:()=>Ue,EsmForTokenClassification:()=>We,EsmModel:()=>qe,EsmPreTrainedModel:()=>je,FalconForCausalLM:()=>Si,FalconModel:()=>Ci,FalconPreTrainedModel:()=>$i,FastViTForImageClassification:()=>ea,FastViTModel:()=>Jr,FastViTPreTrainedModel:()=>Zr,Florence2ForConditionalGeneration:()=>mn,Florence2PreTrainedModel:()=>hn,GLPNForDepthEstimation:()=>us,GLPNModel:()=>ls,GLPNPreTrainedModel:()=>os,GPT2LMHeadModel:()=>An,GPT2Model:()=>Fn,GPT2PreTrainedModel:()=>En,GPTBigCodeForCausalLM:()=>Hn,GPTBigCodeModel:()=>Wn,GPTBigCodePreTrainedModel:()=>Un,GPTJForCausalLM:()=>Gn,GPTJModel:()=>qn,GPTJPreTrainedModel:()=>jn,GPTNeoForCausalLM:()=>Dn,GPTNeoModel:()=>Ln,GPTNeoPreTrainedModel:()=>Bn,GPTNeoXForCausalLM:()=>Vn,GPTNeoXModel:()=>Nn,GPTNeoXPreTrainedModel:()=>Rn,Gemma2ForCausalLM:()=>_r,Gemma2Model:()=>gr,Gemma2PreTrainedModel:()=>fr,GemmaForCausalLM:()=>mr,GemmaModel:()=>hr,GemmaPreTrainedModel:()=>pr,GraniteForCausalLM:()=>lr,GraniteModel:()=>or,GranitePreTrainedModel:()=>ir,GroupViTModel:()=>Yr,GroupViTPreTrainedModel:()=>Qr,HieraForImageClassification:()=>La,HieraModel:()=>Ba,HieraPreTrainedModel:()=>Oa,HubertForCTC:()=>si,HubertForSequenceClassification:()=>ii,HubertModel:()=>ai,HubertPreTrainedModel:()=>ri,ImageMattingOutput:()=>$l,JAISLMHeadModel:()=>On,JAISModel:()=>zn,JAISPreTrainedModel:()=>In,LlamaForCausalLM:()=>Jn,LlamaModel:()=>Zn,LlamaPreTrainedModel:()=>Yn,LlavaForConditionalGeneration:()=>cn,LlavaPreTrainedModel:()=>dn,LongT5ForConditionalGeneration:()=>yt,LongT5Model:()=>wt,LongT5PreTrainedModel:()=>_t,M2M100ForConditionalGeneration:()=>Is,M2M100Model:()=>As,M2M100PreTrainedModel:()=>Fs,MBartForCausalLM:()=>Ft,MBartForConditionalGeneration:()=>Pt,MBartForSequenceClassification:()=>Et,MBartModel:()=>St,MBartPreTrainedModel:()=>Ct,MPNetForMaskedLM:()=>et,MPNetForQuestionAnswering:()=>rt,MPNetForSequenceClassification:()=>tt,MPNetForTokenClassification:()=>nt,MPNetModel:()=>Je,MPNetPreTrainedModel:()=>Ze,MT5ForConditionalGeneration:()=>xt,MT5Model:()=>vt,MT5PreTrainedModel:()=>bt,MarianMTModel:()=>Es,MarianModel:()=>Ps,MarianPreTrainedModel:()=>Ss,MaskFormerForInstanceSegmentation:()=>is,MaskFormerModel:()=>ss,MaskFormerPreTrainedModel:()=>as,MaskedLMOutput:()=>xl,MistralForCausalLM:()=>xi,MistralModel:()=>vi,MistralPreTrainedModel:()=>bi,MobileBertForMaskedLM:()=>Ke,MobileBertForQuestionAnswering:()=>Ye,MobileBertForSequenceClassification:()=>Qe,MobileBertModel:()=>Xe,MobileBertPreTrainedModel:()=>He,MobileLLMForCausalLM:()=>nr,MobileLLMModel:()=>tr,MobileLLMPreTrainedModel:()=>er,MobileNetV1ForImageClassification:()=>Yi,MobileNetV1Model:()=>Qi,MobileNetV1PreTrainedModel:()=>Ki,MobileNetV2ForImageClassification:()=>eo,MobileNetV2Model:()=>Ji,MobileNetV2PreTrainedModel:()=>Zi,MobileNetV3ForImageClassification:()=>ro,MobileNetV3Model:()=>no,MobileNetV3PreTrainedModel:()=>to,MobileNetV4ForImageClassification:()=>io,MobileNetV4Model:()=>so,MobileNetV4PreTrainedModel:()=>ao,MobileViTForImageClassification:()=>sa,MobileViTModel:()=>aa,MobileViTPreTrainedModel:()=>ra,MobileViTV2ForImageClassification:()=>la,MobileViTV2Model:()=>oa,MobileViTV2PreTrainedModel:()=>ia,ModelOutput:()=>G,Moondream1ForConditionalGeneration:()=>pn,MptForCausalLM:()=>Or,MptModel:()=>zr,MptPreTrainedModel:()=>Ir,MusicgenForCausalLM:()=>Hi,MusicgenForConditionalGeneration:()=>Xi,MusicgenModel:()=>Wi,MusicgenPreTrainedModel:()=>Ui,NomicBertModel:()=>J,NomicBertPreTrainedModel:()=>Z,OPTForCausalLM:()=>Dr,OPTModel:()=>Lr,OPTPreTrainedModel:()=>Br,OlmoForCausalLM:()=>sr,OlmoModel:()=>ar,OlmoPreTrainedModel:()=>rr,OpenELMForCausalLM:()=>br,OpenELMModel:()=>yr,OpenELMPreTrainedModel:()=>wr,OwlViTForObjectDetection:()=>ca,OwlViTModel:()=>da,OwlViTPreTrainedModel:()=>ua,Owlv2ForObjectDetection:()=>ma,Owlv2Model:()=>ha,Owlv2PreTrainedModel:()=>pa,Phi3ForCausalLM:()=>Pr,Phi3Model:()=>Sr,Phi3PreTrainedModel:()=>Cr,PhiForCausalLM:()=>$r,PhiModel:()=>kr,PhiPreTrainedModel:()=>Tr,PreTrainedModel:()=>q,PretrainedMixin:()=>uo,PvtForImageClassification:()=>Gr,PvtModel:()=>qr,PvtPreTrainedModel:()=>jr,PyAnnoteForAudioFrameClassification:()=>Vs,PyAnnoteModel:()=>Ns,PyAnnotePreTrainedModel:()=>Rs,QuestionAnsweringModelOutput:()=>Ml,Qwen2ForCausalLM:()=>Mr,Qwen2Model:()=>xr,Qwen2PreTrainedModel:()=>vr,RTDetrForObjectDetection:()=>$a,RTDetrModel:()=>ka,RTDetrObjectDetectionOutput:()=>Ca,RTDetrPreTrainedModel:()=>Ta,ResNetForImageClassification:()=>Na,ResNetModel:()=>Ra,ResNetPreTrainedModel:()=>Da,RoFormerForMaskedLM:()=>ne,RoFormerForQuestionAnswering:()=>se,RoFormerForSequenceClassification:()=>re,RoFormerForTokenClassification:()=>ae,RoFormerModel:()=>te,RoFormerPreTrainedModel:()=>ee,RobertaForMaskedLM:()=>Nt,RobertaForQuestionAnswering:()=>qt,RobertaForSequenceClassification:()=>Vt,RobertaForTokenClassification:()=>jt,RobertaModel:()=>Rt,RobertaPreTrainedModel:()=>Dt,SamImageSegmentationOutput:()=>Cs,SamModel:()=>$s,SamPreTrainedModel:()=>ks,SapiensForDepthEstimation:()=>es,SapiensForNormalEstimation:()=>ts,SapiensForSemanticSegmentation:()=>Ja,SapiensPreTrainedModel:()=>Za,SegformerForImageClassification:()=>Li,SegformerForSemanticSegmentation:()=>Di,SegformerModel:()=>Bi,SegformerPreTrainedModel:()=>Oi,Seq2SeqLMOutput:()=>wl,SequenceClassifierOutput:()=>yl,SiglipModel:()=>xn,SiglipPreTrainedModel:()=>vn,SiglipTextModel:()=>Mn,SiglipVisionModel:()=>Tn,SpeechT5ForSpeechToText:()=>fi,SpeechT5ForTextToSpeech:()=>gi,SpeechT5HifiGan:()=>_i,SpeechT5Model:()=>mi,SpeechT5PreTrainedModel:()=>hi,SqueezeBertForMaskedLM:()=>it,SqueezeBertForQuestionAnswering:()=>lt,SqueezeBertForSequenceClassification:()=>ot,SqueezeBertModel:()=>st,SqueezeBertPreTrainedModel:()=>at,StableLmForCausalLM:()=>Vi,StableLmModel:()=>Ni,StableLmPreTrainedModel:()=>Ri,Starcoder2ForCausalLM:()=>ki,Starcoder2Model:()=>Ti,Starcoder2PreTrainedModel:()=>Mi,Swin2SRForImageSuperResolution:()=>Wa,Swin2SRModel:()=>Ua,Swin2SRPreTrainedModel:()=>Ga,SwinForImageClassification:()=>qa,SwinModel:()=>ja,SwinPreTrainedModel:()=>Va,T5ForConditionalGeneration:()=>gt,T5Model:()=>ft,T5PreTrainedModel:()=>mt,TableTransformerForObjectDetection:()=>Ea,TableTransformerModel:()=>Pa,TableTransformerObjectDetectionOutput:()=>Fa,TableTransformerPreTrainedModel:()=>Sa,TokenClassifierOutput:()=>vl,TrOCRForCausalLM:()=>yi,TrOCRPreTrainedModel:()=>wi,UniSpeechForCTC:()=>Ws,UniSpeechForSequenceClassification:()=>Hs,UniSpeechModel:()=>Us,UniSpeechPreTrainedModel:()=>Gs,UniSpeechSatForAudioFrameClassification:()=>Zs,UniSpeechSatForCTC:()=>Qs,UniSpeechSatForSequenceClassification:()=>Ys,UniSpeechSatModel:()=>Ks,UniSpeechSatPreTrainedModel:()=>Xs,ViTForImageClassification:()=>Vr,ViTMAEModel:()=>Wr,ViTMAEPreTrainedModel:()=>Ur,ViTMSNForImageClassification:()=>Kr,ViTMSNModel:()=>Xr,ViTMSNPreTrainedModel:()=>Hr,ViTModel:()=>Nr,ViTPreTrainedModel:()=>Rr,VisionEncoderDecoderModel:()=>un,VitMatteForImageMatting:()=>na,VitMattePreTrainedModel:()=>ta,VitsModel:()=>zi,VitsModelOutput:()=>Cl,VitsPreTrainedModel:()=>Ii,Wav2Vec2BertForCTC:()=>ti,Wav2Vec2BertForSequenceClassification:()=>ni,Wav2Vec2BertModel:()=>ei,Wav2Vec2BertPreTrainedModel:()=>Js,Wav2Vec2ForAudioFrameClassification:()=>Ds,Wav2Vec2ForCTC:()=>Bs,Wav2Vec2ForSequenceClassification:()=>Ls,Wav2Vec2Model:()=>Os,Wav2Vec2PreTrainedModel:()=>zs,WavLMForAudioFrameClassification:()=>pi,WavLMForCTC:()=>ui,WavLMForSequenceClassification:()=>di,WavLMForXVector:()=>ci,WavLMModel:()=>li,WavLMPreTrainedModel:()=>oi,WeSpeakerResNetModel:()=>qs,WeSpeakerResNetPreTrainedModel:()=>js,WhisperForConditionalGeneration:()=>ln,WhisperModel:()=>on,WhisperPreTrainedModel:()=>sn,XLMForQuestionAnswering:()=>Kt,XLMForSequenceClassification:()=>Ht,XLMForTokenClassification:()=>Xt,XLMModel:()=>Ut,XLMPreTrainedModel:()=>Gt,XLMRobertaForMaskedLM:()=>Zt,XLMRobertaForQuestionAnswering:()=>tn,XLMRobertaForSequenceClassification:()=>Jt,XLMRobertaForTokenClassification:()=>en,XLMRobertaModel:()=>Yt,XLMRobertaPreTrainedModel:()=>Qt,XLMWithLMHeadModel:()=>Wt,XVectorOutput:()=>bl,YolosForObjectDetection:()=>Ms,YolosModel:()=>xs,YolosObjectDetectionOutput:()=>Ts,YolosPreTrainedModel:()=>vs});var r=n(/*! ./configs.js */"./src/configs.js"),a=n(/*! ./backends/onnx.js */"./src/backends/onnx.js"),s=n(/*! ./utils/dtypes.js */"./src/utils/dtypes.js"),i=n(/*! ./utils/generic.js */"./src/utils/generic.js"),o=n(/*! ./utils/core.js */"./src/utils/core.js"),l=n(/*! ./utils/hub.js */"./src/utils/hub.js"),u=n(/*! ./utils/constants.js */"./src/utils/constants.js"),d=n(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),c=n(/*! ./generation/configuration_utils.js */"./src/generation/configuration_utils.js"),p=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),h=n(/*! ./utils/maths.js */"./src/utils/maths.js"),m=n(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),f=n(/*! ./generation/logits_sampler.js */"./src/generation/logits_sampler.js"),g=n(/*! ./env.js */"./src/env.js"),_=n(/*! ./models/whisper/generation_whisper.js */"./src/models/whisper/generation_whisper.js"),w=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");const y=0,b=1,v=2,x=3,M=4,T=5,k=6,$=7,C=new Map,S=new Map,P=new Map;async function E(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async i=>{const{buffer:o,session_options:u,session_config:d}=await async function(e,t,n){const i=n.config?.["transformers.js_config"]??{};let o=n.device??i.device;o&&"string"!=typeof o&&(o.hasOwnProperty(t)?o=o[t]:(console.warn(`device not specified for "${t}". Using the default device.`),o=null));const u=o??(g.apis.IS_NODE_ENV?"cpu":"wasm"),d=(0,a.deviceToExecutionProviders)(u);let c=n.dtype??i.dtype;"string"!=typeof c&&(c&&c.hasOwnProperty(t)?c=c[t]:(c=s.DEFAULT_DEVICE_DTYPE_MAPPING[u]??s.DATA_TYPES.fp32,console.warn(`dtype not specified for "${t}". Using the default dtype (${c}) for this device (${u}).`)));const p=c;if(!s.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(p))throw new Error(`Invalid dtype: ${p}. Should be one of: ${Object.keys(s.DATA_TYPES).join(", ")}`);if(p===s.DATA_TYPES.fp16&&"webgpu"===u&&!await(0,s.isWebGpuFp16Supported)())throw new Error(`The device (${u}) does not support fp16.`);const h=i.kv_cache_dtype?"string"==typeof i.kv_cache_dtype?i.kv_cache_dtype:i.kv_cache_dtype[p]??"float32":void 0;if(h&&!["float32","float16"].includes(h))throw new Error(`Invalid kv_cache_dtype: ${h}. Should be one of: float32, float16`);const m={dtype:p,kv_cache_dtype:h},f=s.DEFAULT_DTYPE_SUFFIX_MAPPING[p],_=`${n.subfolder??""}/${t}${f}.onnx`,w={...n.session_options};w.executionProviders??=d;const y=i.free_dimension_overrides;y?w.freeDimensionOverrides??=y:u.startsWith("webnn")&&!w.freeDimensionOverrides&&console.warn('WebNN does not currently support dynamic shapes and requires `free_dimension_overrides` to be set in config.json as a field within "transformers.js_config". When `free_dimension_overrides` is not set, you may experience significant performance degradation.');const b=(0,l.getModelFile)(e,_,!0,n),v=n.use_external_data_format??i.use_external_data_format;let x=[];if(v&&(!0===v||"object"==typeof v&&v.hasOwnProperty(t)&&!0===v[t])){if(g.apis.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const r=`${t}${f}.onnx_data`,a=`${n.subfolder??""}/${r}`;x.push(new Promise((async(t,s)=>{const i=await(0,l.getModelFile)(e,a,!0,n);t({path:r,data:i})})))}else void 0!==w.externalData&&(x=w.externalData.map((async t=>{if("string"==typeof t.data){const r=await(0,l.getModelFile)(e,t.data,!0,n);return{...t,data:r}}return t})));if(x.length>0&&(w.externalData=await Promise.all(x)),"webgpu"===u){const e=(0,r.getKeyValueShapes)(n.config,{prefix:"present"});if(Object.keys(e).length>0&&!(0,a.isONNXProxy)()){const t={};for(const n in e)t[n]="gpu-buffer";w.preferredOutputLocation=t}}return{buffer:await b,session_options:w,session_config:m}}(e,t[i],n);return[i,await(0,a.createInferenceSession)(o,u,d)]}))))}async function F(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async r=>[r,await(0,l.getModelJSON)(e,t[r],!1,n)]))))}async function A(e,t){const n=function(e,t){const n=Object.create(null),r=[];for(const s of e.inputNames){const e=t[s];e instanceof p.Tensor?n[s]=(0,a.isONNXProxy)()?e.clone():e:r.push(s)}if(r.length>0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${r.join(", ")}.`);const s=Object.keys(t).length,i=e.inputNames.length;if(s>i){let n=Object.keys(t).filter((t=>!e.inputNames.includes(t)));console.warn(`WARNING: Too many inputs were provided (${s} > ${i}). The following inputs will be ignored: "${n.join(", ")}".`)}return n}(e,t);try{const t=Object.fromEntries(Object.entries(n).map((([e,t])=>[e,t.ort_tensor])));let r=await e.run(t);return r=I(r),r}catch(e){throw console.error(`An error occurred during model execution: "${e}".`),console.error("Inputs given to model:",n),e}}function I(e){for(let t in e)(0,a.isONNXTensor)(e[t])?e[t]=new p.Tensor(e[t]):"object"==typeof e[t]&&I(e[t]);return e}function z(e){if(e instanceof p.Tensor)return e;if(0===e.length)throw Error("items must be non-empty");if(Array.isArray(e[0])){if(e.some((t=>t.length!==e[0].length)))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new p.Tensor("int64",BigInt64Array.from(e.flat().map((e=>BigInt(e)))),[e.length,e[0].length])}return new p.Tensor("int64",BigInt64Array.from(e.map((e=>BigInt(e)))),[1,e.length])}function O(e){return new p.Tensor("bool",[e],[1])}async function B(e,t){let{encoder_outputs:n,input_ids:r,decoder_input_ids:a,...s}=t;if(!n){const r=(0,o.pick)(t,e.sessions.model.inputNames);n=(await L(e,r)).last_hidden_state}s.input_ids=a,s.encoder_hidden_states=n,e.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(s.encoder_attention_mask=t.attention_mask);return await D(e,s,!0)}async function L(e,t){const n=e.sessions.model,r=(0,o.pick)(t,n.inputNames);if(n.inputNames.includes("inputs_embeds")&&!r.inputs_embeds){if(!t.input_ids)throw new Error("Both `input_ids` and `inputs_embeds` are missing in the model inputs.");r.inputs_embeds=await e.encode_text({input_ids:t.input_ids})}return n.inputNames.includes("token_type_ids")&&!r.token_type_ids&&(r.token_type_ids=new p.Tensor("int64",new BigInt64Array(r.input_ids.data.length),r.input_ids.dims)),await A(n,r)}async function D(e,t,n=!1){const r=e.sessions[n?"decoder_model_merged":"model"],{past_key_values:a,...s}=t;r.inputNames.includes("use_cache_branch")&&(s.use_cache_branch=O(!!a)),r.inputNames.includes("position_ids")&&s.attention_mask&&!s.position_ids&&(s.position_ids=function(e,t=null){const{input_ids:n,inputs_embeds:r,attention_mask:a}=e,[s,i]=a.dims,o=new BigInt64Array(a.data.length);for(let e=0;e<s;++e){const t=e*i;let n=BigInt(0);for(let e=0;e<i;++e){const r=t+e;0n===a.data[r]?o[r]=BigInt(1):(o[r]=n,n+=a.data[r])}}let l=new p.Tensor("int64",o,a.dims);if(t){const e=-(n??r).dims.at(1);l=l.slice(null,[e,null])}return l}(s,a)),e.addPastKeyValues(s,a);const i=(0,o.pick)(s,r.inputNames);return await A(r,i)}async function R(e,{input_ids:t=null,attention_mask:n=null,pixel_values:r=null,position_ids:a=null,inputs_embeds:s=null,past_key_values:i=null,generation_config:o=null,logits_processor:l=null,...u}){if(!s)if(s=await e.encode_text({input_ids:t}),r&&1!==t.dims[1]){const a=await e.encode_image({pixel_values:r});({inputs_embeds:s,attention_mask:n}=e._merge_input_ids_with_image_features({image_features:a,inputs_embeds:s,input_ids:t,attention_mask:n}))}else if(i&&r&&1===t.dims[1]){const e=t.dims[1],r=Object.values(i)[0].dims.at(-2);n=(0,p.cat)([(0,p.ones)([t.dims[0],r]),n.slice(null,[n.dims[1]-e,n.dims[1]])],1)}return await D(e,{inputs_embeds:s,past_key_values:i,attention_mask:n,position_ids:a,generation_config:o,logits_processor:l},!0)}function N(e,t,n,r){if(n.past_key_values){const t=Object.values(n.past_key_values)[0].dims.at(-2),{input_ids:r,attention_mask:a}=n;if(a&&a.dims[1]>r.dims[1]);else if(t<r.dims[1])n.input_ids=r.slice(null,[t,null]);else if(null!=e.config.image_token_index&&r.data.some((t=>t==e.config.image_token_index))){const a=e.config.num_image_tokens;if(!a)throw new Error("`num_image_tokens` is missing in the model configuration.");const s=r.dims[1]-(t-a);n.input_ids=r.slice(null,[-s,null]),n.attention_mask=(0,p.ones)([1,t+s])}}return n}function V(e,t,n,r){return n.past_key_values&&(t=t.map((e=>[e.at(-1)]))),{...n,decoder_input_ids:z(t)}}function j(e,...t){return e.config.is_encoder_decoder?V(e,...t):N(e,...t)}class q extends i.Callable{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,t,n){super(),this.config=e,this.sessions=t,this.configs=n;const r=P.get(this.constructor),a=C.get(r);switch(this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,a){case M:this.can_generate=!0,this._forward=D,this._prepare_inputs_for_generation=N;break;case v:case x:case $:this.can_generate=!0,this._forward=B,this._prepare_inputs_for_generation=V;break;case b:this._forward=B;break;case k:this.can_generate=!0,this._forward=R,this._prepare_inputs_for_generation=j;break;default:this._forward=L}this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const t of Object.values(this.sessions))t?.handler?.dispose&&e.push(t.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:a=null,local_files_only:s=!1,revision:i="main",model_file_name:o=null,subfolder:l="onnx",device:d=null,dtype:c=null,use_external_data_format:p=null,session_options:h={}}={}){let m={progress_callback:t,config:n,cache_dir:a,local_files_only:s,revision:i,model_file_name:o,subfolder:l,device:d,dtype:c,use_external_data_format:p,session_options:h};const f=P.get(this),g=C.get(f);let _;if(n=m.config=await r.AutoConfig.from_pretrained(e,m),g===M)_=await Promise.all([E(e,{model:m.model_file_name??"model"},m),F(e,{generation_config:"generation_config.json"},m)]);else if(g===v||g===x)_=await Promise.all([E(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m),F(e,{generation_config:"generation_config.json"},m)]);else if(g===T)_=await Promise.all([E(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},m)]);else if(g===b)_=await Promise.all([E(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m)]);else if(g===k){const t={embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"};n.is_encoder_decoder&&(t.model="encoder_model"),_=await Promise.all([E(e,t,m),F(e,{generation_config:"generation_config.json"},m)])}else g===$?_=await Promise.all([E(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},m),F(e,{generation_config:"generation_config.json"},m)]):(g!==y&&console.warn(`Model type for '${f??n?.model_type}' not found, assuming encoder-only architecture. Please report this at ${u.GITHUB_ISSUE_URL}.`),_=await Promise.all([E(e,{model:m.model_file_name??"model"},m)]));return new this(n,..._)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}get generation_config(){return this.configs?.generation_config??null}_get_logits_warper(e){const t=new d.LogitsProcessorList;return null!==e.temperature&&1!==e.temperature&&t.push(new d.TemperatureLogitsWarper(e.temperature)),null!==e.top_k&&0!==e.top_k&&t.push(new d.TopKLogitsWarper(e.top_k)),null!==e.top_p&&e.top_p<1&&t.push(new d.TopPLogitsWarper(e.top_p)),t}_get_logits_processor(e,t,n=null){const r=new d.LogitsProcessorList;if(null!==e.repetition_penalty&&1!==e.repetition_penalty&&r.push(new d.RepetitionPenaltyLogitsProcessor(e.repetition_penalty)),null!==e.no_repeat_ngram_size&&e.no_repeat_ngram_size>0&&r.push(new d.NoRepeatNGramLogitsProcessor(e.no_repeat_ngram_size)),null!==e.bad_words_ids&&r.push(new d.NoBadWordsLogitsProcessor(e.bad_words_ids,e.eos_token_id)),null!==e.min_length&&null!==e.eos_token_id&&e.min_length>0&&r.push(new d.MinLengthLogitsProcessor(e.min_length,e.eos_token_id)),null!==e.min_new_tokens&&null!==e.eos_token_id&&e.min_new_tokens>0&&r.push(new d.MinNewTokensLengthLogitsProcessor(t,e.min_new_tokens,e.eos_token_id)),null!==e.forced_bos_token_id&&r.push(new d.ForcedBOSTokenLogitsProcessor(e.forced_bos_token_id)),null!==e.forced_eos_token_id&&r.push(new d.ForcedEOSTokenLogitsProcessor(e.max_length,e.forced_eos_token_id)),null!==e.begin_suppress_tokens){const n=t>1||null===e.forced_bos_token_id?t:t+1;r.push(new d.SuppressTokensAtBeginLogitsProcessor(e.begin_suppress_tokens,n))}return null!==e.guidance_scale&&e.guidance_scale>1&&r.push(new d.ClassifierFreeGuidanceLogitsProcessor(e.guidance_scale)),null!==n&&r.extend(n),r}_prepare_generation_config(e,t,n=c.GenerationConfig){const r={...this.config};for(const e of["decoder","generator","text_config"])e in r&&Object.assign(r,r[e]);const a=new n(r);return Object.assign(a,this.generation_config??{}),e&&Object.assign(a,e),t&&Object.assign(a,(0,o.pick)(t,Object.getOwnPropertyNames(a))),a}_get_stopping_criteria(e,t=null){const n=new m.StoppingCriteriaList;return null!==e.max_length&&n.push(new m.MaxLengthCriteria(e.max_length,this.config.max_position_embeddings??null)),null!==e.eos_token_id&&n.push(new m.EosTokenCriteria(e.eos_token_id)),t&&n.extend(t),n}_validate_model_class(){if(!this.can_generate){const e=[bo,Mo,yo,mo],t=P.get(this.constructor),n=new Set,r=this.config.model_type;for(const t of e){const e=t.get(r);e&&n.add(e[0])}let a=`The current model class (${t}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw n.size>0&&(a+=` Please use the following class instead: ${[...n].join(", ")}`),Error(a)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:t,model_inputs:n,is_encoder_decoder:r}){return n.past_key_values=this.getPastKeyValues(t,n.past_key_values),n.input_ids=new p.Tensor("int64",e.flat(),[e.length,1]),r||(n.attention_mask=(0,p.cat)([n.attention_mask,(0,p.ones)([n.attention_mask.dims[0],1])],1)),n.position_ids=null,n}_prepare_model_inputs({inputs:e,bos_token_id:t,model_kwargs:n}){const r=(0,o.pick)(n,this.forward_params),a=this.main_input_name;if(a in r){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else r[a]=e;return{inputs_tensor:r[a],model_inputs:r,model_input_name:a}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:t,model_input_name:n,generation_config:r}){if(this.sessions.model.inputNames.includes("inputs_embeds")&&!t.inputs_embeds&&"_prepare_inputs_embeds"in this){const{input_ids:e,pixel_values:n,attention_mask:r,...a}=t,s=await this._prepare_inputs_embeds(t);t={...a,...(0,o.pick)(s,["inputs_embeds","attention_mask"])}}let{last_hidden_state:a}=await L(this,t);if(null!==r.guidance_scale&&r.guidance_scale>1)a=(0,p.cat)([a,(0,p.full_like)(a,0)],0),"attention_mask"in t&&(t.attention_mask=(0,p.cat)([t.attention_mask,(0,p.zeros_like)(t.attention_mask)],0));else if(t.decoder_input_ids){const e=z(t.decoder_input_ids).dims[0];if(e!==a.dims[0]){if(1!==a.dims[0])throw new Error(`The encoder outputs have a different batch size (${a.dims[0]}) than the decoder inputs (${e}).`);a=(0,p.cat)(Array.from({length:e},(()=>a)),0)}}return t.encoder_outputs=a,t}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:t,model_kwargs:n,decoder_start_token_id:r,bos_token_id:a,generation_config:s}){let{decoder_input_ids:i,...o}=n;if(!(i instanceof p.Tensor)){if(i)Array.isArray(i[0])||(i=Array.from({length:e},(()=>i)));else if(r??=a,"musicgen"===this.config.model_type)i=Array.from({length:e*this.config.decoder.num_codebooks},(()=>[r]));else if(Array.isArray(r)){if(r.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${r.length}`);i=r}else i=Array.from({length:e},(()=>[r]));i=z(i)}return n.decoder_attention_mask=(0,p.ones_like)(i),{input_ids:i,model_inputs:o}}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,streamer:a=null,...s}){this._validate_model_class(),t=this._prepare_generation_config(t,s);let{inputs_tensor:i,model_inputs:o,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:s});const u=this.config.is_encoder_decoder;let d;u&&("encoder_outputs"in o||(o=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:i,model_inputs:o,model_input_name:l,generation_config:t}))),u?({input_ids:d,model_inputs:o}=this._prepare_decoder_input_ids_for_generation({batch_size:o[l].dims.at(0),model_input_name:l,model_kwargs:o,decoder_start_token_id:t.decoder_start_token_id,bos_token_id:t.bos_token_id,generation_config:t})):d=o[l];let c=d.dims.at(-1);null!==t.max_new_tokens&&(t.max_length=c+t.max_new_tokens);const h=this._get_logits_processor(t,c,n),m=this._get_stopping_criteria(t,r),g=o[l].dims.at(0),_=f.LogitsSampler.getSampler(t),w=new Array(g).fill(0),y=d.tolist();let b;a&&a.put(y);let v={};for(;;){if(o=this.prepare_inputs_for_generation(y,o,t),b=await this.forward(o),t.output_attentions&&t.return_dict_in_generate){const e=this.getAttentions(b);for(const t in e)t in v||(v[t]=[]),v[t].push(e[t])}const e=h(y,b.logits.slice(null,-1,null)),n=[];for(let t=0;t<e.dims.at(0);++t){const r=e[t],a=await _(r);for(const[e,r]of a){const a=BigInt(e);w[t]+=r,y[t].push(a),n.push([a]);break}}a&&a.put(n);if(m(y).every((e=>e)))break;o=this._update_model_kwargs_for_generation({generated_input_ids:n,outputs:b,model_inputs:o,is_encoder_decoder:u})}a&&a.end();const x=this.getPastKeyValues(b,o.past_key_values,!0),M=new p.Tensor("int64",y.flat(),[y.length,y[0].length]);if(t.return_dict_in_generate)return{sequences:M,past_key_values:x,...v};for(const e of Object.values(b))"gpu-buffer"===e.location&&e.dispose();return M}getPastKeyValues(e,t,n=!1){const r=Object.create(null);for(const a in e)if(a.startsWith("present")){const s=a.replace("present","past_key_values"),i=a.includes("encoder");if(r[s]=i&&t?t[s]:e[a],t&&(!i||n)){const e=t[s];"gpu-buffer"===e.location&&e.dispose()}}return r}getAttentions(e){const t={};for(const n of["cross_attentions","encoder_attentions","decoder_attentions"])for(const r in e)r.startsWith(n)&&(n in t||(t[n]=[]),t[n].push(e[r]));return t}addPastKeyValues(e,t){if(t)Object.assign(e,t);else{const t=this.sessions.decoder_model_merged??this.sessions.model,n=t?.config?.kv_cache_dtype??"float32",a="float16"===n?new Uint16Array:[],s=(0,r.getKeyValueShapes)(this.config);for(const t in s)e[t]=new p.Tensor(n,a,s[t])}}async encode_image({pixel_values:e}){const t=(await A(this.sessions.vision_encoder,{pixel_values:e})).image_features;return this.config.num_image_tokens||(console.warn(`The number of image tokens was not set in the model configuration. Setting it to the number of features detected by the vision encoder (${t.dims[1]}).`),this.config.num_image_tokens=t.dims[1]),t}async encode_text({input_ids:e}){return(await A(this.sessions.embed_tokens,{input_ids:e})).inputs_embeds}}class G{}class U extends G{constructor({last_hidden_state:e,hidden_states:t=null,attentions:n=null}){super(),this.last_hidden_state=e,this.hidden_states=t,this.attentions=n}}class W extends q{}class H extends W{}class X extends W{async _call(e){return new xl(await super._call(e))}}class K extends W{async _call(e){return new yl(await super._call(e))}}class Q extends W{async _call(e){return new vl(await super._call(e))}}class Y extends W{async _call(e){return new Ml(await super._call(e))}}class Z extends q{}class J extends Z{}class ee extends q{}class te extends ee{}class ne extends ee{async _call(e){return new xl(await super._call(e))}}class re extends ee{async _call(e){return new yl(await super._call(e))}}class ae extends ee{async _call(e){return new vl(await super._call(e))}}class se extends ee{async _call(e){return new Ml(await super._call(e))}}class ie extends q{}class oe extends ie{}class le extends ie{async _call(e){return new xl(await super._call(e))}}class ue extends ie{async _call(e){return new yl(await super._call(e))}}class de extends ie{async _call(e){return new vl(await super._call(e))}}class ce extends ie{async _call(e){return new Ml(await super._call(e))}}class pe extends q{}class he extends pe{}class me extends pe{async _call(e){return new xl(await super._call(e))}}class fe extends pe{async _call(e){return new yl(await super._call(e))}}class ge extends pe{async _call(e){return new vl(await super._call(e))}}class _e extends pe{async _call(e){return new Ml(await super._call(e))}}class we extends q{}class ye extends we{}class be extends we{async _call(e){return new xl(await super._call(e))}}class ve extends we{async _call(e){return new yl(await super._call(e))}}class xe extends we{async _call(e){return new vl(await super._call(e))}}class Me extends we{async _call(e){return new Ml(await super._call(e))}}class Te extends q{}class ke extends Te{}class $e extends Te{async _call(e){return new xl(await super._call(e))}}class Ce extends Te{async _call(e){return new yl(await super._call(e))}}class Se extends Te{async _call(e){return new vl(await super._call(e))}}class Pe extends Te{async _call(e){return new Ml(await super._call(e))}}class Ee extends q{}class Fe extends Ee{}class Ae extends Ee{async _call(e){return new xl(await super._call(e))}}class Ie extends Ee{async _call(e){return new yl(await super._call(e))}}class ze extends Ee{async _call(e){return new vl(await super._call(e))}}class Oe extends Ee{async _call(e){return new Ml(await super._call(e))}}class Be extends q{}class Le extends Be{}class De extends Be{async _call(e){return new yl(await super._call(e))}}class Re extends Be{async _call(e){return new vl(await super._call(e))}}class Ne extends Be{async _call(e){return new Ml(await super._call(e))}}class Ve extends Be{async _call(e){return new xl(await super._call(e))}}class je extends q{}class qe extends je{}class Ge extends je{async _call(e){return new xl(await super._call(e))}}class Ue extends je{async _call(e){return new yl(await super._call(e))}}class We extends je{async _call(e){return new vl(await super._call(e))}}class He extends q{}class Xe extends He{}class Ke extends He{async _call(e){return new xl(await super._call(e))}}class Qe extends He{async _call(e){return new yl(await super._call(e))}}class Ye extends He{async _call(e){return new Ml(await super._call(e))}}class Ze extends q{}class Je extends Ze{}class et extends Ze{async _call(e){return new xl(await super._call(e))}}class tt extends Ze{async _call(e){return new yl(await super._call(e))}}class nt extends Ze{async _call(e){return new vl(await super._call(e))}}class rt extends Ze{async _call(e){return new Ml(await super._call(e))}}class at extends q{}class st extends at{}class it extends at{async _call(e){return new xl(await super._call(e))}}class ot extends at{async _call(e){return new yl(await super._call(e))}}class lt extends at{async _call(e){return new Ml(await super._call(e))}}class ut extends q{}class dt extends ut{}class ct extends ut{async _call(e){return new yl(await super._call(e))}}class pt extends ut{async _call(e){return new Ml(await super._call(e))}}class ht extends ut{async _call(e){return new xl(await super._call(e))}}class mt extends q{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"]}class ft extends mt{}class gt extends mt{}class _t extends q{}class wt extends _t{}class yt extends _t{}class bt extends q{}class vt extends bt{}class xt extends bt{}class Mt extends q{}class Tt extends Mt{}class kt extends Mt{}class $t extends Mt{async _call(e){return new yl(await super._call(e))}}class Ct extends q{}class St extends Ct{}class Pt extends Ct{}class Et extends Ct{async _call(e){return new yl(await super._call(e))}}class Ft extends Ct{}class At extends q{}class It extends At{}class zt extends At{}class Ot extends q{}class Bt extends Ot{}class Lt extends Ot{}class Dt extends q{}class Rt extends Dt{}class Nt extends Dt{async _call(e){return new xl(await super._call(e))}}class Vt extends Dt{async _call(e){return new yl(await super._call(e))}}class jt extends Dt{async _call(e){return new vl(await super._call(e))}}class qt extends Dt{async _call(e){return new Ml(await super._call(e))}}class Gt extends q{}class Ut extends Gt{}class Wt extends Gt{async _call(e){return new xl(await super._call(e))}}class Ht extends Gt{async _call(e){return new yl(await super._call(e))}}class Xt extends Gt{async _call(e){return new vl(await super._call(e))}}class Kt extends Gt{async _call(e){return new Ml(await super._call(e))}}class Qt extends q{}class Yt extends Qt{}class Zt extends Qt{async _call(e){return new xl(await super._call(e))}}class Jt extends Qt{async _call(e){return new yl(await super._call(e))}}class en extends Qt{async _call(e){return new vl(await super._call(e))}}class tn extends Qt{async _call(e){return new Ml(await super._call(e))}}class nn extends q{}class rn extends nn{}class an extends nn{}class sn extends q{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"]}class on extends sn{}class ln extends sn{_prepare_generation_config(e,t){return super._prepare_generation_config(e,t,_.WhisperGenerationConfig)}_retrieve_init_tokens(e){const t=[e.decoder_start_token_id];let n=e.language;const r=e.task;if(e.is_multilingual){n||(console.warn("No language specified - defaulting to English (en)."),n="en");const a=`<|${(0,w.whisper_language_to_code)(n)}|>`;t.push(e.lang_to_id[a]),t.push(e.task_to_id[r??"transcribe"])}else if(n||r)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&t.at(-1)!==e.no_timestamps_token_id?t.push(e.no_timestamps_token_id):e.return_timestamps&&t.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),t.pop()),t.filter((e=>null!=e))}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,...a}){t=this._prepare_generation_config(t,a);const s=a.decoder_input_ids??this._retrieve_init_tokens(t);if(t.return_timestamps&&(n??=new d.LogitsProcessorList,n.push(new d.WhisperTimeStampLogitsProcessor(t,s))),t.begin_suppress_tokens&&(n??=new d.LogitsProcessorList,n.push(new d.SuppressTokensAtBeginLogitsProcessor(t.begin_suppress_tokens,s.length))),t.return_token_timestamps){if(!t.alignment_heads)throw new Error("Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config.");"translate"===t.task&&console.warn("Token-level timestamps may not be reliable for task 'translate'."),t.output_attentions=!0,t.return_dict_in_generate=!0}const i=await super.generate({inputs:e,generation_config:t,logits_processor:n,decoder_input_ids:s,...a});return t.return_token_timestamps&&(i.token_timestamps=this._extract_token_timestamps(i,t.alignment_heads,t.num_frames)),i}_extract_token_timestamps(e,t,n=null,r=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");null==n&&console.warn("`num_frames` has not been set, meaning the entire audio will be analyzed. This may lead to inaccurate token-level timestamps for short audios (< 30 seconds).");let a=this.config.median_filter_width;void 0===a&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),a=7);const s=e.cross_attentions,i=Array.from({length:this.config.decoder_layers},((e,t)=>(0,p.cat)(s.map((e=>e[t])),2))),l=(0,p.stack)(t.map((([e,t])=>{if(e>=i.length)throw new Error(`Layer index ${e} is out of bounds for cross attentions (length ${i.length}).`);return n?i[e].slice(null,t,null,[0,n]):i[e].slice(null,t)}))).transpose(1,0,2,3),[u,d]=(0,p.std_mean)(l,-2,0,!0),c=l.clone();for(let e=0;e<c.dims[0];++e){const t=c[e];for(let n=0;n<t.dims[0];++n){const r=t[n],s=u[e][n][0].data,i=d[e][n][0].data;for(let e=0;e<r.dims[0];++e){let t=r[e].data;for(let e=0;e<t.length;++e)t[e]=(t[e]-i[e])/s[e];t.set((0,h.medianFilter)(t,a))}}}const m=[(0,p.mean)(c,1)],f=e.sequences.dims,g=new p.Tensor("float32",new Float32Array(f[0]*f[1]),f);for(let e=0;e<f[0];++e){const t=m[e].neg().squeeze_(0),[n,a]=(0,h.dynamic_time_warping)(t.tolist()),s=Array.from({length:n.length-1},((e,t)=>n[t+1]-n[t])),i=(0,o.mergeArrays)([1],s).map((e=>!!e)),l=[];for(let e=0;e<i.length;++e)i[e]&&l.push(a[e]*r);g[e].data.set(l,1)}return g}}class un extends q{main_input_name="pixel_values";forward_params=["pixel_values","decoder_input_ids","encoder_hidden_states","past_key_values"]}class dn extends q{forward_params=["input_ids","pixel_values","attention_mask","position_ids","past_key_values"]}class cn extends dn{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){const a=this.config.image_token_index,s=n.tolist().map((e=>e.findIndex((e=>e==a)))),i=s.every((e=>-1===e)),o=s.every((e=>-1!==e));if(!i&&!o)throw new Error("Every input should contain either 0 or 1 image token.");if(i)return{inputs_embeds:e,attention_mask:r};const l=[],u=[];for(let n=0;n<s.length;++n){const a=s[n],i=e[n],o=t[n],d=r[n];l.push((0,p.cat)([i.slice([0,a]),o,i.slice([a+1,i.dims[0]])],0)),u.push((0,p.cat)([d.slice([0,a]),(0,p.ones)([o.dims[0]]),d.slice([a+1,d.dims[0]])],0))}return{inputs_embeds:(0,p.stack)(l,0),attention_mask:(0,p.stack)(u,0)}}}class pn extends cn{}class hn extends q{forward_params=["input_ids","inputs_embeds","attention_mask","pixel_values","encoder_outputs","decoder_input_ids","decoder_inputs_embeds","decoder_attention_mask","past_key_values"];main_input_name="inputs_embeds"}class mn extends hn{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){return{inputs_embeds:(0,p.cat)([t,e],1),attention_mask:(0,p.cat)([(0,p.ones)(t.dims.slice(0,2)),r],1)}}async _prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:n,attention_mask:r}){if(!e&&!t)throw new Error("Either `input_ids` or `pixel_values` should be provided.");let a,s;return e&&(a=await this.encode_text({input_ids:e})),t&&(s=await this.encode_image({pixel_values:t})),a&&s?({inputs_embeds:n,attention_mask:r}=this._merge_input_ids_with_image_features({inputs_embeds:a,image_features:s,input_ids:e,attention_mask:r})):n=a||s,{inputs_embeds:n,attention_mask:r}}async forward({input_ids:e,pixel_values:t,attention_mask:n,decoder_input_ids:r,decoder_attention_mask:a,encoder_outputs:s,past_key_values:i,inputs_embeds:o,decoder_inputs_embeds:l}){if(o||({inputs_embeds:o,attention_mask:n}=await this._prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:o,attention_mask:n})),!s){let{last_hidden_state:e}=await L(this,{inputs_embeds:o,attention_mask:n});s=e}if(!l){if(!r)throw new Error("Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.");l=await this.encode_text({input_ids:r})}const u={inputs_embeds:l,attention_mask:a,encoder_attention_mask:n,encoder_hidden_states:s,past_key_values:i};return await D(this,u,!0)}}class fn extends q{}class gn extends fn{}class _n extends fn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class wn extends fn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class yn extends fn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class bn extends fn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class vn extends q{}class xn extends vn{}class Mn extends vn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Tn extends fn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class kn extends q{}class $n extends kn{}class Cn extends q{}class Sn extends Cn{}class Pn extends Cn{}class En extends q{}class Fn extends En{}class An extends En{}class In extends q{}class zn extends In{}class On extends In{}class Bn extends q{}class Ln extends Bn{}class Dn extends Bn{}class Rn extends q{}class Nn extends Rn{}class Vn extends Rn{}class jn extends q{}class qn extends jn{}class Gn extends jn{}class Un extends q{}class Wn extends Un{}class Hn extends Un{}class Xn extends q{}class Kn extends Xn{}class Qn extends Xn{}class Yn extends q{}class Zn extends Yn{}class Jn extends Yn{}class er extends q{}class tr extends er{}class nr extends er{}class rr extends q{}class ar extends rr{}class sr extends rr{}class ir extends q{}class or extends ir{}class lr extends ir{}class ur extends q{}class dr extends ur{}class cr extends ur{}class pr extends q{}class hr extends pr{}class mr extends pr{}class fr extends q{}class gr extends fr{}class _r extends fr{}class wr extends q{}class yr extends wr{}class br extends wr{}class vr extends q{}class xr extends vr{}class Mr extends vr{}class Tr extends q{}class kr extends Tr{}class $r extends Tr{}class Cr extends q{}class Sr extends Cr{}class Pr extends Cr{}class Er extends q{}class Fr extends Er{}class Ar extends Er{}class Ir extends q{}class zr extends Ir{}class Or extends Ir{}class Br extends q{}class Lr extends Br{}class Dr extends Br{}class Rr extends q{}class Nr extends Rr{}class Vr extends Rr{async _call(e){return new yl(await super._call(e))}}class jr extends q{}class qr extends jr{}class Gr extends jr{async _call(e){return new yl(await super._call(e))}}class Ur extends q{}class Wr extends Ur{}class Hr extends q{}class Xr extends Hr{}class Kr extends Hr{async _call(e){return new yl(await super._call(e))}}class Qr extends q{}class Yr extends Qr{}class Zr extends q{}class Jr extends Zr{}class ea extends Zr{async _call(e){return new yl(await super._call(e))}}class ta extends q{}class na extends ta{async _call(e){return new $l(await super._call(e))}}class ra extends q{}class aa extends ra{}class sa extends ra{async _call(e){return new yl(await super._call(e))}}class ia extends q{}class oa extends ia{}class la extends ia{async _call(e){return new yl(await super._call(e))}}class ua extends q{}class da extends ua{}class ca extends ua{}class pa extends q{}class ha extends pa{}class ma extends pa{}class fa extends q{}class ga extends fa{}class _a extends fa{async _call(e){return new yl(await super._call(e))}}class wa extends q{}class ya extends wa{}class ba extends wa{async _call(e){return new xa(await super._call(e))}}class va extends wa{async _call(e){return new Ma(await super._call(e))}}class xa extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Ma extends G{constructor({logits:e,pred_boxes:t,pred_masks:n}){super(),this.logits=e,this.pred_boxes=t,this.pred_masks=n}}class Ta extends q{}class ka extends Ta{}class $a extends Ta{async _call(e){return new Ca(await super._call(e))}}class Ca extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Sa extends q{}class Pa extends Sa{}class Ea extends Sa{async _call(e){return new Fa(await super._call(e))}}class Fa extends xa{}class Aa extends q{}class Ia extends Aa{}class za extends Aa{async _call(e){return new yl(await super._call(e))}}class Oa extends q{}class Ba extends Oa{}class La extends Oa{async _call(e){return new yl(await super._call(e))}}class Da extends q{}class Ra extends Da{}class Na extends Da{async _call(e){return new yl(await super._call(e))}}class Va extends q{}class ja extends Va{}class qa extends Va{async _call(e){return new yl(await super._call(e))}}class Ga extends q{}class Ua extends Ga{}class Wa extends Ga{}class Ha extends q{}class Xa extends Ha{}class Ka extends Ha{}class Qa extends q{}class Ya extends Qa{}class Za extends q{}class Ja extends Za{}class es extends Za{}class ts extends Za{}class ns extends q{}class rs extends ns{}class as extends q{}class ss extends as{}class is extends as{}class os extends q{}class ls extends os{}class us extends os{}class ds extends q{}class cs extends ds{}class ps extends q{}class hs extends ps{}class ms extends ps{async _call(e){return new yl(await super._call(e))}}class fs extends q{}class gs extends fs{}class _s extends fs{async _call(e){return new yl(await super._call(e))}}class ws extends q{}class ys extends ws{}class bs extends ws{async _call(e){return new yl(await super._call(e))}}class vs extends q{}class xs extends vs{}class Ms extends vs{async _call(e){return new Ts(await super._call(e))}}class Ts extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class ks extends q{}class $s extends ks{async get_image_embeddings({pixel_values:e}){return await L(this,{pixel_values:e})}async forward(e){if(e.image_embeddings&&e.image_positional_embeddings||(e={...e,...await this.get_image_embeddings(e)}),!e.input_labels&&e.input_points){const t=e.input_points.dims.slice(0,-1),n=t.reduce(((e,t)=>e*t),1);e.input_labels=new p.Tensor("int64",new BigInt64Array(n).fill(1n),t)}const t={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(t.input_points=e.input_points),e.input_labels&&(t.input_labels=e.input_labels),e.input_boxes&&(t.input_boxes=e.input_boxes),await A(this.sessions.prompt_encoder_mask_decoder,t)}async _call(e){return new Cs(await super._call(e))}}class Cs extends G{constructor({iou_scores:e,pred_masks:t}){super(),this.iou_scores=e,this.pred_masks=t}}class Ss extends q{}class Ps extends Ss{}class Es extends Ss{}class Fs extends q{}class As extends Fs{}class Is extends Fs{}class zs extends q{}class Os extends zs{}class Bs extends zs{async _call(e){return new Tl(await super._call(e))}}class Ls extends zs{async _call(e){return new yl(await super._call(e))}}class Ds extends zs{async _call(e){return new vl(await super._call(e))}}class Rs extends q{}class Ns extends Rs{}class Vs extends Rs{async _call(e){return new vl(await super._call(e))}}class js extends q{}class qs extends js{}class Gs extends q{}class Us extends Gs{}class Ws extends Gs{async _call(e){return new Tl(await super._call(e))}}class Hs extends Gs{async _call(e){return new yl(await super._call(e))}}class Xs extends q{}class Ks extends Xs{}class Qs extends Xs{async _call(e){return new Tl(await super._call(e))}}class Ys extends Xs{async _call(e){return new yl(await super._call(e))}}class Zs extends Xs{async _call(e){return new vl(await super._call(e))}}class Js extends q{}class ei extends Js{}class ti extends Js{async _call(e){return new Tl(await super._call(e))}}class ni extends Js{async _call(e){return new yl(await super._call(e))}}class ri extends q{}class ai extends zs{}class si extends zs{async _call(e){return new Tl(await super._call(e))}}class ii extends zs{async _call(e){return new yl(await super._call(e))}}class oi extends q{}class li extends oi{}class ui extends oi{async _call(e){return new Tl(await super._call(e))}}class di extends oi{async _call(e){return new yl(await super._call(e))}}class ci extends oi{async _call(e){return new bl(await super._call(e))}}class pi extends oi{async _call(e){return new vl(await super._call(e))}}class hi extends q{}class mi extends hi{}class fi extends hi{}class gi extends hi{async generate_speech(e,t,{threshold:n=.5,minlenratio:r=0,maxlenratio:a=20,vocoder:s=null}={}){const i={input_ids:e},{encoder_outputs:o,encoder_attention_mask:l}=await L(this,i),u=o.dims[1]/this.config.reduction_factor,d=Math.floor(u*a),c=Math.floor(u*r),h=this.config.num_mel_bins;let m=[],f=null,g=null,_=0;for(;;){++_;const e=O(!!g);let r;r=g?g.output_sequence_out:new p.Tensor("float32",new Float32Array(h),[1,1,h]);let a={use_cache_branch:e,output_sequence:r,encoder_attention_mask:l,speaker_embeddings:t,encoder_hidden_states:o};this.addPastKeyValues(a,f),g=await A(this.sessions.decoder_model_merged,a),f=this.getPastKeyValues(g,f);const{prob:s,spectrum:i}=g;if(m.push(i),_>=c&&(Array.from(s.data).filter((e=>e>=n)).length>0||_>=d))break}const w=(0,p.cat)(m),{waveform:y}=await A(s.sessions.model,{spectrogram:w});return{spectrogram:w,waveform:y}}}class _i extends q{main_input_name="spectrogram"}class wi extends q{}class yi extends wi{}class bi extends q{}class vi extends bi{}class xi extends bi{}class Mi extends q{}class Ti extends Mi{}class ki extends Mi{}class $i extends q{}class Ci extends $i{}class Si extends $i{}class Pi extends q{}class Ei extends Pi{}class Fi extends Pi{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Ai extends Pi{static async from_pretrained(e,t={}){return t.model_file_name??="audio_model",super.from_pretrained(e,t)}}class Ii extends q{}class zi extends Ii{async _call(e){return new Cl(await super._call(e))}}class Oi extends q{}class Bi extends Oi{}class Li extends Oi{}class Di extends Oi{}class Ri extends q{}class Ni extends Ri{}class Vi extends Ri{}class ji extends q{}class qi extends ji{}class Gi extends ji{async _call(e){return new yl(await super._call(e))}}class Ui extends q{}class Wi extends Ui{}class Hi extends Ui{}class Xi extends q{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];_apply_and_filter_by_delay_pattern_mask(e){const[t,n]=e.dims,r=this.config.decoder.num_codebooks,a=n-r;let s=0;for(let t=0;t<e.size;++t){if(e.data[t]===this.config.decoder.pad_token_id)continue;const i=t%n-Math.floor(t/n)%r;i>0&&i<=a&&(e.data[s++]=e.data[t])}const i=Math.floor(t/r),o=s/(i*r);return new p.Tensor(e.type,e.data.slice(0,s),[i,r,o])}prepare_inputs_for_generation(e,t,n){let r=structuredClone(e);for(let e=0;e<r.length;++e)for(let t=0;t<r[e].length;++t)e%this.config.decoder.num_codebooks>=t&&(r[e][t]=BigInt(this.config.decoder.pad_token_id));null!==n.guidance_scale&&n.guidance_scale>1&&(r=r.concat(r));return super.prepare_inputs_for_generation(r,t,n)}async generate(e){const t=await super.generate(e),n=this._apply_and_filter_by_delay_pattern_mask(t).unsqueeze_(0),{audio_values:r}=await A(this.sessions.encodec_decode,{audio_codes:n});return r}}class Ki extends q{}class Qi extends Ki{}class Yi extends Ki{async _call(e){return new yl(await super._call(e))}}class Zi extends q{}class Ji extends Zi{}class eo extends Zi{async _call(e){return new yl(await super._call(e))}}class to extends q{}class no extends to{}class ro extends to{async _call(e){return new yl(await super._call(e))}}class ao extends q{}class so extends ao{}class io extends ao{async _call(e){return new yl(await super._call(e))}}class oo extends q{}class lo extends oo{}class uo{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:a=null,local_files_only:s=!1,revision:i="main",model_file_name:o=null,subfolder:l="onnx",device:u=null,dtype:d=null,use_external_data_format:c=null,session_options:p={}}={}){const h={progress_callback:t,config:n,cache_dir:a,local_files_only:s,revision:i,model_file_name:o,subfolder:l,device:u,dtype:d,use_external_data_format:c,session_options:p};if(h.config=await r.AutoConfig.from_pretrained(e,h),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(const t of this.MODEL_CLASS_MAPPINGS){const n=t.get(h.config.model_type);if(n)return await n[1].from_pretrained(e,h)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${h.config.model_type}", attempting to construct from base class.`),await q.from_pretrained(e,h);throw Error(`Unsupported model type: ${h.config.model_type}`)}}const co=new Map([["bert",["BertModel",H]],["nomic_bert",["NomicBertModel",J]],["roformer",["RoFormerModel",te]],["electra",["ElectraModel",he]],["esm",["EsmModel",qe]],["convbert",["ConvBertModel",oe]],["camembert",["CamembertModel",ye]],["deberta",["DebertaModel",ke]],["deberta-v2",["DebertaV2Model",Fe]],["mpnet",["MPNetModel",Je]],["albert",["AlbertModel",dt]],["distilbert",["DistilBertModel",Le]],["roberta",["RobertaModel",Rt]],["xlm",["XLMModel",Ut]],["xlm-roberta",["XLMRobertaModel",Yt]],["clap",["ClapModel",Ei]],["clip",["CLIPModel",gn]],["clipseg",["CLIPSegModel",Sn]],["chinese_clip",["ChineseCLIPModel",$n]],["siglip",["SiglipModel",xn]],["mobilebert",["MobileBertModel",Xe]],["squeezebert",["SqueezeBertModel",st]],["wav2vec2",["Wav2Vec2Model",Os]],["wav2vec2-bert",["Wav2Vec2BertModel",ei]],["unispeech",["UniSpeechModel",Us]],["unispeech-sat",["UniSpeechSatModel",Ks]],["hubert",["HubertModel",ai]],["wavlm",["WavLMModel",li]],["audio-spectrogram-transformer",["ASTModel",rn]],["vits",["VitsModel",zi]],["pyannote",["PyAnnoteModel",Ns]],["wespeaker-resnet",["WeSpeakerResNetModel",qs]],["detr",["DetrModel",ya]],["rt_detr",["RTDetrModel",ka]],["table-transformer",["TableTransformerModel",Pa]],["vit",["ViTModel",Nr]],["pvt",["PvtModel",qr]],["vit_msn",["ViTMSNModel",Xr]],["vit_mae",["ViTMAEModel",Wr]],["groupvit",["GroupViTModel",Yr]],["fastvit",["FastViTModel",Jr]],["mobilevit",["MobileViTModel",aa]],["mobilevitv2",["MobileViTV2Model",oa]],["owlvit",["OwlViTModel",da]],["owlv2",["Owlv2Model",ha]],["beit",["BeitModel",ga]],["deit",["DeiTModel",Ia]],["hiera",["HieraModel",Ba]],["convnext",["ConvNextModel",hs]],["convnextv2",["ConvNextV2Model",gs]],["dinov2",["Dinov2Model",ys]],["resnet",["ResNetModel",Ra]],["swin",["SwinModel",ja]],["swin2sr",["Swin2SRModel",Ua]],["donut-swin",["DonutSwinModel",cs]],["yolos",["YolosModel",xs]],["dpt",["DPTModel",Xa]],["glpn",["GLPNModel",ls]],["hifigan",["SpeechT5HifiGan",_i]],["efficientnet",["EfficientNetModel",qi]],["decision_transformer",["DecisionTransformerModel",lo]],["mobilenet_v1",["MobileNetV1Model",Qi]],["mobilenet_v2",["MobileNetV2Model",Ji]],["mobilenet_v3",["MobileNetV3Model",no]],["mobilenet_v4",["MobileNetV4Model",so]],["maskformer",["MaskFormerModel",ss]]]),po=new Map([["t5",["T5Model",ft]],["longt5",["LongT5Model",wt]],["mt5",["MT5Model",vt]],["bart",["BartModel",Tt]],["mbart",["MBartModel",St]],["marian",["MarianModel",Ps]],["whisper",["WhisperModel",on]],["m2m_100",["M2M100Model",As]],["blenderbot",["BlenderbotModel",It]],["blenderbot-small",["BlenderbotSmallModel",Bt]]]),ho=new Map([["bloom",["BloomModel",Fr]],["jais",["JAISModel",zn]],["gpt2",["GPT2Model",Fn]],["gptj",["GPTJModel",qn]],["gpt_bigcode",["GPTBigCodeModel",Wn]],["gpt_neo",["GPTNeoModel",Ln]],["gpt_neox",["GPTNeoXModel",Nn]],["codegen",["CodeGenModel",Kn]],["llama",["LlamaModel",Zn]],["olmo",["OlmoModel",ar]],["mobilellm",["MobileLLMModel",tr]],["granite",["GraniteModel",or]],["cohere",["CohereModel",dr]],["gemma",["GemmaModel",hr]],["gemma2",["Gemma2Model",gr]],["openelm",["OpenELMModel",yr]],["qwen2",["Qwen2Model",xr]],["phi",["PhiModel",kr]],["phi3",["Phi3Model",Sr]],["mpt",["MptModel",zr]],["opt",["OPTModel",Lr]],["mistral",["MistralModel",vi]],["starcoder2",["Starcoder2Model",Ti]],["falcon",["FalconModel",Ci]],["stablelm",["StableLmModel",Ni]]]),mo=new Map([["speecht5",["SpeechT5ForSpeechToText",fi]],["whisper",["WhisperForConditionalGeneration",ln]]]),fo=new Map([["speecht5",["SpeechT5ForTextToSpeech",gi]]]),go=new Map([["vits",["VitsModel",zi]],["musicgen",["MusicgenForConditionalGeneration",Xi]]]),_o=new Map([["bert",["BertForSequenceClassification",K]],["roformer",["RoFormerForSequenceClassification",re]],["electra",["ElectraForSequenceClassification",fe]],["esm",["EsmForSequenceClassification",Ue]],["convbert",["ConvBertForSequenceClassification",ue]],["camembert",["CamembertForSequenceClassification",ve]],["deberta",["DebertaForSequenceClassification",Ce]],["deberta-v2",["DebertaV2ForSequenceClassification",Ie]],["mpnet",["MPNetForSequenceClassification",tt]],["albert",["AlbertForSequenceClassification",ct]],["distilbert",["DistilBertForSequenceClassification",De]],["roberta",["RobertaForSequenceClassification",Vt]],["xlm",["XLMForSequenceClassification",Ht]],["xlm-roberta",["XLMRobertaForSequenceClassification",Jt]],["bart",["BartForSequenceClassification",$t]],["mbart",["MBartForSequenceClassification",Et]],["mobilebert",["MobileBertForSequenceClassification",Qe]],["squeezebert",["SqueezeBertForSequenceClassification",ot]]]),wo=new Map([["bert",["BertForTokenClassification",Q]],["roformer",["RoFormerForTokenClassification",ae]],["electra",["ElectraForTokenClassification",ge]],["esm",["EsmForTokenClassification",We]],["convbert",["ConvBertForTokenClassification",de]],["camembert",["CamembertForTokenClassification",xe]],["deberta",["DebertaForTokenClassification",Se]],["deberta-v2",["DebertaV2ForTokenClassification",ze]],["mpnet",["MPNetForTokenClassification",nt]],["distilbert",["DistilBertForTokenClassification",Re]],["roberta",["RobertaForTokenClassification",jt]],["xlm",["XLMForTokenClassification",Xt]],["xlm-roberta",["XLMRobertaForTokenClassification",en]]]),yo=new Map([["t5",["T5ForConditionalGeneration",gt]],["longt5",["LongT5ForConditionalGeneration",yt]],["mt5",["MT5ForConditionalGeneration",xt]],["bart",["BartForConditionalGeneration",kt]],["mbart",["MBartForConditionalGeneration",Pt]],["marian",["MarianMTModel",Es]],["m2m_100",["M2M100ForConditionalGeneration",Is]],["blenderbot",["BlenderbotForConditionalGeneration",zt]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",Lt]]]),bo=new Map([["bloom",["BloomForCausalLM",Ar]],["gpt2",["GPT2LMHeadModel",An]],["jais",["JAISLMHeadModel",On]],["gptj",["GPTJForCausalLM",Gn]],["gpt_bigcode",["GPTBigCodeForCausalLM",Hn]],["gpt_neo",["GPTNeoForCausalLM",Dn]],["gpt_neox",["GPTNeoXForCausalLM",Vn]],["codegen",["CodeGenForCausalLM",Qn]],["llama",["LlamaForCausalLM",Jn]],["olmo",["OlmoForCausalLM",sr]],["mobilellm",["MobileLLMForCausalLM",nr]],["granite",["GraniteForCausalLM",lr]],["cohere",["CohereForCausalLM",cr]],["gemma",["GemmaForCausalLM",mr]],["gemma2",["Gemma2ForCausalLM",_r]],["openelm",["OpenELMForCausalLM",br]],["qwen2",["Qwen2ForCausalLM",Mr]],["phi",["PhiForCausalLM",$r]],["phi3",["Phi3ForCausalLM",Pr]],["mpt",["MptForCausalLM",Or]],["opt",["OPTForCausalLM",Dr]],["mbart",["MBartForCausalLM",Ft]],["mistral",["MistralForCausalLM",xi]],["starcoder2",["Starcoder2ForCausalLM",ki]],["falcon",["FalconForCausalLM",Si]],["trocr",["TrOCRForCausalLM",yi]],["stablelm",["StableLmForCausalLM",Vi]]]),vo=new Map([["bert",["BertForMaskedLM",X]],["roformer",["RoFormerForMaskedLM",ne]],["electra",["ElectraForMaskedLM",me]],["esm",["EsmForMaskedLM",Ge]],["convbert",["ConvBertForMaskedLM",le]],["camembert",["CamembertForMaskedLM",be]],["deberta",["DebertaForMaskedLM",$e]],["deberta-v2",["DebertaV2ForMaskedLM",Ae]],["mpnet",["MPNetForMaskedLM",et]],["albert",["AlbertForMaskedLM",ht]],["distilbert",["DistilBertForMaskedLM",Ve]],["roberta",["RobertaForMaskedLM",Nt]],["xlm",["XLMWithLMHeadModel",Wt]],["xlm-roberta",["XLMRobertaForMaskedLM",Zt]],["mobilebert",["MobileBertForMaskedLM",Ke]],["squeezebert",["SqueezeBertForMaskedLM",it]]]),xo=new Map([["bert",["BertForQuestionAnswering",Y]],["roformer",["RoFormerForQuestionAnswering",se]],["electra",["ElectraForQuestionAnswering",_e]],["convbert",["ConvBertForQuestionAnswering",ce]],["camembert",["CamembertForQuestionAnswering",Me]],["deberta",["DebertaForQuestionAnswering",Pe]],["deberta-v2",["DebertaV2ForQuestionAnswering",Oe]],["mpnet",["MPNetForQuestionAnswering",rt]],["albert",["AlbertForQuestionAnswering",pt]],["distilbert",["DistilBertForQuestionAnswering",Ne]],["roberta",["RobertaForQuestionAnswering",qt]],["xlm",["XLMForQuestionAnswering",Kt]],["xlm-roberta",["XLMRobertaForQuestionAnswering",tn]],["mobilebert",["MobileBertForQuestionAnswering",Ye]],["squeezebert",["SqueezeBertForQuestionAnswering",lt]]]),Mo=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",un]]]),To=new Map([["llava",["LlavaForConditionalGeneration",cn]],["moondream1",["Moondream1ForConditionalGeneration",pn]],["florence2",["Florence2ForConditionalGeneration",mn]]]),ko=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",un]]]),$o=new Map([["vit",["ViTForImageClassification",Vr]],["pvt",["PvtForImageClassification",Gr]],["vit_msn",["ViTMSNForImageClassification",Kr]],["fastvit",["FastViTForImageClassification",ea]],["mobilevit",["MobileViTForImageClassification",sa]],["mobilevitv2",["MobileViTV2ForImageClassification",la]],["beit",["BeitForImageClassification",_a]],["deit",["DeiTForImageClassification",za]],["hiera",["HieraForImageClassification",La]],["convnext",["ConvNextForImageClassification",ms]],["convnextv2",["ConvNextV2ForImageClassification",_s]],["dinov2",["Dinov2ForImageClassification",bs]],["resnet",["ResNetForImageClassification",Na]],["swin",["SwinForImageClassification",qa]],["segformer",["SegformerForImageClassification",Li]],["efficientnet",["EfficientNetForImageClassification",Gi]],["mobilenet_v1",["MobileNetV1ForImageClassification",Yi]],["mobilenet_v2",["MobileNetV2ForImageClassification",eo]],["mobilenet_v3",["MobileNetV3ForImageClassification",ro]],["mobilenet_v4",["MobileNetV4ForImageClassification",io]]]),Co=new Map([["detr",["DetrForObjectDetection",ba]],["rt_detr",["RTDetrForObjectDetection",$a]],["table-transformer",["TableTransformerForObjectDetection",Ea]],["yolos",["YolosForObjectDetection",Ms]]]),So=new Map([["owlvit",["OwlViTForObjectDetection",ca]],["owlv2",["Owlv2ForObjectDetection",ma]]]),Po=new Map([["detr",["DetrForSegmentation",va]],["clipseg",["CLIPSegForImageSegmentation",Pn]]]),Eo=new Map([["segformer",["SegformerForSemanticSegmentation",Di]],["sapiens",["SapiensForSemanticSegmentation",Ja]]]),Fo=new Map([["detr",["DetrForSegmentation",va]],["maskformer",["MaskFormerForInstanceSegmentation",is]]]),Ao=new Map([["sam",["SamModel",$s]]]),Io=new Map([["wav2vec2",["Wav2Vec2ForCTC",Bs]],["wav2vec2-bert",["Wav2Vec2BertForCTC",ti]],["unispeech",["UniSpeechForCTC",Ws]],["unispeech-sat",["UniSpeechSatForCTC",Qs]],["wavlm",["WavLMForCTC",ui]],["hubert",["HubertForCTC",si]]]),zo=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",Ls]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",ni]],["unispeech",["UniSpeechForSequenceClassification",Hs]],["unispeech-sat",["UniSpeechSatForSequenceClassification",Ys]],["wavlm",["WavLMForSequenceClassification",di]],["hubert",["HubertForSequenceClassification",ii]],["audio-spectrogram-transformer",["ASTForAudioClassification",an]]]),Oo=new Map([["wavlm",["WavLMForXVector",ci]]]),Bo=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",Zs]],["wavlm",["WavLMForAudioFrameClassification",pi]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",Ds]],["pyannote",["PyAnnoteForAudioFrameClassification",Vs]]]),Lo=new Map([["vitmatte",["VitMatteForImageMatting",na]]]),Do=new Map([["swin2sr",["Swin2SRForImageSuperResolution",Wa]]]),Ro=new Map([["dpt",["DPTForDepthEstimation",Ka]],["depth_anything",["DepthAnythingForDepthEstimation",Ya]],["glpn",["GLPNForDepthEstimation",us]],["sapiens",["SapiensForDepthEstimation",es]],["depth_pro",["DepthProForDepthEstimation",rs]]]),No=new Map([["sapiens",["SapiensForNormalEstimation",ts]]]),Vo=new Map([["clip",["CLIPVisionModelWithProjection",bn]],["siglip",["SiglipVisionModel",Tn]]]),jo=[[co,y],[po,b],[ho,M],[_o,y],[wo,y],[yo,v],[mo,v],[bo,M],[vo,y],[xo,y],[Mo,x],[To,k],[$o,y],[Po,y],[Fo,y],[Eo,y],[Lo,y],[Do,y],[Ro,y],[No,y],[Co,y],[So,y],[Ao,T],[Io,y],[zo,y],[fo,v],[go,y],[Oo,y],[Bo,y],[Vo,y]];for(const[e,t]of jo)for(const[n,r]of e.values())C.set(n,t),P.set(r,n),S.set(n,r);const qo=[["MusicgenForConditionalGeneration",Xi,$],["CLIPTextModelWithProjection",wn,y],["SiglipTextModel",Mn,y],["ClapTextModelWithProjection",Fi,y],["ClapAudioModelWithProjection",Ai,y]];for(const[e,t,n]of qo)C.set(e,n),P.set(t,e),S.set(e,t);class Go extends uo{static MODEL_CLASS_MAPPINGS=jo.map((e=>e[0]));static BASE_IF_FAIL=!0}class Uo extends uo{static MODEL_CLASS_MAPPINGS=[_o]}class Wo extends uo{static MODEL_CLASS_MAPPINGS=[wo]}class Ho extends uo{static MODEL_CLASS_MAPPINGS=[yo]}class Xo extends uo{static MODEL_CLASS_MAPPINGS=[mo]}class Ko extends uo{static MODEL_CLASS_MAPPINGS=[fo]}class Qo extends uo{static MODEL_CLASS_MAPPINGS=[go]}class Yo extends uo{static MODEL_CLASS_MAPPINGS=[bo]}class Zo extends uo{static MODEL_CLASS_MAPPINGS=[vo]}class Jo extends uo{static MODEL_CLASS_MAPPINGS=[xo]}class el extends uo{static MODEL_CLASS_MAPPINGS=[Mo]}class tl extends uo{static MODEL_CLASS_MAPPINGS=[$o]}class nl extends uo{static MODEL_CLASS_MAPPINGS=[Po]}class rl extends uo{static MODEL_CLASS_MAPPINGS=[Eo]}class al extends uo{static MODEL_CLASS_MAPPINGS=[Fo]}class sl extends uo{static MODEL_CLASS_MAPPINGS=[Co]}class il extends uo{static MODEL_CLASS_MAPPINGS=[So]}class ol extends uo{static MODEL_CLASS_MAPPINGS=[Ao]}class ll extends uo{static MODEL_CLASS_MAPPINGS=[Io]}class ul extends uo{static MODEL_CLASS_MAPPINGS=[zo]}class dl extends uo{static MODEL_CLASS_MAPPINGS=[Oo]}class cl extends uo{static MODEL_CLASS_MAPPINGS=[Bo]}class pl extends uo{static MODEL_CLASS_MAPPINGS=[ko]}class hl extends uo{static MODEL_CLASS_MAPPINGS=[Lo]}class ml extends uo{static MODEL_CLASS_MAPPINGS=[Do]}class fl extends uo{static MODEL_CLASS_MAPPINGS=[Ro]}class gl extends uo{static MODEL_CLASS_MAPPINGS=[No]}class _l extends uo{static MODEL_CLASS_MAPPINGS=[Vo]}class wl extends G{constructor({logits:e,past_key_values:t,encoder_outputs:n,decoder_attentions:r=null,cross_attentions:a=null}){super(),this.logits=e,this.past_key_values=t,this.encoder_outputs=n,this.decoder_attentions=r,this.cross_attentions=a}}class yl extends G{constructor({logits:e}){super(),this.logits=e}}class bl extends G{constructor({logits:e,embeddings:t}){super(),this.logits=e,this.embeddings=t}}class vl extends G{constructor({logits:e}){super(),this.logits=e}}class xl extends G{constructor({logits:e}){super(),this.logits=e}}class Ml extends G{constructor({start_logits:e,end_logits:t}){super(),this.start_logits=e,this.end_logits=t}}class Tl extends G{constructor({logits:e}){super(),this.logits=e}}class kl extends G{constructor({logits:e,past_key_values:t}){super(),this.logits=e,this.past_key_values=t}}class $l extends G{constructor({alphas:e}){super(),this.alphas=e}}class Cl extends G{constructor({waveform:e,spectrogram:t}){super(),this.waveform=e,this.spectrogram=t}}},"./src/models/whisper/common_whisper.js":
176
176
  /*!**********************************************!*\
177
177
  !*** ./src/models/whisper/common_whisper.js ***!
178
178
  \**********************************************/(e,t,n)=>{n.r(t),n.d(t,{WHISPER_LANGUAGE_MAPPING:()=>a,WHISPER_TO_LANGUAGE_CODE_MAPPING:()=>s,whisper_language_to_code:()=>i});const r=[["en","english"],["zh","chinese"],["de","german"],["es","spanish"],["ru","russian"],["ko","korean"],["fr","french"],["ja","japanese"],["pt","portuguese"],["tr","turkish"],["pl","polish"],["ca","catalan"],["nl","dutch"],["ar","arabic"],["sv","swedish"],["it","italian"],["id","indonesian"],["hi","hindi"],["fi","finnish"],["vi","vietnamese"],["he","hebrew"],["uk","ukrainian"],["el","greek"],["ms","malay"],["cs","czech"],["ro","romanian"],["da","danish"],["hu","hungarian"],["ta","tamil"],["no","norwegian"],["th","thai"],["ur","urdu"],["hr","croatian"],["bg","bulgarian"],["lt","lithuanian"],["la","latin"],["mi","maori"],["ml","malayalam"],["cy","welsh"],["sk","slovak"],["te","telugu"],["fa","persian"],["lv","latvian"],["bn","bengali"],["sr","serbian"],["az","azerbaijani"],["sl","slovenian"],["kn","kannada"],["et","estonian"],["mk","macedonian"],["br","breton"],["eu","basque"],["is","icelandic"],["hy","armenian"],["ne","nepali"],["mn","mongolian"],["bs","bosnian"],["kk","kazakh"],["sq","albanian"],["sw","swahili"],["gl","galician"],["mr","marathi"],["pa","punjabi"],["si","sinhala"],["km","khmer"],["sn","shona"],["yo","yoruba"],["so","somali"],["af","afrikaans"],["oc","occitan"],["ka","georgian"],["be","belarusian"],["tg","tajik"],["sd","sindhi"],["gu","gujarati"],["am","amharic"],["yi","yiddish"],["lo","lao"],["uz","uzbek"],["fo","faroese"],["ht","haitian creole"],["ps","pashto"],["tk","turkmen"],["nn","nynorsk"],["mt","maltese"],["sa","sanskrit"],["lb","luxembourgish"],["my","myanmar"],["bo","tibetan"],["tl","tagalog"],["mg","malagasy"],["as","assamese"],["tt","tatar"],["haw","hawaiian"],["ln","lingala"],["ha","hausa"],["ba","bashkir"],["jw","javanese"],["su","sundanese"]],a=new Map(r),s=new Map([...r.map((([e,t])=>[t,e])),["burmese","my"],["valencian","ca"],["flemish","nl"],["haitian","ht"],["letzeburgesch","lb"],["pushto","ps"],["panjabi","pa"],["moldavian","ro"],["moldovan","ro"],["sinhalese","si"],["castilian","es"]]);function i(e){e=e.toLowerCase();let t=s.get(e);if(void 0===t){if(!a.has(e)){const t=2===e.length?a.keys():a.values();throw new Error(`Language "${e}" is not supported. Must be one of: ${JSON.stringify(t)}`)}t=e}return t}},"./src/models/whisper/generation_whisper.js":
@@ -226,5 +226,5 @@ var r,a,s,i,o,l,u,d,c,p,h,m,f,g,_,w,y,b,v,x,M,T,k,$,C,S,P,E,F,A,I,z,O,B,L=Object
226
226
  \*****************************/(e,t,n)=>{n.r(t),n.d(t,{Tensor:()=>o,cat:()=>y,full:()=>T,full_like:()=>k,interpolate:()=>u,interpolate_4d:()=>d,layer_norm:()=>f,matmul:()=>c,mean:()=>x,mean_pooling:()=>m,ones:()=>$,ones_like:()=>C,permute:()=>l,quantize_embeddings:()=>E,rfft:()=>p,stack:()=>b,std_mean:()=>v,topk:()=>h,zeros:()=>S,zeros_like:()=>P});var r=n(/*! ./maths.js */"./src/utils/maths.js"),a=n(/*! ../backends/onnx.js */"./src/backends/onnx.js"),s=n(/*! ../ops/registry.js */"./src/ops/registry.js");const i=Object.freeze({float32:Float32Array,float16:Uint16Array,float64:Float64Array,string:Array,int8:Int8Array,uint8:Uint8Array,int16:Int16Array,uint16:Uint16Array,int32:Int32Array,uint32:Uint32Array,int64:BigInt64Array,uint64:BigUint64Array,bool:Uint8Array});class o{get dims(){return this.ort_tensor.dims}set dims(e){this.ort_tensor.dims=e}get type(){return this.ort_tensor.type}get data(){return this.ort_tensor.data}get size(){return this.ort_tensor.size}get location(){return this.ort_tensor.location}ort_tensor;constructor(...e){return(0,a.isONNXTensor)(e[0])?this.ort_tensor=e[0]:this.ort_tensor=new a.Tensor(e[0],e[1],e[2]),new Proxy(this,{get:(e,t)=>{if("string"==typeof t){let n=Number(t);if(Number.isInteger(n))return e._getitem(n)}return e[t]},set:(e,t,n)=>e[t]=n})}dispose(){this.ort_tensor.dispose()}*[Symbol.iterator](){const[e,...t]=this.dims;if(t.length>0){const n=t.reduce(((e,t)=>e*t));for(let r=0;r<e;++r)yield this._subarray(r,n,t)}else yield*this.data}_getitem(e){const[t,...n]=this.dims;if(e=w(e,t),n.length>0){const t=n.reduce(((e,t)=>e*t));return this._subarray(e,t,n)}return new o(this.type,[this.data[e]],n)}indexOf(e){const t=this.data;for(let n=0;n<t.length;++n)if(t[n]==e)return n;return-1}_subarray(e,t,n){const r=e*t,a=(e+1)*t,s="subarray"in this.data?this.data.subarray(r,a):this.data.slice(r,a);return new o(this.type,s,n)}item(){const e=this.data;if(1!==e.length)throw new Error(`a Tensor with ${e.length} elements cannot be converted to Scalar`);return e[0]}tolist(){return function(e,t){const n=e.length,r=t.reduce(((e,t)=>e*t));if(n!==r)throw Error(`cannot reshape array of size ${n} into shape (${t})`);let a=e;for(let e=t.length-1;e>=0;e--)a=a.reduce(((n,r)=>{let a=n[n.length-1];return a.length<t[e]?a.push(r):n.push([r]),n}),[[]]);return a[0]}(this.data,this.dims)}sigmoid(){return this.clone().sigmoid_()}sigmoid_(){const e=this.data;for(let t=0;t<e.length;++t)e[t]=1/(1+Math.exp(-e[t]));return this}map(e){return this.clone().map_(e)}map_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]=e(t[n],n,t);return this}mul(e){return this.clone().mul_(e)}mul_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]*=e;return this}div(e){return this.clone().div_(e)}div_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]/=e;return this}add(e){return this.clone().add_(e)}add_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]+=e;return this}sub(e){return this.clone().sub_(e)}sub_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]-=e;return this}clone(){return new o(this.type,this.data.slice(),this.dims.slice())}slice(...e){const t=[],n=[];for(let r=0;r<this.dims.length;++r){let a=e[r];if(null==a)n.push([0,this.dims[r]]),t.push(this.dims[r]);else if("number"==typeof a)a=w(a,this.dims[r],r),n.push([a,a+1]);else{if(!Array.isArray(a)||2!==a.length)throw new Error(`Invalid slice: ${a}`);{let[e,s]=a;if(e=null===e?0:w(e,this.dims[r],r,!1),s=null===s?this.dims[r]:w(s,this.dims[r],r,!1),e>s)throw new Error(`Invalid slice: ${a}`);const i=[Math.max(e,0),Math.min(s,this.dims[r])];n.push(i),t.push(i[1]-i[0])}}}const r=n.map((([e,t])=>t-e)),a=r.reduce(((e,t)=>e*t)),s=this.data,i=new s.constructor(a),l=this.stride();for(let e=0;e<a;++e){let t=0;for(let a=r.length-1,s=e;a>=0;--a){const e=r[a];t+=(s%e+n[a][0])*l[a],s=Math.floor(s/e)}i[e]=s[t]}return new o(this.type,i,t)}permute(...e){return l(this,e)}transpose(...e){return this.permute(...e)}sum(e=null,t=!1){return this.norm(1,e,t)}norm(e="fro",t=null,n=!1){if("fro"===e)e=2;else if("string"==typeof e)throw Error(`Unsupported norm: ${e}`);const r=this.data;if(null===t){let t=r.reduce(((t,n)=>t+n**e),0)**(1/e);return new o(this.type,[t],[])}t=w(t,this.dims.length);const a=this.dims.slice();a[t]=1;const s=new r.constructor(r.length/this.dims[t]);for(let n=0;n<r.length;++n){let i=0;for(let e=this.dims.length-1,r=n,s=1;e>=0;--e){const n=this.dims[e];if(e!==t){i+=r%n*s,s*=a[e]}r=Math.floor(r/n)}s[i]+=r[n]**e}if(1!==e)for(let t=0;t<s.length;++t)s[t]=s[t]**(1/e);return n||a.splice(t,1),new o(this.type,s,a)}normalize_(e=2,t=1){t=w(t,this.dims.length);const n=this.norm(e,t,!0),r=this.data,a=n.data;for(let e=0;e<r.length;++e){let n=0;for(let r=this.dims.length-1,a=e,s=1;r>=0;--r){const e=this.dims[r];if(r!==t){n+=a%e*s,s*=this.dims[r]}a=Math.floor(a/e)}r[e]/=a[n]}return this}normalize(e=2,t=1){return this.clone().normalize_(e,t)}stride(){return function(e){const t=new Array(e.length);for(let n=e.length-1,r=1;n>=0;--n)t[n]=r,r*=e[n];return t}(this.dims)}squeeze(e=null){return new o(this.type,this.data,g(this.dims,e))}squeeze_(e=null){return this.dims=g(this.dims,e),this}unsqueeze(e=null){return new o(this.type,this.data,_(this.dims,e))}unsqueeze_(e=null){return this.dims=_(this.dims,e),this}flatten_(e=0,t=-1){t=(t+this.dims.length)%this.dims.length;let n=this.dims.slice(0,e),r=this.dims.slice(e,t+1),a=this.dims.slice(t+1);return this.dims=[...n,r.reduce(((e,t)=>e*t),1),...a],this}flatten(e=0,t=-1){return this.clone().flatten_(e,t)}view(...e){let t=-1;for(let n=0;n<e.length;++n)if(-1===e[n]){if(-1!==t)throw new Error("Only one dimension can be inferred");t=n}const n=this.data;if(-1!==t){const r=e.reduce(((e,n,r)=>r!==t?e*n:e),1);e[t]=n.length/r}return new o(this.type,n,e)}neg_(){const e=this.data;for(let t=0;t<e.length;++t)e[t]=-e[t];return this}neg(){return this.clone().neg_()}clamp_(e,t){const n=this.data;for(let r=0;r<n.length;++r)n[r]=Math.min(Math.max(n[r],e),t);return this}clamp(e,t){return this.clone().clamp_(e,t)}round_(){const e=this.data;for(let t=0;t<e.length;++t)e[t]=Math.round(e[t]);return this}round(){return this.clone().round_()}mean(e=null,t=!1){return x(this,e,t)}to(e){if(this.type===e)return this;if(!i.hasOwnProperty(e))throw new Error(`Unsupported type: ${e}`);return new o(e,i[e].from(this.data),this.dims)}}function l(e,t){const[n,a]=(0,r.permute_data)(e.data,e.dims,t);return new o(e.type,n,a)}function u(e,[t,n],a="bilinear",s=!1){const i=e.dims.at(-3)??1,l=e.dims.at(-2),u=e.dims.at(-1);let d=(0,r.interpolate_data)(e.data,[i,l,u],[t,n],a,s);return new o(e.type,d,[i,t,n])}async function d(e,{size:t=null,mode:n="bilinear"}={}){if(4!==e.dims.length)throw new Error("`interpolate_4d` currently only supports 4D input.");if(!t)throw new Error("`interpolate_4d` requires a `size` argument.");let r,a;if(2===t.length)r=[...e.dims.slice(0,2),...t];else if(3===t.length)r=[e.dims[0],...t];else{if(4!==t.length)throw new Error("`size` must be of length 2, 3, or 4.");r=t}if("bilinear"===n)a=await s.TensorOpRegistry.bilinear_interpolate_4d;else{if("bicubic"!==n)throw new Error(`Unsupported mode: ${n}`);a=await s.TensorOpRegistry.bicubic_interpolate_4d}const i=new o("int64",new BigInt64Array(r.map(BigInt)),[r.length]);return await a({x:e,s:i})}async function c(e,t){const n=await s.TensorOpRegistry.matmul;return await n({a:e,b:t})}async function p(e,t){const n=await s.TensorOpRegistry.rfft;return await n({x:e,a:t})}async function h(e,t){const n=await s.TensorOpRegistry.top_k;return t=null===t?e.dims.at(-1):Math.min(t,e.dims.at(-1)),await n({x:e,k:new o("int64",[BigInt(t)],[1])})}function m(e,t){const n=e.data,r=t.data,a=[e.dims[0],e.dims[2]],s=new n.constructor(a[0]*a[1]),[i,l,u]=e.dims;let d=0;for(let e=0;e<i;++e){const t=e*u*l;for(let a=0;a<u;++a){let i=0,o=0;const c=e*l,p=t+a;for(let e=0;e<l;++e){const t=Number(r[c+e]);o+=t,i+=n[p+e*u]*t}const h=i/o;s[d++]=h}}return new o(e.type,s,a)}function f(e,t,{eps:n=1e-5}={}){if(2!==e.dims.length)throw new Error("`layer_norm` currently only supports 2D input.");const[r,a]=e.dims;if(1!==t.length&&t[0]!==a)throw new Error("`normalized_shape` must be a 1D array with shape `[input.dims[1]]`.");const[s,i]=v(e,1,0,!0),l=s.data,u=i.data,d=e.data,c=new d.constructor(d.length);for(let e=0;e<r;++e){const t=e*a;for(let r=0;r<a;++r){const a=t+r;c[a]=(d[a]-u[e])/(l[e]+n)}}return new o(e.type,c,e.dims)}function g(e,t){return e=e.slice(),null===t?e=e.filter((e=>1!==e)):"number"==typeof t?1===e[t]&&e.splice(t,1):Array.isArray(t)&&(e=e.filter(((e,n)=>1!==e||!t.includes(n)))),e}function _(e,t){return t=w(t,e.length+1),(e=e.slice()).splice(t,0,1),e}function w(e,t,n=null,r=!0){if(r&&(e<-t||e>=t))throw new Error(`IndexError: index ${e} is out of bounds for dimension${null===n?"":" "+n} with size ${t}`);return e<0&&(e=(e%t+t)%t),e}function y(e,t=0){t=w(t,e[0].dims.length);const n=e[0].dims.slice();n[t]=e.reduce(((e,n)=>e+n.dims[t]),0);const r=n.reduce(((e,t)=>e*t),1),a=new e[0].data.constructor(r),s=e[0].type;if(0===t){let t=0;for(const n of e){const e=n.data;a.set(e,t),t+=e.length}}else{let r=0;for(let s=0;s<e.length;++s){const{data:i,dims:o}=e[s];for(let e=0;e<i.length;++e){let s=0;for(let a=o.length-1,i=e,l=1;a>=0;--a){const e=o[a];let u=i%e;a===t&&(u+=r),s+=u*l,l*=n[a],i=Math.floor(i/e)}a[s]=i[e]}r+=o[t]}}return new o(s,a,n)}function b(e,t=0){return y(e.map((e=>e.unsqueeze(t))),t)}function v(e,t=null,n=1,r=!1){const a=e.data,s=e.dims;if(null===t){const t=a.reduce(((e,t)=>e+t),0)/a.length,r=Math.sqrt(a.reduce(((e,n)=>e+(n-t)**2),0)/(a.length-n)),s=new o(e.type,[t],[]);return[new o(e.type,[r],[]),s]}const i=x(e,t=w(t,s.length),r),l=i.data,u=s.slice();u[t]=1;const d=new a.constructor(a.length/s[t]);for(let e=0;e<a.length;++e){let n=0;for(let r=s.length-1,a=e,i=1;r>=0;--r){const e=s[r];if(r!==t){n+=a%e*i,i*=u[r]}a=Math.floor(a/e)}d[n]+=(a[e]-l[n])**2}for(let e=0;e<d.length;++e)d[e]=Math.sqrt(d[e]/(s[t]-n));r||u.splice(t,1);return[new o(e.type,d,u),i]}function x(e,t=null,n=!1){const r=e.data;if(null===t){const t=r.reduce(((e,t)=>e+t),0);return new o(e.type,[t/r.length],[])}const a=e.dims;t=w(t,a.length);const s=a.slice();s[t]=1;const i=new r.constructor(r.length/a[t]);for(let e=0;e<r.length;++e){let n=0;for(let r=a.length-1,i=e,o=1;r>=0;--r){const e=a[r];if(r!==t){n+=i%e*o,o*=s[r]}i=Math.floor(i/e)}i[n]+=r[e]}if(1!==a[t])for(let e=0;e<i.length;++e)i[e]=i[e]/a[t];return n||s.splice(t,1),new o(e.type,i,s)}function M(e,t,n,r){const a=e.reduce(((e,t)=>e*t),1);return new o(n,new r(a).fill(t),e)}function T(e,t){let n,r;if("number"==typeof t)n="float32",r=Float32Array;else{if("bigint"!=typeof t)throw new Error("Unsupported data type: "+typeof t);n="int64",r=BigInt64Array}return M(e,t,n,r)}function k(e,t){return T(e.dims,t)}function $(e){return M(e,1n,"int64",BigInt64Array)}function C(e){return $(e.dims)}function S(e){return M(e,0n,"int64",BigInt64Array)}function P(e){return S(e.dims)}function E(e,t){if(2!==e.dims.length)throw new Error("The tensor must have 2 dimensions");if(e.dims.at(-1)%8!=0)throw new Error("The last dimension of the tensor must be a multiple of 8");if(!["binary","ubinary"].includes(t))throw new Error("The precision must be either 'binary' or 'ubinary'");const n="binary"===t,r=n?"int8":"uint8",a=n?Int8Array:Uint8Array,s=e.data,i=new a(s.length/8);for(let e=0;e<s.length;++e){const t=s[e]>0?1:0,r=Math.floor(e/8),a=e%8;i[r]|=t<<7-a,n&&0===a&&(i[r]-=128)}return new o(r,i,[e.dims[0],e.dims[1]/8])}}},r={};function a(e){var t=r[e];if(void 0!==t)return t.exports;var s=r[e]={exports:{}};return n[e](s,s.exports,a),s.exports}a.m=n,t=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,a.t=function(n,r){if(1&r&&(n=this(n)),8&r)return n;if("object"==typeof n&&n){if(4&r&&n.__esModule)return n;if(16&r&&"function"==typeof n.then)return n}var s=Object.create(null);a.r(s);var i={};e=e||[null,t({}),t([]),t(t)];for(var o=2&r&&n;"object"==typeof o&&!~e.indexOf(o);o=t(o))Object.getOwnPropertyNames(o).forEach((e=>i[e]=()=>n[e]));return i.default=()=>n,a.d(s,i),s},a.d=(e,t)=>{for(var n in t)a.o(t,n)&&!a.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},a.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),a.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},(()=>{var e;if("string"==typeof import.meta.url&&(e=import.meta.url),!e)throw new Error("Automatic publicPath is not supported in this browser");e=e.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/"),a.p=e})(),a.b=new URL("./",import.meta.url);var s={};
227
227
  /*!*****************************!*\
228
228
  !*** ./src/transformers.js ***!
229
- \*****************************/a.r(s),a.d(s,{ASTFeatureExtractor:()=>d.ASTFeatureExtractor,ASTForAudioClassification:()=>l.ASTForAudioClassification,ASTModel:()=>l.ASTModel,ASTPreTrainedModel:()=>l.ASTPreTrainedModel,AlbertForMaskedLM:()=>l.AlbertForMaskedLM,AlbertForQuestionAnswering:()=>l.AlbertForQuestionAnswering,AlbertForSequenceClassification:()=>l.AlbertForSequenceClassification,AlbertModel:()=>l.AlbertModel,AlbertPreTrainedModel:()=>l.AlbertPreTrainedModel,AlbertTokenizer:()=>u.AlbertTokenizer,AudioClassificationPipeline:()=>o.AudioClassificationPipeline,AutoConfig:()=>c.AutoConfig,AutoModel:()=>l.AutoModel,AutoModelForAudioClassification:()=>l.AutoModelForAudioClassification,AutoModelForAudioFrameClassification:()=>l.AutoModelForAudioFrameClassification,AutoModelForCTC:()=>l.AutoModelForCTC,AutoModelForCausalLM:()=>l.AutoModelForCausalLM,AutoModelForDepthEstimation:()=>l.AutoModelForDepthEstimation,AutoModelForDocumentQuestionAnswering:()=>l.AutoModelForDocumentQuestionAnswering,AutoModelForImageClassification:()=>l.AutoModelForImageClassification,AutoModelForImageFeatureExtraction:()=>l.AutoModelForImageFeatureExtraction,AutoModelForImageMatting:()=>l.AutoModelForImageMatting,AutoModelForImageSegmentation:()=>l.AutoModelForImageSegmentation,AutoModelForImageToImage:()=>l.AutoModelForImageToImage,AutoModelForMaskGeneration:()=>l.AutoModelForMaskGeneration,AutoModelForMaskedLM:()=>l.AutoModelForMaskedLM,AutoModelForNormalEstimation:()=>l.AutoModelForNormalEstimation,AutoModelForObjectDetection:()=>l.AutoModelForObjectDetection,AutoModelForQuestionAnswering:()=>l.AutoModelForQuestionAnswering,AutoModelForSemanticSegmentation:()=>l.AutoModelForSemanticSegmentation,AutoModelForSeq2SeqLM:()=>l.AutoModelForSeq2SeqLM,AutoModelForSequenceClassification:()=>l.AutoModelForSequenceClassification,AutoModelForSpeechSeq2Seq:()=>l.AutoModelForSpeechSeq2Seq,AutoModelForTextToSpectrogram:()=>l.AutoModelForTextToSpectrogram,AutoModelForTextToWaveform:()=>l.AutoModelForTextToWaveform,AutoModelForTokenClassification:()=>l.AutoModelForTokenClassification,AutoModelForUniversalSegmentation:()=>l.AutoModelForUniversalSegmentation,AutoModelForVision2Seq:()=>l.AutoModelForVision2Seq,AutoModelForXVector:()=>l.AutoModelForXVector,AutoModelForZeroShotObjectDetection:()=>l.AutoModelForZeroShotObjectDetection,AutoProcessor:()=>d.AutoProcessor,AutoTokenizer:()=>u.AutoTokenizer,AutomaticSpeechRecognitionPipeline:()=>o.AutomaticSpeechRecognitionPipeline,BartForConditionalGeneration:()=>l.BartForConditionalGeneration,BartForSequenceClassification:()=>l.BartForSequenceClassification,BartModel:()=>l.BartModel,BartPretrainedModel:()=>l.BartPretrainedModel,BartTokenizer:()=>u.BartTokenizer,BaseModelOutput:()=>l.BaseModelOutput,BaseStreamer:()=>g.BaseStreamer,BeitFeatureExtractor:()=>d.BeitFeatureExtractor,BeitForImageClassification:()=>l.BeitForImageClassification,BeitModel:()=>l.BeitModel,BeitPreTrainedModel:()=>l.BeitPreTrainedModel,BertForMaskedLM:()=>l.BertForMaskedLM,BertForQuestionAnswering:()=>l.BertForQuestionAnswering,BertForSequenceClassification:()=>l.BertForSequenceClassification,BertForTokenClassification:()=>l.BertForTokenClassification,BertModel:()=>l.BertModel,BertPreTrainedModel:()=>l.BertPreTrainedModel,BertTokenizer:()=>u.BertTokenizer,BitImageProcessor:()=>d.BitImageProcessor,BlenderbotForConditionalGeneration:()=>l.BlenderbotForConditionalGeneration,BlenderbotModel:()=>l.BlenderbotModel,BlenderbotPreTrainedModel:()=>l.BlenderbotPreTrainedModel,BlenderbotSmallForConditionalGeneration:()=>l.BlenderbotSmallForConditionalGeneration,BlenderbotSmallModel:()=>l.BlenderbotSmallModel,BlenderbotSmallPreTrainedModel:()=>l.BlenderbotSmallPreTrainedModel,BlenderbotSmallTokenizer:()=>u.BlenderbotSmallTokenizer,BlenderbotTokenizer:()=>u.BlenderbotTokenizer,BloomForCausalLM:()=>l.BloomForCausalLM,BloomModel:()=>l.BloomModel,BloomPreTrainedModel:()=>l.BloomPreTrainedModel,BloomTokenizer:()=>u.BloomTokenizer,CLIPFeatureExtractor:()=>d.CLIPFeatureExtractor,CLIPImageProcessor:()=>d.CLIPImageProcessor,CLIPModel:()=>l.CLIPModel,CLIPPreTrainedModel:()=>l.CLIPPreTrainedModel,CLIPSegForImageSegmentation:()=>l.CLIPSegForImageSegmentation,CLIPSegModel:()=>l.CLIPSegModel,CLIPSegPreTrainedModel:()=>l.CLIPSegPreTrainedModel,CLIPTextModel:()=>l.CLIPTextModel,CLIPTextModelWithProjection:()=>l.CLIPTextModelWithProjection,CLIPTokenizer:()=>u.CLIPTokenizer,CLIPVisionModel:()=>l.CLIPVisionModel,CLIPVisionModelWithProjection:()=>l.CLIPVisionModelWithProjection,CamembertForMaskedLM:()=>l.CamembertForMaskedLM,CamembertForQuestionAnswering:()=>l.CamembertForQuestionAnswering,CamembertForSequenceClassification:()=>l.CamembertForSequenceClassification,CamembertForTokenClassification:()=>l.CamembertForTokenClassification,CamembertModel:()=>l.CamembertModel,CamembertPreTrainedModel:()=>l.CamembertPreTrainedModel,CamembertTokenizer:()=>u.CamembertTokenizer,CausalLMOutput:()=>l.CausalLMOutput,CausalLMOutputWithPast:()=>l.CausalLMOutputWithPast,ChineseCLIPFeatureExtractor:()=>d.ChineseCLIPFeatureExtractor,ChineseCLIPModel:()=>l.ChineseCLIPModel,ChineseCLIPPreTrainedModel:()=>l.ChineseCLIPPreTrainedModel,ClapAudioModelWithProjection:()=>l.ClapAudioModelWithProjection,ClapFeatureExtractor:()=>d.ClapFeatureExtractor,ClapModel:()=>l.ClapModel,ClapPreTrainedModel:()=>l.ClapPreTrainedModel,ClapTextModelWithProjection:()=>l.ClapTextModelWithProjection,CodeGenForCausalLM:()=>l.CodeGenForCausalLM,CodeGenModel:()=>l.CodeGenModel,CodeGenPreTrainedModel:()=>l.CodeGenPreTrainedModel,CodeGenTokenizer:()=>u.CodeGenTokenizer,CodeLlamaTokenizer:()=>u.CodeLlamaTokenizer,CohereForCausalLM:()=>l.CohereForCausalLM,CohereModel:()=>l.CohereModel,CoherePreTrainedModel:()=>l.CoherePreTrainedModel,CohereTokenizer:()=>u.CohereTokenizer,ConvBertForMaskedLM:()=>l.ConvBertForMaskedLM,ConvBertForQuestionAnswering:()=>l.ConvBertForQuestionAnswering,ConvBertForSequenceClassification:()=>l.ConvBertForSequenceClassification,ConvBertForTokenClassification:()=>l.ConvBertForTokenClassification,ConvBertModel:()=>l.ConvBertModel,ConvBertPreTrainedModel:()=>l.ConvBertPreTrainedModel,ConvBertTokenizer:()=>u.ConvBertTokenizer,ConvNextFeatureExtractor:()=>d.ConvNextFeatureExtractor,ConvNextForImageClassification:()=>l.ConvNextForImageClassification,ConvNextImageProcessor:()=>d.ConvNextImageProcessor,ConvNextModel:()=>l.ConvNextModel,ConvNextPreTrainedModel:()=>l.ConvNextPreTrainedModel,ConvNextV2ForImageClassification:()=>l.ConvNextV2ForImageClassification,ConvNextV2Model:()=>l.ConvNextV2Model,ConvNextV2PreTrainedModel:()=>l.ConvNextV2PreTrainedModel,DPTFeatureExtractor:()=>d.DPTFeatureExtractor,DPTForDepthEstimation:()=>l.DPTForDepthEstimation,DPTImageProcessor:()=>d.DPTImageProcessor,DPTModel:()=>l.DPTModel,DPTPreTrainedModel:()=>l.DPTPreTrainedModel,DebertaForMaskedLM:()=>l.DebertaForMaskedLM,DebertaForQuestionAnswering:()=>l.DebertaForQuestionAnswering,DebertaForSequenceClassification:()=>l.DebertaForSequenceClassification,DebertaForTokenClassification:()=>l.DebertaForTokenClassification,DebertaModel:()=>l.DebertaModel,DebertaPreTrainedModel:()=>l.DebertaPreTrainedModel,DebertaTokenizer:()=>u.DebertaTokenizer,DebertaV2ForMaskedLM:()=>l.DebertaV2ForMaskedLM,DebertaV2ForQuestionAnswering:()=>l.DebertaV2ForQuestionAnswering,DebertaV2ForSequenceClassification:()=>l.DebertaV2ForSequenceClassification,DebertaV2ForTokenClassification:()=>l.DebertaV2ForTokenClassification,DebertaV2Model:()=>l.DebertaV2Model,DebertaV2PreTrainedModel:()=>l.DebertaV2PreTrainedModel,DebertaV2Tokenizer:()=>u.DebertaV2Tokenizer,DecisionTransformerModel:()=>l.DecisionTransformerModel,DecisionTransformerPreTrainedModel:()=>l.DecisionTransformerPreTrainedModel,DeiTFeatureExtractor:()=>d.DeiTFeatureExtractor,DeiTForImageClassification:()=>l.DeiTForImageClassification,DeiTModel:()=>l.DeiTModel,DeiTPreTrainedModel:()=>l.DeiTPreTrainedModel,DepthAnythingForDepthEstimation:()=>l.DepthAnythingForDepthEstimation,DepthAnythingPreTrainedModel:()=>l.DepthAnythingPreTrainedModel,DepthEstimationPipeline:()=>o.DepthEstimationPipeline,DepthProForDepthEstimation:()=>l.DepthProForDepthEstimation,DepthProPreTrainedModel:()=>l.DepthProPreTrainedModel,DetrFeatureExtractor:()=>d.DetrFeatureExtractor,DetrForObjectDetection:()=>l.DetrForObjectDetection,DetrForSegmentation:()=>l.DetrForSegmentation,DetrModel:()=>l.DetrModel,DetrObjectDetectionOutput:()=>l.DetrObjectDetectionOutput,DetrPreTrainedModel:()=>l.DetrPreTrainedModel,DetrSegmentationOutput:()=>l.DetrSegmentationOutput,Dinov2ForImageClassification:()=>l.Dinov2ForImageClassification,Dinov2Model:()=>l.Dinov2Model,Dinov2PreTrainedModel:()=>l.Dinov2PreTrainedModel,DistilBertForMaskedLM:()=>l.DistilBertForMaskedLM,DistilBertForQuestionAnswering:()=>l.DistilBertForQuestionAnswering,DistilBertForSequenceClassification:()=>l.DistilBertForSequenceClassification,DistilBertForTokenClassification:()=>l.DistilBertForTokenClassification,DistilBertModel:()=>l.DistilBertModel,DistilBertPreTrainedModel:()=>l.DistilBertPreTrainedModel,DistilBertTokenizer:()=>u.DistilBertTokenizer,DocumentQuestionAnsweringPipeline:()=>o.DocumentQuestionAnsweringPipeline,DonutFeatureExtractor:()=>d.DonutFeatureExtractor,DonutImageProcessor:()=>d.DonutImageProcessor,DonutSwinModel:()=>l.DonutSwinModel,DonutSwinPreTrainedModel:()=>l.DonutSwinPreTrainedModel,EfficientNetForImageClassification:()=>l.EfficientNetForImageClassification,EfficientNetImageProcessor:()=>d.EfficientNetImageProcessor,EfficientNetModel:()=>l.EfficientNetModel,EfficientNetPreTrainedModel:()=>l.EfficientNetPreTrainedModel,ElectraForMaskedLM:()=>l.ElectraForMaskedLM,ElectraForQuestionAnswering:()=>l.ElectraForQuestionAnswering,ElectraForSequenceClassification:()=>l.ElectraForSequenceClassification,ElectraForTokenClassification:()=>l.ElectraForTokenClassification,ElectraModel:()=>l.ElectraModel,ElectraPreTrainedModel:()=>l.ElectraPreTrainedModel,ElectraTokenizer:()=>u.ElectraTokenizer,EosTokenCriteria:()=>_.EosTokenCriteria,EsmForMaskedLM:()=>l.EsmForMaskedLM,EsmForSequenceClassification:()=>l.EsmForSequenceClassification,EsmForTokenClassification:()=>l.EsmForTokenClassification,EsmModel:()=>l.EsmModel,EsmPreTrainedModel:()=>l.EsmPreTrainedModel,EsmTokenizer:()=>u.EsmTokenizer,FFT:()=>f.FFT,FalconForCausalLM:()=>l.FalconForCausalLM,FalconModel:()=>l.FalconModel,FalconPreTrainedModel:()=>l.FalconPreTrainedModel,FalconTokenizer:()=>u.FalconTokenizer,FastViTForImageClassification:()=>l.FastViTForImageClassification,FastViTModel:()=>l.FastViTModel,FastViTPreTrainedModel:()=>l.FastViTPreTrainedModel,FeatureExtractionPipeline:()=>o.FeatureExtractionPipeline,FeatureExtractor:()=>d.FeatureExtractor,FillMaskPipeline:()=>o.FillMaskPipeline,Florence2ForConditionalGeneration:()=>l.Florence2ForConditionalGeneration,Florence2PreTrainedModel:()=>l.Florence2PreTrainedModel,Florence2Processor:()=>d.Florence2Processor,GLPNFeatureExtractor:()=>d.GLPNFeatureExtractor,GLPNForDepthEstimation:()=>l.GLPNForDepthEstimation,GLPNModel:()=>l.GLPNModel,GLPNPreTrainedModel:()=>l.GLPNPreTrainedModel,GPT2LMHeadModel:()=>l.GPT2LMHeadModel,GPT2Model:()=>l.GPT2Model,GPT2PreTrainedModel:()=>l.GPT2PreTrainedModel,GPT2Tokenizer:()=>u.GPT2Tokenizer,GPTBigCodeForCausalLM:()=>l.GPTBigCodeForCausalLM,GPTBigCodeModel:()=>l.GPTBigCodeModel,GPTBigCodePreTrainedModel:()=>l.GPTBigCodePreTrainedModel,GPTJForCausalLM:()=>l.GPTJForCausalLM,GPTJModel:()=>l.GPTJModel,GPTJPreTrainedModel:()=>l.GPTJPreTrainedModel,GPTNeoForCausalLM:()=>l.GPTNeoForCausalLM,GPTNeoModel:()=>l.GPTNeoModel,GPTNeoPreTrainedModel:()=>l.GPTNeoPreTrainedModel,GPTNeoXForCausalLM:()=>l.GPTNeoXForCausalLM,GPTNeoXModel:()=>l.GPTNeoXModel,GPTNeoXPreTrainedModel:()=>l.GPTNeoXPreTrainedModel,GPTNeoXTokenizer:()=>u.GPTNeoXTokenizer,Gemma2ForCausalLM:()=>l.Gemma2ForCausalLM,Gemma2Model:()=>l.Gemma2Model,Gemma2PreTrainedModel:()=>l.Gemma2PreTrainedModel,GemmaForCausalLM:()=>l.GemmaForCausalLM,GemmaModel:()=>l.GemmaModel,GemmaPreTrainedModel:()=>l.GemmaPreTrainedModel,GemmaTokenizer:()=>u.GemmaTokenizer,GraniteForCausalLM:()=>l.GraniteForCausalLM,GraniteModel:()=>l.GraniteModel,GranitePreTrainedModel:()=>l.GranitePreTrainedModel,Grok1Tokenizer:()=>u.Grok1Tokenizer,GroupViTModel:()=>l.GroupViTModel,GroupViTPreTrainedModel:()=>l.GroupViTPreTrainedModel,HerbertTokenizer:()=>u.HerbertTokenizer,HieraForImageClassification:()=>l.HieraForImageClassification,HieraModel:()=>l.HieraModel,HieraPreTrainedModel:()=>l.HieraPreTrainedModel,HubertForCTC:()=>l.HubertForCTC,HubertForSequenceClassification:()=>l.HubertForSequenceClassification,HubertModel:()=>l.HubertModel,HubertPreTrainedModel:()=>l.HubertPreTrainedModel,ImageClassificationPipeline:()=>o.ImageClassificationPipeline,ImageFeatureExtractionPipeline:()=>o.ImageFeatureExtractionPipeline,ImageFeatureExtractor:()=>d.ImageFeatureExtractor,ImageMattingOutput:()=>l.ImageMattingOutput,ImageSegmentationPipeline:()=>o.ImageSegmentationPipeline,ImageToImagePipeline:()=>o.ImageToImagePipeline,ImageToTextPipeline:()=>o.ImageToTextPipeline,InterruptableStoppingCriteria:()=>_.InterruptableStoppingCriteria,JAISLMHeadModel:()=>l.JAISLMHeadModel,JAISModel:()=>l.JAISModel,JAISPreTrainedModel:()=>l.JAISPreTrainedModel,LlamaForCausalLM:()=>l.LlamaForCausalLM,LlamaModel:()=>l.LlamaModel,LlamaPreTrainedModel:()=>l.LlamaPreTrainedModel,LlamaTokenizer:()=>u.LlamaTokenizer,LlavaForConditionalGeneration:()=>l.LlavaForConditionalGeneration,LlavaPreTrainedModel:()=>l.LlavaPreTrainedModel,LongT5ForConditionalGeneration:()=>l.LongT5ForConditionalGeneration,LongT5Model:()=>l.LongT5Model,LongT5PreTrainedModel:()=>l.LongT5PreTrainedModel,M2M100ForConditionalGeneration:()=>l.M2M100ForConditionalGeneration,M2M100Model:()=>l.M2M100Model,M2M100PreTrainedModel:()=>l.M2M100PreTrainedModel,M2M100Tokenizer:()=>u.M2M100Tokenizer,MBart50Tokenizer:()=>u.MBart50Tokenizer,MBartForCausalLM:()=>l.MBartForCausalLM,MBartForConditionalGeneration:()=>l.MBartForConditionalGeneration,MBartForSequenceClassification:()=>l.MBartForSequenceClassification,MBartModel:()=>l.MBartModel,MBartPreTrainedModel:()=>l.MBartPreTrainedModel,MBartTokenizer:()=>u.MBartTokenizer,MPNetForMaskedLM:()=>l.MPNetForMaskedLM,MPNetForQuestionAnswering:()=>l.MPNetForQuestionAnswering,MPNetForSequenceClassification:()=>l.MPNetForSequenceClassification,MPNetForTokenClassification:()=>l.MPNetForTokenClassification,MPNetModel:()=>l.MPNetModel,MPNetPreTrainedModel:()=>l.MPNetPreTrainedModel,MPNetTokenizer:()=>u.MPNetTokenizer,MT5ForConditionalGeneration:()=>l.MT5ForConditionalGeneration,MT5Model:()=>l.MT5Model,MT5PreTrainedModel:()=>l.MT5PreTrainedModel,MarianMTModel:()=>l.MarianMTModel,MarianModel:()=>l.MarianModel,MarianPreTrainedModel:()=>l.MarianPreTrainedModel,MarianTokenizer:()=>u.MarianTokenizer,MaskFormerFeatureExtractor:()=>d.MaskFormerFeatureExtractor,MaskFormerForInstanceSegmentation:()=>l.MaskFormerForInstanceSegmentation,MaskFormerModel:()=>l.MaskFormerModel,MaskFormerPreTrainedModel:()=>l.MaskFormerPreTrainedModel,MaskedLMOutput:()=>l.MaskedLMOutput,MaxLengthCriteria:()=>_.MaxLengthCriteria,MistralForCausalLM:()=>l.MistralForCausalLM,MistralModel:()=>l.MistralModel,MistralPreTrainedModel:()=>l.MistralPreTrainedModel,MobileBertForMaskedLM:()=>l.MobileBertForMaskedLM,MobileBertForQuestionAnswering:()=>l.MobileBertForQuestionAnswering,MobileBertForSequenceClassification:()=>l.MobileBertForSequenceClassification,MobileBertModel:()=>l.MobileBertModel,MobileBertPreTrainedModel:()=>l.MobileBertPreTrainedModel,MobileBertTokenizer:()=>u.MobileBertTokenizer,MobileNetV1FeatureExtractor:()=>d.MobileNetV1FeatureExtractor,MobileNetV1ForImageClassification:()=>l.MobileNetV1ForImageClassification,MobileNetV1Model:()=>l.MobileNetV1Model,MobileNetV1PreTrainedModel:()=>l.MobileNetV1PreTrainedModel,MobileNetV2FeatureExtractor:()=>d.MobileNetV2FeatureExtractor,MobileNetV2ForImageClassification:()=>l.MobileNetV2ForImageClassification,MobileNetV2Model:()=>l.MobileNetV2Model,MobileNetV2PreTrainedModel:()=>l.MobileNetV2PreTrainedModel,MobileNetV3FeatureExtractor:()=>d.MobileNetV3FeatureExtractor,MobileNetV3ForImageClassification:()=>l.MobileNetV3ForImageClassification,MobileNetV3Model:()=>l.MobileNetV3Model,MobileNetV3PreTrainedModel:()=>l.MobileNetV3PreTrainedModel,MobileNetV4FeatureExtractor:()=>d.MobileNetV4FeatureExtractor,MobileNetV4ForImageClassification:()=>l.MobileNetV4ForImageClassification,MobileNetV4Model:()=>l.MobileNetV4Model,MobileNetV4PreTrainedModel:()=>l.MobileNetV4PreTrainedModel,MobileViTFeatureExtractor:()=>d.MobileViTFeatureExtractor,MobileViTForImageClassification:()=>l.MobileViTForImageClassification,MobileViTImageProcessor:()=>d.MobileViTImageProcessor,MobileViTModel:()=>l.MobileViTModel,MobileViTPreTrainedModel:()=>l.MobileViTPreTrainedModel,MobileViTV2ForImageClassification:()=>l.MobileViTV2ForImageClassification,MobileViTV2Model:()=>l.MobileViTV2Model,MobileViTV2PreTrainedModel:()=>l.MobileViTV2PreTrainedModel,ModelOutput:()=>l.ModelOutput,Moondream1ForConditionalGeneration:()=>l.Moondream1ForConditionalGeneration,MptForCausalLM:()=>l.MptForCausalLM,MptModel:()=>l.MptModel,MptPreTrainedModel:()=>l.MptPreTrainedModel,MusicgenForCausalLM:()=>l.MusicgenForCausalLM,MusicgenForConditionalGeneration:()=>l.MusicgenForConditionalGeneration,MusicgenModel:()=>l.MusicgenModel,MusicgenPreTrainedModel:()=>l.MusicgenPreTrainedModel,NllbTokenizer:()=>u.NllbTokenizer,NomicBertModel:()=>l.NomicBertModel,NomicBertPreTrainedModel:()=>l.NomicBertPreTrainedModel,NougatImageProcessor:()=>d.NougatImageProcessor,NougatTokenizer:()=>u.NougatTokenizer,OPTForCausalLM:()=>l.OPTForCausalLM,OPTModel:()=>l.OPTModel,OPTPreTrainedModel:()=>l.OPTPreTrainedModel,ObjectDetectionPipeline:()=>o.ObjectDetectionPipeline,OpenELMForCausalLM:()=>l.OpenELMForCausalLM,OpenELMModel:()=>l.OpenELMModel,OpenELMPreTrainedModel:()=>l.OpenELMPreTrainedModel,OwlViTFeatureExtractor:()=>d.OwlViTFeatureExtractor,OwlViTForObjectDetection:()=>l.OwlViTForObjectDetection,OwlViTModel:()=>l.OwlViTModel,OwlViTPreTrainedModel:()=>l.OwlViTPreTrainedModel,OwlViTProcessor:()=>d.OwlViTProcessor,Owlv2ForObjectDetection:()=>l.Owlv2ForObjectDetection,Owlv2ImageProcessor:()=>d.Owlv2ImageProcessor,Owlv2Model:()=>l.Owlv2Model,Owlv2PreTrainedModel:()=>l.Owlv2PreTrainedModel,Phi3ForCausalLM:()=>l.Phi3ForCausalLM,Phi3Model:()=>l.Phi3Model,Phi3PreTrainedModel:()=>l.Phi3PreTrainedModel,PhiForCausalLM:()=>l.PhiForCausalLM,PhiModel:()=>l.PhiModel,PhiPreTrainedModel:()=>l.PhiPreTrainedModel,Pipeline:()=>o.Pipeline,PreTrainedModel:()=>l.PreTrainedModel,PreTrainedTokenizer:()=>u.PreTrainedTokenizer,PretrainedConfig:()=>c.PretrainedConfig,PretrainedMixin:()=>l.PretrainedMixin,Processor:()=>d.Processor,PvtForImageClassification:()=>l.PvtForImageClassification,PvtImageProcessor:()=>d.PvtImageProcessor,PvtModel:()=>l.PvtModel,PvtPreTrainedModel:()=>l.PvtPreTrainedModel,PyAnnoteFeatureExtractor:()=>d.PyAnnoteFeatureExtractor,PyAnnoteForAudioFrameClassification:()=>l.PyAnnoteForAudioFrameClassification,PyAnnoteModel:()=>l.PyAnnoteModel,PyAnnotePreTrainedModel:()=>l.PyAnnotePreTrainedModel,PyAnnoteProcessor:()=>d.PyAnnoteProcessor,QuestionAnsweringModelOutput:()=>l.QuestionAnsweringModelOutput,QuestionAnsweringPipeline:()=>o.QuestionAnsweringPipeline,Qwen2ForCausalLM:()=>l.Qwen2ForCausalLM,Qwen2Model:()=>l.Qwen2Model,Qwen2PreTrainedModel:()=>l.Qwen2PreTrainedModel,Qwen2Tokenizer:()=>u.Qwen2Tokenizer,RTDetrForObjectDetection:()=>l.RTDetrForObjectDetection,RTDetrImageProcessor:()=>d.RTDetrImageProcessor,RTDetrModel:()=>l.RTDetrModel,RTDetrObjectDetectionOutput:()=>l.RTDetrObjectDetectionOutput,RTDetrPreTrainedModel:()=>l.RTDetrPreTrainedModel,RawImage:()=>h.RawImage,ResNetForImageClassification:()=>l.ResNetForImageClassification,ResNetModel:()=>l.ResNetModel,ResNetPreTrainedModel:()=>l.ResNetPreTrainedModel,RoFormerForMaskedLM:()=>l.RoFormerForMaskedLM,RoFormerForQuestionAnswering:()=>l.RoFormerForQuestionAnswering,RoFormerForSequenceClassification:()=>l.RoFormerForSequenceClassification,RoFormerForTokenClassification:()=>l.RoFormerForTokenClassification,RoFormerModel:()=>l.RoFormerModel,RoFormerPreTrainedModel:()=>l.RoFormerPreTrainedModel,RoFormerTokenizer:()=>u.RoFormerTokenizer,RobertaForMaskedLM:()=>l.RobertaForMaskedLM,RobertaForQuestionAnswering:()=>l.RobertaForQuestionAnswering,RobertaForSequenceClassification:()=>l.RobertaForSequenceClassification,RobertaForTokenClassification:()=>l.RobertaForTokenClassification,RobertaModel:()=>l.RobertaModel,RobertaPreTrainedModel:()=>l.RobertaPreTrainedModel,RobertaTokenizer:()=>u.RobertaTokenizer,SamImageProcessor:()=>d.SamImageProcessor,SamImageSegmentationOutput:()=>l.SamImageSegmentationOutput,SamModel:()=>l.SamModel,SamPreTrainedModel:()=>l.SamPreTrainedModel,SamProcessor:()=>d.SamProcessor,SapiensFeatureExtractor:()=>d.SapiensFeatureExtractor,SapiensForDepthEstimation:()=>l.SapiensForDepthEstimation,SapiensForNormalEstimation:()=>l.SapiensForNormalEstimation,SapiensForSemanticSegmentation:()=>l.SapiensForSemanticSegmentation,SapiensPreTrainedModel:()=>l.SapiensPreTrainedModel,SeamlessM4TFeatureExtractor:()=>d.SeamlessM4TFeatureExtractor,SegformerFeatureExtractor:()=>d.SegformerFeatureExtractor,SegformerForImageClassification:()=>l.SegformerForImageClassification,SegformerForSemanticSegmentation:()=>l.SegformerForSemanticSegmentation,SegformerModel:()=>l.SegformerModel,SegformerPreTrainedModel:()=>l.SegformerPreTrainedModel,Seq2SeqLMOutput:()=>l.Seq2SeqLMOutput,SequenceClassifierOutput:()=>l.SequenceClassifierOutput,SiglipImageProcessor:()=>d.SiglipImageProcessor,SiglipModel:()=>l.SiglipModel,SiglipPreTrainedModel:()=>l.SiglipPreTrainedModel,SiglipTextModel:()=>l.SiglipTextModel,SiglipTokenizer:()=>u.SiglipTokenizer,SiglipVisionModel:()=>l.SiglipVisionModel,SpeechT5FeatureExtractor:()=>d.SpeechT5FeatureExtractor,SpeechT5ForSpeechToText:()=>l.SpeechT5ForSpeechToText,SpeechT5ForTextToSpeech:()=>l.SpeechT5ForTextToSpeech,SpeechT5HifiGan:()=>l.SpeechT5HifiGan,SpeechT5Model:()=>l.SpeechT5Model,SpeechT5PreTrainedModel:()=>l.SpeechT5PreTrainedModel,SpeechT5Processor:()=>d.SpeechT5Processor,SpeechT5Tokenizer:()=>u.SpeechT5Tokenizer,SqueezeBertForMaskedLM:()=>l.SqueezeBertForMaskedLM,SqueezeBertForQuestionAnswering:()=>l.SqueezeBertForQuestionAnswering,SqueezeBertForSequenceClassification:()=>l.SqueezeBertForSequenceClassification,SqueezeBertModel:()=>l.SqueezeBertModel,SqueezeBertPreTrainedModel:()=>l.SqueezeBertPreTrainedModel,SqueezeBertTokenizer:()=>u.SqueezeBertTokenizer,StableLmForCausalLM:()=>l.StableLmForCausalLM,StableLmModel:()=>l.StableLmModel,StableLmPreTrainedModel:()=>l.StableLmPreTrainedModel,Starcoder2ForCausalLM:()=>l.Starcoder2ForCausalLM,Starcoder2Model:()=>l.Starcoder2Model,Starcoder2PreTrainedModel:()=>l.Starcoder2PreTrainedModel,StoppingCriteria:()=>_.StoppingCriteria,StoppingCriteriaList:()=>_.StoppingCriteriaList,SummarizationPipeline:()=>o.SummarizationPipeline,Swin2SRForImageSuperResolution:()=>l.Swin2SRForImageSuperResolution,Swin2SRImageProcessor:()=>d.Swin2SRImageProcessor,Swin2SRModel:()=>l.Swin2SRModel,Swin2SRPreTrainedModel:()=>l.Swin2SRPreTrainedModel,SwinForImageClassification:()=>l.SwinForImageClassification,SwinModel:()=>l.SwinModel,SwinPreTrainedModel:()=>l.SwinPreTrainedModel,T5ForConditionalGeneration:()=>l.T5ForConditionalGeneration,T5Model:()=>l.T5Model,T5PreTrainedModel:()=>l.T5PreTrainedModel,T5Tokenizer:()=>u.T5Tokenizer,TableTransformerForObjectDetection:()=>l.TableTransformerForObjectDetection,TableTransformerModel:()=>l.TableTransformerModel,TableTransformerObjectDetectionOutput:()=>l.TableTransformerObjectDetectionOutput,TableTransformerPreTrainedModel:()=>l.TableTransformerPreTrainedModel,Tensor:()=>m.Tensor,Text2TextGenerationPipeline:()=>o.Text2TextGenerationPipeline,TextClassificationPipeline:()=>o.TextClassificationPipeline,TextGenerationPipeline:()=>o.TextGenerationPipeline,TextStreamer:()=>g.TextStreamer,TextToAudioPipeline:()=>o.TextToAudioPipeline,TokenClassificationPipeline:()=>o.TokenClassificationPipeline,TokenClassifierOutput:()=>l.TokenClassifierOutput,TokenizerModel:()=>u.TokenizerModel,TrOCRForCausalLM:()=>l.TrOCRForCausalLM,TrOCRPreTrainedModel:()=>l.TrOCRPreTrainedModel,TranslationPipeline:()=>o.TranslationPipeline,UniSpeechForCTC:()=>l.UniSpeechForCTC,UniSpeechForSequenceClassification:()=>l.UniSpeechForSequenceClassification,UniSpeechModel:()=>l.UniSpeechModel,UniSpeechPreTrainedModel:()=>l.UniSpeechPreTrainedModel,UniSpeechSatForAudioFrameClassification:()=>l.UniSpeechSatForAudioFrameClassification,UniSpeechSatForCTC:()=>l.UniSpeechSatForCTC,UniSpeechSatForSequenceClassification:()=>l.UniSpeechSatForSequenceClassification,UniSpeechSatModel:()=>l.UniSpeechSatModel,UniSpeechSatPreTrainedModel:()=>l.UniSpeechSatPreTrainedModel,ViTFeatureExtractor:()=>d.ViTFeatureExtractor,ViTForImageClassification:()=>l.ViTForImageClassification,ViTImageProcessor:()=>d.ViTImageProcessor,ViTMAEModel:()=>l.ViTMAEModel,ViTMAEPreTrainedModel:()=>l.ViTMAEPreTrainedModel,ViTMSNForImageClassification:()=>l.ViTMSNForImageClassification,ViTMSNModel:()=>l.ViTMSNModel,ViTMSNPreTrainedModel:()=>l.ViTMSNPreTrainedModel,ViTModel:()=>l.ViTModel,ViTPreTrainedModel:()=>l.ViTPreTrainedModel,VisionEncoderDecoderModel:()=>l.VisionEncoderDecoderModel,VitMatteForImageMatting:()=>l.VitMatteForImageMatting,VitMatteImageProcessor:()=>d.VitMatteImageProcessor,VitMattePreTrainedModel:()=>l.VitMattePreTrainedModel,VitsModel:()=>l.VitsModel,VitsModelOutput:()=>l.VitsModelOutput,VitsPreTrainedModel:()=>l.VitsPreTrainedModel,VitsTokenizer:()=>u.VitsTokenizer,Wav2Vec2BertForCTC:()=>l.Wav2Vec2BertForCTC,Wav2Vec2BertForSequenceClassification:()=>l.Wav2Vec2BertForSequenceClassification,Wav2Vec2BertModel:()=>l.Wav2Vec2BertModel,Wav2Vec2BertPreTrainedModel:()=>l.Wav2Vec2BertPreTrainedModel,Wav2Vec2CTCTokenizer:()=>u.Wav2Vec2CTCTokenizer,Wav2Vec2FeatureExtractor:()=>d.Wav2Vec2FeatureExtractor,Wav2Vec2ForAudioFrameClassification:()=>l.Wav2Vec2ForAudioFrameClassification,Wav2Vec2ForCTC:()=>l.Wav2Vec2ForCTC,Wav2Vec2ForSequenceClassification:()=>l.Wav2Vec2ForSequenceClassification,Wav2Vec2Model:()=>l.Wav2Vec2Model,Wav2Vec2PreTrainedModel:()=>l.Wav2Vec2PreTrainedModel,Wav2Vec2ProcessorWithLM:()=>d.Wav2Vec2ProcessorWithLM,WavLMForAudioFrameClassification:()=>l.WavLMForAudioFrameClassification,WavLMForCTC:()=>l.WavLMForCTC,WavLMForSequenceClassification:()=>l.WavLMForSequenceClassification,WavLMForXVector:()=>l.WavLMForXVector,WavLMModel:()=>l.WavLMModel,WavLMPreTrainedModel:()=>l.WavLMPreTrainedModel,WeSpeakerFeatureExtractor:()=>d.WeSpeakerFeatureExtractor,WeSpeakerResNetModel:()=>l.WeSpeakerResNetModel,WeSpeakerResNetPreTrainedModel:()=>l.WeSpeakerResNetPreTrainedModel,WhisperFeatureExtractor:()=>d.WhisperFeatureExtractor,WhisperForConditionalGeneration:()=>l.WhisperForConditionalGeneration,WhisperModel:()=>l.WhisperModel,WhisperPreTrainedModel:()=>l.WhisperPreTrainedModel,WhisperProcessor:()=>d.WhisperProcessor,WhisperTextStreamer:()=>g.WhisperTextStreamer,WhisperTokenizer:()=>u.WhisperTokenizer,XLMForQuestionAnswering:()=>l.XLMForQuestionAnswering,XLMForSequenceClassification:()=>l.XLMForSequenceClassification,XLMForTokenClassification:()=>l.XLMForTokenClassification,XLMModel:()=>l.XLMModel,XLMPreTrainedModel:()=>l.XLMPreTrainedModel,XLMRobertaForMaskedLM:()=>l.XLMRobertaForMaskedLM,XLMRobertaForQuestionAnswering:()=>l.XLMRobertaForQuestionAnswering,XLMRobertaForSequenceClassification:()=>l.XLMRobertaForSequenceClassification,XLMRobertaForTokenClassification:()=>l.XLMRobertaForTokenClassification,XLMRobertaModel:()=>l.XLMRobertaModel,XLMRobertaPreTrainedModel:()=>l.XLMRobertaPreTrainedModel,XLMRobertaTokenizer:()=>u.XLMRobertaTokenizer,XLMTokenizer:()=>u.XLMTokenizer,XLMWithLMHeadModel:()=>l.XLMWithLMHeadModel,XVectorOutput:()=>l.XVectorOutput,YolosFeatureExtractor:()=>d.YolosFeatureExtractor,YolosForObjectDetection:()=>l.YolosForObjectDetection,YolosModel:()=>l.YolosModel,YolosObjectDetectionOutput:()=>l.YolosObjectDetectionOutput,YolosPreTrainedModel:()=>l.YolosPreTrainedModel,ZeroShotAudioClassificationPipeline:()=>o.ZeroShotAudioClassificationPipeline,ZeroShotClassificationPipeline:()=>o.ZeroShotClassificationPipeline,ZeroShotImageClassificationPipeline:()=>o.ZeroShotImageClassificationPipeline,ZeroShotObjectDetectionPipeline:()=>o.ZeroShotObjectDetectionPipeline,bankers_round:()=>f.bankers_round,cat:()=>m.cat,cos_sim:()=>f.cos_sim,dot:()=>f.dot,dynamic_time_warping:()=>f.dynamic_time_warping,env:()=>i.env,full:()=>m.full,full_like:()=>m.full_like,getKeyValueShapes:()=>c.getKeyValueShapes,hamming:()=>p.hamming,hanning:()=>p.hanning,interpolate:()=>m.interpolate,interpolate_4d:()=>m.interpolate_4d,interpolate_data:()=>f.interpolate_data,is_chinese_char:()=>u.is_chinese_char,layer_norm:()=>m.layer_norm,log_softmax:()=>f.log_softmax,magnitude:()=>f.magnitude,matmul:()=>m.matmul,max:()=>f.max,mean:()=>m.mean,mean_pooling:()=>m.mean_pooling,medianFilter:()=>f.medianFilter,mel_filter_bank:()=>p.mel_filter_bank,min:()=>f.min,ones:()=>m.ones,ones_like:()=>m.ones_like,permute:()=>m.permute,permute_data:()=>f.permute_data,pipeline:()=>o.pipeline,quantize_embeddings:()=>m.quantize_embeddings,read_audio:()=>p.read_audio,rfft:()=>m.rfft,round:()=>f.round,softmax:()=>f.softmax,spectrogram:()=>p.spectrogram,stack:()=>m.stack,std_mean:()=>m.std_mean,topk:()=>m.topk,window_function:()=>p.window_function,zeros:()=>m.zeros,zeros_like:()=>m.zeros_like});var i=a(/*! ./env.js */"./src/env.js"),o=a(/*! ./pipelines.js */"./src/pipelines.js"),l=a(/*! ./models.js */"./src/models.js"),u=a(/*! ./tokenizers.js */"./src/tokenizers.js"),d=a(/*! ./processors.js */"./src/processors.js"),c=a(/*! ./configs.js */"./src/configs.js"),p=a(/*! ./utils/audio.js */"./src/utils/audio.js"),h=a(/*! ./utils/image.js */"./src/utils/image.js"),m=a(/*! ./utils/tensor.js */"./src/utils/tensor.js"),f=a(/*! ./utils/maths.js */"./src/utils/maths.js"),g=a(/*! ./generation/streamers.js */"./src/generation/streamers.js"),_=a(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),w=s.ASTFeatureExtractor,y=s.ASTForAudioClassification,b=s.ASTModel,v=s.ASTPreTrainedModel,x=s.AlbertForMaskedLM,M=s.AlbertForQuestionAnswering,T=s.AlbertForSequenceClassification,k=s.AlbertModel,$=s.AlbertPreTrainedModel,C=s.AlbertTokenizer,S=s.AudioClassificationPipeline,P=s.AutoConfig,E=s.AutoModel,F=s.AutoModelForAudioClassification,A=s.AutoModelForAudioFrameClassification,I=s.AutoModelForCTC,z=s.AutoModelForCausalLM,O=s.AutoModelForDepthEstimation,B=s.AutoModelForDocumentQuestionAnswering,L=s.AutoModelForImageClassification,D=s.AutoModelForImageFeatureExtraction,R=s.AutoModelForImageMatting,N=s.AutoModelForImageSegmentation,V=s.AutoModelForImageToImage,j=s.AutoModelForMaskGeneration,q=s.AutoModelForMaskedLM,G=s.AutoModelForNormalEstimation,U=s.AutoModelForObjectDetection,W=s.AutoModelForQuestionAnswering,H=s.AutoModelForSemanticSegmentation,X=s.AutoModelForSeq2SeqLM,K=s.AutoModelForSequenceClassification,Q=s.AutoModelForSpeechSeq2Seq,Y=s.AutoModelForTextToSpectrogram,Z=s.AutoModelForTextToWaveform,J=s.AutoModelForTokenClassification,ee=s.AutoModelForUniversalSegmentation,te=s.AutoModelForVision2Seq,ne=s.AutoModelForXVector,re=s.AutoModelForZeroShotObjectDetection,ae=s.AutoProcessor,se=s.AutoTokenizer,ie=s.AutomaticSpeechRecognitionPipeline,oe=s.BartForConditionalGeneration,le=s.BartForSequenceClassification,ue=s.BartModel,de=s.BartPretrainedModel,ce=s.BartTokenizer,pe=s.BaseModelOutput,he=s.BaseStreamer,me=s.BeitFeatureExtractor,fe=s.BeitForImageClassification,ge=s.BeitModel,_e=s.BeitPreTrainedModel,we=s.BertForMaskedLM,ye=s.BertForQuestionAnswering,be=s.BertForSequenceClassification,ve=s.BertForTokenClassification,xe=s.BertModel,Me=s.BertPreTrainedModel,Te=s.BertTokenizer,ke=s.BitImageProcessor,$e=s.BlenderbotForConditionalGeneration,Ce=s.BlenderbotModel,Se=s.BlenderbotPreTrainedModel,Pe=s.BlenderbotSmallForConditionalGeneration,Ee=s.BlenderbotSmallModel,Fe=s.BlenderbotSmallPreTrainedModel,Ae=s.BlenderbotSmallTokenizer,Ie=s.BlenderbotTokenizer,ze=s.BloomForCausalLM,Oe=s.BloomModel,Be=s.BloomPreTrainedModel,Le=s.BloomTokenizer,De=s.CLIPFeatureExtractor,Re=s.CLIPImageProcessor,Ne=s.CLIPModel,Ve=s.CLIPPreTrainedModel,je=s.CLIPSegForImageSegmentation,qe=s.CLIPSegModel,Ge=s.CLIPSegPreTrainedModel,Ue=s.CLIPTextModel,We=s.CLIPTextModelWithProjection,He=s.CLIPTokenizer,Xe=s.CLIPVisionModel,Ke=s.CLIPVisionModelWithProjection,Qe=s.CamembertForMaskedLM,Ye=s.CamembertForQuestionAnswering,Ze=s.CamembertForSequenceClassification,Je=s.CamembertForTokenClassification,et=s.CamembertModel,tt=s.CamembertPreTrainedModel,nt=s.CamembertTokenizer,rt=s.CausalLMOutput,at=s.CausalLMOutputWithPast,st=s.ChineseCLIPFeatureExtractor,it=s.ChineseCLIPModel,ot=s.ChineseCLIPPreTrainedModel,lt=s.ClapAudioModelWithProjection,ut=s.ClapFeatureExtractor,dt=s.ClapModel,ct=s.ClapPreTrainedModel,pt=s.ClapTextModelWithProjection,ht=s.CodeGenForCausalLM,mt=s.CodeGenModel,ft=s.CodeGenPreTrainedModel,gt=s.CodeGenTokenizer,_t=s.CodeLlamaTokenizer,wt=s.CohereForCausalLM,yt=s.CohereModel,bt=s.CoherePreTrainedModel,vt=s.CohereTokenizer,xt=s.ConvBertForMaskedLM,Mt=s.ConvBertForQuestionAnswering,Tt=s.ConvBertForSequenceClassification,kt=s.ConvBertForTokenClassification,$t=s.ConvBertModel,Ct=s.ConvBertPreTrainedModel,St=s.ConvBertTokenizer,Pt=s.ConvNextFeatureExtractor,Et=s.ConvNextForImageClassification,Ft=s.ConvNextImageProcessor,At=s.ConvNextModel,It=s.ConvNextPreTrainedModel,zt=s.ConvNextV2ForImageClassification,Ot=s.ConvNextV2Model,Bt=s.ConvNextV2PreTrainedModel,Lt=s.DPTFeatureExtractor,Dt=s.DPTForDepthEstimation,Rt=s.DPTImageProcessor,Nt=s.DPTModel,Vt=s.DPTPreTrainedModel,jt=s.DebertaForMaskedLM,qt=s.DebertaForQuestionAnswering,Gt=s.DebertaForSequenceClassification,Ut=s.DebertaForTokenClassification,Wt=s.DebertaModel,Ht=s.DebertaPreTrainedModel,Xt=s.DebertaTokenizer,Kt=s.DebertaV2ForMaskedLM,Qt=s.DebertaV2ForQuestionAnswering,Yt=s.DebertaV2ForSequenceClassification,Zt=s.DebertaV2ForTokenClassification,Jt=s.DebertaV2Model,en=s.DebertaV2PreTrainedModel,tn=s.DebertaV2Tokenizer,nn=s.DecisionTransformerModel,rn=s.DecisionTransformerPreTrainedModel,an=s.DeiTFeatureExtractor,sn=s.DeiTForImageClassification,on=s.DeiTModel,ln=s.DeiTPreTrainedModel,un=s.DepthAnythingForDepthEstimation,dn=s.DepthAnythingPreTrainedModel,cn=s.DepthEstimationPipeline,pn=s.DepthProForDepthEstimation,hn=s.DepthProPreTrainedModel,mn=s.DetrFeatureExtractor,fn=s.DetrForObjectDetection,gn=s.DetrForSegmentation,_n=s.DetrModel,wn=s.DetrObjectDetectionOutput,yn=s.DetrPreTrainedModel,bn=s.DetrSegmentationOutput,vn=s.Dinov2ForImageClassification,xn=s.Dinov2Model,Mn=s.Dinov2PreTrainedModel,Tn=s.DistilBertForMaskedLM,kn=s.DistilBertForQuestionAnswering,$n=s.DistilBertForSequenceClassification,Cn=s.DistilBertForTokenClassification,Sn=s.DistilBertModel,Pn=s.DistilBertPreTrainedModel,En=s.DistilBertTokenizer,Fn=s.DocumentQuestionAnsweringPipeline,An=s.DonutFeatureExtractor,In=s.DonutImageProcessor,zn=s.DonutSwinModel,On=s.DonutSwinPreTrainedModel,Bn=s.EfficientNetForImageClassification,Ln=s.EfficientNetImageProcessor,Dn=s.EfficientNetModel,Rn=s.EfficientNetPreTrainedModel,Nn=s.ElectraForMaskedLM,Vn=s.ElectraForQuestionAnswering,jn=s.ElectraForSequenceClassification,qn=s.ElectraForTokenClassification,Gn=s.ElectraModel,Un=s.ElectraPreTrainedModel,Wn=s.ElectraTokenizer,Hn=s.EosTokenCriteria,Xn=s.EsmForMaskedLM,Kn=s.EsmForSequenceClassification,Qn=s.EsmForTokenClassification,Yn=s.EsmModel,Zn=s.EsmPreTrainedModel,Jn=s.EsmTokenizer,er=s.FFT,tr=s.FalconForCausalLM,nr=s.FalconModel,rr=s.FalconPreTrainedModel,ar=s.FalconTokenizer,sr=s.FastViTForImageClassification,ir=s.FastViTModel,or=s.FastViTPreTrainedModel,lr=s.FeatureExtractionPipeline,ur=s.FeatureExtractor,dr=s.FillMaskPipeline,cr=s.Florence2ForConditionalGeneration,pr=s.Florence2PreTrainedModel,hr=s.Florence2Processor,mr=s.GLPNFeatureExtractor,fr=s.GLPNForDepthEstimation,gr=s.GLPNModel,_r=s.GLPNPreTrainedModel,wr=s.GPT2LMHeadModel,yr=s.GPT2Model,br=s.GPT2PreTrainedModel,vr=s.GPT2Tokenizer,xr=s.GPTBigCodeForCausalLM,Mr=s.GPTBigCodeModel,Tr=s.GPTBigCodePreTrainedModel,kr=s.GPTJForCausalLM,$r=s.GPTJModel,Cr=s.GPTJPreTrainedModel,Sr=s.GPTNeoForCausalLM,Pr=s.GPTNeoModel,Er=s.GPTNeoPreTrainedModel,Fr=s.GPTNeoXForCausalLM,Ar=s.GPTNeoXModel,Ir=s.GPTNeoXPreTrainedModel,zr=s.GPTNeoXTokenizer,Or=s.Gemma2ForCausalLM,Br=s.Gemma2Model,Lr=s.Gemma2PreTrainedModel,Dr=s.GemmaForCausalLM,Rr=s.GemmaModel,Nr=s.GemmaPreTrainedModel,Vr=s.GemmaTokenizer,jr=s.GraniteForCausalLM,qr=s.GraniteModel,Gr=s.GranitePreTrainedModel,Ur=s.Grok1Tokenizer,Wr=s.GroupViTModel,Hr=s.GroupViTPreTrainedModel,Xr=s.HerbertTokenizer,Kr=s.HieraForImageClassification,Qr=s.HieraModel,Yr=s.HieraPreTrainedModel,Zr=s.HubertForCTC,Jr=s.HubertForSequenceClassification,ea=s.HubertModel,ta=s.HubertPreTrainedModel,na=s.ImageClassificationPipeline,ra=s.ImageFeatureExtractionPipeline,aa=s.ImageFeatureExtractor,sa=s.ImageMattingOutput,ia=s.ImageSegmentationPipeline,oa=s.ImageToImagePipeline,la=s.ImageToTextPipeline,ua=s.InterruptableStoppingCriteria,da=s.JAISLMHeadModel,ca=s.JAISModel,pa=s.JAISPreTrainedModel,ha=s.LlamaForCausalLM,ma=s.LlamaModel,fa=s.LlamaPreTrainedModel,ga=s.LlamaTokenizer,_a=s.LlavaForConditionalGeneration,wa=s.LlavaPreTrainedModel,ya=s.LongT5ForConditionalGeneration,ba=s.LongT5Model,va=s.LongT5PreTrainedModel,xa=s.M2M100ForConditionalGeneration,Ma=s.M2M100Model,Ta=s.M2M100PreTrainedModel,ka=s.M2M100Tokenizer,$a=s.MBart50Tokenizer,Ca=s.MBartForCausalLM,Sa=s.MBartForConditionalGeneration,Pa=s.MBartForSequenceClassification,Ea=s.MBartModel,Fa=s.MBartPreTrainedModel,Aa=s.MBartTokenizer,Ia=s.MPNetForMaskedLM,za=s.MPNetForQuestionAnswering,Oa=s.MPNetForSequenceClassification,Ba=s.MPNetForTokenClassification,La=s.MPNetModel,Da=s.MPNetPreTrainedModel,Ra=s.MPNetTokenizer,Na=s.MT5ForConditionalGeneration,Va=s.MT5Model,ja=s.MT5PreTrainedModel,qa=s.MarianMTModel,Ga=s.MarianModel,Ua=s.MarianPreTrainedModel,Wa=s.MarianTokenizer,Ha=s.MaskFormerFeatureExtractor,Xa=s.MaskFormerForInstanceSegmentation,Ka=s.MaskFormerModel,Qa=s.MaskFormerPreTrainedModel,Ya=s.MaskedLMOutput,Za=s.MaxLengthCriteria,Ja=s.MistralForCausalLM,es=s.MistralModel,ts=s.MistralPreTrainedModel,ns=s.MobileBertForMaskedLM,rs=s.MobileBertForQuestionAnswering,as=s.MobileBertForSequenceClassification,ss=s.MobileBertModel,is=s.MobileBertPreTrainedModel,os=s.MobileBertTokenizer,ls=s.MobileNetV1FeatureExtractor,us=s.MobileNetV1ForImageClassification,ds=s.MobileNetV1Model,cs=s.MobileNetV1PreTrainedModel,ps=s.MobileNetV2FeatureExtractor,hs=s.MobileNetV2ForImageClassification,ms=s.MobileNetV2Model,fs=s.MobileNetV2PreTrainedModel,gs=s.MobileNetV3FeatureExtractor,_s=s.MobileNetV3ForImageClassification,ws=s.MobileNetV3Model,ys=s.MobileNetV3PreTrainedModel,bs=s.MobileNetV4FeatureExtractor,vs=s.MobileNetV4ForImageClassification,xs=s.MobileNetV4Model,Ms=s.MobileNetV4PreTrainedModel,Ts=s.MobileViTFeatureExtractor,ks=s.MobileViTForImageClassification,$s=s.MobileViTImageProcessor,Cs=s.MobileViTModel,Ss=s.MobileViTPreTrainedModel,Ps=s.MobileViTV2ForImageClassification,Es=s.MobileViTV2Model,Fs=s.MobileViTV2PreTrainedModel,As=s.ModelOutput,Is=s.Moondream1ForConditionalGeneration,zs=s.MptForCausalLM,Os=s.MptModel,Bs=s.MptPreTrainedModel,Ls=s.MusicgenForCausalLM,Ds=s.MusicgenForConditionalGeneration,Rs=s.MusicgenModel,Ns=s.MusicgenPreTrainedModel,Vs=s.NllbTokenizer,js=s.NomicBertModel,qs=s.NomicBertPreTrainedModel,Gs=s.NougatImageProcessor,Us=s.NougatTokenizer,Ws=s.OPTForCausalLM,Hs=s.OPTModel,Xs=s.OPTPreTrainedModel,Ks=s.ObjectDetectionPipeline,Qs=s.OpenELMForCausalLM,Ys=s.OpenELMModel,Zs=s.OpenELMPreTrainedModel,Js=s.OwlViTFeatureExtractor,ei=s.OwlViTForObjectDetection,ti=s.OwlViTModel,ni=s.OwlViTPreTrainedModel,ri=s.OwlViTProcessor,ai=s.Owlv2ForObjectDetection,si=s.Owlv2ImageProcessor,ii=s.Owlv2Model,oi=s.Owlv2PreTrainedModel,li=s.Phi3ForCausalLM,ui=s.Phi3Model,di=s.Phi3PreTrainedModel,ci=s.PhiForCausalLM,pi=s.PhiModel,hi=s.PhiPreTrainedModel,mi=s.Pipeline,fi=s.PreTrainedModel,gi=s.PreTrainedTokenizer,_i=s.PretrainedConfig,wi=s.PretrainedMixin,yi=s.Processor,bi=s.PvtForImageClassification,vi=s.PvtImageProcessor,xi=s.PvtModel,Mi=s.PvtPreTrainedModel,Ti=s.PyAnnoteFeatureExtractor,ki=s.PyAnnoteForAudioFrameClassification,$i=s.PyAnnoteModel,Ci=s.PyAnnotePreTrainedModel,Si=s.PyAnnoteProcessor,Pi=s.QuestionAnsweringModelOutput,Ei=s.QuestionAnsweringPipeline,Fi=s.Qwen2ForCausalLM,Ai=s.Qwen2Model,Ii=s.Qwen2PreTrainedModel,zi=s.Qwen2Tokenizer,Oi=s.RTDetrForObjectDetection,Bi=s.RTDetrImageProcessor,Li=s.RTDetrModel,Di=s.RTDetrObjectDetectionOutput,Ri=s.RTDetrPreTrainedModel,Ni=s.RawImage,Vi=s.ResNetForImageClassification,ji=s.ResNetModel,qi=s.ResNetPreTrainedModel,Gi=s.RoFormerForMaskedLM,Ui=s.RoFormerForQuestionAnswering,Wi=s.RoFormerForSequenceClassification,Hi=s.RoFormerForTokenClassification,Xi=s.RoFormerModel,Ki=s.RoFormerPreTrainedModel,Qi=s.RoFormerTokenizer,Yi=s.RobertaForMaskedLM,Zi=s.RobertaForQuestionAnswering,Ji=s.RobertaForSequenceClassification,eo=s.RobertaForTokenClassification,to=s.RobertaModel,no=s.RobertaPreTrainedModel,ro=s.RobertaTokenizer,ao=s.SamImageProcessor,so=s.SamImageSegmentationOutput,io=s.SamModel,oo=s.SamPreTrainedModel,lo=s.SamProcessor,uo=s.SapiensFeatureExtractor,co=s.SapiensForDepthEstimation,po=s.SapiensForNormalEstimation,ho=s.SapiensForSemanticSegmentation,mo=s.SapiensPreTrainedModel,fo=s.SeamlessM4TFeatureExtractor,go=s.SegformerFeatureExtractor,_o=s.SegformerForImageClassification,wo=s.SegformerForSemanticSegmentation,yo=s.SegformerModel,bo=s.SegformerPreTrainedModel,vo=s.Seq2SeqLMOutput,xo=s.SequenceClassifierOutput,Mo=s.SiglipImageProcessor,To=s.SiglipModel,ko=s.SiglipPreTrainedModel,$o=s.SiglipTextModel,Co=s.SiglipTokenizer,So=s.SiglipVisionModel,Po=s.SpeechT5FeatureExtractor,Eo=s.SpeechT5ForSpeechToText,Fo=s.SpeechT5ForTextToSpeech,Ao=s.SpeechT5HifiGan,Io=s.SpeechT5Model,zo=s.SpeechT5PreTrainedModel,Oo=s.SpeechT5Processor,Bo=s.SpeechT5Tokenizer,Lo=s.SqueezeBertForMaskedLM,Do=s.SqueezeBertForQuestionAnswering,Ro=s.SqueezeBertForSequenceClassification,No=s.SqueezeBertModel,Vo=s.SqueezeBertPreTrainedModel,jo=s.SqueezeBertTokenizer,qo=s.StableLmForCausalLM,Go=s.StableLmModel,Uo=s.StableLmPreTrainedModel,Wo=s.Starcoder2ForCausalLM,Ho=s.Starcoder2Model,Xo=s.Starcoder2PreTrainedModel,Ko=s.StoppingCriteria,Qo=s.StoppingCriteriaList,Yo=s.SummarizationPipeline,Zo=s.Swin2SRForImageSuperResolution,Jo=s.Swin2SRImageProcessor,el=s.Swin2SRModel,tl=s.Swin2SRPreTrainedModel,nl=s.SwinForImageClassification,rl=s.SwinModel,al=s.SwinPreTrainedModel,sl=s.T5ForConditionalGeneration,il=s.T5Model,ol=s.T5PreTrainedModel,ll=s.T5Tokenizer,ul=s.TableTransformerForObjectDetection,dl=s.TableTransformerModel,cl=s.TableTransformerObjectDetectionOutput,pl=s.TableTransformerPreTrainedModel,hl=s.Tensor,ml=s.Text2TextGenerationPipeline,fl=s.TextClassificationPipeline,gl=s.TextGenerationPipeline,_l=s.TextStreamer,wl=s.TextToAudioPipeline,yl=s.TokenClassificationPipeline,bl=s.TokenClassifierOutput,vl=s.TokenizerModel,xl=s.TrOCRForCausalLM,Ml=s.TrOCRPreTrainedModel,Tl=s.TranslationPipeline,kl=s.UniSpeechForCTC,$l=s.UniSpeechForSequenceClassification,Cl=s.UniSpeechModel,Sl=s.UniSpeechPreTrainedModel,Pl=s.UniSpeechSatForAudioFrameClassification,El=s.UniSpeechSatForCTC,Fl=s.UniSpeechSatForSequenceClassification,Al=s.UniSpeechSatModel,Il=s.UniSpeechSatPreTrainedModel,zl=s.ViTFeatureExtractor,Ol=s.ViTForImageClassification,Bl=s.ViTImageProcessor,Ll=s.ViTMAEModel,Dl=s.ViTMAEPreTrainedModel,Rl=s.ViTMSNForImageClassification,Nl=s.ViTMSNModel,Vl=s.ViTMSNPreTrainedModel,jl=s.ViTModel,ql=s.ViTPreTrainedModel,Gl=s.VisionEncoderDecoderModel,Ul=s.VitMatteForImageMatting,Wl=s.VitMatteImageProcessor,Hl=s.VitMattePreTrainedModel,Xl=s.VitsModel,Kl=s.VitsModelOutput,Ql=s.VitsPreTrainedModel,Yl=s.VitsTokenizer,Zl=s.Wav2Vec2BertForCTC,Jl=s.Wav2Vec2BertForSequenceClassification,eu=s.Wav2Vec2BertModel,tu=s.Wav2Vec2BertPreTrainedModel,nu=s.Wav2Vec2CTCTokenizer,ru=s.Wav2Vec2FeatureExtractor,au=s.Wav2Vec2ForAudioFrameClassification,su=s.Wav2Vec2ForCTC,iu=s.Wav2Vec2ForSequenceClassification,ou=s.Wav2Vec2Model,lu=s.Wav2Vec2PreTrainedModel,uu=s.Wav2Vec2ProcessorWithLM,du=s.WavLMForAudioFrameClassification,cu=s.WavLMForCTC,pu=s.WavLMForSequenceClassification,hu=s.WavLMForXVector,mu=s.WavLMModel,fu=s.WavLMPreTrainedModel,gu=s.WeSpeakerFeatureExtractor,_u=s.WeSpeakerResNetModel,wu=s.WeSpeakerResNetPreTrainedModel,yu=s.WhisperFeatureExtractor,bu=s.WhisperForConditionalGeneration,vu=s.WhisperModel,xu=s.WhisperPreTrainedModel,Mu=s.WhisperProcessor,Tu=s.WhisperTextStreamer,ku=s.WhisperTokenizer,$u=s.XLMForQuestionAnswering,Cu=s.XLMForSequenceClassification,Su=s.XLMForTokenClassification,Pu=s.XLMModel,Eu=s.XLMPreTrainedModel,Fu=s.XLMRobertaForMaskedLM,Au=s.XLMRobertaForQuestionAnswering,Iu=s.XLMRobertaForSequenceClassification,zu=s.XLMRobertaForTokenClassification,Ou=s.XLMRobertaModel,Bu=s.XLMRobertaPreTrainedModel,Lu=s.XLMRobertaTokenizer,Du=s.XLMTokenizer,Ru=s.XLMWithLMHeadModel,Nu=s.XVectorOutput,Vu=s.YolosFeatureExtractor,ju=s.YolosForObjectDetection,qu=s.YolosModel,Gu=s.YolosObjectDetectionOutput,Uu=s.YolosPreTrainedModel,Wu=s.ZeroShotAudioClassificationPipeline,Hu=s.ZeroShotClassificationPipeline,Xu=s.ZeroShotImageClassificationPipeline,Ku=s.ZeroShotObjectDetectionPipeline,Qu=s.bankers_round,Yu=s.cat,Zu=s.cos_sim,Ju=s.dot,ed=s.dynamic_time_warping,td=s.env,nd=s.full,rd=s.full_like,ad=s.getKeyValueShapes,sd=s.hamming,id=s.hanning,od=s.interpolate,ld=s.interpolate_4d,ud=s.interpolate_data,dd=s.is_chinese_char,cd=s.layer_norm,pd=s.log_softmax,hd=s.magnitude,md=s.matmul,fd=s.max,gd=s.mean,_d=s.mean_pooling,wd=s.medianFilter,yd=s.mel_filter_bank,bd=s.min,vd=s.ones,xd=s.ones_like,Md=s.permute,Td=s.permute_data,kd=s.pipeline,$d=s.quantize_embeddings,Cd=s.read_audio,Sd=s.rfft,Pd=s.round,Ed=s.softmax,Fd=s.spectrogram,Ad=s.stack,Id=s.std_mean,zd=s.topk,Od=s.window_function,Bd=s.zeros,Ld=s.zeros_like;export{w as ASTFeatureExtractor,y as ASTForAudioClassification,b as ASTModel,v as ASTPreTrainedModel,x as AlbertForMaskedLM,M as AlbertForQuestionAnswering,T as AlbertForSequenceClassification,k as AlbertModel,$ as AlbertPreTrainedModel,C as AlbertTokenizer,S as AudioClassificationPipeline,P as AutoConfig,E as AutoModel,F as AutoModelForAudioClassification,A as AutoModelForAudioFrameClassification,I as AutoModelForCTC,z as AutoModelForCausalLM,O as AutoModelForDepthEstimation,B as AutoModelForDocumentQuestionAnswering,L as AutoModelForImageClassification,D as AutoModelForImageFeatureExtraction,R as AutoModelForImageMatting,N as AutoModelForImageSegmentation,V as AutoModelForImageToImage,j as AutoModelForMaskGeneration,q as AutoModelForMaskedLM,G as AutoModelForNormalEstimation,U as AutoModelForObjectDetection,W as AutoModelForQuestionAnswering,H as AutoModelForSemanticSegmentation,X as AutoModelForSeq2SeqLM,K as AutoModelForSequenceClassification,Q as AutoModelForSpeechSeq2Seq,Y as AutoModelForTextToSpectrogram,Z as AutoModelForTextToWaveform,J as AutoModelForTokenClassification,ee as AutoModelForUniversalSegmentation,te as AutoModelForVision2Seq,ne as AutoModelForXVector,re as AutoModelForZeroShotObjectDetection,ae as AutoProcessor,se as AutoTokenizer,ie as AutomaticSpeechRecognitionPipeline,oe as BartForConditionalGeneration,le as BartForSequenceClassification,ue as BartModel,de as BartPretrainedModel,ce as BartTokenizer,pe as BaseModelOutput,he as BaseStreamer,me as BeitFeatureExtractor,fe as BeitForImageClassification,ge as BeitModel,_e as BeitPreTrainedModel,we as BertForMaskedLM,ye as BertForQuestionAnswering,be as BertForSequenceClassification,ve as BertForTokenClassification,xe as BertModel,Me as BertPreTrainedModel,Te as BertTokenizer,ke as BitImageProcessor,$e as BlenderbotForConditionalGeneration,Ce as BlenderbotModel,Se as BlenderbotPreTrainedModel,Pe as BlenderbotSmallForConditionalGeneration,Ee as BlenderbotSmallModel,Fe as BlenderbotSmallPreTrainedModel,Ae as BlenderbotSmallTokenizer,Ie as BlenderbotTokenizer,ze as BloomForCausalLM,Oe as BloomModel,Be as BloomPreTrainedModel,Le as BloomTokenizer,De as CLIPFeatureExtractor,Re as CLIPImageProcessor,Ne as CLIPModel,Ve as CLIPPreTrainedModel,je as CLIPSegForImageSegmentation,qe as CLIPSegModel,Ge as CLIPSegPreTrainedModel,Ue as CLIPTextModel,We as CLIPTextModelWithProjection,He as CLIPTokenizer,Xe as CLIPVisionModel,Ke as CLIPVisionModelWithProjection,Qe as CamembertForMaskedLM,Ye as CamembertForQuestionAnswering,Ze as CamembertForSequenceClassification,Je as CamembertForTokenClassification,et as CamembertModel,tt as CamembertPreTrainedModel,nt as CamembertTokenizer,rt as CausalLMOutput,at as CausalLMOutputWithPast,st as ChineseCLIPFeatureExtractor,it as ChineseCLIPModel,ot as ChineseCLIPPreTrainedModel,lt as ClapAudioModelWithProjection,ut as ClapFeatureExtractor,dt as ClapModel,ct as ClapPreTrainedModel,pt as ClapTextModelWithProjection,ht as CodeGenForCausalLM,mt as CodeGenModel,ft as CodeGenPreTrainedModel,gt as CodeGenTokenizer,_t as CodeLlamaTokenizer,wt as CohereForCausalLM,yt as CohereModel,bt as CoherePreTrainedModel,vt as CohereTokenizer,xt as ConvBertForMaskedLM,Mt as ConvBertForQuestionAnswering,Tt as ConvBertForSequenceClassification,kt as ConvBertForTokenClassification,$t as ConvBertModel,Ct as ConvBertPreTrainedModel,St as ConvBertTokenizer,Pt as ConvNextFeatureExtractor,Et as ConvNextForImageClassification,Ft as ConvNextImageProcessor,At as ConvNextModel,It as ConvNextPreTrainedModel,zt as ConvNextV2ForImageClassification,Ot as ConvNextV2Model,Bt as ConvNextV2PreTrainedModel,Lt as DPTFeatureExtractor,Dt as DPTForDepthEstimation,Rt as DPTImageProcessor,Nt as DPTModel,Vt as DPTPreTrainedModel,jt as DebertaForMaskedLM,qt as DebertaForQuestionAnswering,Gt as DebertaForSequenceClassification,Ut as DebertaForTokenClassification,Wt as DebertaModel,Ht as DebertaPreTrainedModel,Xt as DebertaTokenizer,Kt as DebertaV2ForMaskedLM,Qt as DebertaV2ForQuestionAnswering,Yt as DebertaV2ForSequenceClassification,Zt as DebertaV2ForTokenClassification,Jt as DebertaV2Model,en as DebertaV2PreTrainedModel,tn as DebertaV2Tokenizer,nn as DecisionTransformerModel,rn as DecisionTransformerPreTrainedModel,an as DeiTFeatureExtractor,sn as DeiTForImageClassification,on as DeiTModel,ln as DeiTPreTrainedModel,un as DepthAnythingForDepthEstimation,dn as DepthAnythingPreTrainedModel,cn as DepthEstimationPipeline,pn as DepthProForDepthEstimation,hn as DepthProPreTrainedModel,mn as DetrFeatureExtractor,fn as DetrForObjectDetection,gn as DetrForSegmentation,_n as DetrModel,wn as DetrObjectDetectionOutput,yn as DetrPreTrainedModel,bn as DetrSegmentationOutput,vn as Dinov2ForImageClassification,xn as Dinov2Model,Mn as Dinov2PreTrainedModel,Tn as DistilBertForMaskedLM,kn as DistilBertForQuestionAnswering,$n as DistilBertForSequenceClassification,Cn as DistilBertForTokenClassification,Sn as DistilBertModel,Pn as DistilBertPreTrainedModel,En as DistilBertTokenizer,Fn as DocumentQuestionAnsweringPipeline,An as DonutFeatureExtractor,In as DonutImageProcessor,zn as DonutSwinModel,On as DonutSwinPreTrainedModel,Bn as EfficientNetForImageClassification,Ln as EfficientNetImageProcessor,Dn as EfficientNetModel,Rn as EfficientNetPreTrainedModel,Nn as ElectraForMaskedLM,Vn as ElectraForQuestionAnswering,jn as ElectraForSequenceClassification,qn as ElectraForTokenClassification,Gn as ElectraModel,Un as ElectraPreTrainedModel,Wn as ElectraTokenizer,Hn as EosTokenCriteria,Xn as EsmForMaskedLM,Kn as EsmForSequenceClassification,Qn as EsmForTokenClassification,Yn as EsmModel,Zn as EsmPreTrainedModel,Jn as EsmTokenizer,er as FFT,tr as FalconForCausalLM,nr as FalconModel,rr as FalconPreTrainedModel,ar as FalconTokenizer,sr as FastViTForImageClassification,ir as FastViTModel,or as FastViTPreTrainedModel,lr as FeatureExtractionPipeline,ur as FeatureExtractor,dr as FillMaskPipeline,cr as Florence2ForConditionalGeneration,pr as Florence2PreTrainedModel,hr as Florence2Processor,mr as GLPNFeatureExtractor,fr as GLPNForDepthEstimation,gr as GLPNModel,_r as GLPNPreTrainedModel,wr as GPT2LMHeadModel,yr as GPT2Model,br as GPT2PreTrainedModel,vr as GPT2Tokenizer,xr as GPTBigCodeForCausalLM,Mr as GPTBigCodeModel,Tr as GPTBigCodePreTrainedModel,kr as GPTJForCausalLM,$r as GPTJModel,Cr as GPTJPreTrainedModel,Sr as GPTNeoForCausalLM,Pr as GPTNeoModel,Er as GPTNeoPreTrainedModel,Fr as GPTNeoXForCausalLM,Ar as GPTNeoXModel,Ir as GPTNeoXPreTrainedModel,zr as GPTNeoXTokenizer,Or as Gemma2ForCausalLM,Br as Gemma2Model,Lr as Gemma2PreTrainedModel,Dr as GemmaForCausalLM,Rr as GemmaModel,Nr as GemmaPreTrainedModel,Vr as GemmaTokenizer,jr as GraniteForCausalLM,qr as GraniteModel,Gr as GranitePreTrainedModel,Ur as Grok1Tokenizer,Wr as GroupViTModel,Hr as GroupViTPreTrainedModel,Xr as HerbertTokenizer,Kr as HieraForImageClassification,Qr as HieraModel,Yr as HieraPreTrainedModel,Zr as HubertForCTC,Jr as HubertForSequenceClassification,ea as HubertModel,ta as HubertPreTrainedModel,na as ImageClassificationPipeline,ra as ImageFeatureExtractionPipeline,aa as ImageFeatureExtractor,sa as ImageMattingOutput,ia as ImageSegmentationPipeline,oa as ImageToImagePipeline,la as ImageToTextPipeline,ua as InterruptableStoppingCriteria,da as JAISLMHeadModel,ca as JAISModel,pa as JAISPreTrainedModel,ha as LlamaForCausalLM,ma as LlamaModel,fa as LlamaPreTrainedModel,ga as LlamaTokenizer,_a as LlavaForConditionalGeneration,wa as LlavaPreTrainedModel,ya as LongT5ForConditionalGeneration,ba as LongT5Model,va as LongT5PreTrainedModel,xa as M2M100ForConditionalGeneration,Ma as M2M100Model,Ta as M2M100PreTrainedModel,ka as M2M100Tokenizer,$a as MBart50Tokenizer,Ca as MBartForCausalLM,Sa as MBartForConditionalGeneration,Pa as MBartForSequenceClassification,Ea as MBartModel,Fa as MBartPreTrainedModel,Aa as MBartTokenizer,Ia as MPNetForMaskedLM,za as MPNetForQuestionAnswering,Oa as MPNetForSequenceClassification,Ba as MPNetForTokenClassification,La as MPNetModel,Da as MPNetPreTrainedModel,Ra as MPNetTokenizer,Na as MT5ForConditionalGeneration,Va as MT5Model,ja as MT5PreTrainedModel,qa as MarianMTModel,Ga as MarianModel,Ua as MarianPreTrainedModel,Wa as MarianTokenizer,Ha as MaskFormerFeatureExtractor,Xa as MaskFormerForInstanceSegmentation,Ka as MaskFormerModel,Qa as MaskFormerPreTrainedModel,Ya as MaskedLMOutput,Za as MaxLengthCriteria,Ja as MistralForCausalLM,es as MistralModel,ts as MistralPreTrainedModel,ns as MobileBertForMaskedLM,rs as MobileBertForQuestionAnswering,as as MobileBertForSequenceClassification,ss as MobileBertModel,is as MobileBertPreTrainedModel,os as MobileBertTokenizer,ls as MobileNetV1FeatureExtractor,us as MobileNetV1ForImageClassification,ds as MobileNetV1Model,cs as MobileNetV1PreTrainedModel,ps as MobileNetV2FeatureExtractor,hs as MobileNetV2ForImageClassification,ms as MobileNetV2Model,fs as MobileNetV2PreTrainedModel,gs as MobileNetV3FeatureExtractor,_s as MobileNetV3ForImageClassification,ws as MobileNetV3Model,ys as MobileNetV3PreTrainedModel,bs as MobileNetV4FeatureExtractor,vs as MobileNetV4ForImageClassification,xs as MobileNetV4Model,Ms as MobileNetV4PreTrainedModel,Ts as MobileViTFeatureExtractor,ks as MobileViTForImageClassification,$s as MobileViTImageProcessor,Cs as MobileViTModel,Ss as MobileViTPreTrainedModel,Ps as MobileViTV2ForImageClassification,Es as MobileViTV2Model,Fs as MobileViTV2PreTrainedModel,As as ModelOutput,Is as Moondream1ForConditionalGeneration,zs as MptForCausalLM,Os as MptModel,Bs as MptPreTrainedModel,Ls as MusicgenForCausalLM,Ds as MusicgenForConditionalGeneration,Rs as MusicgenModel,Ns as MusicgenPreTrainedModel,Vs as NllbTokenizer,js as NomicBertModel,qs as NomicBertPreTrainedModel,Gs as NougatImageProcessor,Us as NougatTokenizer,Ws as OPTForCausalLM,Hs as OPTModel,Xs as OPTPreTrainedModel,Ks as ObjectDetectionPipeline,Qs as OpenELMForCausalLM,Ys as OpenELMModel,Zs as OpenELMPreTrainedModel,Js as OwlViTFeatureExtractor,ei as OwlViTForObjectDetection,ti as OwlViTModel,ni as OwlViTPreTrainedModel,ri as OwlViTProcessor,ai as Owlv2ForObjectDetection,si as Owlv2ImageProcessor,ii as Owlv2Model,oi as Owlv2PreTrainedModel,li as Phi3ForCausalLM,ui as Phi3Model,di as Phi3PreTrainedModel,ci as PhiForCausalLM,pi as PhiModel,hi as PhiPreTrainedModel,mi as Pipeline,fi as PreTrainedModel,gi as PreTrainedTokenizer,_i as PretrainedConfig,wi as PretrainedMixin,yi as Processor,bi as PvtForImageClassification,vi as PvtImageProcessor,xi as PvtModel,Mi as PvtPreTrainedModel,Ti as PyAnnoteFeatureExtractor,ki as PyAnnoteForAudioFrameClassification,$i as PyAnnoteModel,Ci as PyAnnotePreTrainedModel,Si as PyAnnoteProcessor,Pi as QuestionAnsweringModelOutput,Ei as QuestionAnsweringPipeline,Fi as Qwen2ForCausalLM,Ai as Qwen2Model,Ii as Qwen2PreTrainedModel,zi as Qwen2Tokenizer,Oi as RTDetrForObjectDetection,Bi as RTDetrImageProcessor,Li as RTDetrModel,Di as RTDetrObjectDetectionOutput,Ri as RTDetrPreTrainedModel,Ni as RawImage,Vi as ResNetForImageClassification,ji as ResNetModel,qi as ResNetPreTrainedModel,Gi as RoFormerForMaskedLM,Ui as RoFormerForQuestionAnswering,Wi as RoFormerForSequenceClassification,Hi as RoFormerForTokenClassification,Xi as RoFormerModel,Ki as RoFormerPreTrainedModel,Qi as RoFormerTokenizer,Yi as RobertaForMaskedLM,Zi as RobertaForQuestionAnswering,Ji as RobertaForSequenceClassification,eo as RobertaForTokenClassification,to as RobertaModel,no as RobertaPreTrainedModel,ro as RobertaTokenizer,ao as SamImageProcessor,so as SamImageSegmentationOutput,io as SamModel,oo as SamPreTrainedModel,lo as SamProcessor,uo as SapiensFeatureExtractor,co as SapiensForDepthEstimation,po as SapiensForNormalEstimation,ho as SapiensForSemanticSegmentation,mo as SapiensPreTrainedModel,fo as SeamlessM4TFeatureExtractor,go as SegformerFeatureExtractor,_o as SegformerForImageClassification,wo as SegformerForSemanticSegmentation,yo as SegformerModel,bo as SegformerPreTrainedModel,vo as Seq2SeqLMOutput,xo as SequenceClassifierOutput,Mo as SiglipImageProcessor,To as SiglipModel,ko as SiglipPreTrainedModel,$o as SiglipTextModel,Co as SiglipTokenizer,So as SiglipVisionModel,Po as SpeechT5FeatureExtractor,Eo as SpeechT5ForSpeechToText,Fo as SpeechT5ForTextToSpeech,Ao as SpeechT5HifiGan,Io as SpeechT5Model,zo as SpeechT5PreTrainedModel,Oo as SpeechT5Processor,Bo as SpeechT5Tokenizer,Lo as SqueezeBertForMaskedLM,Do as SqueezeBertForQuestionAnswering,Ro as SqueezeBertForSequenceClassification,No as SqueezeBertModel,Vo as SqueezeBertPreTrainedModel,jo as SqueezeBertTokenizer,qo as StableLmForCausalLM,Go as StableLmModel,Uo as StableLmPreTrainedModel,Wo as Starcoder2ForCausalLM,Ho as Starcoder2Model,Xo as Starcoder2PreTrainedModel,Ko as StoppingCriteria,Qo as StoppingCriteriaList,Yo as SummarizationPipeline,Zo as Swin2SRForImageSuperResolution,Jo as Swin2SRImageProcessor,el as Swin2SRModel,tl as Swin2SRPreTrainedModel,nl as SwinForImageClassification,rl as SwinModel,al as SwinPreTrainedModel,sl as T5ForConditionalGeneration,il as T5Model,ol as T5PreTrainedModel,ll as T5Tokenizer,ul as TableTransformerForObjectDetection,dl as TableTransformerModel,cl as TableTransformerObjectDetectionOutput,pl as TableTransformerPreTrainedModel,hl as Tensor,ml as Text2TextGenerationPipeline,fl as TextClassificationPipeline,gl as TextGenerationPipeline,_l as TextStreamer,wl as TextToAudioPipeline,yl as TokenClassificationPipeline,bl as TokenClassifierOutput,vl as TokenizerModel,xl as TrOCRForCausalLM,Ml as TrOCRPreTrainedModel,Tl as TranslationPipeline,kl as UniSpeechForCTC,$l as UniSpeechForSequenceClassification,Cl as UniSpeechModel,Sl as UniSpeechPreTrainedModel,Pl as UniSpeechSatForAudioFrameClassification,El as UniSpeechSatForCTC,Fl as UniSpeechSatForSequenceClassification,Al as UniSpeechSatModel,Il as UniSpeechSatPreTrainedModel,zl as ViTFeatureExtractor,Ol as ViTForImageClassification,Bl as ViTImageProcessor,Ll as ViTMAEModel,Dl as ViTMAEPreTrainedModel,Rl as ViTMSNForImageClassification,Nl as ViTMSNModel,Vl as ViTMSNPreTrainedModel,jl as ViTModel,ql as ViTPreTrainedModel,Gl as VisionEncoderDecoderModel,Ul as VitMatteForImageMatting,Wl as VitMatteImageProcessor,Hl as VitMattePreTrainedModel,Xl as VitsModel,Kl as VitsModelOutput,Ql as VitsPreTrainedModel,Yl as VitsTokenizer,Zl as Wav2Vec2BertForCTC,Jl as Wav2Vec2BertForSequenceClassification,eu as Wav2Vec2BertModel,tu as Wav2Vec2BertPreTrainedModel,nu as Wav2Vec2CTCTokenizer,ru as Wav2Vec2FeatureExtractor,au as Wav2Vec2ForAudioFrameClassification,su as Wav2Vec2ForCTC,iu as Wav2Vec2ForSequenceClassification,ou as Wav2Vec2Model,lu as Wav2Vec2PreTrainedModel,uu as Wav2Vec2ProcessorWithLM,du as WavLMForAudioFrameClassification,cu as WavLMForCTC,pu as WavLMForSequenceClassification,hu as WavLMForXVector,mu as WavLMModel,fu as WavLMPreTrainedModel,gu as WeSpeakerFeatureExtractor,_u as WeSpeakerResNetModel,wu as WeSpeakerResNetPreTrainedModel,yu as WhisperFeatureExtractor,bu as WhisperForConditionalGeneration,vu as WhisperModel,xu as WhisperPreTrainedModel,Mu as WhisperProcessor,Tu as WhisperTextStreamer,ku as WhisperTokenizer,$u as XLMForQuestionAnswering,Cu as XLMForSequenceClassification,Su as XLMForTokenClassification,Pu as XLMModel,Eu as XLMPreTrainedModel,Fu as XLMRobertaForMaskedLM,Au as XLMRobertaForQuestionAnswering,Iu as XLMRobertaForSequenceClassification,zu as XLMRobertaForTokenClassification,Ou as XLMRobertaModel,Bu as XLMRobertaPreTrainedModel,Lu as XLMRobertaTokenizer,Du as XLMTokenizer,Ru as XLMWithLMHeadModel,Nu as XVectorOutput,Vu as YolosFeatureExtractor,ju as YolosForObjectDetection,qu as YolosModel,Gu as YolosObjectDetectionOutput,Uu as YolosPreTrainedModel,Wu as ZeroShotAudioClassificationPipeline,Hu as ZeroShotClassificationPipeline,Xu as ZeroShotImageClassificationPipeline,Ku as ZeroShotObjectDetectionPipeline,Qu as bankers_round,Yu as cat,Zu as cos_sim,Ju as dot,ed as dynamic_time_warping,td as env,nd as full,rd as full_like,ad as getKeyValueShapes,sd as hamming,id as hanning,od as interpolate,ld as interpolate_4d,ud as interpolate_data,dd as is_chinese_char,cd as layer_norm,pd as log_softmax,hd as magnitude,md as matmul,fd as max,gd as mean,_d as mean_pooling,wd as medianFilter,yd as mel_filter_bank,bd as min,vd as ones,xd as ones_like,Md as permute,Td as permute_data,kd as pipeline,$d as quantize_embeddings,Cd as read_audio,Sd as rfft,Pd as round,Ed as softmax,Fd as spectrogram,Ad as stack,Id as std_mean,zd as topk,Od as window_function,Bd as zeros,Ld as zeros_like};
229
+ \*****************************/a.r(s),a.d(s,{ASTFeatureExtractor:()=>d.ASTFeatureExtractor,ASTForAudioClassification:()=>l.ASTForAudioClassification,ASTModel:()=>l.ASTModel,ASTPreTrainedModel:()=>l.ASTPreTrainedModel,AlbertForMaskedLM:()=>l.AlbertForMaskedLM,AlbertForQuestionAnswering:()=>l.AlbertForQuestionAnswering,AlbertForSequenceClassification:()=>l.AlbertForSequenceClassification,AlbertModel:()=>l.AlbertModel,AlbertPreTrainedModel:()=>l.AlbertPreTrainedModel,AlbertTokenizer:()=>u.AlbertTokenizer,AudioClassificationPipeline:()=>o.AudioClassificationPipeline,AutoConfig:()=>c.AutoConfig,AutoModel:()=>l.AutoModel,AutoModelForAudioClassification:()=>l.AutoModelForAudioClassification,AutoModelForAudioFrameClassification:()=>l.AutoModelForAudioFrameClassification,AutoModelForCTC:()=>l.AutoModelForCTC,AutoModelForCausalLM:()=>l.AutoModelForCausalLM,AutoModelForDepthEstimation:()=>l.AutoModelForDepthEstimation,AutoModelForDocumentQuestionAnswering:()=>l.AutoModelForDocumentQuestionAnswering,AutoModelForImageClassification:()=>l.AutoModelForImageClassification,AutoModelForImageFeatureExtraction:()=>l.AutoModelForImageFeatureExtraction,AutoModelForImageMatting:()=>l.AutoModelForImageMatting,AutoModelForImageSegmentation:()=>l.AutoModelForImageSegmentation,AutoModelForImageToImage:()=>l.AutoModelForImageToImage,AutoModelForMaskGeneration:()=>l.AutoModelForMaskGeneration,AutoModelForMaskedLM:()=>l.AutoModelForMaskedLM,AutoModelForNormalEstimation:()=>l.AutoModelForNormalEstimation,AutoModelForObjectDetection:()=>l.AutoModelForObjectDetection,AutoModelForQuestionAnswering:()=>l.AutoModelForQuestionAnswering,AutoModelForSemanticSegmentation:()=>l.AutoModelForSemanticSegmentation,AutoModelForSeq2SeqLM:()=>l.AutoModelForSeq2SeqLM,AutoModelForSequenceClassification:()=>l.AutoModelForSequenceClassification,AutoModelForSpeechSeq2Seq:()=>l.AutoModelForSpeechSeq2Seq,AutoModelForTextToSpectrogram:()=>l.AutoModelForTextToSpectrogram,AutoModelForTextToWaveform:()=>l.AutoModelForTextToWaveform,AutoModelForTokenClassification:()=>l.AutoModelForTokenClassification,AutoModelForUniversalSegmentation:()=>l.AutoModelForUniversalSegmentation,AutoModelForVision2Seq:()=>l.AutoModelForVision2Seq,AutoModelForXVector:()=>l.AutoModelForXVector,AutoModelForZeroShotObjectDetection:()=>l.AutoModelForZeroShotObjectDetection,AutoProcessor:()=>d.AutoProcessor,AutoTokenizer:()=>u.AutoTokenizer,AutomaticSpeechRecognitionPipeline:()=>o.AutomaticSpeechRecognitionPipeline,BartForConditionalGeneration:()=>l.BartForConditionalGeneration,BartForSequenceClassification:()=>l.BartForSequenceClassification,BartModel:()=>l.BartModel,BartPretrainedModel:()=>l.BartPretrainedModel,BartTokenizer:()=>u.BartTokenizer,BaseModelOutput:()=>l.BaseModelOutput,BaseStreamer:()=>g.BaseStreamer,BeitFeatureExtractor:()=>d.BeitFeatureExtractor,BeitForImageClassification:()=>l.BeitForImageClassification,BeitModel:()=>l.BeitModel,BeitPreTrainedModel:()=>l.BeitPreTrainedModel,BertForMaskedLM:()=>l.BertForMaskedLM,BertForQuestionAnswering:()=>l.BertForQuestionAnswering,BertForSequenceClassification:()=>l.BertForSequenceClassification,BertForTokenClassification:()=>l.BertForTokenClassification,BertModel:()=>l.BertModel,BertPreTrainedModel:()=>l.BertPreTrainedModel,BertTokenizer:()=>u.BertTokenizer,BitImageProcessor:()=>d.BitImageProcessor,BlenderbotForConditionalGeneration:()=>l.BlenderbotForConditionalGeneration,BlenderbotModel:()=>l.BlenderbotModel,BlenderbotPreTrainedModel:()=>l.BlenderbotPreTrainedModel,BlenderbotSmallForConditionalGeneration:()=>l.BlenderbotSmallForConditionalGeneration,BlenderbotSmallModel:()=>l.BlenderbotSmallModel,BlenderbotSmallPreTrainedModel:()=>l.BlenderbotSmallPreTrainedModel,BlenderbotSmallTokenizer:()=>u.BlenderbotSmallTokenizer,BlenderbotTokenizer:()=>u.BlenderbotTokenizer,BloomForCausalLM:()=>l.BloomForCausalLM,BloomModel:()=>l.BloomModel,BloomPreTrainedModel:()=>l.BloomPreTrainedModel,BloomTokenizer:()=>u.BloomTokenizer,CLIPFeatureExtractor:()=>d.CLIPFeatureExtractor,CLIPImageProcessor:()=>d.CLIPImageProcessor,CLIPModel:()=>l.CLIPModel,CLIPPreTrainedModel:()=>l.CLIPPreTrainedModel,CLIPSegForImageSegmentation:()=>l.CLIPSegForImageSegmentation,CLIPSegModel:()=>l.CLIPSegModel,CLIPSegPreTrainedModel:()=>l.CLIPSegPreTrainedModel,CLIPTextModel:()=>l.CLIPTextModel,CLIPTextModelWithProjection:()=>l.CLIPTextModelWithProjection,CLIPTokenizer:()=>u.CLIPTokenizer,CLIPVisionModel:()=>l.CLIPVisionModel,CLIPVisionModelWithProjection:()=>l.CLIPVisionModelWithProjection,CamembertForMaskedLM:()=>l.CamembertForMaskedLM,CamembertForQuestionAnswering:()=>l.CamembertForQuestionAnswering,CamembertForSequenceClassification:()=>l.CamembertForSequenceClassification,CamembertForTokenClassification:()=>l.CamembertForTokenClassification,CamembertModel:()=>l.CamembertModel,CamembertPreTrainedModel:()=>l.CamembertPreTrainedModel,CamembertTokenizer:()=>u.CamembertTokenizer,CausalLMOutput:()=>l.CausalLMOutput,CausalLMOutputWithPast:()=>l.CausalLMOutputWithPast,ChineseCLIPFeatureExtractor:()=>d.ChineseCLIPFeatureExtractor,ChineseCLIPModel:()=>l.ChineseCLIPModel,ChineseCLIPPreTrainedModel:()=>l.ChineseCLIPPreTrainedModel,ClapAudioModelWithProjection:()=>l.ClapAudioModelWithProjection,ClapFeatureExtractor:()=>d.ClapFeatureExtractor,ClapModel:()=>l.ClapModel,ClapPreTrainedModel:()=>l.ClapPreTrainedModel,ClapTextModelWithProjection:()=>l.ClapTextModelWithProjection,CodeGenForCausalLM:()=>l.CodeGenForCausalLM,CodeGenModel:()=>l.CodeGenModel,CodeGenPreTrainedModel:()=>l.CodeGenPreTrainedModel,CodeGenTokenizer:()=>u.CodeGenTokenizer,CodeLlamaTokenizer:()=>u.CodeLlamaTokenizer,CohereForCausalLM:()=>l.CohereForCausalLM,CohereModel:()=>l.CohereModel,CoherePreTrainedModel:()=>l.CoherePreTrainedModel,CohereTokenizer:()=>u.CohereTokenizer,ConvBertForMaskedLM:()=>l.ConvBertForMaskedLM,ConvBertForQuestionAnswering:()=>l.ConvBertForQuestionAnswering,ConvBertForSequenceClassification:()=>l.ConvBertForSequenceClassification,ConvBertForTokenClassification:()=>l.ConvBertForTokenClassification,ConvBertModel:()=>l.ConvBertModel,ConvBertPreTrainedModel:()=>l.ConvBertPreTrainedModel,ConvBertTokenizer:()=>u.ConvBertTokenizer,ConvNextFeatureExtractor:()=>d.ConvNextFeatureExtractor,ConvNextForImageClassification:()=>l.ConvNextForImageClassification,ConvNextImageProcessor:()=>d.ConvNextImageProcessor,ConvNextModel:()=>l.ConvNextModel,ConvNextPreTrainedModel:()=>l.ConvNextPreTrainedModel,ConvNextV2ForImageClassification:()=>l.ConvNextV2ForImageClassification,ConvNextV2Model:()=>l.ConvNextV2Model,ConvNextV2PreTrainedModel:()=>l.ConvNextV2PreTrainedModel,DPTFeatureExtractor:()=>d.DPTFeatureExtractor,DPTForDepthEstimation:()=>l.DPTForDepthEstimation,DPTImageProcessor:()=>d.DPTImageProcessor,DPTModel:()=>l.DPTModel,DPTPreTrainedModel:()=>l.DPTPreTrainedModel,DebertaForMaskedLM:()=>l.DebertaForMaskedLM,DebertaForQuestionAnswering:()=>l.DebertaForQuestionAnswering,DebertaForSequenceClassification:()=>l.DebertaForSequenceClassification,DebertaForTokenClassification:()=>l.DebertaForTokenClassification,DebertaModel:()=>l.DebertaModel,DebertaPreTrainedModel:()=>l.DebertaPreTrainedModel,DebertaTokenizer:()=>u.DebertaTokenizer,DebertaV2ForMaskedLM:()=>l.DebertaV2ForMaskedLM,DebertaV2ForQuestionAnswering:()=>l.DebertaV2ForQuestionAnswering,DebertaV2ForSequenceClassification:()=>l.DebertaV2ForSequenceClassification,DebertaV2ForTokenClassification:()=>l.DebertaV2ForTokenClassification,DebertaV2Model:()=>l.DebertaV2Model,DebertaV2PreTrainedModel:()=>l.DebertaV2PreTrainedModel,DebertaV2Tokenizer:()=>u.DebertaV2Tokenizer,DecisionTransformerModel:()=>l.DecisionTransformerModel,DecisionTransformerPreTrainedModel:()=>l.DecisionTransformerPreTrainedModel,DeiTFeatureExtractor:()=>d.DeiTFeatureExtractor,DeiTForImageClassification:()=>l.DeiTForImageClassification,DeiTModel:()=>l.DeiTModel,DeiTPreTrainedModel:()=>l.DeiTPreTrainedModel,DepthAnythingForDepthEstimation:()=>l.DepthAnythingForDepthEstimation,DepthAnythingPreTrainedModel:()=>l.DepthAnythingPreTrainedModel,DepthEstimationPipeline:()=>o.DepthEstimationPipeline,DepthProForDepthEstimation:()=>l.DepthProForDepthEstimation,DepthProPreTrainedModel:()=>l.DepthProPreTrainedModel,DetrFeatureExtractor:()=>d.DetrFeatureExtractor,DetrForObjectDetection:()=>l.DetrForObjectDetection,DetrForSegmentation:()=>l.DetrForSegmentation,DetrModel:()=>l.DetrModel,DetrObjectDetectionOutput:()=>l.DetrObjectDetectionOutput,DetrPreTrainedModel:()=>l.DetrPreTrainedModel,DetrSegmentationOutput:()=>l.DetrSegmentationOutput,Dinov2ForImageClassification:()=>l.Dinov2ForImageClassification,Dinov2Model:()=>l.Dinov2Model,Dinov2PreTrainedModel:()=>l.Dinov2PreTrainedModel,DistilBertForMaskedLM:()=>l.DistilBertForMaskedLM,DistilBertForQuestionAnswering:()=>l.DistilBertForQuestionAnswering,DistilBertForSequenceClassification:()=>l.DistilBertForSequenceClassification,DistilBertForTokenClassification:()=>l.DistilBertForTokenClassification,DistilBertModel:()=>l.DistilBertModel,DistilBertPreTrainedModel:()=>l.DistilBertPreTrainedModel,DistilBertTokenizer:()=>u.DistilBertTokenizer,DocumentQuestionAnsweringPipeline:()=>o.DocumentQuestionAnsweringPipeline,DonutFeatureExtractor:()=>d.DonutFeatureExtractor,DonutImageProcessor:()=>d.DonutImageProcessor,DonutSwinModel:()=>l.DonutSwinModel,DonutSwinPreTrainedModel:()=>l.DonutSwinPreTrainedModel,EfficientNetForImageClassification:()=>l.EfficientNetForImageClassification,EfficientNetImageProcessor:()=>d.EfficientNetImageProcessor,EfficientNetModel:()=>l.EfficientNetModel,EfficientNetPreTrainedModel:()=>l.EfficientNetPreTrainedModel,ElectraForMaskedLM:()=>l.ElectraForMaskedLM,ElectraForQuestionAnswering:()=>l.ElectraForQuestionAnswering,ElectraForSequenceClassification:()=>l.ElectraForSequenceClassification,ElectraForTokenClassification:()=>l.ElectraForTokenClassification,ElectraModel:()=>l.ElectraModel,ElectraPreTrainedModel:()=>l.ElectraPreTrainedModel,ElectraTokenizer:()=>u.ElectraTokenizer,EosTokenCriteria:()=>_.EosTokenCriteria,EsmForMaskedLM:()=>l.EsmForMaskedLM,EsmForSequenceClassification:()=>l.EsmForSequenceClassification,EsmForTokenClassification:()=>l.EsmForTokenClassification,EsmModel:()=>l.EsmModel,EsmPreTrainedModel:()=>l.EsmPreTrainedModel,EsmTokenizer:()=>u.EsmTokenizer,FFT:()=>f.FFT,FalconForCausalLM:()=>l.FalconForCausalLM,FalconModel:()=>l.FalconModel,FalconPreTrainedModel:()=>l.FalconPreTrainedModel,FalconTokenizer:()=>u.FalconTokenizer,FastViTForImageClassification:()=>l.FastViTForImageClassification,FastViTModel:()=>l.FastViTModel,FastViTPreTrainedModel:()=>l.FastViTPreTrainedModel,FeatureExtractionPipeline:()=>o.FeatureExtractionPipeline,FeatureExtractor:()=>d.FeatureExtractor,FillMaskPipeline:()=>o.FillMaskPipeline,Florence2ForConditionalGeneration:()=>l.Florence2ForConditionalGeneration,Florence2PreTrainedModel:()=>l.Florence2PreTrainedModel,Florence2Processor:()=>d.Florence2Processor,GLPNFeatureExtractor:()=>d.GLPNFeatureExtractor,GLPNForDepthEstimation:()=>l.GLPNForDepthEstimation,GLPNModel:()=>l.GLPNModel,GLPNPreTrainedModel:()=>l.GLPNPreTrainedModel,GPT2LMHeadModel:()=>l.GPT2LMHeadModel,GPT2Model:()=>l.GPT2Model,GPT2PreTrainedModel:()=>l.GPT2PreTrainedModel,GPT2Tokenizer:()=>u.GPT2Tokenizer,GPTBigCodeForCausalLM:()=>l.GPTBigCodeForCausalLM,GPTBigCodeModel:()=>l.GPTBigCodeModel,GPTBigCodePreTrainedModel:()=>l.GPTBigCodePreTrainedModel,GPTJForCausalLM:()=>l.GPTJForCausalLM,GPTJModel:()=>l.GPTJModel,GPTJPreTrainedModel:()=>l.GPTJPreTrainedModel,GPTNeoForCausalLM:()=>l.GPTNeoForCausalLM,GPTNeoModel:()=>l.GPTNeoModel,GPTNeoPreTrainedModel:()=>l.GPTNeoPreTrainedModel,GPTNeoXForCausalLM:()=>l.GPTNeoXForCausalLM,GPTNeoXModel:()=>l.GPTNeoXModel,GPTNeoXPreTrainedModel:()=>l.GPTNeoXPreTrainedModel,GPTNeoXTokenizer:()=>u.GPTNeoXTokenizer,Gemma2ForCausalLM:()=>l.Gemma2ForCausalLM,Gemma2Model:()=>l.Gemma2Model,Gemma2PreTrainedModel:()=>l.Gemma2PreTrainedModel,GemmaForCausalLM:()=>l.GemmaForCausalLM,GemmaModel:()=>l.GemmaModel,GemmaPreTrainedModel:()=>l.GemmaPreTrainedModel,GemmaTokenizer:()=>u.GemmaTokenizer,GraniteForCausalLM:()=>l.GraniteForCausalLM,GraniteModel:()=>l.GraniteModel,GranitePreTrainedModel:()=>l.GranitePreTrainedModel,Grok1Tokenizer:()=>u.Grok1Tokenizer,GroupViTModel:()=>l.GroupViTModel,GroupViTPreTrainedModel:()=>l.GroupViTPreTrainedModel,HerbertTokenizer:()=>u.HerbertTokenizer,HieraForImageClassification:()=>l.HieraForImageClassification,HieraModel:()=>l.HieraModel,HieraPreTrainedModel:()=>l.HieraPreTrainedModel,HubertForCTC:()=>l.HubertForCTC,HubertForSequenceClassification:()=>l.HubertForSequenceClassification,HubertModel:()=>l.HubertModel,HubertPreTrainedModel:()=>l.HubertPreTrainedModel,ImageClassificationPipeline:()=>o.ImageClassificationPipeline,ImageFeatureExtractionPipeline:()=>o.ImageFeatureExtractionPipeline,ImageFeatureExtractor:()=>d.ImageFeatureExtractor,ImageMattingOutput:()=>l.ImageMattingOutput,ImageSegmentationPipeline:()=>o.ImageSegmentationPipeline,ImageToImagePipeline:()=>o.ImageToImagePipeline,ImageToTextPipeline:()=>o.ImageToTextPipeline,InterruptableStoppingCriteria:()=>_.InterruptableStoppingCriteria,JAISLMHeadModel:()=>l.JAISLMHeadModel,JAISModel:()=>l.JAISModel,JAISPreTrainedModel:()=>l.JAISPreTrainedModel,LlamaForCausalLM:()=>l.LlamaForCausalLM,LlamaModel:()=>l.LlamaModel,LlamaPreTrainedModel:()=>l.LlamaPreTrainedModel,LlamaTokenizer:()=>u.LlamaTokenizer,LlavaForConditionalGeneration:()=>l.LlavaForConditionalGeneration,LlavaPreTrainedModel:()=>l.LlavaPreTrainedModel,LongT5ForConditionalGeneration:()=>l.LongT5ForConditionalGeneration,LongT5Model:()=>l.LongT5Model,LongT5PreTrainedModel:()=>l.LongT5PreTrainedModel,M2M100ForConditionalGeneration:()=>l.M2M100ForConditionalGeneration,M2M100Model:()=>l.M2M100Model,M2M100PreTrainedModel:()=>l.M2M100PreTrainedModel,M2M100Tokenizer:()=>u.M2M100Tokenizer,MBart50Tokenizer:()=>u.MBart50Tokenizer,MBartForCausalLM:()=>l.MBartForCausalLM,MBartForConditionalGeneration:()=>l.MBartForConditionalGeneration,MBartForSequenceClassification:()=>l.MBartForSequenceClassification,MBartModel:()=>l.MBartModel,MBartPreTrainedModel:()=>l.MBartPreTrainedModel,MBartTokenizer:()=>u.MBartTokenizer,MPNetForMaskedLM:()=>l.MPNetForMaskedLM,MPNetForQuestionAnswering:()=>l.MPNetForQuestionAnswering,MPNetForSequenceClassification:()=>l.MPNetForSequenceClassification,MPNetForTokenClassification:()=>l.MPNetForTokenClassification,MPNetModel:()=>l.MPNetModel,MPNetPreTrainedModel:()=>l.MPNetPreTrainedModel,MPNetTokenizer:()=>u.MPNetTokenizer,MT5ForConditionalGeneration:()=>l.MT5ForConditionalGeneration,MT5Model:()=>l.MT5Model,MT5PreTrainedModel:()=>l.MT5PreTrainedModel,MarianMTModel:()=>l.MarianMTModel,MarianModel:()=>l.MarianModel,MarianPreTrainedModel:()=>l.MarianPreTrainedModel,MarianTokenizer:()=>u.MarianTokenizer,MaskFormerFeatureExtractor:()=>d.MaskFormerFeatureExtractor,MaskFormerForInstanceSegmentation:()=>l.MaskFormerForInstanceSegmentation,MaskFormerModel:()=>l.MaskFormerModel,MaskFormerPreTrainedModel:()=>l.MaskFormerPreTrainedModel,MaskedLMOutput:()=>l.MaskedLMOutput,MaxLengthCriteria:()=>_.MaxLengthCriteria,MistralForCausalLM:()=>l.MistralForCausalLM,MistralModel:()=>l.MistralModel,MistralPreTrainedModel:()=>l.MistralPreTrainedModel,MobileBertForMaskedLM:()=>l.MobileBertForMaskedLM,MobileBertForQuestionAnswering:()=>l.MobileBertForQuestionAnswering,MobileBertForSequenceClassification:()=>l.MobileBertForSequenceClassification,MobileBertModel:()=>l.MobileBertModel,MobileBertPreTrainedModel:()=>l.MobileBertPreTrainedModel,MobileBertTokenizer:()=>u.MobileBertTokenizer,MobileLLMForCausalLM:()=>l.MobileLLMForCausalLM,MobileLLMModel:()=>l.MobileLLMModel,MobileLLMPreTrainedModel:()=>l.MobileLLMPreTrainedModel,MobileNetV1FeatureExtractor:()=>d.MobileNetV1FeatureExtractor,MobileNetV1ForImageClassification:()=>l.MobileNetV1ForImageClassification,MobileNetV1Model:()=>l.MobileNetV1Model,MobileNetV1PreTrainedModel:()=>l.MobileNetV1PreTrainedModel,MobileNetV2FeatureExtractor:()=>d.MobileNetV2FeatureExtractor,MobileNetV2ForImageClassification:()=>l.MobileNetV2ForImageClassification,MobileNetV2Model:()=>l.MobileNetV2Model,MobileNetV2PreTrainedModel:()=>l.MobileNetV2PreTrainedModel,MobileNetV3FeatureExtractor:()=>d.MobileNetV3FeatureExtractor,MobileNetV3ForImageClassification:()=>l.MobileNetV3ForImageClassification,MobileNetV3Model:()=>l.MobileNetV3Model,MobileNetV3PreTrainedModel:()=>l.MobileNetV3PreTrainedModel,MobileNetV4FeatureExtractor:()=>d.MobileNetV4FeatureExtractor,MobileNetV4ForImageClassification:()=>l.MobileNetV4ForImageClassification,MobileNetV4Model:()=>l.MobileNetV4Model,MobileNetV4PreTrainedModel:()=>l.MobileNetV4PreTrainedModel,MobileViTFeatureExtractor:()=>d.MobileViTFeatureExtractor,MobileViTForImageClassification:()=>l.MobileViTForImageClassification,MobileViTImageProcessor:()=>d.MobileViTImageProcessor,MobileViTModel:()=>l.MobileViTModel,MobileViTPreTrainedModel:()=>l.MobileViTPreTrainedModel,MobileViTV2ForImageClassification:()=>l.MobileViTV2ForImageClassification,MobileViTV2Model:()=>l.MobileViTV2Model,MobileViTV2PreTrainedModel:()=>l.MobileViTV2PreTrainedModel,ModelOutput:()=>l.ModelOutput,Moondream1ForConditionalGeneration:()=>l.Moondream1ForConditionalGeneration,MptForCausalLM:()=>l.MptForCausalLM,MptModel:()=>l.MptModel,MptPreTrainedModel:()=>l.MptPreTrainedModel,MusicgenForCausalLM:()=>l.MusicgenForCausalLM,MusicgenForConditionalGeneration:()=>l.MusicgenForConditionalGeneration,MusicgenModel:()=>l.MusicgenModel,MusicgenPreTrainedModel:()=>l.MusicgenPreTrainedModel,NllbTokenizer:()=>u.NllbTokenizer,NomicBertModel:()=>l.NomicBertModel,NomicBertPreTrainedModel:()=>l.NomicBertPreTrainedModel,NougatImageProcessor:()=>d.NougatImageProcessor,NougatTokenizer:()=>u.NougatTokenizer,OPTForCausalLM:()=>l.OPTForCausalLM,OPTModel:()=>l.OPTModel,OPTPreTrainedModel:()=>l.OPTPreTrainedModel,ObjectDetectionPipeline:()=>o.ObjectDetectionPipeline,OlmoForCausalLM:()=>l.OlmoForCausalLM,OlmoModel:()=>l.OlmoModel,OlmoPreTrainedModel:()=>l.OlmoPreTrainedModel,OpenELMForCausalLM:()=>l.OpenELMForCausalLM,OpenELMModel:()=>l.OpenELMModel,OpenELMPreTrainedModel:()=>l.OpenELMPreTrainedModel,OwlViTFeatureExtractor:()=>d.OwlViTFeatureExtractor,OwlViTForObjectDetection:()=>l.OwlViTForObjectDetection,OwlViTModel:()=>l.OwlViTModel,OwlViTPreTrainedModel:()=>l.OwlViTPreTrainedModel,OwlViTProcessor:()=>d.OwlViTProcessor,Owlv2ForObjectDetection:()=>l.Owlv2ForObjectDetection,Owlv2ImageProcessor:()=>d.Owlv2ImageProcessor,Owlv2Model:()=>l.Owlv2Model,Owlv2PreTrainedModel:()=>l.Owlv2PreTrainedModel,Phi3ForCausalLM:()=>l.Phi3ForCausalLM,Phi3Model:()=>l.Phi3Model,Phi3PreTrainedModel:()=>l.Phi3PreTrainedModel,PhiForCausalLM:()=>l.PhiForCausalLM,PhiModel:()=>l.PhiModel,PhiPreTrainedModel:()=>l.PhiPreTrainedModel,Pipeline:()=>o.Pipeline,PreTrainedModel:()=>l.PreTrainedModel,PreTrainedTokenizer:()=>u.PreTrainedTokenizer,PretrainedConfig:()=>c.PretrainedConfig,PretrainedMixin:()=>l.PretrainedMixin,Processor:()=>d.Processor,PvtForImageClassification:()=>l.PvtForImageClassification,PvtImageProcessor:()=>d.PvtImageProcessor,PvtModel:()=>l.PvtModel,PvtPreTrainedModel:()=>l.PvtPreTrainedModel,PyAnnoteFeatureExtractor:()=>d.PyAnnoteFeatureExtractor,PyAnnoteForAudioFrameClassification:()=>l.PyAnnoteForAudioFrameClassification,PyAnnoteModel:()=>l.PyAnnoteModel,PyAnnotePreTrainedModel:()=>l.PyAnnotePreTrainedModel,PyAnnoteProcessor:()=>d.PyAnnoteProcessor,QuestionAnsweringModelOutput:()=>l.QuestionAnsweringModelOutput,QuestionAnsweringPipeline:()=>o.QuestionAnsweringPipeline,Qwen2ForCausalLM:()=>l.Qwen2ForCausalLM,Qwen2Model:()=>l.Qwen2Model,Qwen2PreTrainedModel:()=>l.Qwen2PreTrainedModel,Qwen2Tokenizer:()=>u.Qwen2Tokenizer,RTDetrForObjectDetection:()=>l.RTDetrForObjectDetection,RTDetrImageProcessor:()=>d.RTDetrImageProcessor,RTDetrModel:()=>l.RTDetrModel,RTDetrObjectDetectionOutput:()=>l.RTDetrObjectDetectionOutput,RTDetrPreTrainedModel:()=>l.RTDetrPreTrainedModel,RawImage:()=>h.RawImage,ResNetForImageClassification:()=>l.ResNetForImageClassification,ResNetModel:()=>l.ResNetModel,ResNetPreTrainedModel:()=>l.ResNetPreTrainedModel,RoFormerForMaskedLM:()=>l.RoFormerForMaskedLM,RoFormerForQuestionAnswering:()=>l.RoFormerForQuestionAnswering,RoFormerForSequenceClassification:()=>l.RoFormerForSequenceClassification,RoFormerForTokenClassification:()=>l.RoFormerForTokenClassification,RoFormerModel:()=>l.RoFormerModel,RoFormerPreTrainedModel:()=>l.RoFormerPreTrainedModel,RoFormerTokenizer:()=>u.RoFormerTokenizer,RobertaForMaskedLM:()=>l.RobertaForMaskedLM,RobertaForQuestionAnswering:()=>l.RobertaForQuestionAnswering,RobertaForSequenceClassification:()=>l.RobertaForSequenceClassification,RobertaForTokenClassification:()=>l.RobertaForTokenClassification,RobertaModel:()=>l.RobertaModel,RobertaPreTrainedModel:()=>l.RobertaPreTrainedModel,RobertaTokenizer:()=>u.RobertaTokenizer,SamImageProcessor:()=>d.SamImageProcessor,SamImageSegmentationOutput:()=>l.SamImageSegmentationOutput,SamModel:()=>l.SamModel,SamPreTrainedModel:()=>l.SamPreTrainedModel,SamProcessor:()=>d.SamProcessor,SapiensFeatureExtractor:()=>d.SapiensFeatureExtractor,SapiensForDepthEstimation:()=>l.SapiensForDepthEstimation,SapiensForNormalEstimation:()=>l.SapiensForNormalEstimation,SapiensForSemanticSegmentation:()=>l.SapiensForSemanticSegmentation,SapiensPreTrainedModel:()=>l.SapiensPreTrainedModel,SeamlessM4TFeatureExtractor:()=>d.SeamlessM4TFeatureExtractor,SegformerFeatureExtractor:()=>d.SegformerFeatureExtractor,SegformerForImageClassification:()=>l.SegformerForImageClassification,SegformerForSemanticSegmentation:()=>l.SegformerForSemanticSegmentation,SegformerModel:()=>l.SegformerModel,SegformerPreTrainedModel:()=>l.SegformerPreTrainedModel,Seq2SeqLMOutput:()=>l.Seq2SeqLMOutput,SequenceClassifierOutput:()=>l.SequenceClassifierOutput,SiglipImageProcessor:()=>d.SiglipImageProcessor,SiglipModel:()=>l.SiglipModel,SiglipPreTrainedModel:()=>l.SiglipPreTrainedModel,SiglipTextModel:()=>l.SiglipTextModel,SiglipTokenizer:()=>u.SiglipTokenizer,SiglipVisionModel:()=>l.SiglipVisionModel,SpeechT5FeatureExtractor:()=>d.SpeechT5FeatureExtractor,SpeechT5ForSpeechToText:()=>l.SpeechT5ForSpeechToText,SpeechT5ForTextToSpeech:()=>l.SpeechT5ForTextToSpeech,SpeechT5HifiGan:()=>l.SpeechT5HifiGan,SpeechT5Model:()=>l.SpeechT5Model,SpeechT5PreTrainedModel:()=>l.SpeechT5PreTrainedModel,SpeechT5Processor:()=>d.SpeechT5Processor,SpeechT5Tokenizer:()=>u.SpeechT5Tokenizer,SqueezeBertForMaskedLM:()=>l.SqueezeBertForMaskedLM,SqueezeBertForQuestionAnswering:()=>l.SqueezeBertForQuestionAnswering,SqueezeBertForSequenceClassification:()=>l.SqueezeBertForSequenceClassification,SqueezeBertModel:()=>l.SqueezeBertModel,SqueezeBertPreTrainedModel:()=>l.SqueezeBertPreTrainedModel,SqueezeBertTokenizer:()=>u.SqueezeBertTokenizer,StableLmForCausalLM:()=>l.StableLmForCausalLM,StableLmModel:()=>l.StableLmModel,StableLmPreTrainedModel:()=>l.StableLmPreTrainedModel,Starcoder2ForCausalLM:()=>l.Starcoder2ForCausalLM,Starcoder2Model:()=>l.Starcoder2Model,Starcoder2PreTrainedModel:()=>l.Starcoder2PreTrainedModel,StoppingCriteria:()=>_.StoppingCriteria,StoppingCriteriaList:()=>_.StoppingCriteriaList,SummarizationPipeline:()=>o.SummarizationPipeline,Swin2SRForImageSuperResolution:()=>l.Swin2SRForImageSuperResolution,Swin2SRImageProcessor:()=>d.Swin2SRImageProcessor,Swin2SRModel:()=>l.Swin2SRModel,Swin2SRPreTrainedModel:()=>l.Swin2SRPreTrainedModel,SwinForImageClassification:()=>l.SwinForImageClassification,SwinModel:()=>l.SwinModel,SwinPreTrainedModel:()=>l.SwinPreTrainedModel,T5ForConditionalGeneration:()=>l.T5ForConditionalGeneration,T5Model:()=>l.T5Model,T5PreTrainedModel:()=>l.T5PreTrainedModel,T5Tokenizer:()=>u.T5Tokenizer,TableTransformerForObjectDetection:()=>l.TableTransformerForObjectDetection,TableTransformerModel:()=>l.TableTransformerModel,TableTransformerObjectDetectionOutput:()=>l.TableTransformerObjectDetectionOutput,TableTransformerPreTrainedModel:()=>l.TableTransformerPreTrainedModel,Tensor:()=>m.Tensor,Text2TextGenerationPipeline:()=>o.Text2TextGenerationPipeline,TextClassificationPipeline:()=>o.TextClassificationPipeline,TextGenerationPipeline:()=>o.TextGenerationPipeline,TextStreamer:()=>g.TextStreamer,TextToAudioPipeline:()=>o.TextToAudioPipeline,TokenClassificationPipeline:()=>o.TokenClassificationPipeline,TokenClassifierOutput:()=>l.TokenClassifierOutput,TokenizerModel:()=>u.TokenizerModel,TrOCRForCausalLM:()=>l.TrOCRForCausalLM,TrOCRPreTrainedModel:()=>l.TrOCRPreTrainedModel,TranslationPipeline:()=>o.TranslationPipeline,UniSpeechForCTC:()=>l.UniSpeechForCTC,UniSpeechForSequenceClassification:()=>l.UniSpeechForSequenceClassification,UniSpeechModel:()=>l.UniSpeechModel,UniSpeechPreTrainedModel:()=>l.UniSpeechPreTrainedModel,UniSpeechSatForAudioFrameClassification:()=>l.UniSpeechSatForAudioFrameClassification,UniSpeechSatForCTC:()=>l.UniSpeechSatForCTC,UniSpeechSatForSequenceClassification:()=>l.UniSpeechSatForSequenceClassification,UniSpeechSatModel:()=>l.UniSpeechSatModel,UniSpeechSatPreTrainedModel:()=>l.UniSpeechSatPreTrainedModel,ViTFeatureExtractor:()=>d.ViTFeatureExtractor,ViTForImageClassification:()=>l.ViTForImageClassification,ViTImageProcessor:()=>d.ViTImageProcessor,ViTMAEModel:()=>l.ViTMAEModel,ViTMAEPreTrainedModel:()=>l.ViTMAEPreTrainedModel,ViTMSNForImageClassification:()=>l.ViTMSNForImageClassification,ViTMSNModel:()=>l.ViTMSNModel,ViTMSNPreTrainedModel:()=>l.ViTMSNPreTrainedModel,ViTModel:()=>l.ViTModel,ViTPreTrainedModel:()=>l.ViTPreTrainedModel,VisionEncoderDecoderModel:()=>l.VisionEncoderDecoderModel,VitMatteForImageMatting:()=>l.VitMatteForImageMatting,VitMatteImageProcessor:()=>d.VitMatteImageProcessor,VitMattePreTrainedModel:()=>l.VitMattePreTrainedModel,VitsModel:()=>l.VitsModel,VitsModelOutput:()=>l.VitsModelOutput,VitsPreTrainedModel:()=>l.VitsPreTrainedModel,VitsTokenizer:()=>u.VitsTokenizer,Wav2Vec2BertForCTC:()=>l.Wav2Vec2BertForCTC,Wav2Vec2BertForSequenceClassification:()=>l.Wav2Vec2BertForSequenceClassification,Wav2Vec2BertModel:()=>l.Wav2Vec2BertModel,Wav2Vec2BertPreTrainedModel:()=>l.Wav2Vec2BertPreTrainedModel,Wav2Vec2CTCTokenizer:()=>u.Wav2Vec2CTCTokenizer,Wav2Vec2FeatureExtractor:()=>d.Wav2Vec2FeatureExtractor,Wav2Vec2ForAudioFrameClassification:()=>l.Wav2Vec2ForAudioFrameClassification,Wav2Vec2ForCTC:()=>l.Wav2Vec2ForCTC,Wav2Vec2ForSequenceClassification:()=>l.Wav2Vec2ForSequenceClassification,Wav2Vec2Model:()=>l.Wav2Vec2Model,Wav2Vec2PreTrainedModel:()=>l.Wav2Vec2PreTrainedModel,Wav2Vec2ProcessorWithLM:()=>d.Wav2Vec2ProcessorWithLM,WavLMForAudioFrameClassification:()=>l.WavLMForAudioFrameClassification,WavLMForCTC:()=>l.WavLMForCTC,WavLMForSequenceClassification:()=>l.WavLMForSequenceClassification,WavLMForXVector:()=>l.WavLMForXVector,WavLMModel:()=>l.WavLMModel,WavLMPreTrainedModel:()=>l.WavLMPreTrainedModel,WeSpeakerFeatureExtractor:()=>d.WeSpeakerFeatureExtractor,WeSpeakerResNetModel:()=>l.WeSpeakerResNetModel,WeSpeakerResNetPreTrainedModel:()=>l.WeSpeakerResNetPreTrainedModel,WhisperFeatureExtractor:()=>d.WhisperFeatureExtractor,WhisperForConditionalGeneration:()=>l.WhisperForConditionalGeneration,WhisperModel:()=>l.WhisperModel,WhisperPreTrainedModel:()=>l.WhisperPreTrainedModel,WhisperProcessor:()=>d.WhisperProcessor,WhisperTextStreamer:()=>g.WhisperTextStreamer,WhisperTokenizer:()=>u.WhisperTokenizer,XLMForQuestionAnswering:()=>l.XLMForQuestionAnswering,XLMForSequenceClassification:()=>l.XLMForSequenceClassification,XLMForTokenClassification:()=>l.XLMForTokenClassification,XLMModel:()=>l.XLMModel,XLMPreTrainedModel:()=>l.XLMPreTrainedModel,XLMRobertaForMaskedLM:()=>l.XLMRobertaForMaskedLM,XLMRobertaForQuestionAnswering:()=>l.XLMRobertaForQuestionAnswering,XLMRobertaForSequenceClassification:()=>l.XLMRobertaForSequenceClassification,XLMRobertaForTokenClassification:()=>l.XLMRobertaForTokenClassification,XLMRobertaModel:()=>l.XLMRobertaModel,XLMRobertaPreTrainedModel:()=>l.XLMRobertaPreTrainedModel,XLMRobertaTokenizer:()=>u.XLMRobertaTokenizer,XLMTokenizer:()=>u.XLMTokenizer,XLMWithLMHeadModel:()=>l.XLMWithLMHeadModel,XVectorOutput:()=>l.XVectorOutput,YolosFeatureExtractor:()=>d.YolosFeatureExtractor,YolosForObjectDetection:()=>l.YolosForObjectDetection,YolosModel:()=>l.YolosModel,YolosObjectDetectionOutput:()=>l.YolosObjectDetectionOutput,YolosPreTrainedModel:()=>l.YolosPreTrainedModel,ZeroShotAudioClassificationPipeline:()=>o.ZeroShotAudioClassificationPipeline,ZeroShotClassificationPipeline:()=>o.ZeroShotClassificationPipeline,ZeroShotImageClassificationPipeline:()=>o.ZeroShotImageClassificationPipeline,ZeroShotObjectDetectionPipeline:()=>o.ZeroShotObjectDetectionPipeline,bankers_round:()=>f.bankers_round,cat:()=>m.cat,cos_sim:()=>f.cos_sim,dot:()=>f.dot,dynamic_time_warping:()=>f.dynamic_time_warping,env:()=>i.env,full:()=>m.full,full_like:()=>m.full_like,getKeyValueShapes:()=>c.getKeyValueShapes,hamming:()=>p.hamming,hanning:()=>p.hanning,interpolate:()=>m.interpolate,interpolate_4d:()=>m.interpolate_4d,interpolate_data:()=>f.interpolate_data,is_chinese_char:()=>u.is_chinese_char,layer_norm:()=>m.layer_norm,log_softmax:()=>f.log_softmax,magnitude:()=>f.magnitude,matmul:()=>m.matmul,max:()=>f.max,mean:()=>m.mean,mean_pooling:()=>m.mean_pooling,medianFilter:()=>f.medianFilter,mel_filter_bank:()=>p.mel_filter_bank,min:()=>f.min,ones:()=>m.ones,ones_like:()=>m.ones_like,permute:()=>m.permute,permute_data:()=>f.permute_data,pipeline:()=>o.pipeline,quantize_embeddings:()=>m.quantize_embeddings,read_audio:()=>p.read_audio,rfft:()=>m.rfft,round:()=>f.round,softmax:()=>f.softmax,spectrogram:()=>p.spectrogram,stack:()=>m.stack,std_mean:()=>m.std_mean,topk:()=>m.topk,window_function:()=>p.window_function,zeros:()=>m.zeros,zeros_like:()=>m.zeros_like});var i=a(/*! ./env.js */"./src/env.js"),o=a(/*! ./pipelines.js */"./src/pipelines.js"),l=a(/*! ./models.js */"./src/models.js"),u=a(/*! ./tokenizers.js */"./src/tokenizers.js"),d=a(/*! ./processors.js */"./src/processors.js"),c=a(/*! ./configs.js */"./src/configs.js"),p=a(/*! ./utils/audio.js */"./src/utils/audio.js"),h=a(/*! ./utils/image.js */"./src/utils/image.js"),m=a(/*! ./utils/tensor.js */"./src/utils/tensor.js"),f=a(/*! ./utils/maths.js */"./src/utils/maths.js"),g=a(/*! ./generation/streamers.js */"./src/generation/streamers.js"),_=a(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),w=s.ASTFeatureExtractor,y=s.ASTForAudioClassification,b=s.ASTModel,v=s.ASTPreTrainedModel,x=s.AlbertForMaskedLM,M=s.AlbertForQuestionAnswering,T=s.AlbertForSequenceClassification,k=s.AlbertModel,$=s.AlbertPreTrainedModel,C=s.AlbertTokenizer,S=s.AudioClassificationPipeline,P=s.AutoConfig,E=s.AutoModel,F=s.AutoModelForAudioClassification,A=s.AutoModelForAudioFrameClassification,I=s.AutoModelForCTC,z=s.AutoModelForCausalLM,O=s.AutoModelForDepthEstimation,B=s.AutoModelForDocumentQuestionAnswering,L=s.AutoModelForImageClassification,D=s.AutoModelForImageFeatureExtraction,R=s.AutoModelForImageMatting,N=s.AutoModelForImageSegmentation,V=s.AutoModelForImageToImage,j=s.AutoModelForMaskGeneration,q=s.AutoModelForMaskedLM,G=s.AutoModelForNormalEstimation,U=s.AutoModelForObjectDetection,W=s.AutoModelForQuestionAnswering,H=s.AutoModelForSemanticSegmentation,X=s.AutoModelForSeq2SeqLM,K=s.AutoModelForSequenceClassification,Q=s.AutoModelForSpeechSeq2Seq,Y=s.AutoModelForTextToSpectrogram,Z=s.AutoModelForTextToWaveform,J=s.AutoModelForTokenClassification,ee=s.AutoModelForUniversalSegmentation,te=s.AutoModelForVision2Seq,ne=s.AutoModelForXVector,re=s.AutoModelForZeroShotObjectDetection,ae=s.AutoProcessor,se=s.AutoTokenizer,ie=s.AutomaticSpeechRecognitionPipeline,oe=s.BartForConditionalGeneration,le=s.BartForSequenceClassification,ue=s.BartModel,de=s.BartPretrainedModel,ce=s.BartTokenizer,pe=s.BaseModelOutput,he=s.BaseStreamer,me=s.BeitFeatureExtractor,fe=s.BeitForImageClassification,ge=s.BeitModel,_e=s.BeitPreTrainedModel,we=s.BertForMaskedLM,ye=s.BertForQuestionAnswering,be=s.BertForSequenceClassification,ve=s.BertForTokenClassification,xe=s.BertModel,Me=s.BertPreTrainedModel,Te=s.BertTokenizer,ke=s.BitImageProcessor,$e=s.BlenderbotForConditionalGeneration,Ce=s.BlenderbotModel,Se=s.BlenderbotPreTrainedModel,Pe=s.BlenderbotSmallForConditionalGeneration,Ee=s.BlenderbotSmallModel,Fe=s.BlenderbotSmallPreTrainedModel,Ae=s.BlenderbotSmallTokenizer,Ie=s.BlenderbotTokenizer,ze=s.BloomForCausalLM,Oe=s.BloomModel,Be=s.BloomPreTrainedModel,Le=s.BloomTokenizer,De=s.CLIPFeatureExtractor,Re=s.CLIPImageProcessor,Ne=s.CLIPModel,Ve=s.CLIPPreTrainedModel,je=s.CLIPSegForImageSegmentation,qe=s.CLIPSegModel,Ge=s.CLIPSegPreTrainedModel,Ue=s.CLIPTextModel,We=s.CLIPTextModelWithProjection,He=s.CLIPTokenizer,Xe=s.CLIPVisionModel,Ke=s.CLIPVisionModelWithProjection,Qe=s.CamembertForMaskedLM,Ye=s.CamembertForQuestionAnswering,Ze=s.CamembertForSequenceClassification,Je=s.CamembertForTokenClassification,et=s.CamembertModel,tt=s.CamembertPreTrainedModel,nt=s.CamembertTokenizer,rt=s.CausalLMOutput,at=s.CausalLMOutputWithPast,st=s.ChineseCLIPFeatureExtractor,it=s.ChineseCLIPModel,ot=s.ChineseCLIPPreTrainedModel,lt=s.ClapAudioModelWithProjection,ut=s.ClapFeatureExtractor,dt=s.ClapModel,ct=s.ClapPreTrainedModel,pt=s.ClapTextModelWithProjection,ht=s.CodeGenForCausalLM,mt=s.CodeGenModel,ft=s.CodeGenPreTrainedModel,gt=s.CodeGenTokenizer,_t=s.CodeLlamaTokenizer,wt=s.CohereForCausalLM,yt=s.CohereModel,bt=s.CoherePreTrainedModel,vt=s.CohereTokenizer,xt=s.ConvBertForMaskedLM,Mt=s.ConvBertForQuestionAnswering,Tt=s.ConvBertForSequenceClassification,kt=s.ConvBertForTokenClassification,$t=s.ConvBertModel,Ct=s.ConvBertPreTrainedModel,St=s.ConvBertTokenizer,Pt=s.ConvNextFeatureExtractor,Et=s.ConvNextForImageClassification,Ft=s.ConvNextImageProcessor,At=s.ConvNextModel,It=s.ConvNextPreTrainedModel,zt=s.ConvNextV2ForImageClassification,Ot=s.ConvNextV2Model,Bt=s.ConvNextV2PreTrainedModel,Lt=s.DPTFeatureExtractor,Dt=s.DPTForDepthEstimation,Rt=s.DPTImageProcessor,Nt=s.DPTModel,Vt=s.DPTPreTrainedModel,jt=s.DebertaForMaskedLM,qt=s.DebertaForQuestionAnswering,Gt=s.DebertaForSequenceClassification,Ut=s.DebertaForTokenClassification,Wt=s.DebertaModel,Ht=s.DebertaPreTrainedModel,Xt=s.DebertaTokenizer,Kt=s.DebertaV2ForMaskedLM,Qt=s.DebertaV2ForQuestionAnswering,Yt=s.DebertaV2ForSequenceClassification,Zt=s.DebertaV2ForTokenClassification,Jt=s.DebertaV2Model,en=s.DebertaV2PreTrainedModel,tn=s.DebertaV2Tokenizer,nn=s.DecisionTransformerModel,rn=s.DecisionTransformerPreTrainedModel,an=s.DeiTFeatureExtractor,sn=s.DeiTForImageClassification,on=s.DeiTModel,ln=s.DeiTPreTrainedModel,un=s.DepthAnythingForDepthEstimation,dn=s.DepthAnythingPreTrainedModel,cn=s.DepthEstimationPipeline,pn=s.DepthProForDepthEstimation,hn=s.DepthProPreTrainedModel,mn=s.DetrFeatureExtractor,fn=s.DetrForObjectDetection,gn=s.DetrForSegmentation,_n=s.DetrModel,wn=s.DetrObjectDetectionOutput,yn=s.DetrPreTrainedModel,bn=s.DetrSegmentationOutput,vn=s.Dinov2ForImageClassification,xn=s.Dinov2Model,Mn=s.Dinov2PreTrainedModel,Tn=s.DistilBertForMaskedLM,kn=s.DistilBertForQuestionAnswering,$n=s.DistilBertForSequenceClassification,Cn=s.DistilBertForTokenClassification,Sn=s.DistilBertModel,Pn=s.DistilBertPreTrainedModel,En=s.DistilBertTokenizer,Fn=s.DocumentQuestionAnsweringPipeline,An=s.DonutFeatureExtractor,In=s.DonutImageProcessor,zn=s.DonutSwinModel,On=s.DonutSwinPreTrainedModel,Bn=s.EfficientNetForImageClassification,Ln=s.EfficientNetImageProcessor,Dn=s.EfficientNetModel,Rn=s.EfficientNetPreTrainedModel,Nn=s.ElectraForMaskedLM,Vn=s.ElectraForQuestionAnswering,jn=s.ElectraForSequenceClassification,qn=s.ElectraForTokenClassification,Gn=s.ElectraModel,Un=s.ElectraPreTrainedModel,Wn=s.ElectraTokenizer,Hn=s.EosTokenCriteria,Xn=s.EsmForMaskedLM,Kn=s.EsmForSequenceClassification,Qn=s.EsmForTokenClassification,Yn=s.EsmModel,Zn=s.EsmPreTrainedModel,Jn=s.EsmTokenizer,er=s.FFT,tr=s.FalconForCausalLM,nr=s.FalconModel,rr=s.FalconPreTrainedModel,ar=s.FalconTokenizer,sr=s.FastViTForImageClassification,ir=s.FastViTModel,or=s.FastViTPreTrainedModel,lr=s.FeatureExtractionPipeline,ur=s.FeatureExtractor,dr=s.FillMaskPipeline,cr=s.Florence2ForConditionalGeneration,pr=s.Florence2PreTrainedModel,hr=s.Florence2Processor,mr=s.GLPNFeatureExtractor,fr=s.GLPNForDepthEstimation,gr=s.GLPNModel,_r=s.GLPNPreTrainedModel,wr=s.GPT2LMHeadModel,yr=s.GPT2Model,br=s.GPT2PreTrainedModel,vr=s.GPT2Tokenizer,xr=s.GPTBigCodeForCausalLM,Mr=s.GPTBigCodeModel,Tr=s.GPTBigCodePreTrainedModel,kr=s.GPTJForCausalLM,$r=s.GPTJModel,Cr=s.GPTJPreTrainedModel,Sr=s.GPTNeoForCausalLM,Pr=s.GPTNeoModel,Er=s.GPTNeoPreTrainedModel,Fr=s.GPTNeoXForCausalLM,Ar=s.GPTNeoXModel,Ir=s.GPTNeoXPreTrainedModel,zr=s.GPTNeoXTokenizer,Or=s.Gemma2ForCausalLM,Br=s.Gemma2Model,Lr=s.Gemma2PreTrainedModel,Dr=s.GemmaForCausalLM,Rr=s.GemmaModel,Nr=s.GemmaPreTrainedModel,Vr=s.GemmaTokenizer,jr=s.GraniteForCausalLM,qr=s.GraniteModel,Gr=s.GranitePreTrainedModel,Ur=s.Grok1Tokenizer,Wr=s.GroupViTModel,Hr=s.GroupViTPreTrainedModel,Xr=s.HerbertTokenizer,Kr=s.HieraForImageClassification,Qr=s.HieraModel,Yr=s.HieraPreTrainedModel,Zr=s.HubertForCTC,Jr=s.HubertForSequenceClassification,ea=s.HubertModel,ta=s.HubertPreTrainedModel,na=s.ImageClassificationPipeline,ra=s.ImageFeatureExtractionPipeline,aa=s.ImageFeatureExtractor,sa=s.ImageMattingOutput,ia=s.ImageSegmentationPipeline,oa=s.ImageToImagePipeline,la=s.ImageToTextPipeline,ua=s.InterruptableStoppingCriteria,da=s.JAISLMHeadModel,ca=s.JAISModel,pa=s.JAISPreTrainedModel,ha=s.LlamaForCausalLM,ma=s.LlamaModel,fa=s.LlamaPreTrainedModel,ga=s.LlamaTokenizer,_a=s.LlavaForConditionalGeneration,wa=s.LlavaPreTrainedModel,ya=s.LongT5ForConditionalGeneration,ba=s.LongT5Model,va=s.LongT5PreTrainedModel,xa=s.M2M100ForConditionalGeneration,Ma=s.M2M100Model,Ta=s.M2M100PreTrainedModel,ka=s.M2M100Tokenizer,$a=s.MBart50Tokenizer,Ca=s.MBartForCausalLM,Sa=s.MBartForConditionalGeneration,Pa=s.MBartForSequenceClassification,Ea=s.MBartModel,Fa=s.MBartPreTrainedModel,Aa=s.MBartTokenizer,Ia=s.MPNetForMaskedLM,za=s.MPNetForQuestionAnswering,Oa=s.MPNetForSequenceClassification,Ba=s.MPNetForTokenClassification,La=s.MPNetModel,Da=s.MPNetPreTrainedModel,Ra=s.MPNetTokenizer,Na=s.MT5ForConditionalGeneration,Va=s.MT5Model,ja=s.MT5PreTrainedModel,qa=s.MarianMTModel,Ga=s.MarianModel,Ua=s.MarianPreTrainedModel,Wa=s.MarianTokenizer,Ha=s.MaskFormerFeatureExtractor,Xa=s.MaskFormerForInstanceSegmentation,Ka=s.MaskFormerModel,Qa=s.MaskFormerPreTrainedModel,Ya=s.MaskedLMOutput,Za=s.MaxLengthCriteria,Ja=s.MistralForCausalLM,es=s.MistralModel,ts=s.MistralPreTrainedModel,ns=s.MobileBertForMaskedLM,rs=s.MobileBertForQuestionAnswering,as=s.MobileBertForSequenceClassification,ss=s.MobileBertModel,is=s.MobileBertPreTrainedModel,os=s.MobileBertTokenizer,ls=s.MobileLLMForCausalLM,us=s.MobileLLMModel,ds=s.MobileLLMPreTrainedModel,cs=s.MobileNetV1FeatureExtractor,ps=s.MobileNetV1ForImageClassification,hs=s.MobileNetV1Model,ms=s.MobileNetV1PreTrainedModel,fs=s.MobileNetV2FeatureExtractor,gs=s.MobileNetV2ForImageClassification,_s=s.MobileNetV2Model,ws=s.MobileNetV2PreTrainedModel,ys=s.MobileNetV3FeatureExtractor,bs=s.MobileNetV3ForImageClassification,vs=s.MobileNetV3Model,xs=s.MobileNetV3PreTrainedModel,Ms=s.MobileNetV4FeatureExtractor,Ts=s.MobileNetV4ForImageClassification,ks=s.MobileNetV4Model,$s=s.MobileNetV4PreTrainedModel,Cs=s.MobileViTFeatureExtractor,Ss=s.MobileViTForImageClassification,Ps=s.MobileViTImageProcessor,Es=s.MobileViTModel,Fs=s.MobileViTPreTrainedModel,As=s.MobileViTV2ForImageClassification,Is=s.MobileViTV2Model,zs=s.MobileViTV2PreTrainedModel,Os=s.ModelOutput,Bs=s.Moondream1ForConditionalGeneration,Ls=s.MptForCausalLM,Ds=s.MptModel,Rs=s.MptPreTrainedModel,Ns=s.MusicgenForCausalLM,Vs=s.MusicgenForConditionalGeneration,js=s.MusicgenModel,qs=s.MusicgenPreTrainedModel,Gs=s.NllbTokenizer,Us=s.NomicBertModel,Ws=s.NomicBertPreTrainedModel,Hs=s.NougatImageProcessor,Xs=s.NougatTokenizer,Ks=s.OPTForCausalLM,Qs=s.OPTModel,Ys=s.OPTPreTrainedModel,Zs=s.ObjectDetectionPipeline,Js=s.OlmoForCausalLM,ei=s.OlmoModel,ti=s.OlmoPreTrainedModel,ni=s.OpenELMForCausalLM,ri=s.OpenELMModel,ai=s.OpenELMPreTrainedModel,si=s.OwlViTFeatureExtractor,ii=s.OwlViTForObjectDetection,oi=s.OwlViTModel,li=s.OwlViTPreTrainedModel,ui=s.OwlViTProcessor,di=s.Owlv2ForObjectDetection,ci=s.Owlv2ImageProcessor,pi=s.Owlv2Model,hi=s.Owlv2PreTrainedModel,mi=s.Phi3ForCausalLM,fi=s.Phi3Model,gi=s.Phi3PreTrainedModel,_i=s.PhiForCausalLM,wi=s.PhiModel,yi=s.PhiPreTrainedModel,bi=s.Pipeline,vi=s.PreTrainedModel,xi=s.PreTrainedTokenizer,Mi=s.PretrainedConfig,Ti=s.PretrainedMixin,ki=s.Processor,$i=s.PvtForImageClassification,Ci=s.PvtImageProcessor,Si=s.PvtModel,Pi=s.PvtPreTrainedModel,Ei=s.PyAnnoteFeatureExtractor,Fi=s.PyAnnoteForAudioFrameClassification,Ai=s.PyAnnoteModel,Ii=s.PyAnnotePreTrainedModel,zi=s.PyAnnoteProcessor,Oi=s.QuestionAnsweringModelOutput,Bi=s.QuestionAnsweringPipeline,Li=s.Qwen2ForCausalLM,Di=s.Qwen2Model,Ri=s.Qwen2PreTrainedModel,Ni=s.Qwen2Tokenizer,Vi=s.RTDetrForObjectDetection,ji=s.RTDetrImageProcessor,qi=s.RTDetrModel,Gi=s.RTDetrObjectDetectionOutput,Ui=s.RTDetrPreTrainedModel,Wi=s.RawImage,Hi=s.ResNetForImageClassification,Xi=s.ResNetModel,Ki=s.ResNetPreTrainedModel,Qi=s.RoFormerForMaskedLM,Yi=s.RoFormerForQuestionAnswering,Zi=s.RoFormerForSequenceClassification,Ji=s.RoFormerForTokenClassification,eo=s.RoFormerModel,to=s.RoFormerPreTrainedModel,no=s.RoFormerTokenizer,ro=s.RobertaForMaskedLM,ao=s.RobertaForQuestionAnswering,so=s.RobertaForSequenceClassification,io=s.RobertaForTokenClassification,oo=s.RobertaModel,lo=s.RobertaPreTrainedModel,uo=s.RobertaTokenizer,co=s.SamImageProcessor,po=s.SamImageSegmentationOutput,ho=s.SamModel,mo=s.SamPreTrainedModel,fo=s.SamProcessor,go=s.SapiensFeatureExtractor,_o=s.SapiensForDepthEstimation,wo=s.SapiensForNormalEstimation,yo=s.SapiensForSemanticSegmentation,bo=s.SapiensPreTrainedModel,vo=s.SeamlessM4TFeatureExtractor,xo=s.SegformerFeatureExtractor,Mo=s.SegformerForImageClassification,To=s.SegformerForSemanticSegmentation,ko=s.SegformerModel,$o=s.SegformerPreTrainedModel,Co=s.Seq2SeqLMOutput,So=s.SequenceClassifierOutput,Po=s.SiglipImageProcessor,Eo=s.SiglipModel,Fo=s.SiglipPreTrainedModel,Ao=s.SiglipTextModel,Io=s.SiglipTokenizer,zo=s.SiglipVisionModel,Oo=s.SpeechT5FeatureExtractor,Bo=s.SpeechT5ForSpeechToText,Lo=s.SpeechT5ForTextToSpeech,Do=s.SpeechT5HifiGan,Ro=s.SpeechT5Model,No=s.SpeechT5PreTrainedModel,Vo=s.SpeechT5Processor,jo=s.SpeechT5Tokenizer,qo=s.SqueezeBertForMaskedLM,Go=s.SqueezeBertForQuestionAnswering,Uo=s.SqueezeBertForSequenceClassification,Wo=s.SqueezeBertModel,Ho=s.SqueezeBertPreTrainedModel,Xo=s.SqueezeBertTokenizer,Ko=s.StableLmForCausalLM,Qo=s.StableLmModel,Yo=s.StableLmPreTrainedModel,Zo=s.Starcoder2ForCausalLM,Jo=s.Starcoder2Model,el=s.Starcoder2PreTrainedModel,tl=s.StoppingCriteria,nl=s.StoppingCriteriaList,rl=s.SummarizationPipeline,al=s.Swin2SRForImageSuperResolution,sl=s.Swin2SRImageProcessor,il=s.Swin2SRModel,ol=s.Swin2SRPreTrainedModel,ll=s.SwinForImageClassification,ul=s.SwinModel,dl=s.SwinPreTrainedModel,cl=s.T5ForConditionalGeneration,pl=s.T5Model,hl=s.T5PreTrainedModel,ml=s.T5Tokenizer,fl=s.TableTransformerForObjectDetection,gl=s.TableTransformerModel,_l=s.TableTransformerObjectDetectionOutput,wl=s.TableTransformerPreTrainedModel,yl=s.Tensor,bl=s.Text2TextGenerationPipeline,vl=s.TextClassificationPipeline,xl=s.TextGenerationPipeline,Ml=s.TextStreamer,Tl=s.TextToAudioPipeline,kl=s.TokenClassificationPipeline,$l=s.TokenClassifierOutput,Cl=s.TokenizerModel,Sl=s.TrOCRForCausalLM,Pl=s.TrOCRPreTrainedModel,El=s.TranslationPipeline,Fl=s.UniSpeechForCTC,Al=s.UniSpeechForSequenceClassification,Il=s.UniSpeechModel,zl=s.UniSpeechPreTrainedModel,Ol=s.UniSpeechSatForAudioFrameClassification,Bl=s.UniSpeechSatForCTC,Ll=s.UniSpeechSatForSequenceClassification,Dl=s.UniSpeechSatModel,Rl=s.UniSpeechSatPreTrainedModel,Nl=s.ViTFeatureExtractor,Vl=s.ViTForImageClassification,jl=s.ViTImageProcessor,ql=s.ViTMAEModel,Gl=s.ViTMAEPreTrainedModel,Ul=s.ViTMSNForImageClassification,Wl=s.ViTMSNModel,Hl=s.ViTMSNPreTrainedModel,Xl=s.ViTModel,Kl=s.ViTPreTrainedModel,Ql=s.VisionEncoderDecoderModel,Yl=s.VitMatteForImageMatting,Zl=s.VitMatteImageProcessor,Jl=s.VitMattePreTrainedModel,eu=s.VitsModel,tu=s.VitsModelOutput,nu=s.VitsPreTrainedModel,ru=s.VitsTokenizer,au=s.Wav2Vec2BertForCTC,su=s.Wav2Vec2BertForSequenceClassification,iu=s.Wav2Vec2BertModel,ou=s.Wav2Vec2BertPreTrainedModel,lu=s.Wav2Vec2CTCTokenizer,uu=s.Wav2Vec2FeatureExtractor,du=s.Wav2Vec2ForAudioFrameClassification,cu=s.Wav2Vec2ForCTC,pu=s.Wav2Vec2ForSequenceClassification,hu=s.Wav2Vec2Model,mu=s.Wav2Vec2PreTrainedModel,fu=s.Wav2Vec2ProcessorWithLM,gu=s.WavLMForAudioFrameClassification,_u=s.WavLMForCTC,wu=s.WavLMForSequenceClassification,yu=s.WavLMForXVector,bu=s.WavLMModel,vu=s.WavLMPreTrainedModel,xu=s.WeSpeakerFeatureExtractor,Mu=s.WeSpeakerResNetModel,Tu=s.WeSpeakerResNetPreTrainedModel,ku=s.WhisperFeatureExtractor,$u=s.WhisperForConditionalGeneration,Cu=s.WhisperModel,Su=s.WhisperPreTrainedModel,Pu=s.WhisperProcessor,Eu=s.WhisperTextStreamer,Fu=s.WhisperTokenizer,Au=s.XLMForQuestionAnswering,Iu=s.XLMForSequenceClassification,zu=s.XLMForTokenClassification,Ou=s.XLMModel,Bu=s.XLMPreTrainedModel,Lu=s.XLMRobertaForMaskedLM,Du=s.XLMRobertaForQuestionAnswering,Ru=s.XLMRobertaForSequenceClassification,Nu=s.XLMRobertaForTokenClassification,Vu=s.XLMRobertaModel,ju=s.XLMRobertaPreTrainedModel,qu=s.XLMRobertaTokenizer,Gu=s.XLMTokenizer,Uu=s.XLMWithLMHeadModel,Wu=s.XVectorOutput,Hu=s.YolosFeatureExtractor,Xu=s.YolosForObjectDetection,Ku=s.YolosModel,Qu=s.YolosObjectDetectionOutput,Yu=s.YolosPreTrainedModel,Zu=s.ZeroShotAudioClassificationPipeline,Ju=s.ZeroShotClassificationPipeline,ed=s.ZeroShotImageClassificationPipeline,td=s.ZeroShotObjectDetectionPipeline,nd=s.bankers_round,rd=s.cat,ad=s.cos_sim,sd=s.dot,id=s.dynamic_time_warping,od=s.env,ld=s.full,ud=s.full_like,dd=s.getKeyValueShapes,cd=s.hamming,pd=s.hanning,hd=s.interpolate,md=s.interpolate_4d,fd=s.interpolate_data,gd=s.is_chinese_char,_d=s.layer_norm,wd=s.log_softmax,yd=s.magnitude,bd=s.matmul,vd=s.max,xd=s.mean,Md=s.mean_pooling,Td=s.medianFilter,kd=s.mel_filter_bank,$d=s.min,Cd=s.ones,Sd=s.ones_like,Pd=s.permute,Ed=s.permute_data,Fd=s.pipeline,Ad=s.quantize_embeddings,Id=s.read_audio,zd=s.rfft,Od=s.round,Bd=s.softmax,Ld=s.spectrogram,Dd=s.stack,Rd=s.std_mean,Nd=s.topk,Vd=s.window_function,jd=s.zeros,qd=s.zeros_like;export{w as ASTFeatureExtractor,y as ASTForAudioClassification,b as ASTModel,v as ASTPreTrainedModel,x as AlbertForMaskedLM,M as AlbertForQuestionAnswering,T as AlbertForSequenceClassification,k as AlbertModel,$ as AlbertPreTrainedModel,C as AlbertTokenizer,S as AudioClassificationPipeline,P as AutoConfig,E as AutoModel,F as AutoModelForAudioClassification,A as AutoModelForAudioFrameClassification,I as AutoModelForCTC,z as AutoModelForCausalLM,O as AutoModelForDepthEstimation,B as AutoModelForDocumentQuestionAnswering,L as AutoModelForImageClassification,D as AutoModelForImageFeatureExtraction,R as AutoModelForImageMatting,N as AutoModelForImageSegmentation,V as AutoModelForImageToImage,j as AutoModelForMaskGeneration,q as AutoModelForMaskedLM,G as AutoModelForNormalEstimation,U as AutoModelForObjectDetection,W as AutoModelForQuestionAnswering,H as AutoModelForSemanticSegmentation,X as AutoModelForSeq2SeqLM,K as AutoModelForSequenceClassification,Q as AutoModelForSpeechSeq2Seq,Y as AutoModelForTextToSpectrogram,Z as AutoModelForTextToWaveform,J as AutoModelForTokenClassification,ee as AutoModelForUniversalSegmentation,te as AutoModelForVision2Seq,ne as AutoModelForXVector,re as AutoModelForZeroShotObjectDetection,ae as AutoProcessor,se as AutoTokenizer,ie as AutomaticSpeechRecognitionPipeline,oe as BartForConditionalGeneration,le as BartForSequenceClassification,ue as BartModel,de as BartPretrainedModel,ce as BartTokenizer,pe as BaseModelOutput,he as BaseStreamer,me as BeitFeatureExtractor,fe as BeitForImageClassification,ge as BeitModel,_e as BeitPreTrainedModel,we as BertForMaskedLM,ye as BertForQuestionAnswering,be as BertForSequenceClassification,ve as BertForTokenClassification,xe as BertModel,Me as BertPreTrainedModel,Te as BertTokenizer,ke as BitImageProcessor,$e as BlenderbotForConditionalGeneration,Ce as BlenderbotModel,Se as BlenderbotPreTrainedModel,Pe as BlenderbotSmallForConditionalGeneration,Ee as BlenderbotSmallModel,Fe as BlenderbotSmallPreTrainedModel,Ae as BlenderbotSmallTokenizer,Ie as BlenderbotTokenizer,ze as BloomForCausalLM,Oe as BloomModel,Be as BloomPreTrainedModel,Le as BloomTokenizer,De as CLIPFeatureExtractor,Re as CLIPImageProcessor,Ne as CLIPModel,Ve as CLIPPreTrainedModel,je as CLIPSegForImageSegmentation,qe as CLIPSegModel,Ge as CLIPSegPreTrainedModel,Ue as CLIPTextModel,We as CLIPTextModelWithProjection,He as CLIPTokenizer,Xe as CLIPVisionModel,Ke as CLIPVisionModelWithProjection,Qe as CamembertForMaskedLM,Ye as CamembertForQuestionAnswering,Ze as CamembertForSequenceClassification,Je as CamembertForTokenClassification,et as CamembertModel,tt as CamembertPreTrainedModel,nt as CamembertTokenizer,rt as CausalLMOutput,at as CausalLMOutputWithPast,st as ChineseCLIPFeatureExtractor,it as ChineseCLIPModel,ot as ChineseCLIPPreTrainedModel,lt as ClapAudioModelWithProjection,ut as ClapFeatureExtractor,dt as ClapModel,ct as ClapPreTrainedModel,pt as ClapTextModelWithProjection,ht as CodeGenForCausalLM,mt as CodeGenModel,ft as CodeGenPreTrainedModel,gt as CodeGenTokenizer,_t as CodeLlamaTokenizer,wt as CohereForCausalLM,yt as CohereModel,bt as CoherePreTrainedModel,vt as CohereTokenizer,xt as ConvBertForMaskedLM,Mt as ConvBertForQuestionAnswering,Tt as ConvBertForSequenceClassification,kt as ConvBertForTokenClassification,$t as ConvBertModel,Ct as ConvBertPreTrainedModel,St as ConvBertTokenizer,Pt as ConvNextFeatureExtractor,Et as ConvNextForImageClassification,Ft as ConvNextImageProcessor,At as ConvNextModel,It as ConvNextPreTrainedModel,zt as ConvNextV2ForImageClassification,Ot as ConvNextV2Model,Bt as ConvNextV2PreTrainedModel,Lt as DPTFeatureExtractor,Dt as DPTForDepthEstimation,Rt as DPTImageProcessor,Nt as DPTModel,Vt as DPTPreTrainedModel,jt as DebertaForMaskedLM,qt as DebertaForQuestionAnswering,Gt as DebertaForSequenceClassification,Ut as DebertaForTokenClassification,Wt as DebertaModel,Ht as DebertaPreTrainedModel,Xt as DebertaTokenizer,Kt as DebertaV2ForMaskedLM,Qt as DebertaV2ForQuestionAnswering,Yt as DebertaV2ForSequenceClassification,Zt as DebertaV2ForTokenClassification,Jt as DebertaV2Model,en as DebertaV2PreTrainedModel,tn as DebertaV2Tokenizer,nn as DecisionTransformerModel,rn as DecisionTransformerPreTrainedModel,an as DeiTFeatureExtractor,sn as DeiTForImageClassification,on as DeiTModel,ln as DeiTPreTrainedModel,un as DepthAnythingForDepthEstimation,dn as DepthAnythingPreTrainedModel,cn as DepthEstimationPipeline,pn as DepthProForDepthEstimation,hn as DepthProPreTrainedModel,mn as DetrFeatureExtractor,fn as DetrForObjectDetection,gn as DetrForSegmentation,_n as DetrModel,wn as DetrObjectDetectionOutput,yn as DetrPreTrainedModel,bn as DetrSegmentationOutput,vn as Dinov2ForImageClassification,xn as Dinov2Model,Mn as Dinov2PreTrainedModel,Tn as DistilBertForMaskedLM,kn as DistilBertForQuestionAnswering,$n as DistilBertForSequenceClassification,Cn as DistilBertForTokenClassification,Sn as DistilBertModel,Pn as DistilBertPreTrainedModel,En as DistilBertTokenizer,Fn as DocumentQuestionAnsweringPipeline,An as DonutFeatureExtractor,In as DonutImageProcessor,zn as DonutSwinModel,On as DonutSwinPreTrainedModel,Bn as EfficientNetForImageClassification,Ln as EfficientNetImageProcessor,Dn as EfficientNetModel,Rn as EfficientNetPreTrainedModel,Nn as ElectraForMaskedLM,Vn as ElectraForQuestionAnswering,jn as ElectraForSequenceClassification,qn as ElectraForTokenClassification,Gn as ElectraModel,Un as ElectraPreTrainedModel,Wn as ElectraTokenizer,Hn as EosTokenCriteria,Xn as EsmForMaskedLM,Kn as EsmForSequenceClassification,Qn as EsmForTokenClassification,Yn as EsmModel,Zn as EsmPreTrainedModel,Jn as EsmTokenizer,er as FFT,tr as FalconForCausalLM,nr as FalconModel,rr as FalconPreTrainedModel,ar as FalconTokenizer,sr as FastViTForImageClassification,ir as FastViTModel,or as FastViTPreTrainedModel,lr as FeatureExtractionPipeline,ur as FeatureExtractor,dr as FillMaskPipeline,cr as Florence2ForConditionalGeneration,pr as Florence2PreTrainedModel,hr as Florence2Processor,mr as GLPNFeatureExtractor,fr as GLPNForDepthEstimation,gr as GLPNModel,_r as GLPNPreTrainedModel,wr as GPT2LMHeadModel,yr as GPT2Model,br as GPT2PreTrainedModel,vr as GPT2Tokenizer,xr as GPTBigCodeForCausalLM,Mr as GPTBigCodeModel,Tr as GPTBigCodePreTrainedModel,kr as GPTJForCausalLM,$r as GPTJModel,Cr as GPTJPreTrainedModel,Sr as GPTNeoForCausalLM,Pr as GPTNeoModel,Er as GPTNeoPreTrainedModel,Fr as GPTNeoXForCausalLM,Ar as GPTNeoXModel,Ir as GPTNeoXPreTrainedModel,zr as GPTNeoXTokenizer,Or as Gemma2ForCausalLM,Br as Gemma2Model,Lr as Gemma2PreTrainedModel,Dr as GemmaForCausalLM,Rr as GemmaModel,Nr as GemmaPreTrainedModel,Vr as GemmaTokenizer,jr as GraniteForCausalLM,qr as GraniteModel,Gr as GranitePreTrainedModel,Ur as Grok1Tokenizer,Wr as GroupViTModel,Hr as GroupViTPreTrainedModel,Xr as HerbertTokenizer,Kr as HieraForImageClassification,Qr as HieraModel,Yr as HieraPreTrainedModel,Zr as HubertForCTC,Jr as HubertForSequenceClassification,ea as HubertModel,ta as HubertPreTrainedModel,na as ImageClassificationPipeline,ra as ImageFeatureExtractionPipeline,aa as ImageFeatureExtractor,sa as ImageMattingOutput,ia as ImageSegmentationPipeline,oa as ImageToImagePipeline,la as ImageToTextPipeline,ua as InterruptableStoppingCriteria,da as JAISLMHeadModel,ca as JAISModel,pa as JAISPreTrainedModel,ha as LlamaForCausalLM,ma as LlamaModel,fa as LlamaPreTrainedModel,ga as LlamaTokenizer,_a as LlavaForConditionalGeneration,wa as LlavaPreTrainedModel,ya as LongT5ForConditionalGeneration,ba as LongT5Model,va as LongT5PreTrainedModel,xa as M2M100ForConditionalGeneration,Ma as M2M100Model,Ta as M2M100PreTrainedModel,ka as M2M100Tokenizer,$a as MBart50Tokenizer,Ca as MBartForCausalLM,Sa as MBartForConditionalGeneration,Pa as MBartForSequenceClassification,Ea as MBartModel,Fa as MBartPreTrainedModel,Aa as MBartTokenizer,Ia as MPNetForMaskedLM,za as MPNetForQuestionAnswering,Oa as MPNetForSequenceClassification,Ba as MPNetForTokenClassification,La as MPNetModel,Da as MPNetPreTrainedModel,Ra as MPNetTokenizer,Na as MT5ForConditionalGeneration,Va as MT5Model,ja as MT5PreTrainedModel,qa as MarianMTModel,Ga as MarianModel,Ua as MarianPreTrainedModel,Wa as MarianTokenizer,Ha as MaskFormerFeatureExtractor,Xa as MaskFormerForInstanceSegmentation,Ka as MaskFormerModel,Qa as MaskFormerPreTrainedModel,Ya as MaskedLMOutput,Za as MaxLengthCriteria,Ja as MistralForCausalLM,es as MistralModel,ts as MistralPreTrainedModel,ns as MobileBertForMaskedLM,rs as MobileBertForQuestionAnswering,as as MobileBertForSequenceClassification,ss as MobileBertModel,is as MobileBertPreTrainedModel,os as MobileBertTokenizer,ls as MobileLLMForCausalLM,us as MobileLLMModel,ds as MobileLLMPreTrainedModel,cs as MobileNetV1FeatureExtractor,ps as MobileNetV1ForImageClassification,hs as MobileNetV1Model,ms as MobileNetV1PreTrainedModel,fs as MobileNetV2FeatureExtractor,gs as MobileNetV2ForImageClassification,_s as MobileNetV2Model,ws as MobileNetV2PreTrainedModel,ys as MobileNetV3FeatureExtractor,bs as MobileNetV3ForImageClassification,vs as MobileNetV3Model,xs as MobileNetV3PreTrainedModel,Ms as MobileNetV4FeatureExtractor,Ts as MobileNetV4ForImageClassification,ks as MobileNetV4Model,$s as MobileNetV4PreTrainedModel,Cs as MobileViTFeatureExtractor,Ss as MobileViTForImageClassification,Ps as MobileViTImageProcessor,Es as MobileViTModel,Fs as MobileViTPreTrainedModel,As as MobileViTV2ForImageClassification,Is as MobileViTV2Model,zs as MobileViTV2PreTrainedModel,Os as ModelOutput,Bs as Moondream1ForConditionalGeneration,Ls as MptForCausalLM,Ds as MptModel,Rs as MptPreTrainedModel,Ns as MusicgenForCausalLM,Vs as MusicgenForConditionalGeneration,js as MusicgenModel,qs as MusicgenPreTrainedModel,Gs as NllbTokenizer,Us as NomicBertModel,Ws as NomicBertPreTrainedModel,Hs as NougatImageProcessor,Xs as NougatTokenizer,Ks as OPTForCausalLM,Qs as OPTModel,Ys as OPTPreTrainedModel,Zs as ObjectDetectionPipeline,Js as OlmoForCausalLM,ei as OlmoModel,ti as OlmoPreTrainedModel,ni as OpenELMForCausalLM,ri as OpenELMModel,ai as OpenELMPreTrainedModel,si as OwlViTFeatureExtractor,ii as OwlViTForObjectDetection,oi as OwlViTModel,li as OwlViTPreTrainedModel,ui as OwlViTProcessor,di as Owlv2ForObjectDetection,ci as Owlv2ImageProcessor,pi as Owlv2Model,hi as Owlv2PreTrainedModel,mi as Phi3ForCausalLM,fi as Phi3Model,gi as Phi3PreTrainedModel,_i as PhiForCausalLM,wi as PhiModel,yi as PhiPreTrainedModel,bi as Pipeline,vi as PreTrainedModel,xi as PreTrainedTokenizer,Mi as PretrainedConfig,Ti as PretrainedMixin,ki as Processor,$i as PvtForImageClassification,Ci as PvtImageProcessor,Si as PvtModel,Pi as PvtPreTrainedModel,Ei as PyAnnoteFeatureExtractor,Fi as PyAnnoteForAudioFrameClassification,Ai as PyAnnoteModel,Ii as PyAnnotePreTrainedModel,zi as PyAnnoteProcessor,Oi as QuestionAnsweringModelOutput,Bi as QuestionAnsweringPipeline,Li as Qwen2ForCausalLM,Di as Qwen2Model,Ri as Qwen2PreTrainedModel,Ni as Qwen2Tokenizer,Vi as RTDetrForObjectDetection,ji as RTDetrImageProcessor,qi as RTDetrModel,Gi as RTDetrObjectDetectionOutput,Ui as RTDetrPreTrainedModel,Wi as RawImage,Hi as ResNetForImageClassification,Xi as ResNetModel,Ki as ResNetPreTrainedModel,Qi as RoFormerForMaskedLM,Yi as RoFormerForQuestionAnswering,Zi as RoFormerForSequenceClassification,Ji as RoFormerForTokenClassification,eo as RoFormerModel,to as RoFormerPreTrainedModel,no as RoFormerTokenizer,ro as RobertaForMaskedLM,ao as RobertaForQuestionAnswering,so as RobertaForSequenceClassification,io as RobertaForTokenClassification,oo as RobertaModel,lo as RobertaPreTrainedModel,uo as RobertaTokenizer,co as SamImageProcessor,po as SamImageSegmentationOutput,ho as SamModel,mo as SamPreTrainedModel,fo as SamProcessor,go as SapiensFeatureExtractor,_o as SapiensForDepthEstimation,wo as SapiensForNormalEstimation,yo as SapiensForSemanticSegmentation,bo as SapiensPreTrainedModel,vo as SeamlessM4TFeatureExtractor,xo as SegformerFeatureExtractor,Mo as SegformerForImageClassification,To as SegformerForSemanticSegmentation,ko as SegformerModel,$o as SegformerPreTrainedModel,Co as Seq2SeqLMOutput,So as SequenceClassifierOutput,Po as SiglipImageProcessor,Eo as SiglipModel,Fo as SiglipPreTrainedModel,Ao as SiglipTextModel,Io as SiglipTokenizer,zo as SiglipVisionModel,Oo as SpeechT5FeatureExtractor,Bo as SpeechT5ForSpeechToText,Lo as SpeechT5ForTextToSpeech,Do as SpeechT5HifiGan,Ro as SpeechT5Model,No as SpeechT5PreTrainedModel,Vo as SpeechT5Processor,jo as SpeechT5Tokenizer,qo as SqueezeBertForMaskedLM,Go as SqueezeBertForQuestionAnswering,Uo as SqueezeBertForSequenceClassification,Wo as SqueezeBertModel,Ho as SqueezeBertPreTrainedModel,Xo as SqueezeBertTokenizer,Ko as StableLmForCausalLM,Qo as StableLmModel,Yo as StableLmPreTrainedModel,Zo as Starcoder2ForCausalLM,Jo as Starcoder2Model,el as Starcoder2PreTrainedModel,tl as StoppingCriteria,nl as StoppingCriteriaList,rl as SummarizationPipeline,al as Swin2SRForImageSuperResolution,sl as Swin2SRImageProcessor,il as Swin2SRModel,ol as Swin2SRPreTrainedModel,ll as SwinForImageClassification,ul as SwinModel,dl as SwinPreTrainedModel,cl as T5ForConditionalGeneration,pl as T5Model,hl as T5PreTrainedModel,ml as T5Tokenizer,fl as TableTransformerForObjectDetection,gl as TableTransformerModel,_l as TableTransformerObjectDetectionOutput,wl as TableTransformerPreTrainedModel,yl as Tensor,bl as Text2TextGenerationPipeline,vl as TextClassificationPipeline,xl as TextGenerationPipeline,Ml as TextStreamer,Tl as TextToAudioPipeline,kl as TokenClassificationPipeline,$l as TokenClassifierOutput,Cl as TokenizerModel,Sl as TrOCRForCausalLM,Pl as TrOCRPreTrainedModel,El as TranslationPipeline,Fl as UniSpeechForCTC,Al as UniSpeechForSequenceClassification,Il as UniSpeechModel,zl as UniSpeechPreTrainedModel,Ol as UniSpeechSatForAudioFrameClassification,Bl as UniSpeechSatForCTC,Ll as UniSpeechSatForSequenceClassification,Dl as UniSpeechSatModel,Rl as UniSpeechSatPreTrainedModel,Nl as ViTFeatureExtractor,Vl as ViTForImageClassification,jl as ViTImageProcessor,ql as ViTMAEModel,Gl as ViTMAEPreTrainedModel,Ul as ViTMSNForImageClassification,Wl as ViTMSNModel,Hl as ViTMSNPreTrainedModel,Xl as ViTModel,Kl as ViTPreTrainedModel,Ql as VisionEncoderDecoderModel,Yl as VitMatteForImageMatting,Zl as VitMatteImageProcessor,Jl as VitMattePreTrainedModel,eu as VitsModel,tu as VitsModelOutput,nu as VitsPreTrainedModel,ru as VitsTokenizer,au as Wav2Vec2BertForCTC,su as Wav2Vec2BertForSequenceClassification,iu as Wav2Vec2BertModel,ou as Wav2Vec2BertPreTrainedModel,lu as Wav2Vec2CTCTokenizer,uu as Wav2Vec2FeatureExtractor,du as Wav2Vec2ForAudioFrameClassification,cu as Wav2Vec2ForCTC,pu as Wav2Vec2ForSequenceClassification,hu as Wav2Vec2Model,mu as Wav2Vec2PreTrainedModel,fu as Wav2Vec2ProcessorWithLM,gu as WavLMForAudioFrameClassification,_u as WavLMForCTC,wu as WavLMForSequenceClassification,yu as WavLMForXVector,bu as WavLMModel,vu as WavLMPreTrainedModel,xu as WeSpeakerFeatureExtractor,Mu as WeSpeakerResNetModel,Tu as WeSpeakerResNetPreTrainedModel,ku as WhisperFeatureExtractor,$u as WhisperForConditionalGeneration,Cu as WhisperModel,Su as WhisperPreTrainedModel,Pu as WhisperProcessor,Eu as WhisperTextStreamer,Fu as WhisperTokenizer,Au as XLMForQuestionAnswering,Iu as XLMForSequenceClassification,zu as XLMForTokenClassification,Ou as XLMModel,Bu as XLMPreTrainedModel,Lu as XLMRobertaForMaskedLM,Du as XLMRobertaForQuestionAnswering,Ru as XLMRobertaForSequenceClassification,Nu as XLMRobertaForTokenClassification,Vu as XLMRobertaModel,ju as XLMRobertaPreTrainedModel,qu as XLMRobertaTokenizer,Gu as XLMTokenizer,Uu as XLMWithLMHeadModel,Wu as XVectorOutput,Hu as YolosFeatureExtractor,Xu as YolosForObjectDetection,Ku as YolosModel,Qu as YolosObjectDetectionOutput,Yu as YolosPreTrainedModel,Zu as ZeroShotAudioClassificationPipeline,Ju as ZeroShotClassificationPipeline,ed as ZeroShotImageClassificationPipeline,td as ZeroShotObjectDetectionPipeline,nd as bankers_round,rd as cat,ad as cos_sim,sd as dot,id as dynamic_time_warping,od as env,ld as full,ud as full_like,dd as getKeyValueShapes,cd as hamming,pd as hanning,hd as interpolate,md as interpolate_4d,fd as interpolate_data,gd as is_chinese_char,_d as layer_norm,wd as log_softmax,yd as magnitude,bd as matmul,vd as max,xd as mean,Md as mean_pooling,Td as medianFilter,kd as mel_filter_bank,$d as min,Cd as ones,Sd as ones_like,Pd as permute,Ed as permute_data,Fd as pipeline,Ad as quantize_embeddings,Id as read_audio,zd as rfft,Od as round,Bd as softmax,Ld as spectrogram,Dd as stack,Rd as std_mean,Nd as topk,Vd as window_function,jd as zeros,qd as zeros_like};
230
230
  //# sourceMappingURL=transformers.min.js.map