@huggingface/transformers 3.0.0-alpha.13 → 3.0.0-alpha.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -91,13 +91,13 @@ import*as e from"fs";import*as t from"onnxruntime-node";import*as n from"path";i
91
91
  \************************/(e,t,n)=>{n.r(t),n.d(t,{AutoConfig:()=>l,PretrainedConfig:()=>i,getKeyValueShapes:()=>a});var r=n(/*! ./utils/core.js */"./src/utils/core.js"),o=n(/*! ./utils/hub.js */"./src/utils/hub.js");function s(e){const t={};let n={};switch(e.model_type){case"llava":case"paligemma":case"florence2":n=s(e.text_config);break;case"moondream1":n=s(e.phi_config);break;case"musicgen":n=s(e.decoder);break;case"gpt2":case"gptj":case"jais":case"codegen":case"gpt_bigcode":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="n_embd";break;case"gpt_neox":case"stablelm":case"opt":case"phi":case"phi3":case"falcon":t.num_heads="num_attention_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size";break;case"llama":case"cohere":case"mistral":case"starcoder2":case"qwen2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size",t.num_attention_heads="num_attention_heads";break;case"gemma":case"gemma2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.dim_kv="head_dim";break;case"openelm":t.num_heads="num_kv_heads",t.num_layers="num_transformer_layers",t.dim_kv="head_dim";break;case"gpt_neo":case"donut-swin":t.num_heads="num_heads",t.num_layers="num_layers",t.hidden_size="hidden_size";break;case"bloom":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="hidden_size";break;case"mpt":t.num_heads="n_heads",t.num_layers="n_layers",t.hidden_size="d_model";break;case"t5":case"mt5":case"longt5":t.num_decoder_layers="num_decoder_layers",t.num_decoder_heads="num_heads",t.decoder_dim_kv="d_kv",t.num_encoder_layers="num_layers",t.num_encoder_heads="num_heads",t.encoder_dim_kv="d_kv";break;case"bart":case"mbart":case"marian":case"whisper":case"m2m_100":case"blenderbot":case"blenderbot-small":case"florence2_language":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="d_model",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="d_model";break;case"speecht5":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="hidden_size",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="hidden_size";break;case"trocr":t.num_encoder_layers=t.num_decoder_layers="decoder_layers",t.num_encoder_heads=t.num_decoder_heads="decoder_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="d_model";break;case"musicgen_decoder":t.num_encoder_layers=t.num_decoder_layers="num_hidden_layers",t.num_encoder_heads=t.num_decoder_heads="num_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="hidden_size";break;case"vision-encoder-decoder":const o=s(e.decoder),a="num_decoder_layers"in o,i=(0,r.pick)(e,["model_type","is_encoder_decoder"]);return a?(i.num_decoder_layers=o.num_decoder_layers,i.num_decoder_heads=o.num_decoder_heads,i.decoder_hidden_size=o.decoder_hidden_size,i.num_encoder_layers=o.num_encoder_layers,i.num_encoder_heads=o.num_encoder_heads,i.encoder_hidden_size=o.encoder_hidden_size):(i.num_layers=o.num_layers,i.num_heads=o.num_heads,i.hidden_size=o.hidden_size),i}const o={...n,...(0,r.pick)(e,["model_type","multi_query","is_encoder_decoder"])};for(const n in t)o[n]=e[t[n]];return o}function a(e,{prefix:t="past_key_values"}={}){const n={},r=e.normalized_config;if(r.is_encoder_decoder&&"num_encoder_heads"in r&&"num_decoder_heads"in r){const e=r.encoder_dim_kv??r.encoder_hidden_size/r.num_encoder_heads,o=r.decoder_dim_kv??r.decoder_hidden_size/r.num_decoder_heads,s=[1,r.num_encoder_heads,0,e],a=[1,r.num_decoder_heads,0,o];for(let e=0;e<r.num_decoder_layers;++e)n[`${t}.${e}.encoder.key`]=s,n[`${t}.${e}.encoder.value`]=s,n[`${t}.${e}.decoder.key`]=a,n[`${t}.${e}.decoder.value`]=a}else{const e=r.num_heads,o=r.num_layers,s=r.dim_kv??r.hidden_size/(r.num_attention_heads??e);if("falcon"===r.model_type){const r=[1*e,0,s];for(let e=0;e<o;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=r}else if(r.multi_query){const r=[1*e,0,2*s];for(let e=0;e<o;++e)n[`${t}.${e}.key_value`]=r}else if("bloom"===r.model_type){const r=[1*e,s,0],a=[1*e,0,s];for(let e=0;e<o;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=a}else if("openelm"===r.model_type)for(let r=0;r<o;++r){const o=[1,e[r],0,s];n[`${t}.${r}.key`]=o,n[`${t}.${r}.value`]=o}else{const r=[1,e,0,s];for(let e=0;e<o;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=r}}return n}class i{max_position_embeddings;constructor(e){this.model_type=null,this.is_encoder_decoder=!1,Object.assign(this,e),this.normalized_config=s(this)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:s=!1,revision:a="main"}={}){!n||n instanceof i||(n=new i(n));const l=n??await async function(e,t){return await(0,o.getModelJSON)(e,"config.json",!0,t)}(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:s,revision:a});return new this(l)}}class l{static async from_pretrained(...e){return i.from_pretrained(...e)}}},"./src/env.js":
92
92
  /*!********************!*\
93
93
  !*** ./src/env.js ***!
94
- \********************/(e,t,n)=>{n.r(t),n.d(t,{apis:()=>_,env:()=>b});var r=n(/*! fs */"fs"),o=n(/*! path */"path"),s=n(/*! url */"url");const a="undefined"!=typeof self,i=a&&"DedicatedWorkerGlobalScope"===self.constructor.name,l=a&&"caches"in self,c="undefined"!=typeof navigator&&"gpu"in navigator,d="undefined"!=typeof navigator&&"ml"in navigator,u="undefined"!=typeof process,p=u&&"node"===process?.release?.name,h=!T(r.default),m=!T(o.default),_=Object.freeze({IS_BROWSER_ENV:a,IS_WEBWORKER_ENV:i,IS_WEB_CACHE_AVAILABLE:l,IS_WEBGPU_AVAILABLE:c,IS_WEBNN_AVAILABLE:d,IS_PROCESS_AVAILABLE:u,IS_NODE_ENV:p,IS_FS_AVAILABLE:h,IS_PATH_AVAILABLE:m}),f=h&&m,g=f?o.default.dirname(o.default.dirname(s.default.fileURLToPath(import.meta.url))):"./",M=f?o.default.join(g,"/.cache/"):null,w="/models/",b={version:"3.0.0-alpha.13",backends:{onnx:{}},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!a,localModelPath:f?o.default.join(g,w):w,useFS:h,useBrowserCache:l,useFSCache:h,cacheDir:M,useCustomCache:!1,customCache:null};function T(e){return 0===Object.keys(e).length}},"./src/generation/configuration_utils.js":
94
+ \********************/(e,t,n)=>{n.r(t),n.d(t,{apis:()=>_,env:()=>b});var r=n(/*! fs */"fs"),o=n(/*! path */"path"),s=n(/*! url */"url");const a="undefined"!=typeof self,i=a&&"DedicatedWorkerGlobalScope"===self.constructor.name,l=a&&"caches"in self,c="undefined"!=typeof navigator&&"gpu"in navigator,d="undefined"!=typeof navigator&&"ml"in navigator,u="undefined"!=typeof process,p=u&&"node"===process?.release?.name,h=!T(r.default),m=!T(o.default),_=Object.freeze({IS_BROWSER_ENV:a,IS_WEBWORKER_ENV:i,IS_WEB_CACHE_AVAILABLE:l,IS_WEBGPU_AVAILABLE:c,IS_WEBNN_AVAILABLE:d,IS_PROCESS_AVAILABLE:u,IS_NODE_ENV:p,IS_FS_AVAILABLE:h,IS_PATH_AVAILABLE:m}),f=h&&m,g=f?o.default.dirname(o.default.dirname(s.default.fileURLToPath(import.meta.url))):"./",M=f?o.default.join(g,"/.cache/"):null,w="/models/",b={version:"3.0.0-alpha.14",backends:{onnx:{}},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!a,localModelPath:f?o.default.join(g,w):w,useFS:h,useBrowserCache:l,useFSCache:h,cacheDir:M,useCustomCache:!1,customCache:null};function T(e){return 0===Object.keys(e).length}},"./src/generation/configuration_utils.js":
95
95
  /*!***********************************************!*\
96
96
  !*** ./src/generation/configuration_utils.js ***!
97
97
  \***********************************************/(e,t,n)=>{n.r(t),n.d(t,{GenerationConfig:()=>o});var r=n(/*! ../utils/core.js */"./src/utils/core.js");class o{max_length=20;max_new_tokens=null;min_length=0;min_new_tokens=null;early_stopping=!1;max_time=null;do_sample=!1;num_beams=1;num_beam_groups=1;penalty_alpha=null;use_cache=!0;temperature=1;top_k=50;top_p=1;typical_p=1;epsilon_cutoff=0;eta_cutoff=0;diversity_penalty=0;repetition_penalty=1;encoder_repetition_penalty=1;length_penalty=1;no_repeat_ngram_size=0;bad_words_ids=null;force_words_ids=null;renormalize_logits=!1;constraints=null;forced_bos_token_id=null;forced_eos_token_id=null;remove_invalid_values=!1;exponential_decay_length_penalty=null;suppress_tokens=null;begin_suppress_tokens=null;forced_decoder_ids=null;guidance_scale=null;num_return_sequences=1;output_attentions=!1;output_hidden_states=!1;output_scores=!1;return_dict_in_generate=!1;pad_token_id=null;bos_token_id=null;eos_token_id=null;encoder_no_repeat_ngram_size=0;decoder_start_token_id=null;generation_kwargs={};constructor(e){Object.assign(this,(0,r.pick)(e,Object.getOwnPropertyNames(this)))}}},"./src/generation/logits_process.js":
98
98
  /*!******************************************!*\
99
99
  !*** ./src/generation/logits_process.js ***!
100
- \******************************************/(e,t,n)=>{n.r(t),n.d(t,{ClassifierFreeGuidanceLogitsProcessor:()=>g,ForcedBOSTokenLogitsProcessor:()=>l,ForcedEOSTokenLogitsProcessor:()=>c,LogitsProcessor:()=>s,LogitsProcessorList:()=>i,LogitsWarper:()=>a,MinLengthLogitsProcessor:()=>m,MinNewTokensLengthLogitsProcessor:()=>_,NoBadWordsLogitsProcessor:()=>f,NoRepeatNGramLogitsProcessor:()=>p,RepetitionPenaltyLogitsProcessor:()=>h,SuppressTokensAtBeginLogitsProcessor:()=>d,TemperatureLogitsWarper:()=>M,TopKLogitsWarper:()=>b,TopPLogitsWarper:()=>w,WhisperTimeStampLogitsProcessor:()=>u});var r=n(/*! ../utils/generic.js */"./src/utils/generic.js"),o=(n(/*! ../utils/tensor.js */"./src/utils/tensor.js"),n(/*! ../utils/maths.js */"./src/utils/maths.js"));class s extends r.Callable{_call(e,t){throw Error("`_call` should be implemented in a subclass")}}class a extends r.Callable{_call(e,t){throw Error("`_call` should be implemented in a subclass")}}class i extends r.Callable{constructor(){super(),this.processors=[]}push(e){this.processors.push(e)}extend(e){this.processors.push(...e)}_call(e,t){let n=t;for(const t of this.processors)n=t(e,n);return n}[Symbol.iterator](){return this.processors.values()}}class l extends s{constructor(e){super(),this.bos_token_id=e}_call(e,t){for(let n=0;n<e.length;++n)if(1===e[n].length){const e=t[n].data;e.fill(-1/0),e[this.bos_token_id]=0}return t}}class c extends s{constructor(e,t){super(),this.max_length=e,this.eos_token_id=Array.isArray(t)?t:[t]}_call(e,t){for(let n=0;n<e.length;++n)if(e[n].length===this.max_length-1){const e=t[n].data;e.fill(-1/0);for(const t of this.eos_token_id)e[t]=0}return t}}class d extends s{constructor(e,t){super(),this.begin_suppress_tokens=e,this.begin_index=t}_call(e,t){for(let n=0;n<e.length;++n)if(e[n].length===this.begin_index){const e=t[n].data;for(const t of this.begin_suppress_tokens)e[t]=-1/0}return t}}class u extends s{constructor(e,t){super(),this.eos_token_id=Array.isArray(e.eos_token_id)?e.eos_token_id[0]:e.eos_token_id,this.no_timestamps_token_id=e.no_timestamps_token_id,this.timestamp_begin=this.no_timestamps_token_id+1,this.begin_index=t.length,t.at(-1)===this.no_timestamps_token_id&&(this.begin_index-=1),this.max_initial_timestamp_index=e.max_initial_timestamp_index}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data;if(r[this.no_timestamps_token_id]=-1/0,e[n].length===this.begin_index-1){r.fill(-1/0),r[this.timestamp_begin]=0;continue}const s=e[n].slice(this.begin_index),a=s.length>=1&&s[s.length-1]>=this.timestamp_begin,i=s.length<2||s[s.length-2]>=this.timestamp_begin;if(a&&(i?r.subarray(this.timestamp_begin).fill(-1/0):r.subarray(0,this.eos_token_id).fill(-1/0)),e[n].length===this.begin_index&&null!==this.max_initial_timestamp_index){const e=this.timestamp_begin+this.max_initial_timestamp_index;r.subarray(e+1).fill(-1/0)}const l=(0,o.log_softmax)(r);Math.log(l.subarray(this.timestamp_begin).map(Math.exp).reduce(((e,t)=>e+t)))>(0,o.max)(l.subarray(0,this.timestamp_begin))[0]&&r.subarray(0,this.timestamp_begin).fill(-1/0)}return t}}class p extends s{constructor(e){super(),this.no_repeat_ngram_size=e}getNgrams(e){const t=e.length,n=[];for(let r=0;r<t+1-this.no_repeat_ngram_size;++r){const t=[];for(let n=0;n<this.no_repeat_ngram_size;++n)t.push(e[r+n]);n.push(t.map(Number))}const r=new Map;for(const e of n){const t=e.slice(0,e.length-1),n=JSON.stringify(t),o=r.get(n)??[];o.push(e[e.length-1]),r.set(n,o)}return r}getGeneratedNgrams(e,t){const n=t.slice(t.length+1-this.no_repeat_ngram_size,t.length);return e.get(JSON.stringify(n.map(Number)))??[]}calcBannedNgramTokens(e){const t=[];if(e.length+1<this.no_repeat_ngram_size)return t;{const t=this.getNgrams(e);return this.getGeneratedNgrams(t,e)}}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data,o=this.calcBannedNgramTokens(e[n]);for(const e of o)r[e]=-1/0}return t}}class h extends s{constructor(e){super(),this.penalty=e}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data;for(const t of e[n]){const e=Number(t);r[e]<0?r[e]*=this.penalty:r[e]/=this.penalty}}return t}}class m extends s{constructor(e,t){super(),this.min_length=e,this.eos_token_id=Array.isArray(t)?t:[t]}_call(e,t){for(let n=0;n<e.length;++n)if(e[n].length<this.min_length){const e=t[n].data;for(const t of this.eos_token_id)e[t]=-1/0}return t}}class _ extends s{constructor(e,t,n){super(),this.prompt_length_to_skip=e,this.min_new_tokens=t,this.eos_token_id=Array.isArray(n)?n:[n]}_call(e,t){for(let n=0;n<e.length;++n){if(e[n].length-this.prompt_length_to_skip<this.min_new_tokens){const e=t[n].data;for(const t of this.eos_token_id)e[t]=-1/0}}return t}}class f extends s{constructor(e,t){super(),this.bad_words_ids=e,this.eos_token_id=Array.isArray(t)?t:[t]}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data;for(const t of this.bad_words_ids){let n=!0;for(let r=1;r<=t.length-1&&t.length<e[r].length;++r)if(t.at(-r-1)!=e[r].at(-r)){n=!1;break}n&&(r[t.at(-1)]=-1/0)}}return t}}class g extends s{constructor(e){if(super(),e<=1)throw new Error(`Require guidance scale >1 to use the classifier free guidance processor, got guidance scale ${e}.`);this.guidance_scale=e}_call(e,t){if(t.dims[0]!==2*e.length)throw new Error(`Logits should have twice the batch size of the input ids, the first half of batches corresponding to the conditional inputs, and the second half of batches corresponding to the unconditional inputs. Got batch size ${t.dims[0]} for the logits and ${e.length} for the input ids.`);const n=e.length,r=t.slice([0,n],null),o=t.slice([n,t.dims[0]],null);for(let e=0;e<o.data.length;++e)o.data[e]+=(r.data[e]-o.data[e])*this.guidance_scale;return o}}class M extends a{constructor(e){if(super(),"number"!=typeof e||e<=0){let t=`\`temperature\` (=${e}) must be a strictly positive float, otherwise your next token scores will be invalid.`;0===e&&(t+=" If you're looking for greedy decoding strategies, set `do_sample=false`.")}this.temperature=e}_call(e,t){const n=t.data;for(let e=0;e<n.length;++e)n[e]/=this.temperature;return t}}class w extends a{constructor(e,{filter_value:t=-1/0,min_tokens_to_keep:n=1}={}){if(super(),e<0||e>1)throw new Error(`\`top_p\` must be a float > 0 and < 1, but is ${e}`);if(!Number.isInteger(n)||n<1)throw new Error(`\`min_tokens_to_keep\` must be a positive integer, but is ${n}`);this.top_p=e,this.filter_value=t,this.min_tokens_to_keep=n}}class b extends a{constructor(e,{filter_value:t=-1/0,min_tokens_to_keep:n=1}={}){if(super(),!Number.isInteger(e)||e<0)throw new Error(`\`top_k\` must be a positive integer, but is ${e}`);this.top_k=Math.max(e,n),this.filter_value=t}}},"./src/generation/logits_sampler.js":
100
+ \******************************************/(e,t,n)=>{n.r(t),n.d(t,{ClassifierFreeGuidanceLogitsProcessor:()=>g,ForcedBOSTokenLogitsProcessor:()=>l,ForcedEOSTokenLogitsProcessor:()=>c,LogitsProcessor:()=>s,LogitsProcessorList:()=>i,LogitsWarper:()=>a,MinLengthLogitsProcessor:()=>m,MinNewTokensLengthLogitsProcessor:()=>_,NoBadWordsLogitsProcessor:()=>f,NoRepeatNGramLogitsProcessor:()=>p,RepetitionPenaltyLogitsProcessor:()=>h,SuppressTokensAtBeginLogitsProcessor:()=>d,TemperatureLogitsWarper:()=>M,TopKLogitsWarper:()=>b,TopPLogitsWarper:()=>w,WhisperTimeStampLogitsProcessor:()=>u});var r=n(/*! ../utils/generic.js */"./src/utils/generic.js"),o=(n(/*! ../utils/tensor.js */"./src/utils/tensor.js"),n(/*! ../utils/maths.js */"./src/utils/maths.js"));class s extends r.Callable{_call(e,t){throw Error("`_call` should be implemented in a subclass")}}class a extends r.Callable{_call(e,t){throw Error("`_call` should be implemented in a subclass")}}class i extends r.Callable{constructor(){super(),this.processors=[]}push(e){this.processors.push(e)}extend(e){this.processors.push(...e)}_call(e,t){let n=t;for(const t of this.processors)n=t(e,n);return n}[Symbol.iterator](){return this.processors.values()}}class l extends s{constructor(e){super(),this.bos_token_id=e}_call(e,t){for(let n=0;n<e.length;++n)if(1===e[n].length){const e=t[n].data;e.fill(-1/0),e[this.bos_token_id]=0}return t}}class c extends s{constructor(e,t){super(),this.max_length=e,this.eos_token_id=Array.isArray(t)?t:[t]}_call(e,t){for(let n=0;n<e.length;++n)if(e[n].length===this.max_length-1){const e=t[n].data;e.fill(-1/0);for(const t of this.eos_token_id)e[t]=0}return t}}class d extends s{constructor(e,t){super(),this.begin_suppress_tokens=e,this.begin_index=t}_call(e,t){for(let n=0;n<e.length;++n)if(e[n].length===this.begin_index){const e=t[n].data;for(const t of this.begin_suppress_tokens)e[t]=-1/0}return t}}class u extends s{constructor(e,t){super(),this.eos_token_id=Array.isArray(e.eos_token_id)?e.eos_token_id[0]:e.eos_token_id,this.no_timestamps_token_id=e.no_timestamps_token_id,this.timestamp_begin=this.no_timestamps_token_id+1,this.begin_index=t.length,t.at(-1)===this.no_timestamps_token_id&&(this.begin_index-=1),this.max_initial_timestamp_index=e.max_initial_timestamp_index}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data;if(r[this.no_timestamps_token_id]=-1/0,e[n].length===this.begin_index-1){r.fill(-1/0),r[this.timestamp_begin]=0;continue}const s=e[n].slice(this.begin_index),a=s.length>=1&&s[s.length-1]>=this.timestamp_begin,i=s.length<2||s[s.length-2]>=this.timestamp_begin;if(a&&(i?r.subarray(this.timestamp_begin).fill(-1/0):r.subarray(0,this.eos_token_id).fill(-1/0)),e[n].length===this.begin_index&&null!==this.max_initial_timestamp_index){const e=this.timestamp_begin+this.max_initial_timestamp_index;r.subarray(e+1).fill(-1/0)}const l=(0,o.log_softmax)(r);Math.log(l.subarray(this.timestamp_begin).map(Math.exp).reduce(((e,t)=>e+t)))>(0,o.max)(l.subarray(0,this.timestamp_begin))[0]&&r.subarray(0,this.timestamp_begin).fill(-1/0)}return t}}class p extends s{constructor(e){super(),this.no_repeat_ngram_size=e}getNgrams(e){const t=e.length,n=[];for(let r=0;r<t+1-this.no_repeat_ngram_size;++r){const t=[];for(let n=0;n<this.no_repeat_ngram_size;++n)t.push(e[r+n]);n.push(t.map(Number))}const r=new Map;for(const e of n){const t=e.slice(0,e.length-1),n=JSON.stringify(t),o=r.get(n)??[];o.push(e[e.length-1]),r.set(n,o)}return r}getGeneratedNgrams(e,t){const n=t.slice(t.length+1-this.no_repeat_ngram_size,t.length);return e.get(JSON.stringify(n.map(Number)))??[]}calcBannedNgramTokens(e){const t=[];if(e.length+1<this.no_repeat_ngram_size)return t;{const t=this.getNgrams(e);return this.getGeneratedNgrams(t,e)}}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data,o=this.calcBannedNgramTokens(e[n]);for(const e of o)r[e]=-1/0}return t}}class h extends s{constructor(e){super(),this.penalty=e}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data;for(const t of e[n]){const e=Number(t);r[e]<0?r[e]*=this.penalty:r[e]/=this.penalty}}return t}}class m extends s{constructor(e,t){super(),this.min_length=e,this.eos_token_id=Array.isArray(t)?t:[t]}_call(e,t){for(let n=0;n<e.length;++n)if(e[n].length<this.min_length){const e=t[n].data;for(const t of this.eos_token_id)e[t]=-1/0}return t}}class _ extends s{constructor(e,t,n){super(),this.prompt_length_to_skip=e,this.min_new_tokens=t,this.eos_token_id=Array.isArray(n)?n:[n]}_call(e,t){for(let n=0;n<e.length;++n){if(e[n].length-this.prompt_length_to_skip<this.min_new_tokens){const e=t[n].data;for(const t of this.eos_token_id)e[t]=-1/0}}return t}}class f extends s{constructor(e,t){super(),this.bad_words_ids=e,this.eos_token_id=Array.isArray(t)?t:[t]}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data,o=e[n];for(const e of this.bad_words_ids){let t=!0;for(let n=1;n<=e.length-1&&e.length<o.length;++n)if(e.at(-n-1)!=o.at(-n)){t=!1;break}t&&(r[e.at(-1)]=-1/0)}}return t}}class g extends s{constructor(e){if(super(),e<=1)throw new Error(`Require guidance scale >1 to use the classifier free guidance processor, got guidance scale ${e}.`);this.guidance_scale=e}_call(e,t){if(t.dims[0]!==2*e.length)throw new Error(`Logits should have twice the batch size of the input ids, the first half of batches corresponding to the conditional inputs, and the second half of batches corresponding to the unconditional inputs. Got batch size ${t.dims[0]} for the logits and ${e.length} for the input ids.`);const n=e.length,r=t.slice([0,n],null),o=t.slice([n,t.dims[0]],null);for(let e=0;e<o.data.length;++e)o.data[e]+=(r.data[e]-o.data[e])*this.guidance_scale;return o}}class M extends a{constructor(e){if(super(),"number"!=typeof e||e<=0){let t=`\`temperature\` (=${e}) must be a strictly positive float, otherwise your next token scores will be invalid.`;0===e&&(t+=" If you're looking for greedy decoding strategies, set `do_sample=false`.")}this.temperature=e}_call(e,t){const n=t.data;for(let e=0;e<n.length;++e)n[e]/=this.temperature;return t}}class w extends a{constructor(e,{filter_value:t=-1/0,min_tokens_to_keep:n=1}={}){if(super(),e<0||e>1)throw new Error(`\`top_p\` must be a float > 0 and < 1, but is ${e}`);if(!Number.isInteger(n)||n<1)throw new Error(`\`min_tokens_to_keep\` must be a positive integer, but is ${n}`);this.top_p=e,this.filter_value=t,this.min_tokens_to_keep=n}}class b extends a{constructor(e,{filter_value:t=-1/0,min_tokens_to_keep:n=1}={}){if(super(),!Number.isInteger(e)||e<0)throw new Error(`\`top_k\` must be a positive integer, but is ${e}`);this.top_k=Math.max(e,n),this.filter_value=t}}},"./src/generation/logits_sampler.js":
101
101
  /*!******************************************!*\
102
102
  !*** ./src/generation/logits_sampler.js ***!
103
103
  \******************************************/(e,t,n)=>{n.r(t),n.d(t,{LogitsSampler:()=>a});var r=n(/*! ../utils/generic.js */"./src/utils/generic.js"),o=n(/*! ../utils/tensor.js */"./src/utils/tensor.js"),s=n(/*! ../utils/maths.js */"./src/utils/maths.js");n(/*! ../generation/configuration_utils.js */"./src/generation/configuration_utils.js");class a extends r.Callable{constructor(e){super(),this.generation_config=e}async _call(e){return this.sample(e)}async sample(e){throw Error("sample should be implemented in subclasses.")}getLogits(e,t){let n=e.dims.at(-1),r=e.data;if(-1===t)r=r.slice(-n);else{let e=t*n;r=r.slice(e,e+n)}return r}randomSelect(e){let t=0;for(let n=0;n<e.length;++n)t+=e[n];let n=Math.random()*t;for(let t=0;t<e.length;++t)if(n-=e[t],n<=0)return t;return 0}static getSampler(e){if(e.do_sample)return new l(e);if(e.num_beams>1)return new c(e);if(e.num_return_sequences>1)throw Error(`num_return_sequences has to be 1 when doing greedy search, but is ${e.num_return_sequences}.`);return new i(e)}}class i extends a{async sample(e){const t=(0,s.max)(e.data)[1];return[[BigInt(t),0]]}}class l extends a{async sample(e){let t=e.dims.at(-1);this.generation_config.top_k>0&&(t=Math.min(this.generation_config.top_k,t));const[n,r]=await(0,o.topk)(e,t),a=(0,s.softmax)(n.data);return Array.from({length:this.generation_config.num_beams},(()=>{const e=this.randomSelect(a);return[r.data[e],Math.log(a[e])]}))}}class c extends a{async sample(e){let t=e.dims.at(-1);this.generation_config.top_k>0&&(t=Math.min(this.generation_config.top_k,t));const[n,r]=await(0,o.topk)(e,t),a=(0,s.softmax)(n.data);return Array.from({length:this.generation_config.num_beams},((e,t)=>[r.data[t],Math.log(a[t])]))}}},"./src/generation/stopping_criteria.js":
@@ -109,7 +109,7 @@ import*as e from"fs";import*as t from"onnxruntime-node";import*as n from"path";i
109
109
  \*************************************/(e,t,n)=>{n.r(t),n.d(t,{BaseStreamer:()=>a,TextStreamer:()=>l,WhisperTextStreamer:()=>c});var r=n(/*! ../utils/core.js */"./src/utils/core.js"),o=n(/*! ../tokenizers.js */"./src/tokenizers.js"),s=n(/*! ../env.js */"./src/env.js");class a{put(e){throw Error("Not implemented")}end(){throw Error("Not implemented")}}const i=s.apis.IS_PROCESS_AVAILABLE?e=>process.stdout.write(e):e=>console.log(e);class l extends a{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:r=null,decode_kwargs:o={},...s}={}){super(),this.tokenizer=e,this.skip_prompt=t,this.callback_function=n??i,this.token_callback_function=r,this.decode_kwargs={...o,...s},this.token_cache=[],this.print_len=0,this.next_tokens_are_prompt=!0}put(e){if(e.length>1)throw Error("TextStreamer only supports batch size of 1");if(this.skip_prompt&&this.next_tokens_are_prompt)return void(this.next_tokens_are_prompt=!1);const t=e[0];this.token_callback_function?.(t),this.token_cache=(0,r.mergeArrays)(this.token_cache,t);const n=this.tokenizer.decode(this.token_cache,this.decode_kwargs);let s;n.endsWith("\n")?(s=n.slice(this.print_len),this.token_cache=[],this.print_len=0):n.length>0&&(0,o.is_chinese_char)(n.charCodeAt(n.length-1))?(s=n.slice(this.print_len),this.print_len+=s.length):(s=n.slice(this.print_len,n.lastIndexOf(" ")+1),this.print_len+=s.length),this.on_finalized_text(s,!1)}end(){let e;if(this.token_cache.length>0){e=this.tokenizer.decode(this.token_cache,this.decode_kwargs).slice(this.print_len),this.token_cache=[],this.print_len=0}else e="";this.next_tokens_are_prompt=!0,this.on_finalized_text(e,!0)}on_finalized_text(e,t){e.length>0&&this.callback_function?.(e),t&&this.callback_function===i&&s.apis.IS_PROCESS_AVAILABLE&&this.callback_function?.("\n")}}class c extends l{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:r=null,on_chunk_start:o=null,on_chunk_end:s=null,on_finalize:a=null,time_precision:i=.02,skip_special_tokens:l=!0,decode_kwargs:c={}}={}){super(e,{skip_prompt:t,callback_function:n,token_callback_function:r,decode_kwargs:{skip_special_tokens:l,...c}}),this.timestamp_begin=e.timestamp_begin,this.on_chunk_start=o,this.on_chunk_end=s,this.on_finalize=a,this.time_precision=i,this.waiting_for_timestamp=!1}put(e){if(e.length>1)throw Error("WhisperTextStreamer only supports batch size of 1");const t=e[0];if(1===t.length){const n=Number(t[0])-this.timestamp_begin;if(n>=0){const t=n*this.time_precision;this.waiting_for_timestamp?this.on_chunk_end?.(t):this.on_chunk_start?.(t),this.waiting_for_timestamp=!this.waiting_for_timestamp,e=[[]]}}return super.put(e)}end(){super.end(),this.on_finalize?.()}}},"./src/models.js":
110
110
  /*!***********************!*\
111
111
  !*** ./src/models.js ***!
112
- \***********************/(e,t,n)=>{n.r(t),n.d(t,{ASTForAudioClassification:()=>nn,ASTModel:()=>tn,ASTPreTrainedModel:()=>en,AlbertForMaskedLM:()=>ut,AlbertForQuestionAnswering:()=>dt,AlbertForSequenceClassification:()=>ct,AlbertModel:()=>lt,AlbertPreTrainedModel:()=>it,AutoModel:()=>di,AutoModelForAudioClassification:()=>vi,AutoModelForAudioFrameClassification:()=>Ai,AutoModelForCTC:()=>Pi,AutoModelForCausalLM:()=>gi,AutoModelForDepthEstimation:()=>Ii,AutoModelForDocumentQuestionAnswering:()=>Ei,AutoModelForImageClassification:()=>Ti,AutoModelForImageFeatureExtraction:()=>Ni,AutoModelForImageMatting:()=>Li,AutoModelForImageSegmentation:()=>xi,AutoModelForImageToImage:()=>zi,AutoModelForMaskGeneration:()=>Ci,AutoModelForMaskedLM:()=>Mi,AutoModelForNormalEstimation:()=>Bi,AutoModelForObjectDetection:()=>ki,AutoModelForQuestionAnswering:()=>wi,AutoModelForSemanticSegmentation:()=>yi,AutoModelForSeq2SeqLM:()=>hi,AutoModelForSequenceClassification:()=>ui,AutoModelForSpeechSeq2Seq:()=>mi,AutoModelForTextToSpectrogram:()=>_i,AutoModelForTextToWaveform:()=>fi,AutoModelForTokenClassification:()=>pi,AutoModelForVision2Seq:()=>bi,AutoModelForXVector:()=>Si,AutoModelForZeroShotObjectDetection:()=>Fi,BartForConditionalGeneration:()=>yt,BartForSequenceClassification:()=>kt,BartModel:()=>xt,BartPretrainedModel:()=>Tt,BaseModelOutput:()=>q,BeitForImageClassification:()=>Qr,BeitModel:()=>Xr,BeitPreTrainedModel:()=>Ur,BertForMaskedLM:()=>U,BertForQuestionAnswering:()=>H,BertForSequenceClassification:()=>X,BertForTokenClassification:()=>Q,BertModel:()=>W,BertPreTrainedModel:()=>$,BlenderbotForConditionalGeneration:()=>Lt,BlenderbotModel:()=>Et,BlenderbotPreTrainedModel:()=>At,BlenderbotSmallForConditionalGeneration:()=>Bt,BlenderbotSmallModel:()=>It,BlenderbotSmallPreTrainedModel:()=>zt,BloomForCausalLM:()=>wr,BloomModel:()=>Mr,BloomPreTrainedModel:()=>gr,CLIPModel:()=>mn,CLIPPreTrainedModel:()=>hn,CLIPSegForImageSegmentation:()=>Fn,CLIPSegModel:()=>kn,CLIPSegPreTrainedModel:()=>yn,CLIPTextModelWithProjection:()=>_n,CLIPVisionModelWithProjection:()=>fn,CamembertForMaskedLM:()=>Me,CamembertForQuestionAnswering:()=>Te,CamembertForSequenceClassification:()=>we,CamembertForTokenClassification:()=>be,CamembertModel:()=>ge,CamembertPreTrainedModel:()=>fe,CausalLMOutput:()=>qi,CausalLMOutputWithPast:()=>$i,ChineseCLIPModel:()=>xn,ChineseCLIPPreTrainedModel:()=>Tn,ClapAudioModelWithProjection:()=>na,ClapModel:()=>ea,ClapPreTrainedModel:()=>Zs,ClapTextModelWithProjection:()=>ta,CodeGenForCausalLM:()=>Un,CodeGenModel:()=>Wn,CodeGenPreTrainedModel:()=>$n,CohereForCausalLM:()=>Kn,CohereModel:()=>Jn,CoherePreTrainedModel:()=>Yn,ConvBertForMaskedLM:()=>ae,ConvBertForQuestionAnswering:()=>ce,ConvBertForSequenceClassification:()=>ie,ConvBertForTokenClassification:()=>le,ConvBertModel:()=>se,ConvBertPreTrainedModel:()=>oe,ConvNextForImageClassification:()=>Do,ConvNextModel:()=>Oo,ConvNextPreTrainedModel:()=>No,ConvNextV2ForImageClassification:()=>Ro,ConvNextV2Model:()=>jo,ConvNextV2PreTrainedModel:()=>Vo,DPTForDepthEstimation:()=>ko,DPTModel:()=>yo,DPTPreTrainedModel:()=>xo,DebertaForMaskedLM:()=>ke,DebertaForQuestionAnswering:()=>Pe,DebertaForSequenceClassification:()=>Fe,DebertaForTokenClassification:()=>Ce,DebertaModel:()=>ye,DebertaPreTrainedModel:()=>xe,DebertaV2ForMaskedLM:()=>Ae,DebertaV2ForQuestionAnswering:()=>ze,DebertaV2ForSequenceClassification:()=>Ee,DebertaV2ForTokenClassification:()=>Le,DebertaV2Model:()=>Se,DebertaV2PreTrainedModel:()=>ve,DeiTForImageClassification:()=>po,DeiTModel:()=>uo,DeiTPreTrainedModel:()=>co,DepthAnythingForDepthEstimation:()=>Co,DepthAnythingPreTrainedModel:()=>Fo,DetrForObjectDetection:()=>Jr,DetrForSegmentation:()=>Kr,DetrModel:()=>Yr,DetrObjectDetectionOutput:()=>Zr,DetrPreTrainedModel:()=>Hr,DetrSegmentationOutput:()=>eo,Dinov2ForImageClassification:()=>$o,Dinov2Model:()=>qo,Dinov2PreTrainedModel:()=>Go,DistilBertForMaskedLM:()=>Ve,DistilBertForQuestionAnswering:()=>De,DistilBertForSequenceClassification:()=>Ne,DistilBertForTokenClassification:()=>Oe,DistilBertModel:()=>Be,DistilBertPreTrainedModel:()=>Ie,DonutSwinModel:()=>Bo,DonutSwinPreTrainedModel:()=>Io,EfficientNetForImageClassification:()=>ma,EfficientNetModel:()=>ha,EfficientNetPreTrainedModel:()=>pa,ElectraForMaskedLM:()=>pe,ElectraForQuestionAnswering:()=>_e,ElectraForSequenceClassification:()=>he,ElectraForTokenClassification:()=>me,ElectraModel:()=>ue,ElectraPreTrainedModel:()=>de,EsmForMaskedLM:()=>Ge,EsmForSequenceClassification:()=>qe,EsmForTokenClassification:()=>$e,EsmModel:()=>Re,EsmPreTrainedModel:()=>je,FalconForCausalLM:()=>Ks,FalconModel:()=>Js,FalconPreTrainedModel:()=>Ys,FastViTForImageClassification:()=>Er,FastViTModel:()=>Ar,FastViTPreTrainedModel:()=>Sr,Florence2ForConditionalGeneration:()=>pn,Florence2PreTrainedModel:()=>un,GLPNForDepthEstimation:()=>zo,GLPNModel:()=>Lo,GLPNPreTrainedModel:()=>Eo,GPT2LMHeadModel:()=>vn,GPT2Model:()=>Pn,GPT2PreTrainedModel:()=>Cn,GPTBigCodeForCausalLM:()=>qn,GPTBigCodeModel:()=>Gn,GPTBigCodePreTrainedModel:()=>Rn,GPTJForCausalLM:()=>jn,GPTJModel:()=>Vn,GPTJPreTrainedModel:()=>Dn,GPTNeoForCausalLM:()=>In,GPTNeoModel:()=>zn,GPTNeoPreTrainedModel:()=>Ln,GPTNeoXForCausalLM:()=>On,GPTNeoXModel:()=>Nn,GPTNeoXPreTrainedModel:()=>Bn,Gemma2ForCausalLM:()=>or,Gemma2Model:()=>rr,Gemma2PreTrainedModel:()=>nr,GemmaForCausalLM:()=>tr,GemmaModel:()=>er,GemmaPreTrainedModel:()=>Zn,HubertForCTC:()=>Ss,HubertForSequenceClassification:()=>As,HubertModel:()=>vs,HubertPreTrainedModel:()=>Ps,ImageMattingOutput:()=>Wi,JAISLMHeadModel:()=>En,JAISModel:()=>An,JAISPreTrainedModel:()=>Sn,LlamaForCausalLM:()=>Hn,LlamaModel:()=>Qn,LlamaPreTrainedModel:()=>Xn,LlavaForConditionalGeneration:()=>cn,LlavaPreTrainedModel:()=>ln,LongT5ForConditionalGeneration:()=>gt,LongT5Model:()=>ft,LongT5PreTrainedModel:()=>_t,M2M100ForConditionalGeneration:()=>rs,M2M100Model:()=>ns,M2M100PreTrainedModel:()=>ts,MBartForCausalLM:()=>St,MBartForConditionalGeneration:()=>Pt,MBartForSequenceClassification:()=>vt,MBartModel:()=>Ct,MBartPreTrainedModel:()=>Ft,MPNetForMaskedLM:()=>Ke,MPNetForQuestionAnswering:()=>tt,MPNetForSequenceClassification:()=>Ze,MPNetForTokenClassification:()=>et,MPNetModel:()=>Je,MPNetPreTrainedModel:()=>Ye,MT5ForConditionalGeneration:()=>bt,MT5Model:()=>wt,MT5PreTrainedModel:()=>Mt,MarianMTModel:()=>es,MarianModel:()=>Zo,MarianPreTrainedModel:()=>Ko,MaskedLMOutput:()=>Ri,MistralForCausalLM:()=>Us,MistralModel:()=>Ws,MistralPreTrainedModel:()=>$s,MobileBertForMaskedLM:()=>Xe,MobileBertForQuestionAnswering:()=>He,MobileBertForSequenceClassification:()=>Qe,MobileBertModel:()=>Ue,MobileBertPreTrainedModel:()=>We,MobileNetV1ForImageClassification:()=>Ta,MobileNetV1Model:()=>ba,MobileNetV1PreTrainedModel:()=>wa,MobileNetV2ForImageClassification:()=>ka,MobileNetV2Model:()=>ya,MobileNetV2PreTrainedModel:()=>xa,MobileNetV3ForImageClassification:()=>Pa,MobileNetV3Model:()=>Ca,MobileNetV3PreTrainedModel:()=>Fa,MobileNetV4ForImageClassification:()=>Aa,MobileNetV4Model:()=>Sa,MobileNetV4PreTrainedModel:()=>va,MobileViTForImageClassification:()=>Nr,MobileViTModel:()=>Br,MobileViTPreTrainedModel:()=>Ir,MobileViTV2ForImageClassification:()=>Vr,MobileViTV2Model:()=>Dr,MobileViTV2PreTrainedModel:()=>Or,ModelOutput:()=>G,Moondream1ForConditionalGeneration:()=>dn,MptForCausalLM:()=>xr,MptModel:()=>Tr,MptPreTrainedModel:()=>br,MusicgenForCausalLM:()=>ga,MusicgenForConditionalGeneration:()=>Ma,MusicgenModel:()=>fa,MusicgenPreTrainedModel:()=>_a,NomicBertModel:()=>J,NomicBertPreTrainedModel:()=>Y,OPTForCausalLM:()=>Fr,OPTModel:()=>kr,OPTPreTrainedModel:()=>yr,OpenELMForCausalLM:()=>ir,OpenELMModel:()=>ar,OpenELMPreTrainedModel:()=>sr,OwlViTForObjectDetection:()=>Gr,OwlViTModel:()=>Rr,OwlViTPreTrainedModel:()=>jr,Owlv2ForObjectDetection:()=>Wr,Owlv2Model:()=>$r,Owlv2PreTrainedModel:()=>qr,Phi3ForCausalLM:()=>fr,Phi3Model:()=>_r,Phi3PreTrainedModel:()=>mr,PhiForCausalLM:()=>hr,PhiModel:()=>pr,PhiPreTrainedModel:()=>ur,PreTrainedModel:()=>R,PretrainedMixin:()=>Ea,PyAnnoteForAudioFrameClassification:()=>us,PyAnnoteModel:()=>ds,PyAnnotePreTrainedModel:()=>cs,QuestionAnsweringModelOutput:()=>Gi,Qwen2ForCausalLM:()=>dr,Qwen2Model:()=>cr,Qwen2PreTrainedModel:()=>lr,RTDetrForObjectDetection:()=>ro,RTDetrModel:()=>no,RTDetrObjectDetectionOutput:()=>oo,RTDetrPreTrainedModel:()=>to,ResNetForImageClassification:()=>_o,ResNetModel:()=>mo,ResNetPreTrainedModel:()=>ho,RoFormerForMaskedLM:()=>ee,RoFormerForQuestionAnswering:()=>re,RoFormerForSequenceClassification:()=>te,RoFormerForTokenClassification:()=>ne,RoFormerModel:()=>Z,RoFormerPreTrainedModel:()=>K,RobertaForMaskedLM:()=>Dt,RobertaForQuestionAnswering:()=>Rt,RobertaForSequenceClassification:()=>Vt,RobertaForTokenClassification:()=>jt,RobertaModel:()=>Ot,RobertaPreTrainedModel:()=>Nt,SamImageSegmentationOutput:()=>Jo,SamModel:()=>Yo,SamPreTrainedModel:()=>Ho,SapiensForDepthEstimation:()=>So,SapiensForNormalEstimation:()=>Ao,SapiensForSemanticSegmentation:()=>vo,SapiensPreTrainedModel:()=>Po,SegformerForImageClassification:()=>ia,SegformerForSemanticSegmentation:()=>la,SegformerModel:()=>aa,SegformerPreTrainedModel:()=>sa,Seq2SeqLMOutput:()=>Oi,SequenceClassifierOutput:()=>Di,SiglipModel:()=>Mn,SiglipPreTrainedModel:()=>gn,SiglipTextModel:()=>wn,SiglipVisionModel:()=>bn,SpeechT5ForSpeechToText:()=>Vs,SpeechT5ForTextToSpeech:()=>js,SpeechT5HifiGan:()=>Rs,SpeechT5Model:()=>Ds,SpeechT5PreTrainedModel:()=>Os,SqueezeBertForMaskedLM:()=>ot,SqueezeBertForQuestionAnswering:()=>at,SqueezeBertForSequenceClassification:()=>st,SqueezeBertModel:()=>rt,SqueezeBertPreTrainedModel:()=>nt,StableLmForCausalLM:()=>ua,StableLmModel:()=>da,StableLmPreTrainedModel:()=>ca,Starcoder2ForCausalLM:()=>Hs,Starcoder2Model:()=>Qs,Starcoder2PreTrainedModel:()=>Xs,Swin2SRForImageSuperResolution:()=>To,Swin2SRModel:()=>bo,Swin2SRPreTrainedModel:()=>wo,SwinForImageClassification:()=>Mo,SwinModel:()=>go,SwinPreTrainedModel:()=>fo,T5ForConditionalGeneration:()=>mt,T5Model:()=>ht,T5PreTrainedModel:()=>pt,TableTransformerForObjectDetection:()=>io,TableTransformerModel:()=>ao,TableTransformerObjectDetectionOutput:()=>lo,TableTransformerPreTrainedModel:()=>so,TokenClassifierOutput:()=>ji,TrOCRForCausalLM:()=>qs,TrOCRPreTrainedModel:()=>Gs,UniSpeechForCTC:()=>fs,UniSpeechForSequenceClassification:()=>gs,UniSpeechModel:()=>_s,UniSpeechPreTrainedModel:()=>ms,UniSpeechSatForAudioFrameClassification:()=>xs,UniSpeechSatForCTC:()=>bs,UniSpeechSatForSequenceClassification:()=>Ts,UniSpeechSatModel:()=>ws,UniSpeechSatPreTrainedModel:()=>Ms,ViTForImageClassification:()=>vr,ViTModel:()=>Pr,ViTPreTrainedModel:()=>Cr,VisionEncoderDecoderModel:()=>an,VitMatteForImageMatting:()=>zr,VitMattePreTrainedModel:()=>Lr,VitsModel:()=>oa,VitsModelOutput:()=>Ui,VitsPreTrainedModel:()=>ra,Wav2Vec2BertForCTC:()=>Fs,Wav2Vec2BertForSequenceClassification:()=>Cs,Wav2Vec2BertModel:()=>ks,Wav2Vec2BertPreTrainedModel:()=>ys,Wav2Vec2ForAudioFrameClassification:()=>ls,Wav2Vec2ForCTC:()=>as,Wav2Vec2ForSequenceClassification:()=>is,Wav2Vec2Model:()=>ss,Wav2Vec2PreTrainedModel:()=>os,WavLMForAudioFrameClassification:()=>Ns,WavLMForCTC:()=>zs,WavLMForSequenceClassification:()=>Is,WavLMForXVector:()=>Bs,WavLMModel:()=>Ls,WavLMPreTrainedModel:()=>Es,WeSpeakerResNetModel:()=>hs,WeSpeakerResNetPreTrainedModel:()=>ps,WhisperForConditionalGeneration:()=>sn,WhisperModel:()=>on,WhisperPreTrainedModel:()=>rn,XLMForQuestionAnswering:()=>Xt,XLMForSequenceClassification:()=>Wt,XLMForTokenClassification:()=>Ut,XLMModel:()=>qt,XLMPreTrainedModel:()=>Gt,XLMRobertaForMaskedLM:()=>Yt,XLMRobertaForQuestionAnswering:()=>Zt,XLMRobertaForSequenceClassification:()=>Jt,XLMRobertaForTokenClassification:()=>Kt,XLMRobertaModel:()=>Ht,XLMRobertaPreTrainedModel:()=>Qt,XLMWithLMHeadModel:()=>$t,XVectorOutput:()=>Vi,YolosForObjectDetection:()=>Xo,YolosModel:()=>Uo,YolosObjectDetectionOutput:()=>Qo,YolosPreTrainedModel:()=>Wo});var r=n(/*! ./configs.js */"./src/configs.js"),o=n(/*! ./backends/onnx.js */"./src/backends/onnx.js"),s=n(/*! ./utils/dtypes.js */"./src/utils/dtypes.js"),a=n(/*! ./utils/generic.js */"./src/utils/generic.js"),i=n(/*! ./utils/core.js */"./src/utils/core.js"),l=n(/*! ./utils/hub.js */"./src/utils/hub.js"),c=n(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),d=n(/*! ./generation/configuration_utils.js */"./src/generation/configuration_utils.js"),u=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),p=n(/*! ./utils/maths.js */"./src/utils/maths.js"),h=n(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),m=n(/*! ./generation/logits_sampler.js */"./src/generation/logits_sampler.js"),_=n(/*! ./env.js */"./src/env.js"),f=n(/*! ./models/whisper/generation_whisper.js */"./src/models/whisper/generation_whisper.js"),g=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");const M=0,w=1,b=2,T=3,x=4,y=5,k=6,F=7,C=new Map,P=new Map,v=new Map;async function S(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async a=>{const{buffer:i,session_options:c}=await async function(e,t,n){let a=n.device;a&&"string"!=typeof a&&(a.hasOwnProperty(t)?a=a[t]:(console.warn(`device not specified for "${t}". Using the default device.`),a=null));const i=a??(_.apis.IS_NODE_ENV?"cpu":"wasm"),c=(0,o.deviceToExecutionProviders)(i);let d=n.dtype;"string"!=typeof d&&(d&&d.hasOwnProperty(t)?d=d[t]:(d=s.DEFAULT_DEVICE_DTYPE_MAPPING[i]??s.DATA_TYPES.fp32,console.warn(`dtype not specified for "${t}". Using the default dtype (${d}) for this device (${i}).`)));const u=d;if(!s.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(u))throw new Error(`Invalid dtype: ${u}. Should be one of: ${Object.keys(s.DATA_TYPES).join(", ")}`);if(u===s.DATA_TYPES.fp16&&"webgpu"===i&&!await(0,s.isWebGpuFp16Supported)())throw new Error(`The device (${i}) does not support fp16.`);const p=s.DEFAULT_DTYPE_SUFFIX_MAPPING[u],h=`${n.subfolder??""}/${t}${p}.onnx`,m={...n.session_options}??{};m.executionProviders??=c;const f=(0,l.getModelFile)(e,h,!0,n);let g=[];if(n.use_external_data_format&&(!0===n.use_external_data_format||"object"==typeof n.use_external_data_format&&n.use_external_data_format.hasOwnProperty(t)&&!0===n.use_external_data_format[t])){if(_.apis.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const r=`${t}${p}.onnx_data`,o=`${n.subfolder??""}/${r}`;g.push(new Promise((async(t,s)=>{const a=await(0,l.getModelFile)(e,o,!0,n);t({path:r,data:a})})))}else void 0!==m.externalData&&(g=m.externalData.map((async t=>{if("string"==typeof t.data){const r=await(0,l.getModelFile)(e,t.data,!0,n);return{...t,data:r}}return t})));if(g.length>0&&(m.externalData=await Promise.all(g)),"webgpu"===i){const e=(0,r.getKeyValueShapes)(n.config,{prefix:"present"});if(Object.keys(e).length>0&&!(0,o.isONNXProxy)()){const t={};for(const n in e)t[n]="gpu-buffer";m.preferredOutputLocation=t}}return{buffer:await f,session_options:m}}(e,t[a],n);return[a,await(0,o.createInferenceSession)(i,c)]}))))}async function A(e,t){const n=function(e,t){const n=Object.create(null),r=[];for(const s of e.inputNames){const e=t[s];e instanceof u.Tensor?n[s]=(0,o.isONNXProxy)()?e.clone():e:r.push(s)}if(r.length>0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${r.join(", ")}.`);const s=Object.keys(t).length,a=e.inputNames.length;if(s>a){let n=Object.keys(t).filter((t=>!e.inputNames.includes(t)));console.warn(`WARNING: Too many inputs were provided (${s} > ${a}). The following inputs will be ignored: "${n.join(", ")}".`)}return n}(e,t);try{const t=Object.fromEntries(Object.entries(n).map((([e,t])=>[e,t.ort_tensor])));let r=await e.run(t);return r=E(r),r}catch(e){throw console.error(`An error occurred during model execution: "${e}".`),console.error("Inputs given to model:",n),e}}function E(e){for(let t in e)(0,o.isONNXTensor)(e[t])?e[t]=new u.Tensor(e[t]):"object"==typeof e[t]&&E(e[t]);return e}function L(e){if(e instanceof u.Tensor)return e;if(0===e.length)throw Error("items must be non-empty");if(Array.isArray(e[0])){if(e.some((t=>t.length!==e[0].length)))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new u.Tensor("int64",BigInt64Array.from(e.flat().map((e=>BigInt(e)))),[e.length,e[0].length])}return new u.Tensor("int64",BigInt64Array.from(e.map((e=>BigInt(e)))),[1,e.length])}function z(e){return new u.Tensor("bool",[e],[1])}async function I(e,t){let{encoder_outputs:n,input_ids:r,decoder_input_ids:o,...s}=t;if(!n){const r=(0,i.pick)(t,e.sessions.model.inputNames);n=(await B(e,r)).last_hidden_state}s.input_ids=o,s.encoder_hidden_states=n,e.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(s.encoder_attention_mask=t.attention_mask);return await N(e,s,!0)}async function B(e,t){const n=e.sessions.model,r=(0,i.pick)(t,n.inputNames);if(n.inputNames.includes("inputs_embeds")&&!r.inputs_embeds){if(!t.input_ids)throw new Error("Both `input_ids` and `inputs_embeds` are missing in the model inputs.");r.inputs_embeds=await e.encode_text({input_ids:t.input_ids})}return n.inputNames.includes("token_type_ids")&&!r.token_type_ids&&(r.token_type_ids=new u.Tensor("int64",new BigInt64Array(r.input_ids.data.length),r.input_ids.dims)),await A(n,r)}async function N(e,t,n=!1){const r=e.sessions[n?"decoder_model_merged":"model"],{past_key_values:o,...s}=t;r.inputNames.includes("use_cache_branch")&&(s.use_cache_branch=z(!!o)),r.inputNames.includes("position_ids")&&s.attention_mask&&!s.position_ids&&(s.position_ids=function(e,t=null){const{input_ids:n,inputs_embeds:r,attention_mask:o}=e,[s,a]=o.dims,i=new BigInt64Array(o.data.length);for(let e=0;e<s;++e){const t=e*a;let n=BigInt(0);for(let e=0;e<a;++e){const r=t+e;0n===o.data[r]?i[r]=BigInt(1):(i[r]=n,n+=o.data[r])}}let l=new u.Tensor("int64",i,o.dims);if(t){const e=-(n??r).dims.at(1);l=l.slice(null,[e,null])}return l}(s,o)),e.addPastKeyValues(s,o);const a=(0,i.pick)(s,r.inputNames);return await A(r,a)}async function O(e,{input_ids:t=null,attention_mask:n=null,pixel_values:r=null,position_ids:o=null,inputs_embeds:s=null,past_key_values:a=null,generation_config:i=null,logits_processor:l=null,...c}){if(!s)if(s=await e.encode_text({input_ids:t}),r&&1!==t.dims[1]){const o=await e.encode_image({pixel_values:r});({inputs_embeds:s,attention_mask:n}=e._merge_input_ids_with_image_features({image_features:o,inputs_embeds:s,input_ids:t,attention_mask:n}))}else if(a&&r&&1===t.dims[1]){const e=t.dims[1],r=Object.values(a)[0].dims.at(-2);n=(0,u.cat)([(0,u.ones)([t.dims[0],r]),n.slice(null,[n.dims[1]-e,n.dims[1]])],1)}return await N(e,{inputs_embeds:s,past_key_values:a,attention_mask:n,position_ids:o,generation_config:i,logits_processor:l},!0)}function D(e,t,n,r){if(n.past_key_values){const t=Object.values(n.past_key_values)[0].dims.at(-2),{input_ids:r,attention_mask:o}=n;if(o&&o.dims[1]>r.dims[1]);else if(t<r.dims[1])n.input_ids=r.slice(null,[t,null]);else if(null!=e.config.image_token_index&&r.data.some((t=>t==e.config.image_token_index))){const o=e.config.num_image_tokens;if(!o)throw new Error("`num_image_tokens` is missing in the model configuration.");const s=r.dims[1]-(t-o);n.input_ids=r.slice(null,[-s,null]),n.attention_mask=(0,u.ones)([1,t+s])}}return n}function V(e,t,n,r){return n.past_key_values&&(t=t.map((e=>[e.at(-1)]))),{...n,decoder_input_ids:L(t)}}function j(e,...t){return e.config.is_encoder_decoder?V(e,...t):D(e,...t)}class R extends a.Callable{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,t){super(),this.config=e,this.sessions=t;const n=v.get(this.constructor),r=C.get(n);switch(this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,r){case x:this.can_generate=!0,this._forward=N,this._prepare_inputs_for_generation=D;break;case b:case T:case F:this.can_generate=!0,this._forward=I,this._prepare_inputs_for_generation=V;break;case w:this._forward=I;break;case k:this.can_generate=!0,this._forward=O,this._prepare_inputs_for_generation=j;break;default:this._forward=B}this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const t of Object.values(this.sessions))t?.handler?.dispose&&e.push(t.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:o=null,local_files_only:s=!1,revision:a="main",model_file_name:i=null,subfolder:c="onnx",device:d=null,dtype:u=null,use_external_data_format:p=null,session_options:h={}}={}){let m={progress_callback:t,config:n,cache_dir:o,local_files_only:s,revision:a,model_file_name:i,subfolder:c,device:d,dtype:u,use_external_data_format:p,session_options:h};const _=v.get(this),f=C.get(_);let g;if(n=m.config=await r.AutoConfig.from_pretrained(e,m),f===x)g=await Promise.all([S(e,{model:m.model_file_name??"model"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(f===b||f===T)g=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(f===y)g=await Promise.all([S(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},m)]);else if(f===w)g=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m)]);else if(f===k){const t={embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"};n.is_encoder_decoder&&(t.model="encoder_model"),g=await Promise.all([S(e,t,m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)])}else f===F?g=await Promise.all([S(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]):(f!==M&&console.warn(`Model type for '${_??n?.model_type}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`),g=await Promise.all([S(e,{model:m.model_file_name??"model"},m)]));return new this(n,...g)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}_get_logits_warper(e){const t=new c.LogitsProcessorList;return null!==e.temperature&&1!==e.temperature&&t.push(new c.TemperatureLogitsWarper(e.temperature)),null!==e.top_k&&0!==e.top_k&&t.push(new c.TopKLogitsWarper(e.top_k)),null!==e.top_p&&e.top_p<1&&t.push(new c.TopPLogitsWarper(e.top_p)),t}_get_logits_processor(e,t,n=null){const r=new c.LogitsProcessorList;if(null!==e.repetition_penalty&&1!==e.repetition_penalty&&r.push(new c.RepetitionPenaltyLogitsProcessor(e.repetition_penalty)),null!==e.no_repeat_ngram_size&&e.no_repeat_ngram_size>0&&r.push(new c.NoRepeatNGramLogitsProcessor(e.no_repeat_ngram_size)),null!==e.bad_words_ids&&r.push(new c.NoBadWordsLogitsProcessor(e.bad_words_ids,e.eos_token_id)),null!==e.min_length&&null!==e.eos_token_id&&e.min_length>0&&r.push(new c.MinLengthLogitsProcessor(e.min_length,e.eos_token_id)),null!==e.min_new_tokens&&null!==e.eos_token_id&&e.min_new_tokens>0&&r.push(new c.MinNewTokensLengthLogitsProcessor(t,e.min_new_tokens,e.eos_token_id)),null!==e.forced_bos_token_id&&r.push(new c.ForcedBOSTokenLogitsProcessor(e.forced_bos_token_id)),null!==e.forced_eos_token_id&&r.push(new c.ForcedEOSTokenLogitsProcessor(e.max_length,e.forced_eos_token_id)),null!==e.begin_suppress_tokens){const n=t>1||null===e.forced_bos_token_id?t:t+1;r.push(new c.SuppressTokensAtBeginLogitsProcessor(e.begin_suppress_tokens,n))}return null!==e.guidance_scale&&e.guidance_scale>1&&r.push(new c.ClassifierFreeGuidanceLogitsProcessor(e.guidance_scale)),null!==n&&r.extend(n),r}_prepare_generation_config(e,t,n=d.GenerationConfig){const r={...this.config};for(const e of["decoder","generator","text_config"])e in r&&Object.assign(r,r[e]);const o=new n(r);return"generation_config"in this&&Object.assign(o,this.generation_config),e&&Object.assign(o,e),t&&Object.assign(o,(0,i.pick)(t,Object.getOwnPropertyNames(o))),o}_get_stopping_criteria(e,t=null){const n=new h.StoppingCriteriaList;return null!==e.max_length&&n.push(new h.MaxLengthCriteria(e.max_length,this.config.max_position_embeddings??null)),null!==e.eos_token_id&&n.push(new h.EosTokenCriteria(e.eos_token_id)),t&&n.extend(t),n}_validate_model_class(){if(!this.can_generate){const e=[Ra,$a,ja,Ba],t=v.get(this.constructor),n=new Set,r=this.config.model_type;for(const t of e){const e=t.get(r);e&&n.add(e[0])}let o=`The current model class (${t}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw n.size>0&&(o+=` Please use the following class instead: ${[...n].join(", ")}`),Error(o)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:t,model_inputs:n,is_encoder_decoder:r}){return n.past_key_values=this.getPastKeyValues(t,n.past_key_values),n.input_ids=new u.Tensor("int64",e.flat(),[e.length,1]),r||(n.attention_mask=(0,u.cat)([n.attention_mask,(0,u.ones)([n.attention_mask.dims[0],1])],1)),n.position_ids=null,n}_prepare_model_inputs({inputs:e,bos_token_id:t,model_kwargs:n}){const r=(0,i.pick)(n,this.forward_params),o=this.main_input_name;if(o in r){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else r[o]=e;return{inputs_tensor:r[o],model_inputs:r,model_input_name:o}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:t,model_input_name:n,generation_config:r}){if(this.sessions.model.inputNames.includes("inputs_embeds")&&!t.inputs_embeds&&"_prepare_inputs_embeds"in this){const{input_ids:e,pixel_values:n,attention_mask:r,...o}=t,s=await this._prepare_inputs_embeds(t);t={...o,...(0,i.pick)(s,["inputs_embeds","attention_mask"])}}let{last_hidden_state:o}=await B(this,t);if(null!==r.guidance_scale&&r.guidance_scale>1)o=(0,u.cat)([o,(0,u.full_like)(o,0)],0),"attention_mask"in t&&(t.attention_mask=(0,u.cat)([t.attention_mask,(0,u.zeros_like)(t.attention_mask)],0));else if(t.decoder_input_ids){const e=L(t.decoder_input_ids).dims[0];if(e!==o.dims[0]){if(1!==o.dims[0])throw new Error(`The encoder outputs have a different batch size (${o.dims[0]}) than the decoder inputs (${e}).`);o=(0,u.cat)(Array.from({length:e},(()=>o)),0)}}return t.encoder_outputs=o,t}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:t,model_kwargs:n,decoder_start_token_id:r,bos_token_id:o,generation_config:s}){let{decoder_input_ids:a,...i}=n;if(a)Array.isArray(a[0])||(a=Array.from({length:e},(()=>a)));else if(r??=o,"musicgen"===this.config.model_type)a=Array.from({length:e*this.config.decoder.num_codebooks},(()=>[r]));else if(Array.isArray(r)){if(r.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${r.length}`);a=r}else a=Array.from({length:e},(()=>[r]));return a=L(a),n.decoder_attention_mask=(0,u.ones_like)(a),{input_ids:a,model_inputs:i}}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,streamer:o=null,...s}){this._validate_model_class(),t=this._prepare_generation_config(t,s);let{inputs_tensor:a,model_inputs:i,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:s});const c=this.config.is_encoder_decoder;let d;c&&("encoder_outputs"in i||(i=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:a,model_inputs:i,model_input_name:l,generation_config:t}))),c?({input_ids:d,model_inputs:i}=this._prepare_decoder_input_ids_for_generation({batch_size:i[l].dims.at(0),model_input_name:l,model_kwargs:i,decoder_start_token_id:t.decoder_start_token_id,bos_token_id:t.bos_token_id,generation_config:t})):d=i[l];let p=d.dims.at(-1);null!==t.max_new_tokens&&(t.max_length=p+t.max_new_tokens);const h=this._get_logits_processor(t,p,n),_=this._get_stopping_criteria(t,r),f=i[l].dims.at(0),g=m.LogitsSampler.getSampler(t),M=new Array(f).fill(0),w=d.tolist();o&&o.put(w);let b=null,T={};for(;;){i=this.prepare_inputs_for_generation(w,i,t);const e=await this.forward(i);if(t.output_attentions&&t.return_dict_in_generate){const t=this.getAttentions(e);for(const e in t)e in T||(T[e]=[]),T[e].push(t[e])}const n=h(w,e.logits.slice(null,-1,null)),r=[];for(let e=0;e<n.dims.at(0);++e){const t=n[e],o=await g(t);for(const[t,n]of o){const o=BigInt(t);M[e]+=n,w[e].push(o),r.push([o]);break}}o&&o.put(r);if(_(w).every((e=>e))){t.return_dict_in_generate&&(b=this.getPastKeyValues(e,i.past_key_values,!1));break}i=this._update_model_kwargs_for_generation({generated_input_ids:r,outputs:e,model_inputs:i,is_encoder_decoder:c})}o&&o.end();const x=new u.Tensor("int64",w.flat(),[w.length,w[0].length]);return t.return_dict_in_generate?{sequences:x,past_key_values:b,...T}:x}getPastKeyValues(e,t,n=!0){const r=Object.create(null);for(const o in e)if(o.startsWith("present")){const s=o.replace("present","past_key_values");if(t&&o.includes("encoder"))r[s]=t[s];else{if(n&&t){const e=t[s];"gpu-buffer"===e.location&&e.dispose()}r[s]=e[o]}}return r}getAttentions(e){const t={};for(const n of["cross_attentions","encoder_attentions","decoder_attentions"])for(const r in e)r.startsWith(n)&&(n in t||(t[n]=[]),t[n].push(e[r]));return t}addPastKeyValues(e,t){if(t)Object.assign(e,t);else{const t=this.custom_config.kv_cache_dtype??"float32",n="float16"===t?new Uint16Array:[],o=(0,r.getKeyValueShapes)(this.config);for(const r in o)e[r]=new u.Tensor(t,n,o[r])}}async encode_image({pixel_values:e}){const t=(await A(this.sessions.vision_encoder,{pixel_values:e})).image_features;return this.config.num_image_tokens||(console.warn(`The number of image tokens was not set in the model configuration. Setting it to the number of features detected by the vision encoder (${t.dims[1]}).`),this.config.num_image_tokens=t.dims[1]),t}async encode_text({input_ids:e}){return(await A(this.sessions.embed_tokens,{input_ids:e})).inputs_embeds}}class G{}class q extends G{constructor({last_hidden_state:e,hidden_states:t=null,attentions:n=null}){super(),this.last_hidden_state=e,this.hidden_states=t,this.attentions=n}}class $ extends R{}class W extends ${}class U extends ${async _call(e){return new Ri(await super._call(e))}}class X extends ${async _call(e){return new Di(await super._call(e))}}class Q extends ${async _call(e){return new ji(await super._call(e))}}class H extends ${async _call(e){return new Gi(await super._call(e))}}class Y extends R{}class J extends Y{}class K extends R{}class Z extends K{}class ee extends K{async _call(e){return new Ri(await super._call(e))}}class te extends K{async _call(e){return new Di(await super._call(e))}}class ne extends K{async _call(e){return new ji(await super._call(e))}}class re extends K{async _call(e){return new Gi(await super._call(e))}}class oe extends R{}class se extends oe{}class ae extends oe{async _call(e){return new Ri(await super._call(e))}}class ie extends oe{async _call(e){return new Di(await super._call(e))}}class le extends oe{async _call(e){return new ji(await super._call(e))}}class ce extends oe{async _call(e){return new Gi(await super._call(e))}}class de extends R{}class ue extends de{}class pe extends de{async _call(e){return new Ri(await super._call(e))}}class he extends de{async _call(e){return new Di(await super._call(e))}}class me extends de{async _call(e){return new ji(await super._call(e))}}class _e extends de{async _call(e){return new Gi(await super._call(e))}}class fe extends R{}class ge extends fe{}class Me extends fe{async _call(e){return new Ri(await super._call(e))}}class we extends fe{async _call(e){return new Di(await super._call(e))}}class be extends fe{async _call(e){return new ji(await super._call(e))}}class Te extends fe{async _call(e){return new Gi(await super._call(e))}}class xe extends R{}class ye extends xe{}class ke extends xe{async _call(e){return new Ri(await super._call(e))}}class Fe extends xe{async _call(e){return new Di(await super._call(e))}}class Ce extends xe{async _call(e){return new ji(await super._call(e))}}class Pe extends xe{async _call(e){return new Gi(await super._call(e))}}class ve extends R{}class Se extends ve{}class Ae extends ve{async _call(e){return new Ri(await super._call(e))}}class Ee extends ve{async _call(e){return new Di(await super._call(e))}}class Le extends ve{async _call(e){return new ji(await super._call(e))}}class ze extends ve{async _call(e){return new Gi(await super._call(e))}}class Ie extends R{}class Be extends Ie{}class Ne extends Ie{async _call(e){return new Di(await super._call(e))}}class Oe extends Ie{async _call(e){return new ji(await super._call(e))}}class De extends Ie{async _call(e){return new Gi(await super._call(e))}}class Ve extends Ie{async _call(e){return new Ri(await super._call(e))}}class je extends R{}class Re extends je{}class Ge extends je{async _call(e){return new Ri(await super._call(e))}}class qe extends je{async _call(e){return new Di(await super._call(e))}}class $e extends je{async _call(e){return new ji(await super._call(e))}}class We extends R{}class Ue extends We{}class Xe extends We{async _call(e){return new Ri(await super._call(e))}}class Qe extends We{async _call(e){return new Di(await super._call(e))}}class He extends We{async _call(e){return new Gi(await super._call(e))}}class Ye extends R{}class Je extends Ye{}class Ke extends Ye{async _call(e){return new Ri(await super._call(e))}}class Ze extends Ye{async _call(e){return new Di(await super._call(e))}}class et extends Ye{async _call(e){return new ji(await super._call(e))}}class tt extends Ye{async _call(e){return new Gi(await super._call(e))}}class nt extends R{}class rt extends nt{}class ot extends nt{async _call(e){return new Ri(await super._call(e))}}class st extends nt{async _call(e){return new Di(await super._call(e))}}class at extends nt{async _call(e){return new Gi(await super._call(e))}}class it extends R{}class lt extends it{}class ct extends it{async _call(e){return new Di(await super._call(e))}}class dt extends it{async _call(e){return new Gi(await super._call(e))}}class ut extends it{async _call(e){return new Ri(await super._call(e))}}class pt extends R{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class ht extends pt{}class mt extends pt{}class _t extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ft extends _t{}class gt extends _t{}class Mt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class wt extends Mt{}class bt extends Mt{}class Tt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class xt extends Tt{}class yt extends Tt{}class kt extends Tt{async _call(e){return new Di(await super._call(e))}}class Ft extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ct extends Ft{}class Pt extends Ft{}class vt extends Ft{async _call(e){return new Di(await super._call(e))}}class St extends Ft{}class At extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Et extends At{}class Lt extends At{}class zt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class It extends zt{}class Bt extends zt{}class Nt extends R{}class Ot extends Nt{}class Dt extends Nt{async _call(e){return new Ri(await super._call(e))}}class Vt extends Nt{async _call(e){return new Di(await super._call(e))}}class jt extends Nt{async _call(e){return new ji(await super._call(e))}}class Rt extends Nt{async _call(e){return new Gi(await super._call(e))}}class Gt extends R{}class qt extends Gt{}class $t extends Gt{async _call(e){return new Ri(await super._call(e))}}class Wt extends Gt{async _call(e){return new Di(await super._call(e))}}class Ut extends Gt{async _call(e){return new ji(await super._call(e))}}class Xt extends Gt{async _call(e){return new Gi(await super._call(e))}}class Qt extends R{}class Ht extends Qt{}class Yt extends Qt{async _call(e){return new Ri(await super._call(e))}}class Jt extends Qt{async _call(e){return new Di(await super._call(e))}}class Kt extends Qt{async _call(e){return new ji(await super._call(e))}}class Zt extends Qt{async _call(e){return new Gi(await super._call(e))}}class en extends R{}class tn extends en{}class nn extends en{}class rn extends R{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class on extends rn{}class sn extends rn{_prepare_generation_config(e,t){return super._prepare_generation_config(e,t,f.WhisperGenerationConfig)}_retrieve_init_tokens(e){const t=[e.decoder_start_token_id];let n=e.language;const r=e.task;if(e.is_multilingual){n||(console.warn("No language specified - defaulting to English (en)."),n="en");const o=`<|${(0,g.whisper_language_to_code)(n)}|>`;t.push(e.lang_to_id[o]),t.push(e.task_to_id[r??"transcribe"])}else if(n||r)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&t.at(-1)!==e.no_timestamps_token_id?t.push(e.no_timestamps_token_id):e.return_timestamps&&t.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),t.pop()),t.filter((e=>null!=e))}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,...o}){t=this._prepare_generation_config(t,o);const s=o.decoder_input_ids??this._retrieve_init_tokens(t);if(t.return_timestamps&&(n??=new c.LogitsProcessorList,n.push(new c.WhisperTimeStampLogitsProcessor(t,s))),t.begin_suppress_tokens&&(n??=new c.LogitsProcessorList,n.push(new c.SuppressTokensAtBeginLogitsProcessor(t.begin_suppress_tokens,s.length))),t.return_token_timestamps){if(!t.alignment_heads)throw new Error("Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config.");"translate"===t.task&&console.warn("Token-level timestamps may not be reliable for task 'translate'."),t.output_attentions=!0,t.return_dict_in_generate=!0}const a=await super.generate({inputs:e,generation_config:t,logits_processor:n,decoder_input_ids:s,...o});return t.return_token_timestamps&&(a.token_timestamps=this._extract_token_timestamps(a,t.alignment_heads,t.num_frames)),a}_extract_token_timestamps(e,t,n=null,r=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");null==n&&console.warn("`num_frames` has not been set, meaning the entire audio will be analyzed. This may lead to inaccurate token-level timestamps for short audios (< 30 seconds).");let o=this.config.median_filter_width;void 0===o&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),o=7);const s=e.cross_attentions,a=Array.from({length:this.config.decoder_layers},((e,t)=>(0,u.cat)(s.map((e=>e[t])),2))),l=(0,u.stack)(t.map((([e,t])=>{if(e>=a.length)throw new Error(`Layer index ${e} is out of bounds for cross attentions (length ${a.length}).`);return n?a[e].slice(null,t,null,[0,n]):a[e].slice(null,t)}))).transpose(1,0,2,3),[c,d]=(0,u.std_mean)(l,-2,0,!0),h=l.clone();for(let e=0;e<h.dims[0];++e){const t=h[e];for(let n=0;n<t.dims[0];++n){const r=t[n],s=c[e][n][0].data,a=d[e][n][0].data;for(let e=0;e<r.dims[0];++e){let t=r[e].data;for(let e=0;e<t.length;++e)t[e]=(t[e]-a[e])/s[e];t.set((0,p.medianFilter)(t,o))}}}const m=[(0,u.mean)(h,1)],_=e.sequences.dims,f=new u.Tensor("float32",new Float32Array(_[0]*_[1]),_);for(let e=0;e<_[0];++e){const t=m[e].neg().squeeze_(0),[n,o]=(0,p.dynamic_time_warping)(t.tolist()),s=Array.from({length:n.length-1},((e,t)=>n[t+1]-n[t])),a=(0,i.mergeArrays)([1],s).map((e=>!!e)),l=[];for(let e=0;e<a.length;++e)a[e]&&l.push(o[e]*r);f[e].data.set(l,1)}return f}}class an extends R{main_input_name="pixel_values";forward_params=["pixel_values","input_ids","encoder_hidden_states","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class ln extends R{forward_params=["input_ids","pixel_values","attention_mask","position_ids","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class cn extends ln{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){const o=this.config.image_token_index,s=n.tolist().map((e=>e.findIndex((e=>e==o)))),a=s.every((e=>-1===e)),i=s.every((e=>-1!==e));if(!a&&!i)throw new Error("Every input should contain either 0 or 1 image token.");if(a)return{inputs_embeds:e,attention_mask:r};const l=[],c=[];for(let n=0;n<s.length;++n){const o=s[n],a=e[n],i=t[n],d=r[n];l.push((0,u.cat)([a.slice([0,o]),i,a.slice([o+1,a.dims[0]])],0)),c.push((0,u.cat)([d.slice([0,o]),(0,u.ones)([i.dims[0]]),d.slice([o+1,d.dims[0]])],0))}return{inputs_embeds:(0,u.stack)(l,0),attention_mask:(0,u.stack)(c,0)}}}class dn extends cn{}class un extends R{forward_params=["input_ids","inputs_embeds","attention_mask","pixel_values","encoder_outputs","decoder_input_ids","decoder_inputs_embeds","decoder_attention_mask","past_key_values"];main_input_name="inputs_embeds";constructor(e,t,n){super(e,t),this.generation_config=n}}class pn extends un{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){return{inputs_embeds:(0,u.cat)([t,e],1),attention_mask:(0,u.cat)([(0,u.ones)(t.dims.slice(0,2)),r],1)}}async _prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:n,attention_mask:r}){if(!e&&!t)throw new Error("Either `input_ids` or `pixel_values` should be provided.");let o,s;return e&&(o=await this.encode_text({input_ids:e})),t&&(s=await this.encode_image({pixel_values:t})),o&&s?({inputs_embeds:n,attention_mask:r}=this._merge_input_ids_with_image_features({inputs_embeds:o,image_features:s,input_ids:e,attention_mask:r})):n=o||s,{inputs_embeds:n,attention_mask:r}}async forward({input_ids:e,pixel_values:t,attention_mask:n,decoder_input_ids:r,decoder_attention_mask:o,encoder_outputs:s,past_key_values:a,inputs_embeds:i,decoder_inputs_embeds:l}){if(i||({inputs_embeds:i,attention_mask:n}=await this._prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:i,attention_mask:n})),!s){let{last_hidden_state:e}=await B(this,{inputs_embeds:i,attention_mask:n});s=e}if(!l){if(!r)throw new Error("Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.");l=await this.encode_text({input_ids:r})}const c={inputs_embeds:l,attention_mask:o,encoder_attention_mask:n,encoder_hidden_states:s,past_key_values:a};return await N(this,c,!0)}}class hn extends R{}class mn extends hn{}class _n extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class fn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class gn extends R{}class Mn extends gn{}class wn extends gn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class bn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class Tn extends R{}class xn extends Tn{}class yn extends R{}class kn extends yn{}class Fn extends yn{}class Cn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Pn extends Cn{}class vn extends Cn{}class Sn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class An extends Sn{}class En extends Sn{}class Ln extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class zn extends Ln{}class In extends Ln{}class Bn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Nn extends Bn{}class On extends Bn{}class Dn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Vn extends Dn{}class jn extends Dn{}class Rn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Gn extends Rn{}class qn extends Rn{}class $n extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Wn extends $n{}class Un extends $n{}class Xn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Qn extends Xn{}class Hn extends Xn{}class Yn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Jn extends Yn{}class Kn extends Yn{}class Zn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class er extends Zn{}class tr extends Zn{}class nr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class rr extends nr{}class or extends nr{}class sr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ar extends sr{}class ir extends sr{}class lr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class cr extends lr{}class dr extends lr{}class ur extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class pr extends ur{}class hr extends ur{}class mr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class _r extends mr{}class fr extends mr{}class gr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Mr extends gr{}class wr extends gr{}class br extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Tr extends br{}class xr extends br{}class yr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class kr extends yr{}class Fr extends yr{}class Cr extends R{}class Pr extends Cr{}class vr extends Cr{async _call(e){return new Di(await super._call(e))}}class Sr extends R{}class Ar extends Sr{}class Er extends Sr{async _call(e){return new Di(await super._call(e))}}class Lr extends R{}class zr extends Lr{async _call(e){return new Wi(await super._call(e))}}class Ir extends R{}class Br extends Ir{}class Nr extends Ir{async _call(e){return new Di(await super._call(e))}}class Or extends R{}class Dr extends Or{}class Vr extends Or{async _call(e){return new Di(await super._call(e))}}class jr extends R{}class Rr extends jr{}class Gr extends jr{}class qr extends R{}class $r extends qr{}class Wr extends qr{}class Ur extends R{}class Xr extends Ur{}class Qr extends Ur{async _call(e){return new Di(await super._call(e))}}class Hr extends R{}class Yr extends Hr{}class Jr extends Hr{async _call(e){return new Zr(await super._call(e))}}class Kr extends Hr{async _call(e){return new eo(await super._call(e))}}class Zr extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class eo extends G{constructor({logits:e,pred_boxes:t,pred_masks:n}){super(),this.logits=e,this.pred_boxes=t,this.pred_masks=n}}class to extends R{}class no extends to{}class ro extends to{async _call(e){return new oo(await super._call(e))}}class oo extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class so extends R{}class ao extends so{}class io extends so{async _call(e){return new lo(await super._call(e))}}class lo extends Zr{}class co extends R{}class uo extends co{}class po extends co{async _call(e){return new Di(await super._call(e))}}class ho extends R{}class mo extends ho{}class _o extends ho{async _call(e){return new Di(await super._call(e))}}class fo extends R{}class go extends fo{}class Mo extends fo{async _call(e){return new Di(await super._call(e))}}class wo extends R{}class bo extends wo{}class To extends wo{}class xo extends R{}class yo extends xo{}class ko extends xo{}class Fo extends R{}class Co extends Fo{}class Po extends R{}class vo extends Po{}class So extends Po{}class Ao extends Po{}class Eo extends R{}class Lo extends Eo{}class zo extends Eo{}class Io extends R{}class Bo extends Io{}class No extends R{}class Oo extends No{}class Do extends No{async _call(e){return new Di(await super._call(e))}}class Vo extends R{}class jo extends Vo{}class Ro extends Vo{async _call(e){return new Di(await super._call(e))}}class Go extends R{}class qo extends Go{}class $o extends Go{async _call(e){return new Di(await super._call(e))}}class Wo extends R{}class Uo extends Wo{}class Xo extends Wo{async _call(e){return new Qo(await super._call(e))}}class Qo extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Ho extends R{}class Yo extends Ho{async get_image_embeddings({pixel_values:e}){return await B(this,{pixel_values:e})}async forward(e){if(e.image_embeddings&&e.image_positional_embeddings||(e={...e,...await this.get_image_embeddings(e)}),!e.input_labels&&e.input_points){const t=e.input_points.dims.slice(0,-1),n=t.reduce(((e,t)=>e*t),1);e.input_labels=new u.Tensor("int64",new BigInt64Array(n).fill(1n),t)}const t={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(t.input_points=e.input_points),e.input_labels&&(t.input_labels=e.input_labels),e.input_boxes&&(t.input_boxes=e.input_boxes),await A(this.sessions.prompt_encoder_mask_decoder,t)}async _call(e){return new Jo(await super._call(e))}}class Jo extends G{constructor({iou_scores:e,pred_masks:t}){super(),this.iou_scores=e,this.pred_masks=t}}class Ko extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Zo extends Ko{}class es extends Ko{}class ts extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ns extends ts{}class rs extends ts{}class os extends R{}class ss extends os{}class as extends os{async _call(e){return new qi(await super._call(e))}}class is extends os{async _call(e){return new Di(await super._call(e))}}class ls extends os{async _call(e){return new ji(await super._call(e))}}class cs extends R{}class ds extends cs{}class us extends cs{async _call(e){return new ji(await super._call(e))}}class ps extends R{}class hs extends ps{}class ms extends R{}class _s extends ms{}class fs extends ms{async _call(e){return new qi(await super._call(e))}}class gs extends ms{async _call(e){return new Di(await super._call(e))}}class Ms extends R{}class ws extends Ms{}class bs extends Ms{async _call(e){return new qi(await super._call(e))}}class Ts extends Ms{async _call(e){return new Di(await super._call(e))}}class xs extends Ms{async _call(e){return new ji(await super._call(e))}}class ys extends R{}class ks extends ys{}class Fs extends ys{async _call(e){return new qi(await super._call(e))}}class Cs extends ys{async _call(e){return new Di(await super._call(e))}}class Ps extends R{}class vs extends os{}class Ss extends os{async _call(e){return new qi(await super._call(e))}}class As extends os{async _call(e){return new Di(await super._call(e))}}class Es extends R{}class Ls extends Es{}class zs extends Es{async _call(e){return new qi(await super._call(e))}}class Is extends Es{async _call(e){return new Di(await super._call(e))}}class Bs extends Es{async _call(e){return new Vi(await super._call(e))}}class Ns extends Es{async _call(e){return new ji(await super._call(e))}}class Os extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ds extends Os{}class Vs extends Os{}class js extends Os{async generate_speech(e,t,{threshold:n=.5,minlenratio:r=0,maxlenratio:o=20,vocoder:s=null}={}){const a={input_ids:e},{encoder_outputs:i,encoder_attention_mask:l}=await B(this,a),c=i.dims[1]/this.config.reduction_factor,d=Math.floor(c*o),p=Math.floor(c*r),h=this.config.num_mel_bins;let m=[],_=null,f=null,g=0;for(;;){++g;const e=z(!!f);let r;r=f?f.output_sequence_out:new u.Tensor("float32",new Float32Array(h),[1,1,h]);let o={use_cache_branch:e,output_sequence:r,encoder_attention_mask:l,speaker_embeddings:t,encoder_hidden_states:i};this.addPastKeyValues(o,_),f=await A(this.sessions.decoder_model_merged,o),_=this.getPastKeyValues(f,_);const{prob:s,spectrum:a}=f;if(m.push(a),g>=p&&(Array.from(s.data).filter((e=>e>=n)).length>0||g>=d))break}const M=(0,u.cat)(m),{waveform:w}=await A(s.sessions.model,{spectrogram:M});return{spectrogram:M,waveform:w}}}class Rs extends R{main_input_name="spectrogram"}class Gs extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class qs extends Gs{}class $s extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ws extends $s{}class Us extends $s{}class Xs extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Qs extends Xs{}class Hs extends Xs{}class Ys extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Js extends Ys{}class Ks extends Ys{}class Zs extends R{}class ea extends Zs{}class ta extends Zs{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class na extends Zs{static async from_pretrained(e,t={}){return t.model_file_name??="audio_model",super.from_pretrained(e,t)}}class ra extends R{}class oa extends ra{async _call(e){return new Ui(await super._call(e))}}class sa extends R{}class aa extends sa{}class ia extends sa{}class la extends sa{}class ca extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class da extends ca{}class ua extends ca{}class pa extends R{}class ha extends pa{}class ma extends pa{async _call(e){return new Di(await super._call(e))}}class _a extends R{}class fa extends _a{}class ga extends _a{}class Ma extends R{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}_apply_and_filter_by_delay_pattern_mask(e){const[t,n]=e.dims,r=this.config.decoder.num_codebooks,o=n-r;let s=0;for(let t=0;t<e.size;++t){if(e.data[t]===this.config.decoder.pad_token_id)continue;const a=t%n-Math.floor(t/n)%r;a>0&&a<=o&&(e.data[s++]=e.data[t])}const a=Math.floor(t/r),i=s/(a*r);return new u.Tensor(e.type,e.data.slice(0,s),[a,r,i])}prepare_inputs_for_generation(e,t,n){let r=structuredClone(e);for(let e=0;e<r.length;++e)for(let t=0;t<r[e].length;++t)e%this.config.decoder.num_codebooks>=t&&(r[e][t]=BigInt(this.config.decoder.pad_token_id));null!==n.guidance_scale&&n.guidance_scale>1&&(r=r.concat(r));return super.prepare_inputs_for_generation(r,t,n)}async generate(e){const t=await super.generate(e),n=this._apply_and_filter_by_delay_pattern_mask(t).unsqueeze_(0),{audio_values:r}=await A(this.sessions.encodec_decode,{audio_codes:n});return r}}class wa extends R{}class ba extends wa{}class Ta extends wa{async _call(e){return new Di(await super._call(e))}}class xa extends R{}class ya extends xa{}class ka extends xa{async _call(e){return new Di(await super._call(e))}}class Fa extends R{}class Ca extends Fa{}class Pa extends Fa{async _call(e){return new Di(await super._call(e))}}class va extends R{}class Sa extends va{}class Aa extends va{async _call(e){return new Di(await super._call(e))}}class Ea{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:o=null,local_files_only:s=!1,revision:a="main",model_file_name:i=null,subfolder:l="onnx",device:c=null,dtype:d=null,use_external_data_format:u=null,session_options:p={}}={}){let h={progress_callback:t,config:n,cache_dir:o,local_files_only:s,revision:a,model_file_name:i,subfolder:l,device:c,dtype:d,use_external_data_format:u,session_options:p};if(h.config=await r.AutoConfig.from_pretrained(e,h),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(let t of this.MODEL_CLASS_MAPPINGS){const n=t.get(h.config.model_type);if(n)return await n[1].from_pretrained(e,h)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${h.config.model_type}", attempting to construct from base class.`),await R.from_pretrained(e,h);throw Error(`Unsupported model type: ${h.config.model_type}`)}}const La=new Map([["bert",["BertModel",W]],["nomic_bert",["NomicBertModel",J]],["roformer",["RoFormerModel",Z]],["electra",["ElectraModel",ue]],["esm",["EsmModel",Re]],["convbert",["ConvBertModel",se]],["camembert",["CamembertModel",ge]],["deberta",["DebertaModel",ye]],["deberta-v2",["DebertaV2Model",Se]],["mpnet",["MPNetModel",Je]],["albert",["AlbertModel",lt]],["distilbert",["DistilBertModel",Be]],["roberta",["RobertaModel",Ot]],["xlm",["XLMModel",qt]],["xlm-roberta",["XLMRobertaModel",Ht]],["clap",["ClapModel",ea]],["clip",["CLIPModel",mn]],["clipseg",["CLIPSegModel",kn]],["chinese_clip",["ChineseCLIPModel",xn]],["siglip",["SiglipModel",Mn]],["mobilebert",["MobileBertModel",Ue]],["squeezebert",["SqueezeBertModel",rt]],["wav2vec2",["Wav2Vec2Model",ss]],["wav2vec2-bert",["Wav2Vec2BertModel",ks]],["unispeech",["UniSpeechModel",_s]],["unispeech-sat",["UniSpeechSatModel",ws]],["hubert",["HubertModel",vs]],["wavlm",["WavLMModel",Ls]],["audio-spectrogram-transformer",["ASTModel",tn]],["vits",["VitsModel",oa]],["pyannote",["PyAnnoteModel",ds]],["wespeaker-resnet",["WeSpeakerResNetModel",hs]],["detr",["DetrModel",Yr]],["rt_detr",["RTDetrModel",no]],["table-transformer",["TableTransformerModel",ao]],["vit",["ViTModel",Pr]],["fastvit",["FastViTModel",Ar]],["mobilevit",["MobileViTModel",Br]],["mobilevitv2",["MobileViTV2Model",Dr]],["owlvit",["OwlViTModel",Rr]],["owlv2",["Owlv2Model",$r]],["beit",["BeitModel",Xr]],["deit",["DeiTModel",uo]],["convnext",["ConvNextModel",Oo]],["convnextv2",["ConvNextV2Model",jo]],["dinov2",["Dinov2Model",qo]],["resnet",["ResNetModel",mo]],["swin",["SwinModel",go]],["swin2sr",["Swin2SRModel",bo]],["donut-swin",["DonutSwinModel",Bo]],["yolos",["YolosModel",Uo]],["dpt",["DPTModel",yo]],["glpn",["GLPNModel",Lo]],["hifigan",["SpeechT5HifiGan",Rs]],["efficientnet",["EfficientNetModel",ha]],["mobilenet_v1",["MobileNetV1Model",ba]],["mobilenet_v2",["MobileNetV2Model",ya]],["mobilenet_v3",["MobileNetV3Model",Ca]],["mobilenet_v4",["MobileNetV4Model",Sa]]]),za=new Map([["t5",["T5Model",ht]],["longt5",["LongT5Model",ft]],["mt5",["MT5Model",wt]],["bart",["BartModel",xt]],["mbart",["MBartModel",Ct]],["marian",["MarianModel",Zo]],["whisper",["WhisperModel",on]],["m2m_100",["M2M100Model",ns]],["blenderbot",["BlenderbotModel",Et]],["blenderbot-small",["BlenderbotSmallModel",It]]]),Ia=new Map([["bloom",["BloomModel",Mr]],["jais",["JAISModel",An]],["gpt2",["GPT2Model",Pn]],["gptj",["GPTJModel",Vn]],["gpt_bigcode",["GPTBigCodeModel",Gn]],["gpt_neo",["GPTNeoModel",zn]],["gpt_neox",["GPTNeoXModel",Nn]],["codegen",["CodeGenModel",Wn]],["llama",["LlamaModel",Qn]],["cohere",["CohereModel",Jn]],["gemma",["GemmaModel",er]],["gemma2",["Gemma2Model",rr]],["openelm",["OpenELMModel",ar]],["qwen2",["Qwen2Model",cr]],["phi",["PhiModel",pr]],["phi3",["Phi3Model",_r]],["mpt",["MptModel",Tr]],["opt",["OPTModel",kr]],["mistral",["MistralModel",Ws]],["starcoder2",["Starcoder2Model",Qs]],["falcon",["FalconModel",Js]],["stablelm",["StableLmModel",da]]]),Ba=new Map([["speecht5",["SpeechT5ForSpeechToText",Vs]],["whisper",["WhisperForConditionalGeneration",sn]]]),Na=new Map([["speecht5",["SpeechT5ForTextToSpeech",js]]]),Oa=new Map([["vits",["VitsModel",oa]],["musicgen",["MusicgenForConditionalGeneration",Ma]]]),Da=new Map([["bert",["BertForSequenceClassification",X]],["roformer",["RoFormerForSequenceClassification",te]],["electra",["ElectraForSequenceClassification",he]],["esm",["EsmForSequenceClassification",qe]],["convbert",["ConvBertForSequenceClassification",ie]],["camembert",["CamembertForSequenceClassification",we]],["deberta",["DebertaForSequenceClassification",Fe]],["deberta-v2",["DebertaV2ForSequenceClassification",Ee]],["mpnet",["MPNetForSequenceClassification",Ze]],["albert",["AlbertForSequenceClassification",ct]],["distilbert",["DistilBertForSequenceClassification",Ne]],["roberta",["RobertaForSequenceClassification",Vt]],["xlm",["XLMForSequenceClassification",Wt]],["xlm-roberta",["XLMRobertaForSequenceClassification",Jt]],["bart",["BartForSequenceClassification",kt]],["mbart",["MBartForSequenceClassification",vt]],["mobilebert",["MobileBertForSequenceClassification",Qe]],["squeezebert",["SqueezeBertForSequenceClassification",st]]]),Va=new Map([["bert",["BertForTokenClassification",Q]],["roformer",["RoFormerForTokenClassification",ne]],["electra",["ElectraForTokenClassification",me]],["esm",["EsmForTokenClassification",$e]],["convbert",["ConvBertForTokenClassification",le]],["camembert",["CamembertForTokenClassification",be]],["deberta",["DebertaForTokenClassification",Ce]],["deberta-v2",["DebertaV2ForTokenClassification",Le]],["mpnet",["MPNetForTokenClassification",et]],["distilbert",["DistilBertForTokenClassification",Oe]],["roberta",["RobertaForTokenClassification",jt]],["xlm",["XLMForTokenClassification",Ut]],["xlm-roberta",["XLMRobertaForTokenClassification",Kt]]]),ja=new Map([["t5",["T5ForConditionalGeneration",mt]],["longt5",["LongT5ForConditionalGeneration",gt]],["mt5",["MT5ForConditionalGeneration",bt]],["bart",["BartForConditionalGeneration",yt]],["mbart",["MBartForConditionalGeneration",Pt]],["marian",["MarianMTModel",es]],["m2m_100",["M2M100ForConditionalGeneration",rs]],["blenderbot",["BlenderbotForConditionalGeneration",Lt]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",Bt]]]),Ra=new Map([["bloom",["BloomForCausalLM",wr]],["gpt2",["GPT2LMHeadModel",vn]],["jais",["JAISLMHeadModel",En]],["gptj",["GPTJForCausalLM",jn]],["gpt_bigcode",["GPTBigCodeForCausalLM",qn]],["gpt_neo",["GPTNeoForCausalLM",In]],["gpt_neox",["GPTNeoXForCausalLM",On]],["codegen",["CodeGenForCausalLM",Un]],["llama",["LlamaForCausalLM",Hn]],["cohere",["CohereForCausalLM",Kn]],["gemma",["GemmaForCausalLM",tr]],["gemma2",["Gemma2ForCausalLM",or]],["openelm",["OpenELMForCausalLM",ir]],["qwen2",["Qwen2ForCausalLM",dr]],["phi",["PhiForCausalLM",hr]],["phi3",["Phi3ForCausalLM",fr]],["mpt",["MptForCausalLM",xr]],["opt",["OPTForCausalLM",Fr]],["mbart",["MBartForCausalLM",St]],["mistral",["MistralForCausalLM",Us]],["starcoder2",["Starcoder2ForCausalLM",Hs]],["falcon",["FalconForCausalLM",Ks]],["trocr",["TrOCRForCausalLM",qs]],["stablelm",["StableLmForCausalLM",ua]]]),Ga=new Map([["bert",["BertForMaskedLM",U]],["roformer",["RoFormerForMaskedLM",ee]],["electra",["ElectraForMaskedLM",pe]],["esm",["EsmForMaskedLM",Ge]],["convbert",["ConvBertForMaskedLM",ae]],["camembert",["CamembertForMaskedLM",Me]],["deberta",["DebertaForMaskedLM",ke]],["deberta-v2",["DebertaV2ForMaskedLM",Ae]],["mpnet",["MPNetForMaskedLM",Ke]],["albert",["AlbertForMaskedLM",ut]],["distilbert",["DistilBertForMaskedLM",Ve]],["roberta",["RobertaForMaskedLM",Dt]],["xlm",["XLMWithLMHeadModel",$t]],["xlm-roberta",["XLMRobertaForMaskedLM",Yt]],["mobilebert",["MobileBertForMaskedLM",Xe]],["squeezebert",["SqueezeBertForMaskedLM",ot]]]),qa=new Map([["bert",["BertForQuestionAnswering",H]],["roformer",["RoFormerForQuestionAnswering",re]],["electra",["ElectraForQuestionAnswering",_e]],["convbert",["ConvBertForQuestionAnswering",ce]],["camembert",["CamembertForQuestionAnswering",Te]],["deberta",["DebertaForQuestionAnswering",Pe]],["deberta-v2",["DebertaV2ForQuestionAnswering",ze]],["mpnet",["MPNetForQuestionAnswering",tt]],["albert",["AlbertForQuestionAnswering",dt]],["distilbert",["DistilBertForQuestionAnswering",De]],["roberta",["RobertaForQuestionAnswering",Rt]],["xlm",["XLMForQuestionAnswering",Xt]],["xlm-roberta",["XLMRobertaForQuestionAnswering",Zt]],["mobilebert",["MobileBertForQuestionAnswering",He]],["squeezebert",["SqueezeBertForQuestionAnswering",at]]]),$a=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",an]]]),Wa=new Map([["llava",["LlavaForConditionalGeneration",cn]],["moondream1",["Moondream1ForConditionalGeneration",dn]],["florence2",["Florence2ForConditionalGeneration",pn]]]),Ua=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",an]]]),Xa=new Map([["vit",["ViTForImageClassification",vr]],["fastvit",["FastViTForImageClassification",Er]],["mobilevit",["MobileViTForImageClassification",Nr]],["mobilevitv2",["MobileViTV2ForImageClassification",Vr]],["beit",["BeitForImageClassification",Qr]],["deit",["DeiTForImageClassification",po]],["convnext",["ConvNextForImageClassification",Do]],["convnextv2",["ConvNextV2ForImageClassification",Ro]],["dinov2",["Dinov2ForImageClassification",$o]],["resnet",["ResNetForImageClassification",_o]],["swin",["SwinForImageClassification",Mo]],["segformer",["SegformerForImageClassification",ia]],["efficientnet",["EfficientNetForImageClassification",ma]],["mobilenet_v1",["MobileNetV1ForImageClassification",Ta]],["mobilenet_v2",["MobileNetV2ForImageClassification",ka]],["mobilenet_v3",["MobileNetV3ForImageClassification",Pa]],["mobilenet_v4",["MobileNetV4ForImageClassification",Aa]]]),Qa=new Map([["detr",["DetrForObjectDetection",Jr]],["rt_detr",["RTDetrForObjectDetection",ro]],["table-transformer",["TableTransformerForObjectDetection",io]],["yolos",["YolosForObjectDetection",Xo]]]),Ha=new Map([["owlvit",["OwlViTForObjectDetection",Gr]],["owlv2",["Owlv2ForObjectDetection",Wr]]]),Ya=new Map([["detr",["DetrForSegmentation",Kr]],["clipseg",["CLIPSegForImageSegmentation",Fn]]]),Ja=new Map([["segformer",["SegformerForSemanticSegmentation",la]],["sapiens",["SapiensForSemanticSegmentation",vo]]]),Ka=new Map([["sam",["SamModel",Yo]]]),Za=new Map([["wav2vec2",["Wav2Vec2ForCTC",as]],["wav2vec2-bert",["Wav2Vec2BertForCTC",Fs]],["unispeech",["UniSpeechForCTC",fs]],["unispeech-sat",["UniSpeechSatForCTC",bs]],["wavlm",["WavLMForCTC",zs]],["hubert",["HubertForCTC",Ss]]]),ei=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",is]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",Cs]],["unispeech",["UniSpeechForSequenceClassification",gs]],["unispeech-sat",["UniSpeechSatForSequenceClassification",Ts]],["wavlm",["WavLMForSequenceClassification",Is]],["hubert",["HubertForSequenceClassification",As]],["audio-spectrogram-transformer",["ASTForAudioClassification",nn]]]),ti=new Map([["wavlm",["WavLMForXVector",Bs]]]),ni=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",xs]],["wavlm",["WavLMForAudioFrameClassification",Ns]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",ls]],["pyannote",["PyAnnoteForAudioFrameClassification",us]]]),ri=new Map([["vitmatte",["VitMatteForImageMatting",zr]]]),oi=new Map([["swin2sr",["Swin2SRForImageSuperResolution",To]]]),si=new Map([["dpt",["DPTForDepthEstimation",ko]],["depth_anything",["DepthAnythingForDepthEstimation",Co]],["glpn",["GLPNForDepthEstimation",zo]],["sapiens",["SapiensForDepthEstimation",So]]]),ai=new Map([["sapiens",["SapiensForNormalEstimation",Ao]]]),ii=new Map([["clip",["CLIPVisionModelWithProjection",fn]],["siglip",["SiglipVisionModel",bn]]]),li=[[La,M],[za,w],[Ia,x],[Da,M],[Va,M],[ja,b],[Ba,b],[Ra,x],[Ga,M],[qa,M],[$a,T],[Wa,k],[Xa,M],[Ya,M],[Ja,M],[ri,M],[oi,M],[si,M],[ai,M],[Qa,M],[Ha,M],[Ka,y],[Za,M],[ei,M],[Na,b],[Oa,M],[ti,M],[ni,M],[ii,M]];for(const[e,t]of li)for(const[n,r]of e.values())C.set(n,t),v.set(r,n),P.set(n,r);const ci=[["MusicgenForConditionalGeneration",Ma,F],["CLIPTextModelWithProjection",_n,M],["SiglipTextModel",wn,M],["ClapTextModelWithProjection",ta,M],["ClapAudioModelWithProjection",na,M]];for(const[e,t,n]of ci)C.set(e,n),v.set(t,e),P.set(e,t);class di extends Ea{static MODEL_CLASS_MAPPINGS=li.map((e=>e[0]));static BASE_IF_FAIL=!0}class ui extends Ea{static MODEL_CLASS_MAPPINGS=[Da]}class pi extends Ea{static MODEL_CLASS_MAPPINGS=[Va]}class hi extends Ea{static MODEL_CLASS_MAPPINGS=[ja]}class mi extends Ea{static MODEL_CLASS_MAPPINGS=[Ba]}class _i extends Ea{static MODEL_CLASS_MAPPINGS=[Na]}class fi extends Ea{static MODEL_CLASS_MAPPINGS=[Oa]}class gi extends Ea{static MODEL_CLASS_MAPPINGS=[Ra]}class Mi extends Ea{static MODEL_CLASS_MAPPINGS=[Ga]}class wi extends Ea{static MODEL_CLASS_MAPPINGS=[qa]}class bi extends Ea{static MODEL_CLASS_MAPPINGS=[$a]}class Ti extends Ea{static MODEL_CLASS_MAPPINGS=[Xa]}class xi extends Ea{static MODEL_CLASS_MAPPINGS=[Ya]}class yi extends Ea{static MODEL_CLASS_MAPPINGS=[Ja]}class ki extends Ea{static MODEL_CLASS_MAPPINGS=[Qa]}class Fi extends Ea{static MODEL_CLASS_MAPPINGS=[Ha]}class Ci extends Ea{static MODEL_CLASS_MAPPINGS=[Ka]}class Pi extends Ea{static MODEL_CLASS_MAPPINGS=[Za]}class vi extends Ea{static MODEL_CLASS_MAPPINGS=[ei]}class Si extends Ea{static MODEL_CLASS_MAPPINGS=[ti]}class Ai extends Ea{static MODEL_CLASS_MAPPINGS=[ni]}class Ei extends Ea{static MODEL_CLASS_MAPPINGS=[Ua]}class Li extends Ea{static MODEL_CLASS_MAPPINGS=[ri]}class zi extends Ea{static MODEL_CLASS_MAPPINGS=[oi]}class Ii extends Ea{static MODEL_CLASS_MAPPINGS=[si]}class Bi extends Ea{static MODEL_CLASS_MAPPINGS=[ai]}class Ni extends Ea{static MODEL_CLASS_MAPPINGS=[ii]}class Oi extends G{constructor({logits:e,past_key_values:t,encoder_outputs:n,decoder_attentions:r=null,cross_attentions:o=null}){super(),this.logits=e,this.past_key_values=t,this.encoder_outputs=n,this.decoder_attentions=r,this.cross_attentions=o}}class Di extends G{constructor({logits:e}){super(),this.logits=e}}class Vi extends G{constructor({logits:e,embeddings:t}){super(),this.logits=e,this.embeddings=t}}class ji extends G{constructor({logits:e}){super(),this.logits=e}}class Ri extends G{constructor({logits:e}){super(),this.logits=e}}class Gi extends G{constructor({start_logits:e,end_logits:t}){super(),this.start_logits=e,this.end_logits=t}}class qi extends G{constructor({logits:e}){super(),this.logits=e}}class $i extends G{constructor({logits:e,past_key_values:t}){super(),this.logits=e,this.past_key_values=t}}class Wi extends G{constructor({alphas:e}){super(),this.alphas=e}}class Ui extends G{constructor({waveform:e,spectrogram:t}){super(),this.waveform=e,this.spectrogram=t}}},"./src/models/whisper/common_whisper.js":
112
+ \***********************/(e,t,n)=>{n.r(t),n.d(t,{ASTForAudioClassification:()=>nn,ASTModel:()=>tn,ASTPreTrainedModel:()=>en,AlbertForMaskedLM:()=>ut,AlbertForQuestionAnswering:()=>dt,AlbertForSequenceClassification:()=>ct,AlbertModel:()=>lt,AlbertPreTrainedModel:()=>it,AutoModel:()=>hi,AutoModelForAudioClassification:()=>Ei,AutoModelForAudioFrameClassification:()=>zi,AutoModelForCTC:()=>Ai,AutoModelForCausalLM:()=>bi,AutoModelForDepthEstimation:()=>Oi,AutoModelForDocumentQuestionAnswering:()=>Ii,AutoModelForImageClassification:()=>ki,AutoModelForImageFeatureExtraction:()=>Vi,AutoModelForImageMatting:()=>Bi,AutoModelForImageSegmentation:()=>Fi,AutoModelForImageToImage:()=>Ni,AutoModelForMaskGeneration:()=>Si,AutoModelForMaskedLM:()=>Ti,AutoModelForNormalEstimation:()=>Di,AutoModelForObjectDetection:()=>Pi,AutoModelForQuestionAnswering:()=>xi,AutoModelForSemanticSegmentation:()=>Ci,AutoModelForSeq2SeqLM:()=>fi,AutoModelForSequenceClassification:()=>mi,AutoModelForSpeechSeq2Seq:()=>gi,AutoModelForTextToSpectrogram:()=>Mi,AutoModelForTextToWaveform:()=>wi,AutoModelForTokenClassification:()=>_i,AutoModelForVision2Seq:()=>yi,AutoModelForXVector:()=>Li,AutoModelForZeroShotObjectDetection:()=>vi,BartForConditionalGeneration:()=>yt,BartForSequenceClassification:()=>kt,BartModel:()=>xt,BartPretrainedModel:()=>Tt,BaseModelOutput:()=>q,BeitForImageClassification:()=>Qr,BeitModel:()=>Xr,BeitPreTrainedModel:()=>Ur,BertForMaskedLM:()=>U,BertForQuestionAnswering:()=>H,BertForSequenceClassification:()=>X,BertForTokenClassification:()=>Q,BertModel:()=>W,BertPreTrainedModel:()=>$,BlenderbotForConditionalGeneration:()=>Lt,BlenderbotModel:()=>Et,BlenderbotPreTrainedModel:()=>At,BlenderbotSmallForConditionalGeneration:()=>Bt,BlenderbotSmallModel:()=>It,BlenderbotSmallPreTrainedModel:()=>zt,BloomForCausalLM:()=>wr,BloomModel:()=>Mr,BloomPreTrainedModel:()=>gr,CLIPModel:()=>mn,CLIPPreTrainedModel:()=>hn,CLIPSegForImageSegmentation:()=>Fn,CLIPSegModel:()=>kn,CLIPSegPreTrainedModel:()=>yn,CLIPTextModelWithProjection:()=>_n,CLIPVisionModelWithProjection:()=>fn,CamembertForMaskedLM:()=>Me,CamembertForQuestionAnswering:()=>Te,CamembertForSequenceClassification:()=>we,CamembertForTokenClassification:()=>be,CamembertModel:()=>ge,CamembertPreTrainedModel:()=>fe,CausalLMOutput:()=>Ui,CausalLMOutputWithPast:()=>Xi,ChineseCLIPModel:()=>xn,ChineseCLIPPreTrainedModel:()=>Tn,ClapAudioModelWithProjection:()=>sa,ClapModel:()=>ra,ClapPreTrainedModel:()=>na,ClapTextModelWithProjection:()=>oa,CodeGenForCausalLM:()=>Un,CodeGenModel:()=>Wn,CodeGenPreTrainedModel:()=>$n,CohereForCausalLM:()=>Kn,CohereModel:()=>Jn,CoherePreTrainedModel:()=>Yn,ConvBertForMaskedLM:()=>ae,ConvBertForQuestionAnswering:()=>ce,ConvBertForSequenceClassification:()=>ie,ConvBertForTokenClassification:()=>le,ConvBertModel:()=>se,ConvBertPreTrainedModel:()=>oe,ConvNextForImageClassification:()=>Ro,ConvNextModel:()=>jo,ConvNextPreTrainedModel:()=>Vo,ConvNextV2ForImageClassification:()=>$o,ConvNextV2Model:()=>qo,ConvNextV2PreTrainedModel:()=>Go,DPTForDepthEstimation:()=>Po,DPTModel:()=>Co,DPTPreTrainedModel:()=>Fo,DebertaForMaskedLM:()=>ke,DebertaForQuestionAnswering:()=>Pe,DebertaForSequenceClassification:()=>Fe,DebertaForTokenClassification:()=>Ce,DebertaModel:()=>ye,DebertaPreTrainedModel:()=>xe,DebertaV2ForMaskedLM:()=>Ae,DebertaV2ForQuestionAnswering:()=>ze,DebertaV2ForSequenceClassification:()=>Ee,DebertaV2ForTokenClassification:()=>Le,DebertaV2Model:()=>Se,DebertaV2PreTrainedModel:()=>ve,DeiTForImageClassification:()=>po,DeiTModel:()=>uo,DeiTPreTrainedModel:()=>co,DepthAnythingForDepthEstimation:()=>So,DepthAnythingPreTrainedModel:()=>vo,DetrForObjectDetection:()=>Jr,DetrForSegmentation:()=>Kr,DetrModel:()=>Yr,DetrObjectDetectionOutput:()=>Zr,DetrPreTrainedModel:()=>Hr,DetrSegmentationOutput:()=>eo,Dinov2ForImageClassification:()=>Xo,Dinov2Model:()=>Uo,Dinov2PreTrainedModel:()=>Wo,DistilBertForMaskedLM:()=>Ve,DistilBertForQuestionAnswering:()=>De,DistilBertForSequenceClassification:()=>Ne,DistilBertForTokenClassification:()=>Oe,DistilBertModel:()=>Be,DistilBertPreTrainedModel:()=>Ie,DonutSwinModel:()=>Do,DonutSwinPreTrainedModel:()=>Oo,EfficientNetForImageClassification:()=>ga,EfficientNetModel:()=>fa,EfficientNetPreTrainedModel:()=>_a,ElectraForMaskedLM:()=>pe,ElectraForQuestionAnswering:()=>_e,ElectraForSequenceClassification:()=>he,ElectraForTokenClassification:()=>me,ElectraModel:()=>ue,ElectraPreTrainedModel:()=>de,EsmForMaskedLM:()=>Ge,EsmForSequenceClassification:()=>qe,EsmForTokenClassification:()=>$e,EsmModel:()=>Re,EsmPreTrainedModel:()=>je,FalconForCausalLM:()=>ta,FalconModel:()=>ea,FalconPreTrainedModel:()=>Zs,FastViTForImageClassification:()=>Er,FastViTModel:()=>Ar,FastViTPreTrainedModel:()=>Sr,Florence2ForConditionalGeneration:()=>pn,Florence2PreTrainedModel:()=>un,GLPNForDepthEstimation:()=>No,GLPNModel:()=>Bo,GLPNPreTrainedModel:()=>Io,GPT2LMHeadModel:()=>vn,GPT2Model:()=>Pn,GPT2PreTrainedModel:()=>Cn,GPTBigCodeForCausalLM:()=>qn,GPTBigCodeModel:()=>Gn,GPTBigCodePreTrainedModel:()=>Rn,GPTJForCausalLM:()=>jn,GPTJModel:()=>Vn,GPTJPreTrainedModel:()=>Dn,GPTNeoForCausalLM:()=>In,GPTNeoModel:()=>zn,GPTNeoPreTrainedModel:()=>Ln,GPTNeoXForCausalLM:()=>On,GPTNeoXModel:()=>Nn,GPTNeoXPreTrainedModel:()=>Bn,Gemma2ForCausalLM:()=>or,Gemma2Model:()=>rr,Gemma2PreTrainedModel:()=>nr,GemmaForCausalLM:()=>tr,GemmaModel:()=>er,GemmaPreTrainedModel:()=>Zn,HieraForImageClassification:()=>_o,HieraModel:()=>mo,HieraPreTrainedModel:()=>ho,HubertForCTC:()=>Ls,HubertForSequenceClassification:()=>zs,HubertModel:()=>Es,HubertPreTrainedModel:()=>As,ImageMattingOutput:()=>Qi,JAISLMHeadModel:()=>En,JAISModel:()=>An,JAISPreTrainedModel:()=>Sn,LlamaForCausalLM:()=>Hn,LlamaModel:()=>Qn,LlamaPreTrainedModel:()=>Xn,LlavaForConditionalGeneration:()=>cn,LlavaPreTrainedModel:()=>ln,LongT5ForConditionalGeneration:()=>gt,LongT5Model:()=>ft,LongT5PreTrainedModel:()=>_t,M2M100ForConditionalGeneration:()=>as,M2M100Model:()=>ss,M2M100PreTrainedModel:()=>os,MBartForCausalLM:()=>St,MBartForConditionalGeneration:()=>Pt,MBartForSequenceClassification:()=>vt,MBartModel:()=>Ct,MBartPreTrainedModel:()=>Ft,MPNetForMaskedLM:()=>Ke,MPNetForQuestionAnswering:()=>tt,MPNetForSequenceClassification:()=>Ze,MPNetForTokenClassification:()=>et,MPNetModel:()=>Je,MPNetPreTrainedModel:()=>Ye,MT5ForConditionalGeneration:()=>bt,MT5Model:()=>wt,MT5PreTrainedModel:()=>Mt,MarianMTModel:()=>rs,MarianModel:()=>ns,MarianPreTrainedModel:()=>ts,MaskedLMOutput:()=>$i,MistralForCausalLM:()=>Hs,MistralModel:()=>Qs,MistralPreTrainedModel:()=>Xs,MobileBertForMaskedLM:()=>Xe,MobileBertForQuestionAnswering:()=>He,MobileBertForSequenceClassification:()=>Qe,MobileBertModel:()=>Ue,MobileBertPreTrainedModel:()=>We,MobileNetV1ForImageClassification:()=>ka,MobileNetV1Model:()=>ya,MobileNetV1PreTrainedModel:()=>xa,MobileNetV2ForImageClassification:()=>Pa,MobileNetV2Model:()=>Ca,MobileNetV2PreTrainedModel:()=>Fa,MobileNetV3ForImageClassification:()=>Aa,MobileNetV3Model:()=>Sa,MobileNetV3PreTrainedModel:()=>va,MobileNetV4ForImageClassification:()=>za,MobileNetV4Model:()=>La,MobileNetV4PreTrainedModel:()=>Ea,MobileViTForImageClassification:()=>Nr,MobileViTModel:()=>Br,MobileViTPreTrainedModel:()=>Ir,MobileViTV2ForImageClassification:()=>Vr,MobileViTV2Model:()=>Dr,MobileViTV2PreTrainedModel:()=>Or,ModelOutput:()=>G,Moondream1ForConditionalGeneration:()=>dn,MptForCausalLM:()=>xr,MptModel:()=>Tr,MptPreTrainedModel:()=>br,MusicgenForCausalLM:()=>ba,MusicgenForConditionalGeneration:()=>Ta,MusicgenModel:()=>wa,MusicgenPreTrainedModel:()=>Ma,NomicBertModel:()=>J,NomicBertPreTrainedModel:()=>Y,OPTForCausalLM:()=>Fr,OPTModel:()=>kr,OPTPreTrainedModel:()=>yr,OpenELMForCausalLM:()=>ir,OpenELMModel:()=>ar,OpenELMPreTrainedModel:()=>sr,OwlViTForObjectDetection:()=>Gr,OwlViTModel:()=>Rr,OwlViTPreTrainedModel:()=>jr,Owlv2ForObjectDetection:()=>Wr,Owlv2Model:()=>$r,Owlv2PreTrainedModel:()=>qr,Phi3ForCausalLM:()=>fr,Phi3Model:()=>_r,Phi3PreTrainedModel:()=>mr,PhiForCausalLM:()=>hr,PhiModel:()=>pr,PhiPreTrainedModel:()=>ur,PreTrainedModel:()=>R,PretrainedMixin:()=>Ia,PyAnnoteForAudioFrameClassification:()=>ms,PyAnnoteModel:()=>hs,PyAnnotePreTrainedModel:()=>ps,QuestionAnsweringModelOutput:()=>Wi,Qwen2ForCausalLM:()=>dr,Qwen2Model:()=>cr,Qwen2PreTrainedModel:()=>lr,RTDetrForObjectDetection:()=>ro,RTDetrModel:()=>no,RTDetrObjectDetectionOutput:()=>oo,RTDetrPreTrainedModel:()=>to,ResNetForImageClassification:()=>Mo,ResNetModel:()=>go,ResNetPreTrainedModel:()=>fo,RoFormerForMaskedLM:()=>ee,RoFormerForQuestionAnswering:()=>re,RoFormerForSequenceClassification:()=>te,RoFormerForTokenClassification:()=>ne,RoFormerModel:()=>Z,RoFormerPreTrainedModel:()=>K,RobertaForMaskedLM:()=>Dt,RobertaForQuestionAnswering:()=>Rt,RobertaForSequenceClassification:()=>Vt,RobertaForTokenClassification:()=>jt,RobertaModel:()=>Ot,RobertaPreTrainedModel:()=>Nt,SamImageSegmentationOutput:()=>es,SamModel:()=>Zo,SamPreTrainedModel:()=>Ko,SapiensForDepthEstimation:()=>Lo,SapiensForNormalEstimation:()=>zo,SapiensForSemanticSegmentation:()=>Eo,SapiensPreTrainedModel:()=>Ao,SegformerForImageClassification:()=>da,SegformerForSemanticSegmentation:()=>ua,SegformerModel:()=>ca,SegformerPreTrainedModel:()=>la,Seq2SeqLMOutput:()=>ji,SequenceClassifierOutput:()=>Ri,SiglipModel:()=>Mn,SiglipPreTrainedModel:()=>gn,SiglipTextModel:()=>wn,SiglipVisionModel:()=>bn,SpeechT5ForSpeechToText:()=>Gs,SpeechT5ForTextToSpeech:()=>qs,SpeechT5HifiGan:()=>$s,SpeechT5Model:()=>Rs,SpeechT5PreTrainedModel:()=>js,SqueezeBertForMaskedLM:()=>ot,SqueezeBertForQuestionAnswering:()=>at,SqueezeBertForSequenceClassification:()=>st,SqueezeBertModel:()=>rt,SqueezeBertPreTrainedModel:()=>nt,StableLmForCausalLM:()=>ma,StableLmModel:()=>ha,StableLmPreTrainedModel:()=>pa,Starcoder2ForCausalLM:()=>Ks,Starcoder2Model:()=>Js,Starcoder2PreTrainedModel:()=>Ys,Swin2SRForImageSuperResolution:()=>ko,Swin2SRModel:()=>yo,Swin2SRPreTrainedModel:()=>xo,SwinForImageClassification:()=>To,SwinModel:()=>bo,SwinPreTrainedModel:()=>wo,T5ForConditionalGeneration:()=>mt,T5Model:()=>ht,T5PreTrainedModel:()=>pt,TableTransformerForObjectDetection:()=>io,TableTransformerModel:()=>ao,TableTransformerObjectDetectionOutput:()=>lo,TableTransformerPreTrainedModel:()=>so,TokenClassifierOutput:()=>qi,TrOCRForCausalLM:()=>Us,TrOCRPreTrainedModel:()=>Ws,UniSpeechForCTC:()=>ws,UniSpeechForSequenceClassification:()=>bs,UniSpeechModel:()=>Ms,UniSpeechPreTrainedModel:()=>gs,UniSpeechSatForAudioFrameClassification:()=>Fs,UniSpeechSatForCTC:()=>ys,UniSpeechSatForSequenceClassification:()=>ks,UniSpeechSatModel:()=>xs,UniSpeechSatPreTrainedModel:()=>Ts,ViTForImageClassification:()=>vr,ViTModel:()=>Pr,ViTPreTrainedModel:()=>Cr,VisionEncoderDecoderModel:()=>an,VitMatteForImageMatting:()=>zr,VitMattePreTrainedModel:()=>Lr,VitsModel:()=>ia,VitsModelOutput:()=>Hi,VitsPreTrainedModel:()=>aa,Wav2Vec2BertForCTC:()=>vs,Wav2Vec2BertForSequenceClassification:()=>Ss,Wav2Vec2BertModel:()=>Ps,Wav2Vec2BertPreTrainedModel:()=>Cs,Wav2Vec2ForAudioFrameClassification:()=>us,Wav2Vec2ForCTC:()=>cs,Wav2Vec2ForSequenceClassification:()=>ds,Wav2Vec2Model:()=>ls,Wav2Vec2PreTrainedModel:()=>is,WavLMForAudioFrameClassification:()=>Vs,WavLMForCTC:()=>Ns,WavLMForSequenceClassification:()=>Os,WavLMForXVector:()=>Ds,WavLMModel:()=>Bs,WavLMPreTrainedModel:()=>Is,WeSpeakerResNetModel:()=>fs,WeSpeakerResNetPreTrainedModel:()=>_s,WhisperForConditionalGeneration:()=>sn,WhisperModel:()=>on,WhisperPreTrainedModel:()=>rn,XLMForQuestionAnswering:()=>Xt,XLMForSequenceClassification:()=>Wt,XLMForTokenClassification:()=>Ut,XLMModel:()=>qt,XLMPreTrainedModel:()=>Gt,XLMRobertaForMaskedLM:()=>Yt,XLMRobertaForQuestionAnswering:()=>Zt,XLMRobertaForSequenceClassification:()=>Jt,XLMRobertaForTokenClassification:()=>Kt,XLMRobertaModel:()=>Ht,XLMRobertaPreTrainedModel:()=>Qt,XLMWithLMHeadModel:()=>$t,XVectorOutput:()=>Gi,YolosForObjectDetection:()=>Yo,YolosModel:()=>Ho,YolosObjectDetectionOutput:()=>Jo,YolosPreTrainedModel:()=>Qo});var r=n(/*! ./configs.js */"./src/configs.js"),o=n(/*! ./backends/onnx.js */"./src/backends/onnx.js"),s=n(/*! ./utils/dtypes.js */"./src/utils/dtypes.js"),a=n(/*! ./utils/generic.js */"./src/utils/generic.js"),i=n(/*! ./utils/core.js */"./src/utils/core.js"),l=n(/*! ./utils/hub.js */"./src/utils/hub.js"),c=n(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),d=n(/*! ./generation/configuration_utils.js */"./src/generation/configuration_utils.js"),u=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),p=n(/*! ./utils/maths.js */"./src/utils/maths.js"),h=n(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),m=n(/*! ./generation/logits_sampler.js */"./src/generation/logits_sampler.js"),_=n(/*! ./env.js */"./src/env.js"),f=n(/*! ./models/whisper/generation_whisper.js */"./src/models/whisper/generation_whisper.js"),g=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");const M=0,w=1,b=2,T=3,x=4,y=5,k=6,F=7,C=new Map,P=new Map,v=new Map;async function S(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async a=>{const{buffer:i,session_options:c}=await async function(e,t,n){let a=n.device;a&&"string"!=typeof a&&(a.hasOwnProperty(t)?a=a[t]:(console.warn(`device not specified for "${t}". Using the default device.`),a=null));const i=a??(_.apis.IS_NODE_ENV?"cpu":"wasm"),c=(0,o.deviceToExecutionProviders)(i);let d=n.dtype;"string"!=typeof d&&(d&&d.hasOwnProperty(t)?d=d[t]:(d=s.DEFAULT_DEVICE_DTYPE_MAPPING[i]??s.DATA_TYPES.fp32,console.warn(`dtype not specified for "${t}". Using the default dtype (${d}) for this device (${i}).`)));const u=d;if(!s.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(u))throw new Error(`Invalid dtype: ${u}. Should be one of: ${Object.keys(s.DATA_TYPES).join(", ")}`);if(u===s.DATA_TYPES.fp16&&"webgpu"===i&&!await(0,s.isWebGpuFp16Supported)())throw new Error(`The device (${i}) does not support fp16.`);const p=s.DEFAULT_DTYPE_SUFFIX_MAPPING[u],h=`${n.subfolder??""}/${t}${p}.onnx`,m={...n.session_options}??{};m.executionProviders??=c;const f=(0,l.getModelFile)(e,h,!0,n);let g=[];if(n.use_external_data_format&&(!0===n.use_external_data_format||"object"==typeof n.use_external_data_format&&n.use_external_data_format.hasOwnProperty(t)&&!0===n.use_external_data_format[t])){if(_.apis.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const r=`${t}${p}.onnx_data`,o=`${n.subfolder??""}/${r}`;g.push(new Promise((async(t,s)=>{const a=await(0,l.getModelFile)(e,o,!0,n);t({path:r,data:a})})))}else void 0!==m.externalData&&(g=m.externalData.map((async t=>{if("string"==typeof t.data){const r=await(0,l.getModelFile)(e,t.data,!0,n);return{...t,data:r}}return t})));if(g.length>0&&(m.externalData=await Promise.all(g)),"webgpu"===i){const e=(0,r.getKeyValueShapes)(n.config,{prefix:"present"});if(Object.keys(e).length>0&&!(0,o.isONNXProxy)()){const t={};for(const n in e)t[n]="gpu-buffer";m.preferredOutputLocation=t}}return{buffer:await f,session_options:m}}(e,t[a],n);return[a,await(0,o.createInferenceSession)(i,c)]}))))}async function A(e,t){const n=function(e,t){const n=Object.create(null),r=[];for(const s of e.inputNames){const e=t[s];e instanceof u.Tensor?n[s]=(0,o.isONNXProxy)()?e.clone():e:r.push(s)}if(r.length>0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${r.join(", ")}.`);const s=Object.keys(t).length,a=e.inputNames.length;if(s>a){let n=Object.keys(t).filter((t=>!e.inputNames.includes(t)));console.warn(`WARNING: Too many inputs were provided (${s} > ${a}). The following inputs will be ignored: "${n.join(", ")}".`)}return n}(e,t);try{const t=Object.fromEntries(Object.entries(n).map((([e,t])=>[e,t.ort_tensor])));let r=await e.run(t);return r=E(r),r}catch(e){throw console.error(`An error occurred during model execution: "${e}".`),console.error("Inputs given to model:",n),e}}function E(e){for(let t in e)(0,o.isONNXTensor)(e[t])?e[t]=new u.Tensor(e[t]):"object"==typeof e[t]&&E(e[t]);return e}function L(e){if(e instanceof u.Tensor)return e;if(0===e.length)throw Error("items must be non-empty");if(Array.isArray(e[0])){if(e.some((t=>t.length!==e[0].length)))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new u.Tensor("int64",BigInt64Array.from(e.flat().map((e=>BigInt(e)))),[e.length,e[0].length])}return new u.Tensor("int64",BigInt64Array.from(e.map((e=>BigInt(e)))),[1,e.length])}function z(e){return new u.Tensor("bool",[e],[1])}async function I(e,t){let{encoder_outputs:n,input_ids:r,decoder_input_ids:o,...s}=t;if(!n){const r=(0,i.pick)(t,e.sessions.model.inputNames);n=(await B(e,r)).last_hidden_state}s.input_ids=o,s.encoder_hidden_states=n,e.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(s.encoder_attention_mask=t.attention_mask);return await N(e,s,!0)}async function B(e,t){const n=e.sessions.model,r=(0,i.pick)(t,n.inputNames);if(n.inputNames.includes("inputs_embeds")&&!r.inputs_embeds){if(!t.input_ids)throw new Error("Both `input_ids` and `inputs_embeds` are missing in the model inputs.");r.inputs_embeds=await e.encode_text({input_ids:t.input_ids})}return n.inputNames.includes("token_type_ids")&&!r.token_type_ids&&(r.token_type_ids=new u.Tensor("int64",new BigInt64Array(r.input_ids.data.length),r.input_ids.dims)),await A(n,r)}async function N(e,t,n=!1){const r=e.sessions[n?"decoder_model_merged":"model"],{past_key_values:o,...s}=t;r.inputNames.includes("use_cache_branch")&&(s.use_cache_branch=z(!!o)),r.inputNames.includes("position_ids")&&s.attention_mask&&!s.position_ids&&(s.position_ids=function(e,t=null){const{input_ids:n,inputs_embeds:r,attention_mask:o}=e,[s,a]=o.dims,i=new BigInt64Array(o.data.length);for(let e=0;e<s;++e){const t=e*a;let n=BigInt(0);for(let e=0;e<a;++e){const r=t+e;0n===o.data[r]?i[r]=BigInt(1):(i[r]=n,n+=o.data[r])}}let l=new u.Tensor("int64",i,o.dims);if(t){const e=-(n??r).dims.at(1);l=l.slice(null,[e,null])}return l}(s,o)),e.addPastKeyValues(s,o);const a=(0,i.pick)(s,r.inputNames);return await A(r,a)}async function O(e,{input_ids:t=null,attention_mask:n=null,pixel_values:r=null,position_ids:o=null,inputs_embeds:s=null,past_key_values:a=null,generation_config:i=null,logits_processor:l=null,...c}){if(!s)if(s=await e.encode_text({input_ids:t}),r&&1!==t.dims[1]){const o=await e.encode_image({pixel_values:r});({inputs_embeds:s,attention_mask:n}=e._merge_input_ids_with_image_features({image_features:o,inputs_embeds:s,input_ids:t,attention_mask:n}))}else if(a&&r&&1===t.dims[1]){const e=t.dims[1],r=Object.values(a)[0].dims.at(-2);n=(0,u.cat)([(0,u.ones)([t.dims[0],r]),n.slice(null,[n.dims[1]-e,n.dims[1]])],1)}return await N(e,{inputs_embeds:s,past_key_values:a,attention_mask:n,position_ids:o,generation_config:i,logits_processor:l},!0)}function D(e,t,n,r){if(n.past_key_values){const t=Object.values(n.past_key_values)[0].dims.at(-2),{input_ids:r,attention_mask:o}=n;if(o&&o.dims[1]>r.dims[1]);else if(t<r.dims[1])n.input_ids=r.slice(null,[t,null]);else if(null!=e.config.image_token_index&&r.data.some((t=>t==e.config.image_token_index))){const o=e.config.num_image_tokens;if(!o)throw new Error("`num_image_tokens` is missing in the model configuration.");const s=r.dims[1]-(t-o);n.input_ids=r.slice(null,[-s,null]),n.attention_mask=(0,u.ones)([1,t+s])}}return n}function V(e,t,n,r){return n.past_key_values&&(t=t.map((e=>[e.at(-1)]))),{...n,decoder_input_ids:L(t)}}function j(e,...t){return e.config.is_encoder_decoder?V(e,...t):D(e,...t)}class R extends a.Callable{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,t){super(),this.config=e,this.sessions=t;const n=v.get(this.constructor),r=C.get(n);switch(this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,r){case x:this.can_generate=!0,this._forward=N,this._prepare_inputs_for_generation=D;break;case b:case T:case F:this.can_generate=!0,this._forward=I,this._prepare_inputs_for_generation=V;break;case w:this._forward=I;break;case k:this.can_generate=!0,this._forward=O,this._prepare_inputs_for_generation=j;break;default:this._forward=B}this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const t of Object.values(this.sessions))t?.handler?.dispose&&e.push(t.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:o=null,local_files_only:s=!1,revision:a="main",model_file_name:i=null,subfolder:c="onnx",device:d=null,dtype:u=null,use_external_data_format:p=null,session_options:h={}}={}){let m={progress_callback:t,config:n,cache_dir:o,local_files_only:s,revision:a,model_file_name:i,subfolder:c,device:d,dtype:u,use_external_data_format:p,session_options:h};const _=v.get(this),f=C.get(_);let g;if(n=m.config=await r.AutoConfig.from_pretrained(e,m),f===x)g=await Promise.all([S(e,{model:m.model_file_name??"model"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(f===b||f===T)g=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(f===y)g=await Promise.all([S(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},m)]);else if(f===w)g=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m)]);else if(f===k){const t={embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"};n.is_encoder_decoder&&(t.model="encoder_model"),g=await Promise.all([S(e,t,m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)])}else f===F?g=await Promise.all([S(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]):(f!==M&&console.warn(`Model type for '${_??n?.model_type}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`),g=await Promise.all([S(e,{model:m.model_file_name??"model"},m)]));return new this(n,...g)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}_get_logits_warper(e){const t=new c.LogitsProcessorList;return null!==e.temperature&&1!==e.temperature&&t.push(new c.TemperatureLogitsWarper(e.temperature)),null!==e.top_k&&0!==e.top_k&&t.push(new c.TopKLogitsWarper(e.top_k)),null!==e.top_p&&e.top_p<1&&t.push(new c.TopPLogitsWarper(e.top_p)),t}_get_logits_processor(e,t,n=null){const r=new c.LogitsProcessorList;if(null!==e.repetition_penalty&&1!==e.repetition_penalty&&r.push(new c.RepetitionPenaltyLogitsProcessor(e.repetition_penalty)),null!==e.no_repeat_ngram_size&&e.no_repeat_ngram_size>0&&r.push(new c.NoRepeatNGramLogitsProcessor(e.no_repeat_ngram_size)),null!==e.bad_words_ids&&r.push(new c.NoBadWordsLogitsProcessor(e.bad_words_ids,e.eos_token_id)),null!==e.min_length&&null!==e.eos_token_id&&e.min_length>0&&r.push(new c.MinLengthLogitsProcessor(e.min_length,e.eos_token_id)),null!==e.min_new_tokens&&null!==e.eos_token_id&&e.min_new_tokens>0&&r.push(new c.MinNewTokensLengthLogitsProcessor(t,e.min_new_tokens,e.eos_token_id)),null!==e.forced_bos_token_id&&r.push(new c.ForcedBOSTokenLogitsProcessor(e.forced_bos_token_id)),null!==e.forced_eos_token_id&&r.push(new c.ForcedEOSTokenLogitsProcessor(e.max_length,e.forced_eos_token_id)),null!==e.begin_suppress_tokens){const n=t>1||null===e.forced_bos_token_id?t:t+1;r.push(new c.SuppressTokensAtBeginLogitsProcessor(e.begin_suppress_tokens,n))}return null!==e.guidance_scale&&e.guidance_scale>1&&r.push(new c.ClassifierFreeGuidanceLogitsProcessor(e.guidance_scale)),null!==n&&r.extend(n),r}_prepare_generation_config(e,t,n=d.GenerationConfig){const r={...this.config};for(const e of["decoder","generator","text_config"])e in r&&Object.assign(r,r[e]);const o=new n(r);return"generation_config"in this&&Object.assign(o,this.generation_config),e&&Object.assign(o,e),t&&Object.assign(o,(0,i.pick)(t,Object.getOwnPropertyNames(o))),o}_get_stopping_criteria(e,t=null){const n=new h.StoppingCriteriaList;return null!==e.max_length&&n.push(new h.MaxLengthCriteria(e.max_length,this.config.max_position_embeddings??null)),null!==e.eos_token_id&&n.push(new h.EosTokenCriteria(e.eos_token_id)),t&&n.extend(t),n}_validate_model_class(){if(!this.can_generate){const e=[$a,Xa,qa,Da],t=v.get(this.constructor),n=new Set,r=this.config.model_type;for(const t of e){const e=t.get(r);e&&n.add(e[0])}let o=`The current model class (${t}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw n.size>0&&(o+=` Please use the following class instead: ${[...n].join(", ")}`),Error(o)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:t,model_inputs:n,is_encoder_decoder:r}){return n.past_key_values=this.getPastKeyValues(t,n.past_key_values),n.input_ids=new u.Tensor("int64",e.flat(),[e.length,1]),r||(n.attention_mask=(0,u.cat)([n.attention_mask,(0,u.ones)([n.attention_mask.dims[0],1])],1)),n.position_ids=null,n}_prepare_model_inputs({inputs:e,bos_token_id:t,model_kwargs:n}){const r=(0,i.pick)(n,this.forward_params),o=this.main_input_name;if(o in r){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else r[o]=e;return{inputs_tensor:r[o],model_inputs:r,model_input_name:o}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:t,model_input_name:n,generation_config:r}){if(this.sessions.model.inputNames.includes("inputs_embeds")&&!t.inputs_embeds&&"_prepare_inputs_embeds"in this){const{input_ids:e,pixel_values:n,attention_mask:r,...o}=t,s=await this._prepare_inputs_embeds(t);t={...o,...(0,i.pick)(s,["inputs_embeds","attention_mask"])}}let{last_hidden_state:o}=await B(this,t);if(null!==r.guidance_scale&&r.guidance_scale>1)o=(0,u.cat)([o,(0,u.full_like)(o,0)],0),"attention_mask"in t&&(t.attention_mask=(0,u.cat)([t.attention_mask,(0,u.zeros_like)(t.attention_mask)],0));else if(t.decoder_input_ids){const e=L(t.decoder_input_ids).dims[0];if(e!==o.dims[0]){if(1!==o.dims[0])throw new Error(`The encoder outputs have a different batch size (${o.dims[0]}) than the decoder inputs (${e}).`);o=(0,u.cat)(Array.from({length:e},(()=>o)),0)}}return t.encoder_outputs=o,t}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:t,model_kwargs:n,decoder_start_token_id:r,bos_token_id:o,generation_config:s}){let{decoder_input_ids:a,...i}=n;if(a)Array.isArray(a[0])||(a=Array.from({length:e},(()=>a)));else if(r??=o,"musicgen"===this.config.model_type)a=Array.from({length:e*this.config.decoder.num_codebooks},(()=>[r]));else if(Array.isArray(r)){if(r.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${r.length}`);a=r}else a=Array.from({length:e},(()=>[r]));return a=L(a),n.decoder_attention_mask=(0,u.ones_like)(a),{input_ids:a,model_inputs:i}}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,streamer:o=null,...s}){this._validate_model_class(),t=this._prepare_generation_config(t,s);let{inputs_tensor:a,model_inputs:i,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:s});const c=this.config.is_encoder_decoder;let d;c&&("encoder_outputs"in i||(i=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:a,model_inputs:i,model_input_name:l,generation_config:t}))),c?({input_ids:d,model_inputs:i}=this._prepare_decoder_input_ids_for_generation({batch_size:i[l].dims.at(0),model_input_name:l,model_kwargs:i,decoder_start_token_id:t.decoder_start_token_id,bos_token_id:t.bos_token_id,generation_config:t})):d=i[l];let p=d.dims.at(-1);null!==t.max_new_tokens&&(t.max_length=p+t.max_new_tokens);const h=this._get_logits_processor(t,p,n),_=this._get_stopping_criteria(t,r),f=i[l].dims.at(0),g=m.LogitsSampler.getSampler(t),M=new Array(f).fill(0),w=d.tolist();o&&o.put(w);let b=null,T={};for(;;){i=this.prepare_inputs_for_generation(w,i,t);const e=await this.forward(i);if(t.output_attentions&&t.return_dict_in_generate){const t=this.getAttentions(e);for(const e in t)e in T||(T[e]=[]),T[e].push(t[e])}const n=h(w,e.logits.slice(null,-1,null)),r=[];for(let e=0;e<n.dims.at(0);++e){const t=n[e],o=await g(t);for(const[t,n]of o){const o=BigInt(t);M[e]+=n,w[e].push(o),r.push([o]);break}}o&&o.put(r);if(_(w).every((e=>e))){t.return_dict_in_generate&&(b=this.getPastKeyValues(e,i.past_key_values,!1));break}i=this._update_model_kwargs_for_generation({generated_input_ids:r,outputs:e,model_inputs:i,is_encoder_decoder:c})}o&&o.end();const x=new u.Tensor("int64",w.flat(),[w.length,w[0].length]);return t.return_dict_in_generate?{sequences:x,past_key_values:b,...T}:x}getPastKeyValues(e,t,n=!0){const r=Object.create(null);for(const o in e)if(o.startsWith("present")){const s=o.replace("present","past_key_values");if(t&&o.includes("encoder"))r[s]=t[s];else{if(n&&t){const e=t[s];"gpu-buffer"===e.location&&e.dispose()}r[s]=e[o]}}return r}getAttentions(e){const t={};for(const n of["cross_attentions","encoder_attentions","decoder_attentions"])for(const r in e)r.startsWith(n)&&(n in t||(t[n]=[]),t[n].push(e[r]));return t}addPastKeyValues(e,t){if(t)Object.assign(e,t);else{const t=this.custom_config.kv_cache_dtype??"float32",n="float16"===t?new Uint16Array:[],o=(0,r.getKeyValueShapes)(this.config);for(const r in o)e[r]=new u.Tensor(t,n,o[r])}}async encode_image({pixel_values:e}){const t=(await A(this.sessions.vision_encoder,{pixel_values:e})).image_features;return this.config.num_image_tokens||(console.warn(`The number of image tokens was not set in the model configuration. Setting it to the number of features detected by the vision encoder (${t.dims[1]}).`),this.config.num_image_tokens=t.dims[1]),t}async encode_text({input_ids:e}){return(await A(this.sessions.embed_tokens,{input_ids:e})).inputs_embeds}}class G{}class q extends G{constructor({last_hidden_state:e,hidden_states:t=null,attentions:n=null}){super(),this.last_hidden_state=e,this.hidden_states=t,this.attentions=n}}class $ extends R{}class W extends ${}class U extends ${async _call(e){return new $i(await super._call(e))}}class X extends ${async _call(e){return new Ri(await super._call(e))}}class Q extends ${async _call(e){return new qi(await super._call(e))}}class H extends ${async _call(e){return new Wi(await super._call(e))}}class Y extends R{}class J extends Y{}class K extends R{}class Z extends K{}class ee extends K{async _call(e){return new $i(await super._call(e))}}class te extends K{async _call(e){return new Ri(await super._call(e))}}class ne extends K{async _call(e){return new qi(await super._call(e))}}class re extends K{async _call(e){return new Wi(await super._call(e))}}class oe extends R{}class se extends oe{}class ae extends oe{async _call(e){return new $i(await super._call(e))}}class ie extends oe{async _call(e){return new Ri(await super._call(e))}}class le extends oe{async _call(e){return new qi(await super._call(e))}}class ce extends oe{async _call(e){return new Wi(await super._call(e))}}class de extends R{}class ue extends de{}class pe extends de{async _call(e){return new $i(await super._call(e))}}class he extends de{async _call(e){return new Ri(await super._call(e))}}class me extends de{async _call(e){return new qi(await super._call(e))}}class _e extends de{async _call(e){return new Wi(await super._call(e))}}class fe extends R{}class ge extends fe{}class Me extends fe{async _call(e){return new $i(await super._call(e))}}class we extends fe{async _call(e){return new Ri(await super._call(e))}}class be extends fe{async _call(e){return new qi(await super._call(e))}}class Te extends fe{async _call(e){return new Wi(await super._call(e))}}class xe extends R{}class ye extends xe{}class ke extends xe{async _call(e){return new $i(await super._call(e))}}class Fe extends xe{async _call(e){return new Ri(await super._call(e))}}class Ce extends xe{async _call(e){return new qi(await super._call(e))}}class Pe extends xe{async _call(e){return new Wi(await super._call(e))}}class ve extends R{}class Se extends ve{}class Ae extends ve{async _call(e){return new $i(await super._call(e))}}class Ee extends ve{async _call(e){return new Ri(await super._call(e))}}class Le extends ve{async _call(e){return new qi(await super._call(e))}}class ze extends ve{async _call(e){return new Wi(await super._call(e))}}class Ie extends R{}class Be extends Ie{}class Ne extends Ie{async _call(e){return new Ri(await super._call(e))}}class Oe extends Ie{async _call(e){return new qi(await super._call(e))}}class De extends Ie{async _call(e){return new Wi(await super._call(e))}}class Ve extends Ie{async _call(e){return new $i(await super._call(e))}}class je extends R{}class Re extends je{}class Ge extends je{async _call(e){return new $i(await super._call(e))}}class qe extends je{async _call(e){return new Ri(await super._call(e))}}class $e extends je{async _call(e){return new qi(await super._call(e))}}class We extends R{}class Ue extends We{}class Xe extends We{async _call(e){return new $i(await super._call(e))}}class Qe extends We{async _call(e){return new Ri(await super._call(e))}}class He extends We{async _call(e){return new Wi(await super._call(e))}}class Ye extends R{}class Je extends Ye{}class Ke extends Ye{async _call(e){return new $i(await super._call(e))}}class Ze extends Ye{async _call(e){return new Ri(await super._call(e))}}class et extends Ye{async _call(e){return new qi(await super._call(e))}}class tt extends Ye{async _call(e){return new Wi(await super._call(e))}}class nt extends R{}class rt extends nt{}class ot extends nt{async _call(e){return new $i(await super._call(e))}}class st extends nt{async _call(e){return new Ri(await super._call(e))}}class at extends nt{async _call(e){return new Wi(await super._call(e))}}class it extends R{}class lt extends it{}class ct extends it{async _call(e){return new Ri(await super._call(e))}}class dt extends it{async _call(e){return new Wi(await super._call(e))}}class ut extends it{async _call(e){return new $i(await super._call(e))}}class pt extends R{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class ht extends pt{}class mt extends pt{}class _t extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ft extends _t{}class gt extends _t{}class Mt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class wt extends Mt{}class bt extends Mt{}class Tt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class xt extends Tt{}class yt extends Tt{}class kt extends Tt{async _call(e){return new Ri(await super._call(e))}}class Ft extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ct extends Ft{}class Pt extends Ft{}class vt extends Ft{async _call(e){return new Ri(await super._call(e))}}class St extends Ft{}class At extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Et extends At{}class Lt extends At{}class zt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class It extends zt{}class Bt extends zt{}class Nt extends R{}class Ot extends Nt{}class Dt extends Nt{async _call(e){return new $i(await super._call(e))}}class Vt extends Nt{async _call(e){return new Ri(await super._call(e))}}class jt extends Nt{async _call(e){return new qi(await super._call(e))}}class Rt extends Nt{async _call(e){return new Wi(await super._call(e))}}class Gt extends R{}class qt extends Gt{}class $t extends Gt{async _call(e){return new $i(await super._call(e))}}class Wt extends Gt{async _call(e){return new Ri(await super._call(e))}}class Ut extends Gt{async _call(e){return new qi(await super._call(e))}}class Xt extends Gt{async _call(e){return new Wi(await super._call(e))}}class Qt extends R{}class Ht extends Qt{}class Yt extends Qt{async _call(e){return new $i(await super._call(e))}}class Jt extends Qt{async _call(e){return new Ri(await super._call(e))}}class Kt extends Qt{async _call(e){return new qi(await super._call(e))}}class Zt extends Qt{async _call(e){return new Wi(await super._call(e))}}class en extends R{}class tn extends en{}class nn extends en{}class rn extends R{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class on extends rn{}class sn extends rn{_prepare_generation_config(e,t){return super._prepare_generation_config(e,t,f.WhisperGenerationConfig)}_retrieve_init_tokens(e){const t=[e.decoder_start_token_id];let n=e.language;const r=e.task;if(e.is_multilingual){n||(console.warn("No language specified - defaulting to English (en)."),n="en");const o=`<|${(0,g.whisper_language_to_code)(n)}|>`;t.push(e.lang_to_id[o]),t.push(e.task_to_id[r??"transcribe"])}else if(n||r)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&t.at(-1)!==e.no_timestamps_token_id?t.push(e.no_timestamps_token_id):e.return_timestamps&&t.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),t.pop()),t.filter((e=>null!=e))}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,...o}){t=this._prepare_generation_config(t,o);const s=o.decoder_input_ids??this._retrieve_init_tokens(t);if(t.return_timestamps&&(n??=new c.LogitsProcessorList,n.push(new c.WhisperTimeStampLogitsProcessor(t,s))),t.begin_suppress_tokens&&(n??=new c.LogitsProcessorList,n.push(new c.SuppressTokensAtBeginLogitsProcessor(t.begin_suppress_tokens,s.length))),t.return_token_timestamps){if(!t.alignment_heads)throw new Error("Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config.");"translate"===t.task&&console.warn("Token-level timestamps may not be reliable for task 'translate'."),t.output_attentions=!0,t.return_dict_in_generate=!0}const a=await super.generate({inputs:e,generation_config:t,logits_processor:n,decoder_input_ids:s,...o});return t.return_token_timestamps&&(a.token_timestamps=this._extract_token_timestamps(a,t.alignment_heads,t.num_frames)),a}_extract_token_timestamps(e,t,n=null,r=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");null==n&&console.warn("`num_frames` has not been set, meaning the entire audio will be analyzed. This may lead to inaccurate token-level timestamps for short audios (< 30 seconds).");let o=this.config.median_filter_width;void 0===o&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),o=7);const s=e.cross_attentions,a=Array.from({length:this.config.decoder_layers},((e,t)=>(0,u.cat)(s.map((e=>e[t])),2))),l=(0,u.stack)(t.map((([e,t])=>{if(e>=a.length)throw new Error(`Layer index ${e} is out of bounds for cross attentions (length ${a.length}).`);return n?a[e].slice(null,t,null,[0,n]):a[e].slice(null,t)}))).transpose(1,0,2,3),[c,d]=(0,u.std_mean)(l,-2,0,!0),h=l.clone();for(let e=0;e<h.dims[0];++e){const t=h[e];for(let n=0;n<t.dims[0];++n){const r=t[n],s=c[e][n][0].data,a=d[e][n][0].data;for(let e=0;e<r.dims[0];++e){let t=r[e].data;for(let e=0;e<t.length;++e)t[e]=(t[e]-a[e])/s[e];t.set((0,p.medianFilter)(t,o))}}}const m=[(0,u.mean)(h,1)],_=e.sequences.dims,f=new u.Tensor("float32",new Float32Array(_[0]*_[1]),_);for(let e=0;e<_[0];++e){const t=m[e].neg().squeeze_(0),[n,o]=(0,p.dynamic_time_warping)(t.tolist()),s=Array.from({length:n.length-1},((e,t)=>n[t+1]-n[t])),a=(0,i.mergeArrays)([1],s).map((e=>!!e)),l=[];for(let e=0;e<a.length;++e)a[e]&&l.push(o[e]*r);f[e].data.set(l,1)}return f}}class an extends R{main_input_name="pixel_values";forward_params=["pixel_values","input_ids","encoder_hidden_states","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class ln extends R{forward_params=["input_ids","pixel_values","attention_mask","position_ids","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class cn extends ln{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){const o=this.config.image_token_index,s=n.tolist().map((e=>e.findIndex((e=>e==o)))),a=s.every((e=>-1===e)),i=s.every((e=>-1!==e));if(!a&&!i)throw new Error("Every input should contain either 0 or 1 image token.");if(a)return{inputs_embeds:e,attention_mask:r};const l=[],c=[];for(let n=0;n<s.length;++n){const o=s[n],a=e[n],i=t[n],d=r[n];l.push((0,u.cat)([a.slice([0,o]),i,a.slice([o+1,a.dims[0]])],0)),c.push((0,u.cat)([d.slice([0,o]),(0,u.ones)([i.dims[0]]),d.slice([o+1,d.dims[0]])],0))}return{inputs_embeds:(0,u.stack)(l,0),attention_mask:(0,u.stack)(c,0)}}}class dn extends cn{}class un extends R{forward_params=["input_ids","inputs_embeds","attention_mask","pixel_values","encoder_outputs","decoder_input_ids","decoder_inputs_embeds","decoder_attention_mask","past_key_values"];main_input_name="inputs_embeds";constructor(e,t,n){super(e,t),this.generation_config=n}}class pn extends un{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){return{inputs_embeds:(0,u.cat)([t,e],1),attention_mask:(0,u.cat)([(0,u.ones)(t.dims.slice(0,2)),r],1)}}async _prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:n,attention_mask:r}){if(!e&&!t)throw new Error("Either `input_ids` or `pixel_values` should be provided.");let o,s;return e&&(o=await this.encode_text({input_ids:e})),t&&(s=await this.encode_image({pixel_values:t})),o&&s?({inputs_embeds:n,attention_mask:r}=this._merge_input_ids_with_image_features({inputs_embeds:o,image_features:s,input_ids:e,attention_mask:r})):n=o||s,{inputs_embeds:n,attention_mask:r}}async forward({input_ids:e,pixel_values:t,attention_mask:n,decoder_input_ids:r,decoder_attention_mask:o,encoder_outputs:s,past_key_values:a,inputs_embeds:i,decoder_inputs_embeds:l}){if(i||({inputs_embeds:i,attention_mask:n}=await this._prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:i,attention_mask:n})),!s){let{last_hidden_state:e}=await B(this,{inputs_embeds:i,attention_mask:n});s=e}if(!l){if(!r)throw new Error("Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.");l=await this.encode_text({input_ids:r})}const c={inputs_embeds:l,attention_mask:o,encoder_attention_mask:n,encoder_hidden_states:s,past_key_values:a};return await N(this,c,!0)}}class hn extends R{}class mn extends hn{}class _n extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class fn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class gn extends R{}class Mn extends gn{}class wn extends gn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class bn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class Tn extends R{}class xn extends Tn{}class yn extends R{}class kn extends yn{}class Fn extends yn{}class Cn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Pn extends Cn{}class vn extends Cn{}class Sn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class An extends Sn{}class En extends Sn{}class Ln extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class zn extends Ln{}class In extends Ln{}class Bn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Nn extends Bn{}class On extends Bn{}class Dn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Vn extends Dn{}class jn extends Dn{}class Rn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Gn extends Rn{}class qn extends Rn{}class $n extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Wn extends $n{}class Un extends $n{}class Xn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Qn extends Xn{}class Hn extends Xn{}class Yn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Jn extends Yn{}class Kn extends Yn{}class Zn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class er extends Zn{}class tr extends Zn{}class nr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class rr extends nr{}class or extends nr{}class sr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ar extends sr{}class ir extends sr{}class lr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class cr extends lr{}class dr extends lr{}class ur extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class pr extends ur{}class hr extends ur{}class mr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class _r extends mr{}class fr extends mr{}class gr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Mr extends gr{}class wr extends gr{}class br extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Tr extends br{}class xr extends br{}class yr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class kr extends yr{}class Fr extends yr{}class Cr extends R{}class Pr extends Cr{}class vr extends Cr{async _call(e){return new Ri(await super._call(e))}}class Sr extends R{}class Ar extends Sr{}class Er extends Sr{async _call(e){return new Ri(await super._call(e))}}class Lr extends R{}class zr extends Lr{async _call(e){return new Qi(await super._call(e))}}class Ir extends R{}class Br extends Ir{}class Nr extends Ir{async _call(e){return new Ri(await super._call(e))}}class Or extends R{}class Dr extends Or{}class Vr extends Or{async _call(e){return new Ri(await super._call(e))}}class jr extends R{}class Rr extends jr{}class Gr extends jr{}class qr extends R{}class $r extends qr{}class Wr extends qr{}class Ur extends R{}class Xr extends Ur{}class Qr extends Ur{async _call(e){return new Ri(await super._call(e))}}class Hr extends R{}class Yr extends Hr{}class Jr extends Hr{async _call(e){return new Zr(await super._call(e))}}class Kr extends Hr{async _call(e){return new eo(await super._call(e))}}class Zr extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class eo extends G{constructor({logits:e,pred_boxes:t,pred_masks:n}){super(),this.logits=e,this.pred_boxes=t,this.pred_masks=n}}class to extends R{}class no extends to{}class ro extends to{async _call(e){return new oo(await super._call(e))}}class oo extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class so extends R{}class ao extends so{}class io extends so{async _call(e){return new lo(await super._call(e))}}class lo extends Zr{}class co extends R{}class uo extends co{}class po extends co{async _call(e){return new Ri(await super._call(e))}}class ho extends R{}class mo extends ho{}class _o extends ho{async _call(e){return new Ri(await super._call(e))}}class fo extends R{}class go extends fo{}class Mo extends fo{async _call(e){return new Ri(await super._call(e))}}class wo extends R{}class bo extends wo{}class To extends wo{async _call(e){return new Ri(await super._call(e))}}class xo extends R{}class yo extends xo{}class ko extends xo{}class Fo extends R{}class Co extends Fo{}class Po extends Fo{}class vo extends R{}class So extends vo{}class Ao extends R{}class Eo extends Ao{}class Lo extends Ao{}class zo extends Ao{}class Io extends R{}class Bo extends Io{}class No extends Io{}class Oo extends R{}class Do extends Oo{}class Vo extends R{}class jo extends Vo{}class Ro extends Vo{async _call(e){return new Ri(await super._call(e))}}class Go extends R{}class qo extends Go{}class $o extends Go{async _call(e){return new Ri(await super._call(e))}}class Wo extends R{}class Uo extends Wo{}class Xo extends Wo{async _call(e){return new Ri(await super._call(e))}}class Qo extends R{}class Ho extends Qo{}class Yo extends Qo{async _call(e){return new Jo(await super._call(e))}}class Jo extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Ko extends R{}class Zo extends Ko{async get_image_embeddings({pixel_values:e}){return await B(this,{pixel_values:e})}async forward(e){if(e.image_embeddings&&e.image_positional_embeddings||(e={...e,...await this.get_image_embeddings(e)}),!e.input_labels&&e.input_points){const t=e.input_points.dims.slice(0,-1),n=t.reduce(((e,t)=>e*t),1);e.input_labels=new u.Tensor("int64",new BigInt64Array(n).fill(1n),t)}const t={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(t.input_points=e.input_points),e.input_labels&&(t.input_labels=e.input_labels),e.input_boxes&&(t.input_boxes=e.input_boxes),await A(this.sessions.prompt_encoder_mask_decoder,t)}async _call(e){return new es(await super._call(e))}}class es extends G{constructor({iou_scores:e,pred_masks:t}){super(),this.iou_scores=e,this.pred_masks=t}}class ts extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ns extends ts{}class rs extends ts{}class os extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ss extends os{}class as extends os{}class is extends R{}class ls extends is{}class cs extends is{async _call(e){return new Ui(await super._call(e))}}class ds extends is{async _call(e){return new Ri(await super._call(e))}}class us extends is{async _call(e){return new qi(await super._call(e))}}class ps extends R{}class hs extends ps{}class ms extends ps{async _call(e){return new qi(await super._call(e))}}class _s extends R{}class fs extends _s{}class gs extends R{}class Ms extends gs{}class ws extends gs{async _call(e){return new Ui(await super._call(e))}}class bs extends gs{async _call(e){return new Ri(await super._call(e))}}class Ts extends R{}class xs extends Ts{}class ys extends Ts{async _call(e){return new Ui(await super._call(e))}}class ks extends Ts{async _call(e){return new Ri(await super._call(e))}}class Fs extends Ts{async _call(e){return new qi(await super._call(e))}}class Cs extends R{}class Ps extends Cs{}class vs extends Cs{async _call(e){return new Ui(await super._call(e))}}class Ss extends Cs{async _call(e){return new Ri(await super._call(e))}}class As extends R{}class Es extends is{}class Ls extends is{async _call(e){return new Ui(await super._call(e))}}class zs extends is{async _call(e){return new Ri(await super._call(e))}}class Is extends R{}class Bs extends Is{}class Ns extends Is{async _call(e){return new Ui(await super._call(e))}}class Os extends Is{async _call(e){return new Ri(await super._call(e))}}class Ds extends Is{async _call(e){return new Gi(await super._call(e))}}class Vs extends Is{async _call(e){return new qi(await super._call(e))}}class js extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Rs extends js{}class Gs extends js{}class qs extends js{async generate_speech(e,t,{threshold:n=.5,minlenratio:r=0,maxlenratio:o=20,vocoder:s=null}={}){const a={input_ids:e},{encoder_outputs:i,encoder_attention_mask:l}=await B(this,a),c=i.dims[1]/this.config.reduction_factor,d=Math.floor(c*o),p=Math.floor(c*r),h=this.config.num_mel_bins;let m=[],_=null,f=null,g=0;for(;;){++g;const e=z(!!f);let r;r=f?f.output_sequence_out:new u.Tensor("float32",new Float32Array(h),[1,1,h]);let o={use_cache_branch:e,output_sequence:r,encoder_attention_mask:l,speaker_embeddings:t,encoder_hidden_states:i};this.addPastKeyValues(o,_),f=await A(this.sessions.decoder_model_merged,o),_=this.getPastKeyValues(f,_);const{prob:s,spectrum:a}=f;if(m.push(a),g>=p&&(Array.from(s.data).filter((e=>e>=n)).length>0||g>=d))break}const M=(0,u.cat)(m),{waveform:w}=await A(s.sessions.model,{spectrogram:M});return{spectrogram:M,waveform:w}}}class $s extends R{main_input_name="spectrogram"}class Ws extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Us extends Ws{}class Xs extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Qs extends Xs{}class Hs extends Xs{}class Ys extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Js extends Ys{}class Ks extends Ys{}class Zs extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ea extends Zs{}class ta extends Zs{}class na extends R{}class ra extends na{}class oa extends na{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class sa extends na{static async from_pretrained(e,t={}){return t.model_file_name??="audio_model",super.from_pretrained(e,t)}}class aa extends R{}class ia extends aa{async _call(e){return new Hi(await super._call(e))}}class la extends R{}class ca extends la{}class da extends la{}class ua extends la{}class pa extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ha extends pa{}class ma extends pa{}class _a extends R{}class fa extends _a{}class ga extends _a{async _call(e){return new Ri(await super._call(e))}}class Ma extends R{}class wa extends Ma{}class ba extends Ma{}class Ta extends R{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}_apply_and_filter_by_delay_pattern_mask(e){const[t,n]=e.dims,r=this.config.decoder.num_codebooks,o=n-r;let s=0;for(let t=0;t<e.size;++t){if(e.data[t]===this.config.decoder.pad_token_id)continue;const a=t%n-Math.floor(t/n)%r;a>0&&a<=o&&(e.data[s++]=e.data[t])}const a=Math.floor(t/r),i=s/(a*r);return new u.Tensor(e.type,e.data.slice(0,s),[a,r,i])}prepare_inputs_for_generation(e,t,n){let r=structuredClone(e);for(let e=0;e<r.length;++e)for(let t=0;t<r[e].length;++t)e%this.config.decoder.num_codebooks>=t&&(r[e][t]=BigInt(this.config.decoder.pad_token_id));null!==n.guidance_scale&&n.guidance_scale>1&&(r=r.concat(r));return super.prepare_inputs_for_generation(r,t,n)}async generate(e){const t=await super.generate(e),n=this._apply_and_filter_by_delay_pattern_mask(t).unsqueeze_(0),{audio_values:r}=await A(this.sessions.encodec_decode,{audio_codes:n});return r}}class xa extends R{}class ya extends xa{}class ka extends xa{async _call(e){return new Ri(await super._call(e))}}class Fa extends R{}class Ca extends Fa{}class Pa extends Fa{async _call(e){return new Ri(await super._call(e))}}class va extends R{}class Sa extends va{}class Aa extends va{async _call(e){return new Ri(await super._call(e))}}class Ea extends R{}class La extends Ea{}class za extends Ea{async _call(e){return new Ri(await super._call(e))}}class Ia{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:o=null,local_files_only:s=!1,revision:a="main",model_file_name:i=null,subfolder:l="onnx",device:c=null,dtype:d=null,use_external_data_format:u=null,session_options:p={}}={}){let h={progress_callback:t,config:n,cache_dir:o,local_files_only:s,revision:a,model_file_name:i,subfolder:l,device:c,dtype:d,use_external_data_format:u,session_options:p};if(h.config=await r.AutoConfig.from_pretrained(e,h),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(let t of this.MODEL_CLASS_MAPPINGS){const n=t.get(h.config.model_type);if(n)return await n[1].from_pretrained(e,h)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${h.config.model_type}", attempting to construct from base class.`),await R.from_pretrained(e,h);throw Error(`Unsupported model type: ${h.config.model_type}`)}}const Ba=new Map([["bert",["BertModel",W]],["nomic_bert",["NomicBertModel",J]],["roformer",["RoFormerModel",Z]],["electra",["ElectraModel",ue]],["esm",["EsmModel",Re]],["convbert",["ConvBertModel",se]],["camembert",["CamembertModel",ge]],["deberta",["DebertaModel",ye]],["deberta-v2",["DebertaV2Model",Se]],["mpnet",["MPNetModel",Je]],["albert",["AlbertModel",lt]],["distilbert",["DistilBertModel",Be]],["roberta",["RobertaModel",Ot]],["xlm",["XLMModel",qt]],["xlm-roberta",["XLMRobertaModel",Ht]],["clap",["ClapModel",ra]],["clip",["CLIPModel",mn]],["clipseg",["CLIPSegModel",kn]],["chinese_clip",["ChineseCLIPModel",xn]],["siglip",["SiglipModel",Mn]],["mobilebert",["MobileBertModel",Ue]],["squeezebert",["SqueezeBertModel",rt]],["wav2vec2",["Wav2Vec2Model",ls]],["wav2vec2-bert",["Wav2Vec2BertModel",Ps]],["unispeech",["UniSpeechModel",Ms]],["unispeech-sat",["UniSpeechSatModel",xs]],["hubert",["HubertModel",Es]],["wavlm",["WavLMModel",Bs]],["audio-spectrogram-transformer",["ASTModel",tn]],["vits",["VitsModel",ia]],["pyannote",["PyAnnoteModel",hs]],["wespeaker-resnet",["WeSpeakerResNetModel",fs]],["detr",["DetrModel",Yr]],["rt_detr",["RTDetrModel",no]],["table-transformer",["TableTransformerModel",ao]],["vit",["ViTModel",Pr]],["fastvit",["FastViTModel",Ar]],["mobilevit",["MobileViTModel",Br]],["mobilevitv2",["MobileViTV2Model",Dr]],["owlvit",["OwlViTModel",Rr]],["owlv2",["Owlv2Model",$r]],["beit",["BeitModel",Xr]],["deit",["DeiTModel",uo]],["hiera",["HieraModel",mo]],["convnext",["ConvNextModel",jo]],["convnextv2",["ConvNextV2Model",qo]],["dinov2",["Dinov2Model",Uo]],["resnet",["ResNetModel",go]],["swin",["SwinModel",bo]],["swin2sr",["Swin2SRModel",yo]],["donut-swin",["DonutSwinModel",Do]],["yolos",["YolosModel",Ho]],["dpt",["DPTModel",Co]],["glpn",["GLPNModel",Bo]],["hifigan",["SpeechT5HifiGan",$s]],["efficientnet",["EfficientNetModel",fa]],["mobilenet_v1",["MobileNetV1Model",ya]],["mobilenet_v2",["MobileNetV2Model",Ca]],["mobilenet_v3",["MobileNetV3Model",Sa]],["mobilenet_v4",["MobileNetV4Model",La]]]),Na=new Map([["t5",["T5Model",ht]],["longt5",["LongT5Model",ft]],["mt5",["MT5Model",wt]],["bart",["BartModel",xt]],["mbart",["MBartModel",Ct]],["marian",["MarianModel",ns]],["whisper",["WhisperModel",on]],["m2m_100",["M2M100Model",ss]],["blenderbot",["BlenderbotModel",Et]],["blenderbot-small",["BlenderbotSmallModel",It]]]),Oa=new Map([["bloom",["BloomModel",Mr]],["jais",["JAISModel",An]],["gpt2",["GPT2Model",Pn]],["gptj",["GPTJModel",Vn]],["gpt_bigcode",["GPTBigCodeModel",Gn]],["gpt_neo",["GPTNeoModel",zn]],["gpt_neox",["GPTNeoXModel",Nn]],["codegen",["CodeGenModel",Wn]],["llama",["LlamaModel",Qn]],["cohere",["CohereModel",Jn]],["gemma",["GemmaModel",er]],["gemma2",["Gemma2Model",rr]],["openelm",["OpenELMModel",ar]],["qwen2",["Qwen2Model",cr]],["phi",["PhiModel",pr]],["phi3",["Phi3Model",_r]],["mpt",["MptModel",Tr]],["opt",["OPTModel",kr]],["mistral",["MistralModel",Qs]],["starcoder2",["Starcoder2Model",Js]],["falcon",["FalconModel",ea]],["stablelm",["StableLmModel",ha]]]),Da=new Map([["speecht5",["SpeechT5ForSpeechToText",Gs]],["whisper",["WhisperForConditionalGeneration",sn]]]),Va=new Map([["speecht5",["SpeechT5ForTextToSpeech",qs]]]),ja=new Map([["vits",["VitsModel",ia]],["musicgen",["MusicgenForConditionalGeneration",Ta]]]),Ra=new Map([["bert",["BertForSequenceClassification",X]],["roformer",["RoFormerForSequenceClassification",te]],["electra",["ElectraForSequenceClassification",he]],["esm",["EsmForSequenceClassification",qe]],["convbert",["ConvBertForSequenceClassification",ie]],["camembert",["CamembertForSequenceClassification",we]],["deberta",["DebertaForSequenceClassification",Fe]],["deberta-v2",["DebertaV2ForSequenceClassification",Ee]],["mpnet",["MPNetForSequenceClassification",Ze]],["albert",["AlbertForSequenceClassification",ct]],["distilbert",["DistilBertForSequenceClassification",Ne]],["roberta",["RobertaForSequenceClassification",Vt]],["xlm",["XLMForSequenceClassification",Wt]],["xlm-roberta",["XLMRobertaForSequenceClassification",Jt]],["bart",["BartForSequenceClassification",kt]],["mbart",["MBartForSequenceClassification",vt]],["mobilebert",["MobileBertForSequenceClassification",Qe]],["squeezebert",["SqueezeBertForSequenceClassification",st]]]),Ga=new Map([["bert",["BertForTokenClassification",Q]],["roformer",["RoFormerForTokenClassification",ne]],["electra",["ElectraForTokenClassification",me]],["esm",["EsmForTokenClassification",$e]],["convbert",["ConvBertForTokenClassification",le]],["camembert",["CamembertForTokenClassification",be]],["deberta",["DebertaForTokenClassification",Ce]],["deberta-v2",["DebertaV2ForTokenClassification",Le]],["mpnet",["MPNetForTokenClassification",et]],["distilbert",["DistilBertForTokenClassification",Oe]],["roberta",["RobertaForTokenClassification",jt]],["xlm",["XLMForTokenClassification",Ut]],["xlm-roberta",["XLMRobertaForTokenClassification",Kt]]]),qa=new Map([["t5",["T5ForConditionalGeneration",mt]],["longt5",["LongT5ForConditionalGeneration",gt]],["mt5",["MT5ForConditionalGeneration",bt]],["bart",["BartForConditionalGeneration",yt]],["mbart",["MBartForConditionalGeneration",Pt]],["marian",["MarianMTModel",rs]],["m2m_100",["M2M100ForConditionalGeneration",as]],["blenderbot",["BlenderbotForConditionalGeneration",Lt]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",Bt]]]),$a=new Map([["bloom",["BloomForCausalLM",wr]],["gpt2",["GPT2LMHeadModel",vn]],["jais",["JAISLMHeadModel",En]],["gptj",["GPTJForCausalLM",jn]],["gpt_bigcode",["GPTBigCodeForCausalLM",qn]],["gpt_neo",["GPTNeoForCausalLM",In]],["gpt_neox",["GPTNeoXForCausalLM",On]],["codegen",["CodeGenForCausalLM",Un]],["llama",["LlamaForCausalLM",Hn]],["cohere",["CohereForCausalLM",Kn]],["gemma",["GemmaForCausalLM",tr]],["gemma2",["Gemma2ForCausalLM",or]],["openelm",["OpenELMForCausalLM",ir]],["qwen2",["Qwen2ForCausalLM",dr]],["phi",["PhiForCausalLM",hr]],["phi3",["Phi3ForCausalLM",fr]],["mpt",["MptForCausalLM",xr]],["opt",["OPTForCausalLM",Fr]],["mbart",["MBartForCausalLM",St]],["mistral",["MistralForCausalLM",Hs]],["starcoder2",["Starcoder2ForCausalLM",Ks]],["falcon",["FalconForCausalLM",ta]],["trocr",["TrOCRForCausalLM",Us]],["stablelm",["StableLmForCausalLM",ma]]]),Wa=new Map([["bert",["BertForMaskedLM",U]],["roformer",["RoFormerForMaskedLM",ee]],["electra",["ElectraForMaskedLM",pe]],["esm",["EsmForMaskedLM",Ge]],["convbert",["ConvBertForMaskedLM",ae]],["camembert",["CamembertForMaskedLM",Me]],["deberta",["DebertaForMaskedLM",ke]],["deberta-v2",["DebertaV2ForMaskedLM",Ae]],["mpnet",["MPNetForMaskedLM",Ke]],["albert",["AlbertForMaskedLM",ut]],["distilbert",["DistilBertForMaskedLM",Ve]],["roberta",["RobertaForMaskedLM",Dt]],["xlm",["XLMWithLMHeadModel",$t]],["xlm-roberta",["XLMRobertaForMaskedLM",Yt]],["mobilebert",["MobileBertForMaskedLM",Xe]],["squeezebert",["SqueezeBertForMaskedLM",ot]]]),Ua=new Map([["bert",["BertForQuestionAnswering",H]],["roformer",["RoFormerForQuestionAnswering",re]],["electra",["ElectraForQuestionAnswering",_e]],["convbert",["ConvBertForQuestionAnswering",ce]],["camembert",["CamembertForQuestionAnswering",Te]],["deberta",["DebertaForQuestionAnswering",Pe]],["deberta-v2",["DebertaV2ForQuestionAnswering",ze]],["mpnet",["MPNetForQuestionAnswering",tt]],["albert",["AlbertForQuestionAnswering",dt]],["distilbert",["DistilBertForQuestionAnswering",De]],["roberta",["RobertaForQuestionAnswering",Rt]],["xlm",["XLMForQuestionAnswering",Xt]],["xlm-roberta",["XLMRobertaForQuestionAnswering",Zt]],["mobilebert",["MobileBertForQuestionAnswering",He]],["squeezebert",["SqueezeBertForQuestionAnswering",at]]]),Xa=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",an]]]),Qa=new Map([["llava",["LlavaForConditionalGeneration",cn]],["moondream1",["Moondream1ForConditionalGeneration",dn]],["florence2",["Florence2ForConditionalGeneration",pn]]]),Ha=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",an]]]),Ya=new Map([["vit",["ViTForImageClassification",vr]],["fastvit",["FastViTForImageClassification",Er]],["mobilevit",["MobileViTForImageClassification",Nr]],["mobilevitv2",["MobileViTV2ForImageClassification",Vr]],["beit",["BeitForImageClassification",Qr]],["deit",["DeiTForImageClassification",po]],["hiera",["HieraForImageClassification",_o]],["convnext",["ConvNextForImageClassification",Ro]],["convnextv2",["ConvNextV2ForImageClassification",$o]],["dinov2",["Dinov2ForImageClassification",Xo]],["resnet",["ResNetForImageClassification",Mo]],["swin",["SwinForImageClassification",To]],["segformer",["SegformerForImageClassification",da]],["efficientnet",["EfficientNetForImageClassification",ga]],["mobilenet_v1",["MobileNetV1ForImageClassification",ka]],["mobilenet_v2",["MobileNetV2ForImageClassification",Pa]],["mobilenet_v3",["MobileNetV3ForImageClassification",Aa]],["mobilenet_v4",["MobileNetV4ForImageClassification",za]]]),Ja=new Map([["detr",["DetrForObjectDetection",Jr]],["rt_detr",["RTDetrForObjectDetection",ro]],["table-transformer",["TableTransformerForObjectDetection",io]],["yolos",["YolosForObjectDetection",Yo]]]),Ka=new Map([["owlvit",["OwlViTForObjectDetection",Gr]],["owlv2",["Owlv2ForObjectDetection",Wr]]]),Za=new Map([["detr",["DetrForSegmentation",Kr]],["clipseg",["CLIPSegForImageSegmentation",Fn]]]),ei=new Map([["segformer",["SegformerForSemanticSegmentation",ua]],["sapiens",["SapiensForSemanticSegmentation",Eo]]]),ti=new Map([["sam",["SamModel",Zo]]]),ni=new Map([["wav2vec2",["Wav2Vec2ForCTC",cs]],["wav2vec2-bert",["Wav2Vec2BertForCTC",vs]],["unispeech",["UniSpeechForCTC",ws]],["unispeech-sat",["UniSpeechSatForCTC",ys]],["wavlm",["WavLMForCTC",Ns]],["hubert",["HubertForCTC",Ls]]]),ri=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",ds]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",Ss]],["unispeech",["UniSpeechForSequenceClassification",bs]],["unispeech-sat",["UniSpeechSatForSequenceClassification",ks]],["wavlm",["WavLMForSequenceClassification",Os]],["hubert",["HubertForSequenceClassification",zs]],["audio-spectrogram-transformer",["ASTForAudioClassification",nn]]]),oi=new Map([["wavlm",["WavLMForXVector",Ds]]]),si=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",Fs]],["wavlm",["WavLMForAudioFrameClassification",Vs]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",us]],["pyannote",["PyAnnoteForAudioFrameClassification",ms]]]),ai=new Map([["vitmatte",["VitMatteForImageMatting",zr]]]),ii=new Map([["swin2sr",["Swin2SRForImageSuperResolution",ko]]]),li=new Map([["dpt",["DPTForDepthEstimation",Po]],["depth_anything",["DepthAnythingForDepthEstimation",So]],["glpn",["GLPNForDepthEstimation",No]],["sapiens",["SapiensForDepthEstimation",Lo]]]),ci=new Map([["sapiens",["SapiensForNormalEstimation",zo]]]),di=new Map([["clip",["CLIPVisionModelWithProjection",fn]],["siglip",["SiglipVisionModel",bn]]]),ui=[[Ba,M],[Na,w],[Oa,x],[Ra,M],[Ga,M],[qa,b],[Da,b],[$a,x],[Wa,M],[Ua,M],[Xa,T],[Qa,k],[Ya,M],[Za,M],[ei,M],[ai,M],[ii,M],[li,M],[ci,M],[Ja,M],[Ka,M],[ti,y],[ni,M],[ri,M],[Va,b],[ja,M],[oi,M],[si,M],[di,M]];for(const[e,t]of ui)for(const[n,r]of e.values())C.set(n,t),v.set(r,n),P.set(n,r);const pi=[["MusicgenForConditionalGeneration",Ta,F],["CLIPTextModelWithProjection",_n,M],["SiglipTextModel",wn,M],["ClapTextModelWithProjection",oa,M],["ClapAudioModelWithProjection",sa,M]];for(const[e,t,n]of pi)C.set(e,n),v.set(t,e),P.set(e,t);class hi extends Ia{static MODEL_CLASS_MAPPINGS=ui.map((e=>e[0]));static BASE_IF_FAIL=!0}class mi extends Ia{static MODEL_CLASS_MAPPINGS=[Ra]}class _i extends Ia{static MODEL_CLASS_MAPPINGS=[Ga]}class fi extends Ia{static MODEL_CLASS_MAPPINGS=[qa]}class gi extends Ia{static MODEL_CLASS_MAPPINGS=[Da]}class Mi extends Ia{static MODEL_CLASS_MAPPINGS=[Va]}class wi extends Ia{static MODEL_CLASS_MAPPINGS=[ja]}class bi extends Ia{static MODEL_CLASS_MAPPINGS=[$a]}class Ti extends Ia{static MODEL_CLASS_MAPPINGS=[Wa]}class xi extends Ia{static MODEL_CLASS_MAPPINGS=[Ua]}class yi extends Ia{static MODEL_CLASS_MAPPINGS=[Xa]}class ki extends Ia{static MODEL_CLASS_MAPPINGS=[Ya]}class Fi extends Ia{static MODEL_CLASS_MAPPINGS=[Za]}class Ci extends Ia{static MODEL_CLASS_MAPPINGS=[ei]}class Pi extends Ia{static MODEL_CLASS_MAPPINGS=[Ja]}class vi extends Ia{static MODEL_CLASS_MAPPINGS=[Ka]}class Si extends Ia{static MODEL_CLASS_MAPPINGS=[ti]}class Ai extends Ia{static MODEL_CLASS_MAPPINGS=[ni]}class Ei extends Ia{static MODEL_CLASS_MAPPINGS=[ri]}class Li extends Ia{static MODEL_CLASS_MAPPINGS=[oi]}class zi extends Ia{static MODEL_CLASS_MAPPINGS=[si]}class Ii extends Ia{static MODEL_CLASS_MAPPINGS=[Ha]}class Bi extends Ia{static MODEL_CLASS_MAPPINGS=[ai]}class Ni extends Ia{static MODEL_CLASS_MAPPINGS=[ii]}class Oi extends Ia{static MODEL_CLASS_MAPPINGS=[li]}class Di extends Ia{static MODEL_CLASS_MAPPINGS=[ci]}class Vi extends Ia{static MODEL_CLASS_MAPPINGS=[di]}class ji extends G{constructor({logits:e,past_key_values:t,encoder_outputs:n,decoder_attentions:r=null,cross_attentions:o=null}){super(),this.logits=e,this.past_key_values=t,this.encoder_outputs=n,this.decoder_attentions=r,this.cross_attentions=o}}class Ri extends G{constructor({logits:e}){super(),this.logits=e}}class Gi extends G{constructor({logits:e,embeddings:t}){super(),this.logits=e,this.embeddings=t}}class qi extends G{constructor({logits:e}){super(),this.logits=e}}class $i extends G{constructor({logits:e}){super(),this.logits=e}}class Wi extends G{constructor({start_logits:e,end_logits:t}){super(),this.start_logits=e,this.end_logits=t}}class Ui extends G{constructor({logits:e}){super(),this.logits=e}}class Xi extends G{constructor({logits:e,past_key_values:t}){super(),this.logits=e,this.past_key_values=t}}class Qi extends G{constructor({alphas:e}){super(),this.alphas=e}}class Hi extends G{constructor({waveform:e,spectrogram:t}){super(),this.waveform=e,this.spectrogram=t}}},"./src/models/whisper/common_whisper.js":
113
113
  /*!**********************************************!*\
114
114
  !*** ./src/models/whisper/common_whisper.js ***!
115
115
  \**********************************************/(e,t,n)=>{n.r(t),n.d(t,{WHISPER_LANGUAGE_MAPPING:()=>o,WHISPER_TO_LANGUAGE_CODE_MAPPING:()=>s,whisper_language_to_code:()=>a});const r=[["en","english"],["zh","chinese"],["de","german"],["es","spanish"],["ru","russian"],["ko","korean"],["fr","french"],["ja","japanese"],["pt","portuguese"],["tr","turkish"],["pl","polish"],["ca","catalan"],["nl","dutch"],["ar","arabic"],["sv","swedish"],["it","italian"],["id","indonesian"],["hi","hindi"],["fi","finnish"],["vi","vietnamese"],["he","hebrew"],["uk","ukrainian"],["el","greek"],["ms","malay"],["cs","czech"],["ro","romanian"],["da","danish"],["hu","hungarian"],["ta","tamil"],["no","norwegian"],["th","thai"],["ur","urdu"],["hr","croatian"],["bg","bulgarian"],["lt","lithuanian"],["la","latin"],["mi","maori"],["ml","malayalam"],["cy","welsh"],["sk","slovak"],["te","telugu"],["fa","persian"],["lv","latvian"],["bn","bengali"],["sr","serbian"],["az","azerbaijani"],["sl","slovenian"],["kn","kannada"],["et","estonian"],["mk","macedonian"],["br","breton"],["eu","basque"],["is","icelandic"],["hy","armenian"],["ne","nepali"],["mn","mongolian"],["bs","bosnian"],["kk","kazakh"],["sq","albanian"],["sw","swahili"],["gl","galician"],["mr","marathi"],["pa","punjabi"],["si","sinhala"],["km","khmer"],["sn","shona"],["yo","yoruba"],["so","somali"],["af","afrikaans"],["oc","occitan"],["ka","georgian"],["be","belarusian"],["tg","tajik"],["sd","sindhi"],["gu","gujarati"],["am","amharic"],["yi","yiddish"],["lo","lao"],["uz","uzbek"],["fo","faroese"],["ht","haitian creole"],["ps","pashto"],["tk","turkmen"],["nn","nynorsk"],["mt","maltese"],["sa","sanskrit"],["lb","luxembourgish"],["my","myanmar"],["bo","tibetan"],["tl","tagalog"],["mg","malagasy"],["as","assamese"],["tt","tatar"],["haw","hawaiian"],["ln","lingala"],["ha","hausa"],["ba","bashkir"],["jw","javanese"],["su","sundanese"]],o=new Map(r),s=new Map([...r.map((([e,t])=>[t,e])),["burmese","my"],["valencian","ca"],["flemish","nl"],["haitian","ht"],["letzeburgesch","lb"],["pushto","ps"],["panjabi","pa"],["moldavian","ro"],["moldovan","ro"],["sinhalese","si"],["castilian","es"]]);function a(e){e=e.toLowerCase();let t=s.get(e);if(void 0===t){if(!o.has(e)){const t=2===e.length?o.keys():o.values();throw new Error(`Language "${e}" is not supported. Must be one of: ${JSON.stringify(t)}`)}t=e}return t}},"./src/models/whisper/generation_whisper.js":
@@ -124,7 +124,7 @@ import*as e from"fs";import*as t from"onnxruntime-node";import*as n from"path";i
124
124
  \**************************/(e,t,n)=>{n.r(t),n.d(t,{AudioClassificationPipeline:()=>v,AutomaticSpeechRecognitionPipeline:()=>A,DepthEstimationPipeline:()=>j,DocumentQuestionAnsweringPipeline:()=>O,FeatureExtractionPipeline:()=>C,FillMaskPipeline:()=>w,ImageClassificationPipeline:()=>L,ImageFeatureExtractionPipeline:()=>P,ImageSegmentationPipeline:()=>z,ImageToImagePipeline:()=>V,ImageToTextPipeline:()=>E,ObjectDetectionPipeline:()=>B,Pipeline:()=>_,QuestionAnsweringPipeline:()=>M,SummarizationPipeline:()=>T,Text2TextGenerationPipeline:()=>b,TextClassificationPipeline:()=>f,TextGenerationPipeline:()=>k,TextToAudioPipeline:()=>D,TokenClassificationPipeline:()=>g,TranslationPipeline:()=>x,ZeroShotAudioClassificationPipeline:()=>S,ZeroShotClassificationPipeline:()=>F,ZeroShotImageClassificationPipeline:()=>I,ZeroShotObjectDetectionPipeline:()=>N,pipeline:()=>q});var r=n(/*! ./tokenizers.js */"./src/tokenizers.js"),o=n(/*! ./models.js */"./src/models.js"),s=n(/*! ./processors.js */"./src/processors.js"),a=n(/*! ./utils/generic.js */"./src/utils/generic.js"),i=n(/*! ./utils/core.js */"./src/utils/core.js"),l=n(/*! ./utils/maths.js */"./src/utils/maths.js"),c=n(/*! ./utils/audio.js */"./src/utils/audio.js"),d=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),u=n(/*! ./utils/image.js */"./src/utils/image.js");async function p(e){return Array.isArray(e)||(e=[e]),await Promise.all(e.map((e=>u.RawImage.read(e))))}async function h(e,t){return Array.isArray(e)||(e=[e]),await Promise.all(e.map((e=>"string"==typeof e||e instanceof URL?(0,c.read_audio)(e,t):e instanceof Float64Array?new Float32Array(e):e)))}function m(e,t){t&&(e=e.map((e=>0|e)));const[n,r,o,s]=e;return{xmin:n,ymin:r,xmax:o,ymax:s}}class _ extends a.Callable{constructor({task:e,model:t,tokenizer:n=null,processor:r=null}){super(),this.task=e,this.model=t,this.tokenizer=n,this.processor=r}async dispose(){await this.model.dispose()}}class f extends _{constructor(e){super(e)}async _call(e,{top_k:t=1}={}){const n=this.tokenizer(e,{padding:!0,truncation:!0}),r=await this.model(n),o="multi_label_classification"===this.model.config.problem_type?e=>e.sigmoid():e=>new d.Tensor("float32",(0,l.softmax)(e.data),e.dims),s=this.model.config.id2label,a=[];for(const e of r.logits){const n=o(e),r=await(0,d.topk)(n,t),i=r[0].tolist(),l=r[1].tolist().map(((e,t)=>({label:s?s[e]:`LABEL_${e}`,score:i[t]})));1===t?a.push(...l):a.push(l)}return Array.isArray(e)||1===t?a:a[0]}}class g extends _{constructor(e){super(e)}async _call(e,{ignore_labels:t=["O"]}={}){const n=Array.isArray(e),r=this.tokenizer(n?e:[e],{padding:!0,truncation:!0}),o=(await this.model(r)).logits,s=this.model.config.id2label,a=[];for(let e=0;e<o.dims[0];++e){const n=r.input_ids[e],i=o[e],c=[];for(let e=0;e<i.dims[0];++e){const r=i[e],o=(0,l.max)(r.data)[1],a=s?s[o]:`LABEL_${o}`;if(t.includes(a))continue;const d=this.tokenizer.decode([n[e].item()],{skip_special_tokens:!0});if(""===d)continue;const u=(0,l.softmax)(r.data);c.push({entity:a,score:u[o],index:e,word:d})}a.push(c)}return n?a:a[0]}}class M extends _{constructor(e){super(e)}async _call(e,t,{top_k:n=1}={}){const r=this.tokenizer(e,{text_pair:t,padding:!0,truncation:!0}),{start_logits:o,end_logits:s}=await this.model(r),a=r.input_ids.tolist(),c=r.attention_mask.tolist(),d=this.tokenizer.all_special_ids,u=[];for(let e=0;e<o.dims[0];++e){const t=a[e],r=t.findIndex((e=>e==this.tokenizer.sep_token_id)),p=(c[e].map(((e,n)=>1==e&&(0===n||n>r&&-1===d.findIndex((e=>e==t[n]))))),o[e].tolist()),h=s[e].tolist();for(let n=1;n<p.length;++n)(0==c[e]||n<=r||-1!==d.findIndex((e=>e==t[n])))&&(p[n]=-1/0,h[n]=-1/0);const m=(0,l.softmax)(p).map(((e,t)=>[e,t])),_=(0,l.softmax)(h).map(((e,t)=>[e,t]));m[0][0]=0,_[0][0]=0;const f=(0,i.product)(m,_).filter((e=>e[0][1]<=e[1][1])).map((e=>[e[0][1],e[1][1],e[0][0]*e[1][0]])).sort(((e,t)=>t[2]-e[2]));for(let e=0;e<Math.min(f.length,n);++e){const[n,r,o]=f[e],s=t.slice(n,r+1),a=this.tokenizer.decode(s,{skip_special_tokens:!0});u.push({answer:a,score:o})}}return 1===n?u[0]:u}}class w extends _{constructor(e){super(e)}async _call(e,{top_k:t=5}={}){const n=this.tokenizer(e,{padding:!0,truncation:!0}),{logits:r}=await this.model(n),o=[],s=n.input_ids.tolist();for(let e=0;e<s.length;++e){const n=s[e],a=n.findIndex((e=>e==this.tokenizer.mask_token_id));if(-1===a)throw Error(`Mask token (${this.tokenizer.mask_token}) not found in text.`);const i=r[e][a],c=await(0,d.topk)(new d.Tensor("float32",(0,l.softmax)(i.data),i.dims),t),u=c[0].tolist(),p=c[1].tolist();o.push(p.map(((e,t)=>{const r=n.slice();return r[a]=e,{score:u[t],token:Number(e),token_str:this.tokenizer.model.vocab[e],sequence:this.tokenizer.decode(r,{skip_special_tokens:!0})}})))}return Array.isArray(e)?o:o[0]}}class b extends _{_key="generated_text";constructor(e){super(e)}async _call(e,t={}){Array.isArray(e)||(e=[e]),this.model.config.prefix&&(e=e.map((e=>this.model.config.prefix+e)));const n=this.model.config.task_specific_params;n&&n[this.task]&&n[this.task].prefix&&(e=e.map((e=>n[this.task].prefix+e)));const r=this.tokenizer,o={padding:!0,truncation:!0};let s;s=this instanceof x&&"_build_translation_inputs"in r?r._build_translation_inputs(e,o,t):r(e,o);const a=await this.model.generate({...s,...t});return r.batch_decode(a,{skip_special_tokens:!0}).map((e=>({[this._key]:e})))}}class T extends b{_key="summary_text";constructor(e){super(e)}}class x extends b{_key="translation_text";constructor(e){super(e)}}function y(e){return Array.isArray(e)&&e.every((e=>"role"in e&&"content"in e))}class k extends _{constructor(e){super(e)}async _call(e,t={}){let n,r=!1,o=!1;if("string"==typeof e)n=e=[e];else if(Array.isArray(e)&&e.every((e=>"string"==typeof e)))r=!0,n=e;else{if(y(e))e=[e];else{if(!Array.isArray(e)||!e.every(y))throw new Error("Input must be a string, an array of strings, a Chat, or an array of Chats");r=!0}o=!0,n=e.map((e=>this.tokenizer.apply_chat_template(e,{tokenize:!1,add_generation_prompt:!0})))}const s=t.add_special_tokens??!1,a=!o&&(t.return_full_text??!0);this.tokenizer.padding_side="left";const i=this.tokenizer(n,{add_special_tokens:s,padding:!0,truncation:!0}),l=await this.model.generate({...i,...t}),c=this.tokenizer.batch_decode(l,{skip_special_tokens:!0});let d;!a&&i.input_ids.dims.at(-1)>0&&(d=this.tokenizer.batch_decode(i.input_ids,{skip_special_tokens:!0}).map((e=>e.length)));const u=Array.from({length:e.length},(e=>[]));for(let t=0;t<c.length;++t){const n=Math.floor(t/l.dims[0]*e.length);d&&(c[t]=c[t].slice(d[n])),u[n].push({generated_text:o?[...e[n],{role:"assistant",content:c[t]}]:c[t]})}return r||1!==u.length?u:u[0]}}class F extends _{constructor(e){super(e),this.label2id=Object.fromEntries(Object.entries(this.model.config.label2id).map((([e,t])=>[e.toLowerCase(),t]))),this.entailment_id=this.label2id.entailment,void 0===this.entailment_id&&(console.warn("Could not find 'entailment' in label2id mapping. Using 2 as entailment_id."),this.entailment_id=2),this.contradiction_id=this.label2id.contradiction??this.label2id.not_entailment,void 0===this.contradiction_id&&(console.warn("Could not find 'contradiction' in label2id mapping. Using 0 as contradiction_id."),this.contradiction_id=0)}async _call(e,t,{hypothesis_template:n="This example is {}.",multi_label:r=!1}={}){const o=Array.isArray(e);o||(e=[e]),Array.isArray(t)||(t=[t]);const s=t.map((e=>n.replace("{}",e))),a=r||1===t.length,i=[];for(const n of e){const e=[];for(const t of s){const r=this.tokenizer(n,{text_pair:t,padding:!0,truncation:!0}),o=await this.model(r);a?e.push([o.logits.data[this.contradiction_id],o.logits.data[this.entailment_id]]):e.push(o.logits.data[this.entailment_id])}const r=(a?e.map((e=>(0,l.softmax)(e)[1])):(0,l.softmax)(e)).map(((e,t)=>[e,t])).sort(((e,t)=>t[0]-e[0]));i.push({sequence:n,labels:r.map((e=>t[e[1]])),scores:r.map((e=>e[0]))})}return o?i:i[0]}}class C extends _{constructor(e){super(e)}async _call(e,{pooling:t="none",normalize:n=!1,quantize:r=!1,precision:o="binary"}={}){const s=this.tokenizer(e,{padding:!0,truncation:!0}),a=await this.model(s);let i=a.last_hidden_state??a.logits??a.token_embeddings;if("none"===t);else if("mean"===t)i=(0,d.mean_pooling)(i,s.attention_mask);else{if("cls"!==t)throw Error(`Pooling method '${t}' not supported.`);i=i.slice(null,0)}return n&&(i=i.normalize(2,-1)),r&&(i=(0,d.quantize_embeddings)(i,o)),i}}class P extends _{constructor(e){super(e)}async _call(e,{pool:t=null}={}){const n=await p(e),{pixel_values:r}=await this.processor(n),o=await this.model({pixel_values:r});let s;if(t){if(!("pooler_output"in o))throw Error("No pooled output was returned. Make sure the model has a 'pooler' layer when using the 'pool' option.");s=o.pooler_output}else s=o.last_hidden_state??o.logits??o.image_embeds;return s}}class v extends _{constructor(e){super(e)}async _call(e,{top_k:t=5}={}){const n=this.processor.feature_extractor.config.sampling_rate,r=await h(e,n),o=this.model.config.id2label,s=[];for(const e of r){const n=await this.processor(e),r=(await this.model(n)).logits[0],a=await(0,d.topk)(new d.Tensor("float32",(0,l.softmax)(r.data),r.dims),t),i=a[0].tolist(),c=a[1].tolist().map(((e,t)=>({label:o?o[e]:`LABEL_${e}`,score:i[t]})));s.push(c)}return Array.isArray(e)?s:s[0]}}class S extends _{constructor(e){super(e)}async _call(e,t,{hypothesis_template:n="This is a sound of {}."}={}){const r=!Array.isArray(e);r&&(e=[e]);const o=t.map((e=>n.replace("{}",e))),s=this.tokenizer(o,{padding:!0,truncation:!0}),a=this.processor.feature_extractor.config.sampling_rate,i=await h(e,a),c=[];for(const e of i){const n=await this.processor(e),r=await this.model({...s,...n}),o=(0,l.softmax)(r.logits_per_audio.data);c.push([...o].map(((e,n)=>({score:e,label:t[n]}))))}return r?c[0]:c}}class A extends _{constructor(e){super(e)}async _call(e,t={}){switch(this.model.config.model_type){case"whisper":return this._call_whisper(e,t);case"wav2vec2":case"wav2vec2-bert":case"unispeech":case"unispeech-sat":case"hubert":return this._call_wav2vec2(e,t);default:throw new Error(`AutomaticSpeechRecognitionPipeline does not support model type '${this.model.config.model_type}'.`)}}async _call_wav2vec2(e,t){t.language&&console.warn('`language` parameter is not yet supported for `wav2vec2` models, defaulting to "English".'),t.task&&console.warn('`task` parameter is not yet supported for `wav2vec2` models, defaulting to "transcribe".');const n=!Array.isArray(e);n&&(e=[e]);const r=this.processor.feature_extractor.config.sampling_rate,o=await h(e,r),s=[];for(const e of o){const t=await this.processor(e),n=(await this.model(t)).logits[0],r=[];for(const e of n)r.push((0,l.max)(e.data)[1]);const o=this.tokenizer.decode(r);s.push({text:o})}return n?s[0]:s}async _call_whisper(e,t){const n=t.return_timestamps??!1,r=t.chunk_length_s??0,o=t.force_full_sequences??!1;let s=t.stride_length_s??null;const a={...t};"word"===n&&(a.return_token_timestamps=!0,a.return_timestamps=!1);const i=!Array.isArray(e);i&&(e=[e]);const c=this.processor.feature_extractor.config.chunk_length/this.model.config.max_source_positions,d=this.processor.feature_extractor.config.hop_length,u=this.processor.feature_extractor.config.sampling_rate,p=await h(e,u),m=[];for(const e of p){let t=[];if(r>0){if(null===s)s=r/6;else if(r<=s)throw Error("`chunk_length_s` must be larger than `stride_length_s`.");const n=u*r,o=u*s,a=n-2*o;let i=0;for(;;){const r=i+n,s=e.subarray(i,r),l=await this.processor(s),c=0===i,d=r>=e.length;if(t.push({stride:[s.length,c?0:o,d?0:o],input_features:l.input_features,is_last:d}),d)break;i+=a}}else t=[{stride:[e.length,0,0],input_features:(await this.processor(e)).input_features,is_last:!0}];for(const e of t){a.num_frames=Math.floor(e.stride[0]/d);const t=await this.model.generate({inputs:e.input_features,...a});"word"===n?(e.tokens=t.sequences.tolist()[0],e.token_timestamps=t.token_timestamps.tolist()[0].map((e=>(0,l.round)(e,2)))):e.tokens=t[0].tolist(),e.stride=e.stride.map((e=>e/u))}const[i,p]=this.tokenizer._decode_asr(t,{time_precision:c,return_timestamps:n,force_full_sequences:o});m.push({text:i,...p})}return i?m[0]:m}}class E extends _{constructor(e){super(e)}async _call(e,t={}){const n=Array.isArray(e),r=await p(e),{pixel_values:o}=await this.processor(r),s=[];for(const e of o){e.dims=[1,...e.dims];const n=await this.model.generate({inputs:e,...t}),r=this.tokenizer.batch_decode(n,{skip_special_tokens:!0}).map((e=>({generated_text:e.trim()})));s.push(r)}return n?s:s[0]}}class L extends _{constructor(e){super(e)}async _call(e,{top_k:t=5}={}){const n=await p(e),{pixel_values:r}=await this.processor(n),o=await this.model({pixel_values:r}),s=this.model.config.id2label,a=[];for(const e of o.logits){const n=await(0,d.topk)(new d.Tensor("float32",(0,l.softmax)(e.data),e.dims),t),r=n[0].tolist(),o=n[1].tolist().map(((e,t)=>({label:s?s[e]:`LABEL_${e}`,score:r[t]})));a.push(o)}return Array.isArray(e)?a:a[0]}}class z extends _{constructor(e){super(e),this.subtasks_mapping={panoptic:"post_process_panoptic_segmentation",instance:"post_process_instance_segmentation",semantic:"post_process_semantic_segmentation"}}async _call(e,{threshold:t=.5,mask_threshold:n=.5,overlap_mask_area_threshold:r=.8,label_ids_to_fuse:o=null,target_sizes:s=null,subtask:a=null}={}){if(Array.isArray(e)&&1!==e.length)throw Error("Image segmentation pipeline currently only supports a batch size of 1.");const i=await p(e),l=i.map((e=>[e.height,e.width])),{pixel_values:c,pixel_mask:d}=await this.processor(i),h=await this.model({pixel_values:c,pixel_mask:d});let m=null;if(null!==a)m=this.subtasks_mapping[a];else for(let[e,t]of Object.entries(this.subtasks_mapping))if(t in this.processor.feature_extractor){m=this.processor.feature_extractor[t].bind(this.processor.feature_extractor),a=e;break}const _=this.model.config.id2label,f=[];if("panoptic"===a||"instance"===a){const e=m(h,t,n,r,o,s??l)[0],a=e.segmentation;for(const t of e.segments_info){const e=new Uint8ClampedArray(a.data.length);for(let n=0;n<a.data.length;++n)a.data[n]===t.id&&(e[n]=255);const n=new u.RawImage(e,a.dims[1],a.dims[0],1);f.push({score:t.score,label:_[t.label_id],mask:n})}}else{if("semantic"!==a)throw Error(`Subtask ${a} not supported.`);{const{segmentation:e,labels:t}=m(h,s??l)[0];for(const n of t){const t=new Uint8ClampedArray(e.data.length);for(let r=0;r<e.data.length;++r)e.data[r]===n&&(t[r]=255);const r=new u.RawImage(t,e.dims[1],e.dims[0],1);f.push({score:null,label:_[n],mask:r})}}}return f}}class I extends _{constructor(e){super(e)}async _call(e,t,{hypothesis_template:n="This is a photo of {}"}={}){const r=Array.isArray(e),o=await p(e),s=t.map((e=>n.replace("{}",e))),a=this.tokenizer(s,{padding:"siglip"!==this.model.config.model_type||"max_length",truncation:!0}),{pixel_values:i}=await this.processor(o),c=await this.model({...a,pixel_values:i}),d="siglip"===this.model.config.model_type?e=>e.sigmoid().data:e=>(0,l.softmax)(e.data),u=[];for(const e of c.logits_per_image){const n=[...d(e)].map(((e,n)=>({score:e,label:t[n]})));n.sort(((e,t)=>t.score-e.score)),u.push(n)}return r?u:u[0]}}class B extends _{constructor(e){super(e)}async _call(e,{threshold:t=.9,percentage:n=!1}={}){const r=Array.isArray(e);if(r&&1!==e.length)throw Error("Object detection pipeline currently only supports a batch size of 1.");const o=await p(e),s=n?null:o.map((e=>[e.height,e.width])),{pixel_values:a,pixel_mask:i}=await this.processor(o),l=await this.model({pixel_values:a,pixel_mask:i}),c=this.processor.feature_extractor.post_process_object_detection(l,t,s),d=this.model.config.id2label,u=c.map((e=>e.boxes.map(((t,r)=>({score:e.scores[r],label:d[e.classes[r]],box:m(t,!n)})))));return r?u:u[0]}}class N extends _{constructor(e){super(e)}async _call(e,t,{threshold:n=.1,top_k:r=null,percentage:o=!1}={}){const s=Array.isArray(e),a=await p(e),i=this.tokenizer(t,{padding:!0,truncation:!0}),l=await this.processor(a),c=[];for(let e=0;e<a.length;++e){const s=a[e],d=o?null:[[s.height,s.width]],u=l.pixel_values[e].unsqueeze_(0),p=await this.model({...i,pixel_values:u}),h=this.processor.feature_extractor.post_process_object_detection(p,n,d,!0)[0];let _=h.boxes.map(((e,n)=>({score:h.scores[n],label:t[h.classes[n]],box:m(e,!o)}))).sort(((e,t)=>t.score-e.score));null!==r&&(_=_.slice(0,r)),c.push(_)}return s?c:c[0]}}class O extends _{constructor(e){super(e)}async _call(e,t,n={}){throw new Error("This pipeline is not yet supported in Transformers.js v3.")}}class D extends _{DEFAULT_VOCODER_ID="Xenova/speecht5_hifigan";constructor(e){super(e),this.vocoder=e.vocoder??null}async _call(e,{speaker_embeddings:t=null}={}){return this.processor?this._call_text_to_spectrogram(e,{speaker_embeddings:t}):this._call_text_to_waveform(e)}async _call_text_to_waveform(e){const t=this.tokenizer(e,{padding:!0,truncation:!0}),{waveform:n}=await this.model(t),r=this.model.config.sampling_rate;return{audio:n.data,sampling_rate:r}}async _call_text_to_spectrogram(e,{speaker_embeddings:t}){if(this.vocoder||(console.log("No vocoder specified, using default HifiGan vocoder."),this.vocoder=await o.AutoModel.from_pretrained(this.DEFAULT_VOCODER_ID,{dtype:"fp32"})),("string"==typeof t||t instanceof URL)&&(t=new Float32Array(await(await fetch(t)).arrayBuffer())),t instanceof Float32Array)t=new d.Tensor("float32",t,[1,t.length]);else if(!(t instanceof d.Tensor))throw new Error("Speaker embeddings must be a `Tensor`, `Float32Array`, `string`, or `URL`.");const{input_ids:n}=this.tokenizer(e,{padding:!0,truncation:!0}),{waveform:r}=await this.model.generate_speech(n,t,{vocoder:this.vocoder}),s=this.processor.feature_extractor.config.sampling_rate;return{audio:r.data,sampling_rate:s}}}class V extends _{constructor(e){super(e)}async _call(e){const t=await p(e),n=await this.processor(t),r=await this.model(n),o=[];for(const e of r.reconstruction){const t=e.squeeze().clamp_(0,1).mul_(255).round_().to("uint8");o.push(u.RawImage.fromTensor(t))}return o.length>1?o:o[0]}}class j extends _{constructor(e){super(e)}async _call(e){const t=await p(e),n=await this.processor(t),{predicted_depth:r}=await this.model(n),o=[];for(let e=0;e<t.length;++e){const n=(0,d.interpolate)(r[e],t[e].size.reverse(),"bilinear",!1),s=n.mul_(255/(0,l.max)(n.data)[0]).to("uint8");o.push({predicted_depth:r[e],depth:u.RawImage.fromTensor(s)})}return o.length>1?o:o[0]}}const R=Object.freeze({"text-classification":{tokenizer:r.AutoTokenizer,pipeline:f,model:o.AutoModelForSequenceClassification,default:{model:"Xenova/distilbert-base-uncased-finetuned-sst-2-english"},type:"text"},"token-classification":{tokenizer:r.AutoTokenizer,pipeline:g,model:o.AutoModelForTokenClassification,default:{model:"Xenova/bert-base-multilingual-cased-ner-hrl"},type:"text"},"question-answering":{tokenizer:r.AutoTokenizer,pipeline:M,model:o.AutoModelForQuestionAnswering,default:{model:"Xenova/distilbert-base-cased-distilled-squad"},type:"text"},"fill-mask":{tokenizer:r.AutoTokenizer,pipeline:w,model:o.AutoModelForMaskedLM,default:{model:"Xenova/bert-base-uncased"},type:"text"},summarization:{tokenizer:r.AutoTokenizer,pipeline:T,model:o.AutoModelForSeq2SeqLM,default:{model:"Xenova/distilbart-cnn-6-6"},type:"text"},translation:{tokenizer:r.AutoTokenizer,pipeline:x,model:o.AutoModelForSeq2SeqLM,default:{model:"Xenova/t5-small"},type:"text"},"text2text-generation":{tokenizer:r.AutoTokenizer,pipeline:b,model:o.AutoModelForSeq2SeqLM,default:{model:"Xenova/flan-t5-small"},type:"text"},"text-generation":{tokenizer:r.AutoTokenizer,pipeline:k,model:o.AutoModelForCausalLM,default:{model:"Xenova/gpt2"},type:"text"},"zero-shot-classification":{tokenizer:r.AutoTokenizer,pipeline:F,model:o.AutoModelForSequenceClassification,default:{model:"Xenova/distilbert-base-uncased-mnli"},type:"text"},"audio-classification":{pipeline:v,model:o.AutoModelForAudioClassification,processor:s.AutoProcessor,default:{model:"Xenova/wav2vec2-base-superb-ks"},type:"audio"},"zero-shot-audio-classification":{tokenizer:r.AutoTokenizer,pipeline:S,model:o.AutoModel,processor:s.AutoProcessor,default:{model:"Xenova/clap-htsat-unfused"},type:"multimodal"},"automatic-speech-recognition":{tokenizer:r.AutoTokenizer,pipeline:A,model:[o.AutoModelForSpeechSeq2Seq,o.AutoModelForCTC],processor:s.AutoProcessor,default:{model:"Xenova/whisper-tiny.en"},type:"multimodal"},"text-to-audio":{tokenizer:r.AutoTokenizer,pipeline:D,model:[o.AutoModelForTextToWaveform,o.AutoModelForTextToSpectrogram],processor:[s.AutoProcessor,null],default:{model:"Xenova/speecht5_tts"},type:"text"},"image-to-text":{tokenizer:r.AutoTokenizer,pipeline:E,model:o.AutoModelForVision2Seq,processor:s.AutoProcessor,default:{model:"Xenova/vit-gpt2-image-captioning"},type:"multimodal"},"image-classification":{pipeline:L,model:o.AutoModelForImageClassification,processor:s.AutoProcessor,default:{model:"Xenova/vit-base-patch16-224"},type:"multimodal"},"image-segmentation":{pipeline:z,model:[o.AutoModelForImageSegmentation,o.AutoModelForSemanticSegmentation],processor:s.AutoProcessor,default:{model:"Xenova/detr-resnet-50-panoptic"},type:"multimodal"},"zero-shot-image-classification":{tokenizer:r.AutoTokenizer,pipeline:I,model:o.AutoModel,processor:s.AutoProcessor,default:{model:"Xenova/clip-vit-base-patch32"},type:"multimodal"},"object-detection":{pipeline:B,model:o.AutoModelForObjectDetection,processor:s.AutoProcessor,default:{model:"Xenova/detr-resnet-50"},type:"multimodal"},"zero-shot-object-detection":{tokenizer:r.AutoTokenizer,pipeline:N,model:o.AutoModelForZeroShotObjectDetection,processor:s.AutoProcessor,default:{model:"Xenova/owlvit-base-patch32"},type:"multimodal"},"document-question-answering":{tokenizer:r.AutoTokenizer,pipeline:O,model:o.AutoModelForDocumentQuestionAnswering,processor:s.AutoProcessor,default:{model:"Xenova/donut-base-finetuned-docvqa"},type:"multimodal"},"image-to-image":{pipeline:V,model:o.AutoModelForImageToImage,processor:s.AutoProcessor,default:{model:"Xenova/swin2SR-classical-sr-x2-64"},type:"image"},"depth-estimation":{pipeline:j,model:o.AutoModelForDepthEstimation,processor:s.AutoProcessor,default:{model:"Xenova/dpt-large"},type:"image"},"feature-extraction":{tokenizer:r.AutoTokenizer,pipeline:C,model:o.AutoModel,default:{model:"Xenova/all-MiniLM-L6-v2"},type:"text"},"image-feature-extraction":{processor:s.AutoProcessor,pipeline:P,model:[o.AutoModelForImageFeatureExtraction,o.AutoModel],default:{model:"Xenova/vit-base-patch16-224-in21k"},type:"image"}}),G=Object.freeze({"sentiment-analysis":"text-classification",ner:"token-classification",asr:"automatic-speech-recognition","text-to-speech":"text-to-audio",embeddings:"feature-extraction"});async function q(e,t=null,{progress_callback:n=null,config:r=null,cache_dir:o=null,local_files_only:s=!1,revision:a="main",device:l=null,dtype:c=null,model_file_name:d=null,session_options:u={}}={}){e=G[e]??e;const p=R[e.split("_",1)[0]];if(!p)throw Error(`Unsupported pipeline: ${e}. Must be one of [${Object.keys(R)}]`);t||(t=p.default.model,console.log(`No model specified. Using default model: "${t}".`));const h={progress_callback:n,config:r,cache_dir:o,local_files_only:s,revision:a,device:l,dtype:c,model_file_name:d,session_options:u},m=new Map([["tokenizer",p.tokenizer],["model",p.model],["processor",p.processor]]),_=await async function(e,t,n){const r=Object.create(null),o=[];for(let[s,a]of e.entries()){if(!a)continue;let e;e=Array.isArray(a)?new Promise((async(e,r)=>{let o;for(let s of a){if(null===s)return void e(null);try{return void e(await s.from_pretrained(t,n))}catch(e){if(e.message?.includes("Unsupported model type"))o=e;else{if(!e.message?.includes("Could not locate file"))return void r(e);o=e}}}r(o)})):a.from_pretrained(t,n),r[s]=e,o.push(e)}await Promise.all(o);for(let[e,t]of Object.entries(r))r[e]=await t;return r}(m,t,h);_.task=e,(0,i.dispatchCallback)(n,{status:"ready",task:e,model:t});return new(0,p.pipeline)(_)}},"./src/processors.js":
125
125
  /*!***************************!*\
126
126
  !*** ./src/processors.js ***!
127
- \***************************/(e,t,n)=>{n.r(t),n.d(t,{ASTFeatureExtractor:()=>Z,AutoProcessor:()=>pe,BeitFeatureExtractor:()=>G,BitImageProcessor:()=>T,CLIPFeatureExtractor:()=>y,CLIPImageProcessor:()=>k,ChineseCLIPFeatureExtractor:()=>F,ClapFeatureExtractor:()=>ee,ConvNextFeatureExtractor:()=>P,ConvNextImageProcessor:()=>v,DPTFeatureExtractor:()=>w,DPTImageProcessor:()=>b,DeiTFeatureExtractor:()=>R,DetrFeatureExtractor:()=>W,DonutFeatureExtractor:()=>q,EfficientNetImageProcessor:()=>E,FeatureExtractor:()=>_,Florence2Processor:()=>ue,GLPNFeatureExtractor:()=>x,ImageFeatureExtractor:()=>f,MobileNetV1FeatureExtractor:()=>L,MobileNetV2FeatureExtractor:()=>z,MobileNetV3FeatureExtractor:()=>I,MobileNetV4FeatureExtractor:()=>B,MobileViTFeatureExtractor:()=>N,MobileViTImageProcessor:()=>O,NougatImageProcessor:()=>$,OwlViTFeatureExtractor:()=>D,OwlViTProcessor:()=>de,Owlv2ImageProcessor:()=>V,Processor:()=>oe,PyAnnoteFeatureExtractor:()=>te,PyAnnoteProcessor:()=>le,RTDetrImageProcessor:()=>j,SamImageProcessor:()=>X,SamProcessor:()=>se,SapiensFeatureExtractor:()=>g,SeamlessM4TFeatureExtractor:()=>K,SegformerFeatureExtractor:()=>M,SiglipImageProcessor:()=>C,SpeechT5FeatureExtractor:()=>re,SpeechT5Processor:()=>ce,Swin2SRImageProcessor:()=>Q,ViTFeatureExtractor:()=>S,ViTImageProcessor:()=>A,VitMatteImageProcessor:()=>H,Wav2Vec2FeatureExtractor:()=>J,Wav2Vec2ProcessorWithLM:()=>ie,WeSpeakerFeatureExtractor:()=>ne,WhisperFeatureExtractor:()=>Y,WhisperProcessor:()=>ae,YolosFeatureExtractor:()=>U});var r=n(/*! ./utils/generic.js */"./src/utils/generic.js"),o=n(/*! ./utils/core.js */"./src/utils/core.js"),s=n(/*! ./utils/hub.js */"./src/utils/hub.js"),a=n(/*! ./utils/maths.js */"./src/utils/maths.js"),i=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),l=(n(/*! ./utils/image.js */"./src/utils/image.js"),n(/*! ./utils/audio.js */"./src/utils/audio.js"));function c([e,t,n,r]){return[e-n/2,t-r/2,e+n/2,t+r/2]}function d(e,t=.5,n=null,r=!1){const o=e.logits,s=e.pred_boxes,[i,l,d]=o.dims;if(null!==n&&n.length!==i)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");let u=[];for(let e=0;e<i;++e){let i=null!==n?n[e]:null,p={boxes:[],classes:[],scores:[]},h=o[e],m=s[e];for(let e=0;e<l;++e){let n,o=h[e],s=[];if(r){n=o.sigmoid().data;for(let e=0;e<n.length;++e)n[e]>t&&s.push(e)}else{let e=(0,a.max)(o.data)[1];if(e===d-1)continue;if(n=(0,a.softmax)(o.data),n[e]<t)continue;s.push(e)}for(const t of s){let r=m[e].data;r=c(r),null!==i&&(r=r.map(((e,t)=>e*i[(t+1)%2]))),p.boxes.push(r),p.classes.push(t),p.scores.push(n[t])}}u.push(p)}return u}function u(e,t=null){const n=e.logits,r=n.dims[0];if(null!==t&&t.length!==r)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");const o=[];for(let e=0;e<r;++e){const r=null!==t?t[e]:null;let s=n[e];null!==r&&(s=(0,i.interpolate)(s,r,"bilinear",!1));const[a,l]=r??s.dims.slice(-2),c=new i.Tensor("int32",new Int32Array(a*l),[a,l]),d=s[0].data,u=c.data;for(let e=1;e<s.dims[0];++e){const t=s[e].data;for(let n=0;n<t.length;++n)t[n]>d[n]&&(d[n]=t[n],u[n]=e)}const p=new Array(s.dims[0]),h=c.data;for(let e=0;e<h.length;++e){const t=h[e];p[t]=t}const m=p.filter((e=>void 0!==e));o.push({segmentation:c,labels:m})}return o}function p(e,t){if(!(e instanceof Float32Array||e instanceof Float64Array))throw new Error(`${t} expects input to be a Float32Array or a Float64Array, but got ${e?.constructor?.name??typeof e} instead. If using the feature extractor directly, remember to use \`read_audio(url, sampling_rate)\` to obtain the raw audio data of the file/url.`)}function h(e,t,n=0,r=null){const o=e/t;let s=(0,a.bankers_round)(o)*t;return null!==r&&s>r&&(s=Math.floor(o)*t),s<n&&(s=Math.ceil(o)*t),s}function m([e,t],n){return[Math.max(Math.floor(e/n),1)*n,Math.max(Math.floor(t/n),1)*n]}class _ extends r.Callable{constructor(e){super(),this.config=e}}class f extends _{constructor(e){super(e),this.image_mean=this.config.image_mean??this.config.mean,this.image_std=this.config.image_std??this.config.std,this.resample=this.config.resample??2,this.do_rescale=this.config.do_rescale??!0,this.rescale_factor=this.config.rescale_factor??1/255,this.do_normalize=this.config.do_normalize,this.do_resize=this.config.do_resize,this.do_thumbnail=this.config.do_thumbnail,this.size=this.config.size,this.size_divisibility=this.config.size_divisibility??this.config.size_divisor,this.do_center_crop=this.config.do_center_crop,this.crop_size=this.config.crop_size,this.do_convert_rgb=this.config.do_convert_rgb??!0,this.do_crop_margin=this.config.do_crop_margin,this.pad_size=this.config.pad_size,this.do_pad=this.config.do_pad,this.do_pad&&!this.pad_size&&this.size&&void 0!==this.size.width&&void 0!==this.size.height&&(this.pad_size=this.size),this.do_flip_channel_order=this.config.do_flip_channel_order??!1}async thumbnail(e,t,n=2){const r=e.height,o=e.width,s=t.height,a=t.width;let i=Math.min(r,s),l=Math.min(o,a);return i===r&&l===o?e:(r>o?l=Math.floor(o*i/r):o>r&&(i=Math.floor(r*l/o)),await e.resize(l,i,{resample:n}))}async crop_margin(e,t=200){const n=e.clone().grayscale(),r=(0,a.min)(n.data)[0],o=(0,a.max)(n.data)[0]-r;if(0===o)return e;const s=t/255;let i=n.width,l=n.height,c=0,d=0;const u=n.data;for(let e=0;e<n.height;++e){const t=e*n.width;for(let a=0;a<n.width;++a)(u[t+a]-r)/o<s&&(i=Math.min(i,a),l=Math.min(l,e),c=Math.max(c,a),d=Math.max(d,e))}return e=await e.crop([i,l,c,d])}pad_image(e,t,n,{mode:r="constant",center:s=!1,constant_values:a=0}={}){const[i,l,c]=t;let d,u;if("number"==typeof n?(d=n,u=n):(d=n.width,u=n.height),d!==l||u!==i){const n=new Float32Array(d*u*c);if(Array.isArray(a))for(let e=0;e<n.length;++e)n[e]=a[e%c];else 0!==a&&n.fill(a);const[p,h]=s?[Math.floor((d-l)/2),Math.floor((u-i)/2)]:[0,0];for(let t=0;t<i;++t){const r=(t+h)*d,o=t*l;for(let t=0;t<l;++t){const s=(r+t+p)*c,a=(o+t)*c;for(let t=0;t<c;++t)n[s+t]=e[a+t]}}if("symmetric"===r){if(s)throw new Error("`center` padding is not supported when `mode` is set to `symmetric`.");const t=i-1,r=l-1;for(let s=0;s<u;++s){const a=s*d,u=(0,o.calculateReflectOffset)(s,t)*l;for(let t=0;t<d;++t){if(s<i&&t<l)continue;const d=(a+t)*c,p=(u+(0,o.calculateReflectOffset)(t,r))*c;for(let t=0;t<c;++t)n[d+t]=e[p+t]}}}e=n,t=[u,d,c]}return[e,t]}rescale(e){for(let t=0;t<e.length;++t)e[t]=this.rescale_factor*e[t]}get_resize_output_image_size(e,t){const[n,r]=e.size;let o,s;if(this.do_thumbnail){const{height:e,width:n}=t;o=Math.min(e,n)}else Number.isInteger(t)?(o=t,s=this.config.max_size??o):void 0!==t&&(o=t.shortest_edge,s=t.longest_edge);if(void 0!==o||void 0!==s){const e=void 0===o?1:Math.max(o/n,o/r),t=n*e,a=r*e,i=void 0===s?1:Math.min(s/t,s/a);let l=Math.floor(Number((t*i).toFixed(2))),c=Math.floor(Number((a*i).toFixed(2)));return void 0!==this.size_divisibility&&([l,c]=m([l,c],this.size_divisibility)),[l,c]}if(void 0!==t&&void 0!==t.width&&void 0!==t.height){let e=t.width,o=t.height;if(this.config.keep_aspect_ratio&&this.config.ensure_multiple_of){let t=o/r,s=e/n;Math.abs(1-s)<Math.abs(1-t)?t=s:s=t,o=h(t*r,this.config.ensure_multiple_of),e=h(s*n,this.config.ensure_multiple_of)}return[e,o]}if(void 0!==this.size_divisibility)return m([n,r],this.size_divisibility);throw new Error(`Could not resize image due to unsupported \`this.size\` option in config: ${JSON.stringify(t)}`)}async resize(e){const[t,n]=this.get_resize_output_image_size(e,this.size);return await e.resize(t,n,{resample:this.resample})}async preprocess(e,{do_normalize:t=null,do_pad:n=null,do_convert_rgb:r=null,do_convert_grayscale:o=null,do_flip_channel_order:s=null}={}){this.do_crop_margin&&(e=await this.crop_margin(e));const[a,l]=e.size;if(r??this.do_convert_rgb?e=e.rgb():o&&(e=e.grayscale()),this.do_resize&&(e=await this.resize(e)),this.do_thumbnail&&(e=await this.thumbnail(e,this.size,this.resample)),this.do_center_crop){let t,n;Number.isInteger(this.crop_size)?(t=this.crop_size,n=this.crop_size):(t=this.crop_size.width,n=this.crop_size.height),e=await e.center_crop(t,n)}const c=[e.height,e.width];let d=Float32Array.from(e.data),u=[e.height,e.width,e.channels];if(this.do_rescale&&this.rescale(d),t??this.do_normalize){let t=this.image_mean;Array.isArray(this.image_mean)||(t=new Array(e.channels).fill(t));let n=this.image_std;if(Array.isArray(this.image_std)||(n=new Array(e.channels).fill(t)),t.length!==e.channels||n.length!==e.channels)throw new Error(`When set to arrays, the length of \`image_mean\` (${t.length}) and \`image_std\` (${n.length}) must match the number of channels in the image (${e.channels}).`);for(let r=0;r<d.length;r+=e.channels)for(let o=0;o<e.channels;++o)d[r+o]=(d[r+o]-t[o])/n[o]}if(n??this.do_pad)if(this.pad_size){const t=this.pad_image(d,[e.height,e.width,e.channels],this.pad_size);[d,u]=t}else if(this.size_divisibility){const[e,t]=m([u[1],u[0]],this.size_divisibility);[d,u]=this.pad_image(d,u,{width:e,height:t})}if(s??this.do_flip_channel_order){if(3!==u[2])throw new Error("Flipping channel order is only supported for RGB images.");for(let e=0;e<d.length;e+=3){const t=d[e];d[e]=d[e+2],d[e+2]=t}}return{original_size:[l,a],reshaped_input_size:c,pixel_values:new i.Tensor("float32",d,u).permute(2,0,1)}}async _call(e,...t){Array.isArray(e)||(e=[e]);const n=await Promise.all(e.map((e=>this.preprocess(e))));return{pixel_values:(0,i.stack)(n.map((e=>e.pixel_values)),0),original_sizes:n.map((e=>e.original_size)),reshaped_input_sizes:n.map((e=>e.reshaped_input_size))}}}class g extends f{post_process_semantic_segmentation(...e){return u(...e)}}class M extends f{post_process_semantic_segmentation(...e){return u(...e)}}class w extends f{}class b extends w{}class T extends f{}class x extends f{}class y extends f{}class k extends y{}class F extends f{}class C extends f{}class P extends f{constructor(e){super(e),this.crop_pct=this.config.crop_pct??.875}async resize(e){const t=this.size?.shortest_edge;if(void 0===t)throw new Error("Size dictionary must contain 'shortest_edge' key.");if(t<384){const n=Math.floor(t/this.crop_pct),[r,o]=this.get_resize_output_image_size(e,{shortest_edge:n});e=await e.resize(r,o,{resample:this.resample}),e=await e.center_crop(t,t)}else e=await e.resize(t,t,{resample:this.resample});return e}}class v extends P{}class S extends f{}class A extends f{}class E extends f{constructor(e){super(e),this.include_top=this.config.include_top??!0,this.include_top&&(this.image_std=this.image_std.map((e=>e*e)))}}class L extends f{}class z extends f{}class I extends f{}class B extends f{}class N extends f{}class O extends N{}class D extends f{post_process_object_detection(...e){return d(...e)}}class V extends D{}class j extends f{post_process_object_detection(...e){return d(...e)}}class R extends f{}class G extends f{}class q extends f{pad_image(e,t,n,r={}){const[o,s,a]=t;let i=this.image_mean;Array.isArray(this.image_mean)||(i=new Array(a).fill(i));let l=this.image_std;Array.isArray(l)||(l=new Array(a).fill(i));const c=i.map(((e,t)=>-e/l[t]));return super.pad_image(e,t,n,{center:!0,constant_values:c,...r})}}class $ extends q{}class W extends f{async _call(e){const t=await super._call(e),n=[t.pixel_values.dims[0],64,64],r=new i.Tensor("int64",new BigInt64Array(n.reduce(((e,t)=>e*t))).fill(1n),n);return{...t,pixel_mask:r}}post_process_object_detection(...e){return d(...e)}remove_low_and_no_objects(e,t,n,r){let o=[],s=[],i=[];for(let l=0;l<e.dims[0];++l){let c=e[l],d=t[l],u=(0,a.max)(c.data)[1];if(u===r)continue;let p=(0,a.softmax)(c.data)[u];p>n&&(o.push(d),s.push(p),i.push(u))}return[o,s,i]}check_segment_validity(e,t,n,r=.5,o=.8){let s=[],a=0,i=0;const l=t[n].data;for(let t=0;t<e.length;++t)e[t]===n&&(s.push(t),++a),l[t]>=r&&++i;let c=a>0&&i>0;if(c){c=a/i>o}return[c,s]}compute_segments(e,t,n,r,o,s=null,a=null){let[l,c]=a??e[0].dims,d=new i.Tensor("int32",new Int32Array(l*c),[l,c]),u=[];if(null!==a)for(let t=0;t<e.length;++t)e[t]=(0,i.interpolate)(e[t],a,"bilinear",!1);let p=new Int32Array(e[0].data.length),h=new Float32Array(e[0].data.length);for(let n=0;n<e.length;++n){let r=t[n];const o=e[n].data;for(let e=0;e<o.length;++e)o[e]*=r,o[e]>h[e]&&(p[e]=n,h[e]=o[e])}let m=0;const _=d.data;for(let s=0;s<n.length;++s){let a=n[s],[i,l]=this.check_segment_validity(p,e,s,r,o);if(i){++m;for(let e of l)_[e]=m;u.push({id:m,label_id:a,score:t[s]})}}return[d,u]}post_process_panoptic_segmentation(e,t=.5,n=.5,r=.8,o=null,s=null){null===o&&(console.warn("`label_ids_to_fuse` unset. No instance will be fused."),o=new Set);const a=e.logits,l=e.pred_masks.sigmoid();let[c,d,u]=a.dims;if(u-=1,null!==s&&s.length!==c)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");let p=[];for(let e=0;e<c;++e){let c=null!==s?s[e]:null,d=a[e],h=l[e],[m,_,f]=this.remove_low_and_no_objects(d,h,t,u);if(0===f.length){let[e,t]=c??h.dims.slice(-2),n=new i.Tensor("int32",new Int32Array(e*t).fill(-1),[e,t]);p.push({segmentation:n,segments_info:[]});continue}let[g,M]=this.compute_segments(m,_,f,n,r,o,c);p.push({segmentation:g,segments_info:M})}return p}post_process_instance_segmentation(){throw Error("Not implemented yet")}}class U extends f{post_process_object_detection(...e){return d(...e)}}class X extends f{reshape_input_points(e,t,n,r=!1){e=structuredClone(e);let s=(0,o.calculateDimensions)(e);if(3===s.length)r||(s=[1,...s]),e=[e];else if(4!==s.length)throw Error("The input_points must be a 4D tensor of shape `batch_size`, `point_batch_size`, `nb_points_per_image`, `2`.");for(let r=0;r<e.length;++r){let o=t[r],s=n[r],a=[s[0]/o[0],s[1]/o[1]];for(let t=0;t<e[r].length;++t)for(let n=0;n<e[r][t].length;++n)for(let o=0;o<e[r][t][n].length;++o)e[r][t][n][o]*=a[o%2]}return new i.Tensor("float32",Float32Array.from(e.flat(1/0)),s)}add_input_labels(e,t){let n=(0,o.calculateDimensions)(e);if(2===n.length)n=[1,...n],e=[e];else if(3!==n.length)throw Error("The input_points must be a 4D tensor of shape `batch_size`, `point_batch_size`, `nb_points_per_image`, `2`.");if(n.some(((e,n)=>e!==t.dims[n])))throw Error(`The first ${n.length} dimensions of 'input_points' and 'input_labels' must be the same.`);return new i.Tensor("int64",e.flat(1/0).map(BigInt),n)}async _call(e,{input_points:t=null,input_labels:n=null,input_boxes:r=null}={}){const o=await super._call(e);if(t&&(o.input_points=this.reshape_input_points(t,o.original_sizes,o.reshaped_input_sizes)),n){if(!o.input_points)throw Error("`input_points` must be provided if `input_labels` are provided.");o.input_labels=this.add_input_labels(n,o.input_points)}return r&&(o.input_boxes=this.reshape_input_points(r,o.original_sizes,o.reshaped_input_sizes,!0)),o}async post_process_masks(e,t,n,{mask_threshold:r=0,binarize:o=!0,pad_size:s=null}={}){const a=[],l=[(s=s??this.pad_size).height,s.width];for(let s=0;s<t.length;++s){const c=t[s],d=n[s];let u=await(0,i.interpolate_4d)(e[s],{mode:"bilinear",size:l});if(u=u.slice(null,null,[0,d[0]],[0,d[1]]),u=await(0,i.interpolate_4d)(u,{mode:"bilinear",size:c}),o){const e=u.data,t=new Uint8Array(e.length);for(let n=0;n<e.length;++n)e[n]>r&&(t[n]=1);u=new i.Tensor("bool",t,u.dims)}a.push(u)}return a}generate_crop_boxes(e,t,{crop_n_layers:n=0,overlap_ratio:r=512/1500,points_per_crop:o=32,crop_n_points_downscale_factor:s=1}={}){}}class Q extends f{pad_image(e,t,n,r={}){const[o,s,a]=t;return super.pad_image(e,t,{width:s+(n-s%n)%n,height:o+(n-o%n)%n},{mode:"symmetric",center:!1,constant_values:-1,...r})}}class H extends f{async _call(e,t){Array.isArray(e)||(e=[e]),Array.isArray(t)||(t=[t]);const n=await Promise.all(e.map((e=>this.preprocess(e)))),r=await Promise.all(t.map((e=>this.preprocess(e,{do_normalize:!1,do_convert_rgb:!1,do_convert_grayscale:!0}))));return{pixel_values:(0,i.stack)(n.map(((e,t)=>(0,i.cat)([e.pixel_values,r[t].pixel_values],0))),0),original_sizes:n.map((e=>e.original_size)),reshaped_input_sizes:n.map((e=>e.reshaped_input_size))}}}class Y extends _{constructor(e){super(e),this.config.mel_filters??=(0,l.mel_filter_bank)(Math.floor(1+this.config.n_fft/2),this.config.feature_size,0,8e3,this.config.sampling_rate,"slaney","slaney"),this.window=(0,l.window_function)(this.config.n_fft,"hann")}async _extract_fbank_features(e){const t=await(0,l.spectrogram)(e,this.window,this.config.n_fft,this.config.hop_length,{power:2,mel_filters:this.config.mel_filters,log_mel:"log10",max_num_frames:this.config.nb_max_frames}),n=t.data,r=(0,a.max)(n)[0];for(let e=0;e<n.length;++e)n[e]=(Math.max(n[e],r-8)+4)/4;return t}async _call(e){let t;p(e,"WhisperFeatureExtractor"),e.length>this.config.n_samples?(console.warn("Attempting to extract features for audio longer than 30 seconds. If using a pipeline to extract transcript from a long audio clip, remember to specify `chunk_length_s` and/or `stride_length_s`."),t=e.slice(0,this.config.n_samples)):(t=new Float32Array(this.config.n_samples),t.set(e));return{input_features:(await this._extract_fbank_features(t)).unsqueeze_(0)}}}class J extends _{_zero_mean_unit_var_norm(e){const t=e.reduce(((e,t)=>e+t),0)/e.length,n=e.reduce(((e,n)=>e+(n-t)**2),0)/e.length;return e.map((e=>(e-t)/Math.sqrt(n+1e-7)))}async _call(e){p(e,"Wav2Vec2FeatureExtractor"),e instanceof Float64Array&&(e=new Float32Array(e));let t=e;this.config.do_normalize&&(t=this._zero_mean_unit_var_norm(t));const n=[1,t.length];return{input_values:new i.Tensor("float32",t,n),attention_mask:new i.Tensor("int64",new BigInt64Array(t.length).fill(1n),n)}}}class K extends _{constructor(e){super(e);const t=this.config.sampling_rate,n=(0,l.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<n.length;++e)n[e].push(0);this.mel_filters=n,this.window=(0,l.window_function)(400,"povey",{periodic:!1})}async _extract_fbank_features(e,t){return e=e.map((e=>32768*e)),(0,l.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,max_num_frames:t,transpose:!0})}async _call(e,{padding:t=!0,pad_to_multiple_of:n=2,do_normalize_per_mel_bins:r=!0,return_attention_mask:o=!0}={}){p(e,"SeamlessM4TFeatureExtractor");let s,a=await this._extract_fbank_features(e,this.config.max_length);if(r){const[e,t]=a.dims,n=a.data;for(let r=0;r<t;++r){let o=0;for(let s=0;s<e;++s)o+=n[s*t+r];const s=o/e;let a=0;for(let o=0;o<e;++o)a+=(n[o*t+r]-s)**2;a/=e-1;const i=Math.sqrt(a+1e-7);for(let o=0;o<e;++o){const e=o*t+r;n[e]=(n[e]-s)/i}}}if(t){const[e,t]=a.dims,r=a.data,l=e%n;if(l>0){const n=new Float32Array(t*(e+l));n.set(r),n.fill(this.config.padding_value,r.length);const c=e+l;a=new i.Tensor(a.type,n,[c,t]),o&&(s=new i.Tensor("int64",new BigInt64Array(c),[1,c]),s.data.fill(1n,0,e))}}const[l,c]=a.dims,d=this.config.stride;if(0!==l%d)throw new Error(`The number of frames (${l}) must be a multiple of the stride (${d}).`);const u=a.view(1,Math.floor(l/d),c*d),h={input_features:u};if(o){const e=u.dims[1],t=new BigInt64Array(e);if(s){const e=s.data;for(let n=1,r=0;n<l;n+=d,++r)t[r]=e[n]}else t.fill(1n);h.attention_mask=new i.Tensor("int64",t,[1,e])}return h}}class Z extends _{constructor(e){super(e);const t=this.config.sampling_rate,n=(0,l.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<n.length;++e)n[e].push(0);this.mel_filters=n,this.window=(0,l.window_function)(400,"hann",{periodic:!1}),this.mean=this.config.mean,this.std=this.config.std}async _extract_fbank_features(e,t){return(0,l.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,max_num_frames:t,transpose:!0})}async _call(e){p(e,"ASTFeatureExtractor");const t=await this._extract_fbank_features(e,this.config.max_length);if(this.config.do_normalize){const e=2*this.std,n=t.data;for(let t=0;t<n.length;++t)n[t]=(n[t]-this.mean)/e}return{input_values:t.unsqueeze_(0)}}}class ee extends _{constructor(e){super(e),this.mel_filters=(0,l.mel_filter_bank)(this.config.nb_frequency_bins,this.config.feature_size,this.config.frequency_min,this.config.frequency_max,this.config.sampling_rate,null,"htk"),this.mel_filters_slaney=(0,l.mel_filter_bank)(this.config.nb_frequency_bins,this.config.feature_size,this.config.frequency_min,this.config.frequency_max,this.config.sampling_rate,"slaney","slaney"),this.window=(0,l.window_function)(this.config.fft_window_size,"hann")}async _get_input_mel(e,t,n,r){let o,s=!1;const a=e.length-t;if(a>0){if("rand_trunc"!==n)throw new Error(`Truncation strategy "${n}" not implemented`);{s=!0;const n=Math.floor(Math.random()*(a+1));e=e.subarray(n,n+t),o=await this._extract_fbank_features(e,this.mel_filters_slaney,this.config.nb_max_samples)}}else{if(a<0){let n=new Float64Array(t);if(n.set(e),"repeat"===r)for(let r=e.length;r<t;r+=e.length)n.set(e.subarray(0,Math.min(e.length,t-r)),r);else if("repeatpad"===r)for(let t=e.length;t<-a;t+=e.length)n.set(e,t);e=n}if("fusion"===n)throw new Error(`Truncation strategy "${n}" not implemented`);o=await this._extract_fbank_features(e,this.mel_filters_slaney,this.config.nb_max_samples)}return o.unsqueeze_(0)}async _extract_fbank_features(e,t,n=null){return(0,l.spectrogram)(e,this.window,this.config.fft_window_size,this.config.hop_length,{power:2,mel_filters:t,log_mel:"dB",max_num_frames:n,do_pad:!1,transpose:!0})}async _call(e,{max_length:t=null}={}){p(e,"ClapFeatureExtractor");return{input_features:(await this._get_input_mel(e,t??this.config.nb_max_samples,this.config.truncation,this.config.padding)).unsqueeze_(0)}}}class te extends _{async _call(e){p(e,"PyAnnoteFeatureExtractor"),e instanceof Float64Array&&(e=new Float32Array(e));const t=[1,1,e.length];return{input_values:new i.Tensor("float32",e,t)}}samples_to_frames(e){return(e-this.config.offset)/this.config.step}post_process_speaker_diarization(e,t){const n=t/this.samples_to_frames(t)/this.config.sampling_rate,r=[];for(const t of e.tolist()){const e=[];let o=-1;for(let n=0;n<t.length;++n){const r=(0,a.softmax)(t[n]),[s,i]=(0,a.max)(r),[l,c]=[n,n+1];i!==o?(o=i,e.push({id:i,start:l,end:c,score:s})):(e.at(-1).end=c,e.at(-1).score+=s)}r.push(e.map((({id:e,start:t,end:r,score:o})=>({id:e,start:t*n,end:r*n,confidence:o/(r-t)}))))}return r}}class ne extends _{constructor(e){super(e);const t=this.config.sampling_rate,n=(0,l.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<n.length;++e)n[e].push(0);this.mel_filters=n,this.window=(0,l.window_function)(400,"hamming",{periodic:!1}),this.min_num_frames=this.config.min_num_frames}async _extract_fbank_features(e){return e=e.map((e=>32768*e)),(0,l.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,transpose:!0,min_num_frames:this.min_num_frames})}async _call(e){p(e,"WeSpeakerFeatureExtractor");const t=(await this._extract_fbank_features(e)).unsqueeze_(0);if(null===this.config.fbank_centering_span){const e=t.mean(1).data,n=t.data,[r,o,s]=t.dims;for(let t=0;t<r;++t){const r=t*o*s,a=t*s;for(let t=0;t<o;++t){const o=r+t*s;for(let t=0;t<s;++t)n[o+t]-=e[a+t]}}}return{input_features:t}}}class re extends _{}class oe extends r.Callable{constructor(e){super(),this.feature_extractor=e}async _call(e,...t){return await this.feature_extractor(e,...t)}}class se extends oe{async _call(...e){return await this.feature_extractor(...e)}post_process_masks(...e){return this.feature_extractor.post_process_masks(...e)}reshape_input_points(...e){return this.feature_extractor.reshape_input_points(...e)}}class ae extends oe{async _call(e){return await this.feature_extractor(e)}}class ie extends oe{async _call(e){return await this.feature_extractor(e)}}class le extends oe{async _call(e){return await this.feature_extractor(e)}post_process_speaker_diarization(...e){return this.feature_extractor.post_process_speaker_diarization(...e)}}class ce extends oe{async _call(e){return await this.feature_extractor(e)}}class de extends oe{}class ue extends oe{constructor(e){super(e);const{tasks_answer_post_processing_type:t,task_prompts_without_inputs:n,task_prompts_with_input:r}=e.config;this.tasks_answer_post_processing_type=new Map(Object.entries(t??{})),this.task_prompts_without_inputs=new Map(Object.entries(n??{})),this.task_prompts_with_input=new Map(Object.entries(r??{})),this.regexes={quad_boxes:/(.+?)<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>/gm,bboxes:/([^<]+)?<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>/gm},this.size_per_bin=1e3}construct_prompts(e){"string"==typeof e&&(e=[e]);const t=[];for(const n of e)if(this.task_prompts_without_inputs.has(n))t.push(this.task_prompts_without_inputs.get(n));else{for(const[e,r]of this.task_prompts_with_input)if(n.includes(e)){t.push(r.replaceAll("{input}",n).replaceAll(e,""));break}t.length!==e.length&&t.push(n)}return t}post_process_generation(e,t,n){const r=this.tasks_answer_post_processing_type.get(t)??"pure_text";let o;switch(e=e.replaceAll("<s>","").replaceAll("</s>",""),r){case"pure_text":o=e;break;case"description_with_bboxes":case"bboxes":case"phrase_grounding":case"ocr":const s="ocr"===r?"quad_boxes":"bboxes",a=e.matchAll(this.regexes[s]),i=[],l=[];for(const[e,t,...r]of a)i.push(t?t.trim():i.at(-1)??""),l.push(r.map(((e,t)=>(Number(e)+.5)/this.size_per_bin*n[t%2])));o={labels:i,[s]:l};break;default:throw new Error(`Task "${t}" (of type "${r}") not yet implemented.`)}return{[t]:o}}}class pe{static FEATURE_EXTRACTOR_CLASS_MAPPING={ImageFeatureExtractor:f,WhisperFeatureExtractor:Y,ViTFeatureExtractor:S,MobileViTFeatureExtractor:N,MobileViTImageProcessor:O,MobileNetV1FeatureExtractor:L,MobileNetV2FeatureExtractor:z,MobileNetV3FeatureExtractor:I,MobileNetV4FeatureExtractor:B,OwlViTFeatureExtractor:D,Owlv2ImageProcessor:V,CLIPFeatureExtractor:y,CLIPImageProcessor:k,Florence2Processor:ue,ChineseCLIPFeatureExtractor:F,SiglipImageProcessor:C,ConvNextFeatureExtractor:P,ConvNextImageProcessor:v,SegformerFeatureExtractor:M,SapiensFeatureExtractor:g,BitImageProcessor:T,DPTImageProcessor:b,DPTFeatureExtractor:w,GLPNFeatureExtractor:x,BeitFeatureExtractor:G,DeiTFeatureExtractor:R,DetrFeatureExtractor:W,RTDetrImageProcessor:j,YolosFeatureExtractor:U,DonutFeatureExtractor:q,NougatImageProcessor:$,EfficientNetImageProcessor:E,ViTImageProcessor:A,VitMatteImageProcessor:H,SamImageProcessor:X,Swin2SRImageProcessor:Q,Wav2Vec2FeatureExtractor:J,SeamlessM4TFeatureExtractor:K,SpeechT5FeatureExtractor:re,ASTFeatureExtractor:Z,ClapFeatureExtractor:ee,PyAnnoteFeatureExtractor:te,WeSpeakerFeatureExtractor:ne};static PROCESSOR_CLASS_MAPPING={WhisperProcessor:ae,Wav2Vec2ProcessorWithLM:ie,PyAnnoteProcessor:le,SamProcessor:se,SpeechT5Processor:ce,OwlViTProcessor:de,Florence2Processor:ue};static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:o=!1,revision:a="main"}={}){let i=n??await(0,s.getModelJSON)(e,"preprocessor_config.json",!0,{progress_callback:t,config:n,cache_dir:r,local_files_only:o,revision:a}),l=i.feature_extractor_type??i.image_processor_type,c=this.FEATURE_EXTRACTOR_CLASS_MAPPING[l];if(!c){if(void 0===i.size)throw new Error(`Unknown Feature Extractor type: ${l}`);console.warn(`Feature extractor type "${l}" not found, assuming ImageFeatureExtractor due to size parameter in config.`),c=f}return new(this.PROCESSOR_CLASS_MAPPING[i.processor_class]??oe)(new c(i))}}},"./src/tokenizers.js":
127
+ \***************************/(e,t,n)=>{n.r(t),n.d(t,{ASTFeatureExtractor:()=>Z,AutoProcessor:()=>pe,BeitFeatureExtractor:()=>G,BitImageProcessor:()=>T,CLIPFeatureExtractor:()=>y,CLIPImageProcessor:()=>k,ChineseCLIPFeatureExtractor:()=>F,ClapFeatureExtractor:()=>ee,ConvNextFeatureExtractor:()=>P,ConvNextImageProcessor:()=>v,DPTFeatureExtractor:()=>w,DPTImageProcessor:()=>b,DeiTFeatureExtractor:()=>R,DetrFeatureExtractor:()=>W,DonutFeatureExtractor:()=>q,EfficientNetImageProcessor:()=>E,FeatureExtractor:()=>_,Florence2Processor:()=>ue,GLPNFeatureExtractor:()=>x,ImageFeatureExtractor:()=>f,MobileNetV1FeatureExtractor:()=>L,MobileNetV2FeatureExtractor:()=>z,MobileNetV3FeatureExtractor:()=>I,MobileNetV4FeatureExtractor:()=>B,MobileViTFeatureExtractor:()=>N,MobileViTImageProcessor:()=>O,NougatImageProcessor:()=>$,OwlViTFeatureExtractor:()=>D,OwlViTProcessor:()=>de,Owlv2ImageProcessor:()=>V,Processor:()=>oe,PyAnnoteFeatureExtractor:()=>te,PyAnnoteProcessor:()=>le,RTDetrImageProcessor:()=>j,SamImageProcessor:()=>X,SamProcessor:()=>se,SapiensFeatureExtractor:()=>g,SeamlessM4TFeatureExtractor:()=>K,SegformerFeatureExtractor:()=>M,SiglipImageProcessor:()=>C,SpeechT5FeatureExtractor:()=>re,SpeechT5Processor:()=>ce,Swin2SRImageProcessor:()=>Q,ViTFeatureExtractor:()=>S,ViTImageProcessor:()=>A,VitMatteImageProcessor:()=>H,Wav2Vec2FeatureExtractor:()=>J,Wav2Vec2ProcessorWithLM:()=>ie,WeSpeakerFeatureExtractor:()=>ne,WhisperFeatureExtractor:()=>Y,WhisperProcessor:()=>ae,YolosFeatureExtractor:()=>U});var r=n(/*! ./utils/generic.js */"./src/utils/generic.js"),o=n(/*! ./utils/core.js */"./src/utils/core.js"),s=n(/*! ./utils/hub.js */"./src/utils/hub.js"),a=n(/*! ./utils/maths.js */"./src/utils/maths.js"),i=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),l=(n(/*! ./utils/image.js */"./src/utils/image.js"),n(/*! ./utils/audio.js */"./src/utils/audio.js"));function c([e,t,n,r]){return[e-n/2,t-r/2,e+n/2,t+r/2]}function d(e,t=.5,n=null,r=!1){const o=e.logits,s=e.pred_boxes,[i,l,d]=o.dims;if(null!==n&&n.length!==i)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");let u=[];for(let e=0;e<i;++e){let i=null!==n?n[e]:null,p={boxes:[],classes:[],scores:[]},h=o[e],m=s[e];for(let e=0;e<l;++e){let n,o=h[e],s=[];if(r){n=o.sigmoid().data;for(let e=0;e<n.length;++e)n[e]>t&&s.push(e)}else{let e=(0,a.max)(o.data)[1];if(e===d-1)continue;if(n=(0,a.softmax)(o.data),n[e]<t)continue;s.push(e)}for(const t of s){let r=m[e].data;r=c(r),null!==i&&(r=r.map(((e,t)=>e*i[(t+1)%2]))),p.boxes.push(r),p.classes.push(t),p.scores.push(n[t])}}u.push(p)}return u}function u(e,t=null){const n=e.logits,r=n.dims[0];if(null!==t&&t.length!==r)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");const o=[];for(let e=0;e<r;++e){const r=null!==t?t[e]:null;let s=n[e];null!==r&&(s=(0,i.interpolate)(s,r,"bilinear",!1));const[a,l]=r??s.dims.slice(-2),c=new i.Tensor("int32",new Int32Array(a*l),[a,l]),d=s[0].data,u=c.data;for(let e=1;e<s.dims[0];++e){const t=s[e].data;for(let n=0;n<t.length;++n)t[n]>d[n]&&(d[n]=t[n],u[n]=e)}const p=new Array(s.dims[0]);for(let e=0;e<u.length;++e){const t=u[e];p[t]=t}const h=p.filter((e=>void 0!==e));o.push({segmentation:c,labels:h})}return o}function p(e,t){if(!(e instanceof Float32Array||e instanceof Float64Array))throw new Error(`${t} expects input to be a Float32Array or a Float64Array, but got ${e?.constructor?.name??typeof e} instead. If using the feature extractor directly, remember to use \`read_audio(url, sampling_rate)\` to obtain the raw audio data of the file/url.`)}function h(e,t,n=0,r=null){const o=e/t;let s=(0,a.bankers_round)(o)*t;return null!==r&&s>r&&(s=Math.floor(o)*t),s<n&&(s=Math.ceil(o)*t),s}function m([e,t],n){return[Math.max(Math.floor(e/n),1)*n,Math.max(Math.floor(t/n),1)*n]}class _ extends r.Callable{constructor(e){super(),this.config=e}}class f extends _{constructor(e){super(e),this.image_mean=this.config.image_mean??this.config.mean,this.image_std=this.config.image_std??this.config.std,this.resample=this.config.resample??2,this.do_rescale=this.config.do_rescale??!0,this.rescale_factor=this.config.rescale_factor??1/255,this.do_normalize=this.config.do_normalize,this.do_resize=this.config.do_resize,this.do_thumbnail=this.config.do_thumbnail,this.size=this.config.size,this.size_divisibility=this.config.size_divisibility??this.config.size_divisor,this.do_center_crop=this.config.do_center_crop,this.crop_size=this.config.crop_size,this.do_convert_rgb=this.config.do_convert_rgb??!0,this.do_crop_margin=this.config.do_crop_margin,this.pad_size=this.config.pad_size,this.do_pad=this.config.do_pad,this.do_pad&&!this.pad_size&&this.size&&void 0!==this.size.width&&void 0!==this.size.height&&(this.pad_size=this.size),this.do_flip_channel_order=this.config.do_flip_channel_order??!1}async thumbnail(e,t,n=2){const r=e.height,o=e.width,s=t.height,a=t.width;let i=Math.min(r,s),l=Math.min(o,a);return i===r&&l===o?e:(r>o?l=Math.floor(o*i/r):o>r&&(i=Math.floor(r*l/o)),await e.resize(l,i,{resample:n}))}async crop_margin(e,t=200){const n=e.clone().grayscale(),r=(0,a.min)(n.data)[0],o=(0,a.max)(n.data)[0]-r;if(0===o)return e;const s=t/255;let i=n.width,l=n.height,c=0,d=0;const u=n.data;for(let e=0;e<n.height;++e){const t=e*n.width;for(let a=0;a<n.width;++a)(u[t+a]-r)/o<s&&(i=Math.min(i,a),l=Math.min(l,e),c=Math.max(c,a),d=Math.max(d,e))}return e=await e.crop([i,l,c,d])}pad_image(e,t,n,{mode:r="constant",center:s=!1,constant_values:a=0}={}){const[i,l,c]=t;let d,u;if("number"==typeof n?(d=n,u=n):(d=n.width,u=n.height),d!==l||u!==i){const n=new Float32Array(d*u*c);if(Array.isArray(a))for(let e=0;e<n.length;++e)n[e]=a[e%c];else 0!==a&&n.fill(a);const[p,h]=s?[Math.floor((d-l)/2),Math.floor((u-i)/2)]:[0,0];for(let t=0;t<i;++t){const r=(t+h)*d,o=t*l;for(let t=0;t<l;++t){const s=(r+t+p)*c,a=(o+t)*c;for(let t=0;t<c;++t)n[s+t]=e[a+t]}}if("symmetric"===r){if(s)throw new Error("`center` padding is not supported when `mode` is set to `symmetric`.");const t=i-1,r=l-1;for(let s=0;s<u;++s){const a=s*d,u=(0,o.calculateReflectOffset)(s,t)*l;for(let t=0;t<d;++t){if(s<i&&t<l)continue;const d=(a+t)*c,p=(u+(0,o.calculateReflectOffset)(t,r))*c;for(let t=0;t<c;++t)n[d+t]=e[p+t]}}}e=n,t=[u,d,c]}return[e,t]}rescale(e){for(let t=0;t<e.length;++t)e[t]=this.rescale_factor*e[t]}get_resize_output_image_size(e,t){const[n,r]=e.size;let o,s;if(this.do_thumbnail){const{height:e,width:n}=t;o=Math.min(e,n)}else Number.isInteger(t)?(o=t,s=this.config.max_size??o):void 0!==t&&(o=t.shortest_edge,s=t.longest_edge);if(void 0!==o||void 0!==s){const e=void 0===o?1:Math.max(o/n,o/r),t=n*e,a=r*e,i=void 0===s?1:Math.min(s/t,s/a);let l=Math.floor(Number((t*i).toFixed(2))),c=Math.floor(Number((a*i).toFixed(2)));return void 0!==this.size_divisibility&&([l,c]=m([l,c],this.size_divisibility)),[l,c]}if(void 0!==t&&void 0!==t.width&&void 0!==t.height){let e=t.width,o=t.height;if(this.config.keep_aspect_ratio&&this.config.ensure_multiple_of){let t=o/r,s=e/n;Math.abs(1-s)<Math.abs(1-t)?t=s:s=t,o=h(t*r,this.config.ensure_multiple_of),e=h(s*n,this.config.ensure_multiple_of)}return[e,o]}if(void 0!==this.size_divisibility)return m([n,r],this.size_divisibility);throw new Error(`Could not resize image due to unsupported \`this.size\` option in config: ${JSON.stringify(t)}`)}async resize(e){const[t,n]=this.get_resize_output_image_size(e,this.size);return await e.resize(t,n,{resample:this.resample})}async preprocess(e,{do_normalize:t=null,do_pad:n=null,do_convert_rgb:r=null,do_convert_grayscale:o=null,do_flip_channel_order:s=null}={}){this.do_crop_margin&&(e=await this.crop_margin(e));const[a,l]=e.size;if(r??this.do_convert_rgb?e=e.rgb():o&&(e=e.grayscale()),this.do_resize&&(e=await this.resize(e)),this.do_thumbnail&&(e=await this.thumbnail(e,this.size,this.resample)),this.do_center_crop){let t,n;Number.isInteger(this.crop_size)?(t=this.crop_size,n=this.crop_size):(t=this.crop_size.width,n=this.crop_size.height),e=await e.center_crop(t,n)}const c=[e.height,e.width];let d=Float32Array.from(e.data),u=[e.height,e.width,e.channels];if(this.do_rescale&&this.rescale(d),t??this.do_normalize){let t=this.image_mean;Array.isArray(this.image_mean)||(t=new Array(e.channels).fill(t));let n=this.image_std;if(Array.isArray(this.image_std)||(n=new Array(e.channels).fill(t)),t.length!==e.channels||n.length!==e.channels)throw new Error(`When set to arrays, the length of \`image_mean\` (${t.length}) and \`image_std\` (${n.length}) must match the number of channels in the image (${e.channels}).`);for(let r=0;r<d.length;r+=e.channels)for(let o=0;o<e.channels;++o)d[r+o]=(d[r+o]-t[o])/n[o]}if(n??this.do_pad)if(this.pad_size){const t=this.pad_image(d,[e.height,e.width,e.channels],this.pad_size);[d,u]=t}else if(this.size_divisibility){const[e,t]=m([u[1],u[0]],this.size_divisibility);[d,u]=this.pad_image(d,u,{width:e,height:t})}if(s??this.do_flip_channel_order){if(3!==u[2])throw new Error("Flipping channel order is only supported for RGB images.");for(let e=0;e<d.length;e+=3){const t=d[e];d[e]=d[e+2],d[e+2]=t}}return{original_size:[l,a],reshaped_input_size:c,pixel_values:new i.Tensor("float32",d,u).permute(2,0,1)}}async _call(e,...t){Array.isArray(e)||(e=[e]);const n=await Promise.all(e.map((e=>this.preprocess(e))));return{pixel_values:(0,i.stack)(n.map((e=>e.pixel_values)),0),original_sizes:n.map((e=>e.original_size)),reshaped_input_sizes:n.map((e=>e.reshaped_input_size))}}}class g extends f{post_process_semantic_segmentation(...e){return u(...e)}}class M extends f{post_process_semantic_segmentation(...e){return u(...e)}}class w extends f{}class b extends w{}class T extends f{}class x extends f{}class y extends f{}class k extends y{}class F extends f{}class C extends f{}class P extends f{constructor(e){super(e),this.crop_pct=this.config.crop_pct??.875}async resize(e){const t=this.size?.shortest_edge;if(void 0===t)throw new Error("Size dictionary must contain 'shortest_edge' key.");if(t<384){const n=Math.floor(t/this.crop_pct),[r,o]=this.get_resize_output_image_size(e,{shortest_edge:n});e=await e.resize(r,o,{resample:this.resample}),e=await e.center_crop(t,t)}else e=await e.resize(t,t,{resample:this.resample});return e}}class v extends P{}class S extends f{}class A extends f{}class E extends f{constructor(e){super(e),this.include_top=this.config.include_top??!0,this.include_top&&(this.image_std=this.image_std.map((e=>e*e)))}}class L extends f{}class z extends f{}class I extends f{}class B extends f{}class N extends f{}class O extends N{}class D extends f{post_process_object_detection(...e){return d(...e)}}class V extends D{}class j extends f{post_process_object_detection(...e){return d(...e)}}class R extends f{}class G extends f{}class q extends f{pad_image(e,t,n,r={}){const[o,s,a]=t;let i=this.image_mean;Array.isArray(this.image_mean)||(i=new Array(a).fill(i));let l=this.image_std;Array.isArray(l)||(l=new Array(a).fill(i));const c=i.map(((e,t)=>-e/l[t]));return super.pad_image(e,t,n,{center:!0,constant_values:c,...r})}}class $ extends q{}class W extends f{async _call(e){const t=await super._call(e),n=[t.pixel_values.dims[0],64,64],r=new i.Tensor("int64",new BigInt64Array(n.reduce(((e,t)=>e*t))).fill(1n),n);return{...t,pixel_mask:r}}post_process_object_detection(...e){return d(...e)}remove_low_and_no_objects(e,t,n,r){let o=[],s=[],i=[];for(let l=0;l<e.dims[0];++l){let c=e[l],d=t[l],u=(0,a.max)(c.data)[1];if(u===r)continue;let p=(0,a.softmax)(c.data)[u];p>n&&(o.push(d),s.push(p),i.push(u))}return[o,s,i]}check_segment_validity(e,t,n,r=.5,o=.8){let s=[],a=0,i=0;const l=t[n].data;for(let t=0;t<e.length;++t)e[t]===n&&(s.push(t),++a),l[t]>=r&&++i;let c=a>0&&i>0;if(c){c=a/i>o}return[c,s]}compute_segments(e,t,n,r,o,s=null,a=null){let[l,c]=a??e[0].dims,d=new i.Tensor("int32",new Int32Array(l*c),[l,c]),u=[];if(null!==a)for(let t=0;t<e.length;++t)e[t]=(0,i.interpolate)(e[t],a,"bilinear",!1);let p=new Int32Array(e[0].data.length),h=new Float32Array(e[0].data.length);for(let n=0;n<e.length;++n){let r=t[n];const o=e[n].data;for(let e=0;e<o.length;++e)o[e]*=r,o[e]>h[e]&&(p[e]=n,h[e]=o[e])}let m=0;const _=d.data;for(let s=0;s<n.length;++s){let a=n[s],[i,l]=this.check_segment_validity(p,e,s,r,o);if(i){++m;for(let e of l)_[e]=m;u.push({id:m,label_id:a,score:t[s]})}}return[d,u]}post_process_panoptic_segmentation(e,t=.5,n=.5,r=.8,o=null,s=null){null===o&&(console.warn("`label_ids_to_fuse` unset. No instance will be fused."),o=new Set);const a=e.logits,l=e.pred_masks.sigmoid();let[c,d,u]=a.dims;if(u-=1,null!==s&&s.length!==c)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");let p=[];for(let e=0;e<c;++e){let c=null!==s?s[e]:null,d=a[e],h=l[e],[m,_,f]=this.remove_low_and_no_objects(d,h,t,u);if(0===f.length){let[e,t]=c??h.dims.slice(-2),n=new i.Tensor("int32",new Int32Array(e*t).fill(-1),[e,t]);p.push({segmentation:n,segments_info:[]});continue}let[g,M]=this.compute_segments(m,_,f,n,r,o,c);p.push({segmentation:g,segments_info:M})}return p}post_process_instance_segmentation(){throw Error("Not implemented yet")}}class U extends f{post_process_object_detection(...e){return d(...e)}}class X extends f{reshape_input_points(e,t,n,r=!1){e=structuredClone(e);let s=(0,o.calculateDimensions)(e);if(3===s.length)r||(s=[1,...s]),e=[e];else if(4!==s.length)throw Error("The input_points must be a 4D tensor of shape `batch_size`, `point_batch_size`, `nb_points_per_image`, `2`.");for(let r=0;r<e.length;++r){let o=t[r],s=n[r],a=[s[0]/o[0],s[1]/o[1]];for(let t=0;t<e[r].length;++t)for(let n=0;n<e[r][t].length;++n)for(let o=0;o<e[r][t][n].length;++o)e[r][t][n][o]*=a[o%2]}return new i.Tensor("float32",Float32Array.from(e.flat(1/0)),s)}add_input_labels(e,t){let n=(0,o.calculateDimensions)(e);if(2===n.length)n=[1,...n],e=[e];else if(3!==n.length)throw Error("The input_points must be a 4D tensor of shape `batch_size`, `point_batch_size`, `nb_points_per_image`, `2`.");if(n.some(((e,n)=>e!==t.dims[n])))throw Error(`The first ${n.length} dimensions of 'input_points' and 'input_labels' must be the same.`);return new i.Tensor("int64",e.flat(1/0).map(BigInt),n)}async _call(e,{input_points:t=null,input_labels:n=null,input_boxes:r=null}={}){const o=await super._call(e);if(t&&(o.input_points=this.reshape_input_points(t,o.original_sizes,o.reshaped_input_sizes)),n){if(!o.input_points)throw Error("`input_points` must be provided if `input_labels` are provided.");o.input_labels=this.add_input_labels(n,o.input_points)}return r&&(o.input_boxes=this.reshape_input_points(r,o.original_sizes,o.reshaped_input_sizes,!0)),o}async post_process_masks(e,t,n,{mask_threshold:r=0,binarize:o=!0,pad_size:s=null}={}){const a=[],l=[(s=s??this.pad_size).height,s.width];for(let s=0;s<t.length;++s){const c=t[s],d=n[s];let u=await(0,i.interpolate_4d)(e[s],{mode:"bilinear",size:l});if(u=u.slice(null,null,[0,d[0]],[0,d[1]]),u=await(0,i.interpolate_4d)(u,{mode:"bilinear",size:c}),o){const e=u.data,t=new Uint8Array(e.length);for(let n=0;n<e.length;++n)e[n]>r&&(t[n]=1);u=new i.Tensor("bool",t,u.dims)}a.push(u)}return a}generate_crop_boxes(e,t,{crop_n_layers:n=0,overlap_ratio:r=512/1500,points_per_crop:o=32,crop_n_points_downscale_factor:s=1}={}){}}class Q extends f{pad_image(e,t,n,r={}){const[o,s,a]=t;return super.pad_image(e,t,{width:s+(n-s%n)%n,height:o+(n-o%n)%n},{mode:"symmetric",center:!1,constant_values:-1,...r})}}class H extends f{async _call(e,t){Array.isArray(e)||(e=[e]),Array.isArray(t)||(t=[t]);const n=await Promise.all(e.map((e=>this.preprocess(e)))),r=await Promise.all(t.map((e=>this.preprocess(e,{do_normalize:!1,do_convert_rgb:!1,do_convert_grayscale:!0}))));return{pixel_values:(0,i.stack)(n.map(((e,t)=>(0,i.cat)([e.pixel_values,r[t].pixel_values],0))),0),original_sizes:n.map((e=>e.original_size)),reshaped_input_sizes:n.map((e=>e.reshaped_input_size))}}}class Y extends _{constructor(e){super(e),this.config.mel_filters??=(0,l.mel_filter_bank)(Math.floor(1+this.config.n_fft/2),this.config.feature_size,0,8e3,this.config.sampling_rate,"slaney","slaney"),this.window=(0,l.window_function)(this.config.n_fft,"hann")}async _extract_fbank_features(e){const t=await(0,l.spectrogram)(e,this.window,this.config.n_fft,this.config.hop_length,{power:2,mel_filters:this.config.mel_filters,log_mel:"log10",max_num_frames:this.config.nb_max_frames}),n=t.data,r=(0,a.max)(n)[0];for(let e=0;e<n.length;++e)n[e]=(Math.max(n[e],r-8)+4)/4;return t}async _call(e){let t;p(e,"WhisperFeatureExtractor"),e.length>this.config.n_samples?(console.warn("Attempting to extract features for audio longer than 30 seconds. If using a pipeline to extract transcript from a long audio clip, remember to specify `chunk_length_s` and/or `stride_length_s`."),t=e.slice(0,this.config.n_samples)):(t=new Float32Array(this.config.n_samples),t.set(e));return{input_features:(await this._extract_fbank_features(t)).unsqueeze_(0)}}}class J extends _{_zero_mean_unit_var_norm(e){const t=e.reduce(((e,t)=>e+t),0)/e.length,n=e.reduce(((e,n)=>e+(n-t)**2),0)/e.length;return e.map((e=>(e-t)/Math.sqrt(n+1e-7)))}async _call(e){p(e,"Wav2Vec2FeatureExtractor"),e instanceof Float64Array&&(e=new Float32Array(e));let t=e;this.config.do_normalize&&(t=this._zero_mean_unit_var_norm(t));const n=[1,t.length];return{input_values:new i.Tensor("float32",t,n),attention_mask:new i.Tensor("int64",new BigInt64Array(t.length).fill(1n),n)}}}class K extends _{constructor(e){super(e);const t=this.config.sampling_rate,n=(0,l.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<n.length;++e)n[e].push(0);this.mel_filters=n,this.window=(0,l.window_function)(400,"povey",{periodic:!1})}async _extract_fbank_features(e,t){return e=e.map((e=>32768*e)),(0,l.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,max_num_frames:t,transpose:!0})}async _call(e,{padding:t=!0,pad_to_multiple_of:n=2,do_normalize_per_mel_bins:r=!0,return_attention_mask:o=!0}={}){p(e,"SeamlessM4TFeatureExtractor");let s,a=await this._extract_fbank_features(e,this.config.max_length);if(r){const[e,t]=a.dims,n=a.data;for(let r=0;r<t;++r){let o=0;for(let s=0;s<e;++s)o+=n[s*t+r];const s=o/e;let a=0;for(let o=0;o<e;++o)a+=(n[o*t+r]-s)**2;a/=e-1;const i=Math.sqrt(a+1e-7);for(let o=0;o<e;++o){const e=o*t+r;n[e]=(n[e]-s)/i}}}if(t){const[e,t]=a.dims,r=a.data,l=e%n;if(l>0){const n=new Float32Array(t*(e+l));n.set(r),n.fill(this.config.padding_value,r.length);const c=e+l;a=new i.Tensor(a.type,n,[c,t]),o&&(s=new i.Tensor("int64",new BigInt64Array(c),[1,c]),s.data.fill(1n,0,e))}}const[l,c]=a.dims,d=this.config.stride;if(0!==l%d)throw new Error(`The number of frames (${l}) must be a multiple of the stride (${d}).`);const u=a.view(1,Math.floor(l/d),c*d),h={input_features:u};if(o){const e=u.dims[1],t=new BigInt64Array(e);if(s){const e=s.data;for(let n=1,r=0;n<l;n+=d,++r)t[r]=e[n]}else t.fill(1n);h.attention_mask=new i.Tensor("int64",t,[1,e])}return h}}class Z extends _{constructor(e){super(e);const t=this.config.sampling_rate,n=(0,l.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<n.length;++e)n[e].push(0);this.mel_filters=n,this.window=(0,l.window_function)(400,"hann",{periodic:!1}),this.mean=this.config.mean,this.std=this.config.std}async _extract_fbank_features(e,t){return(0,l.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,max_num_frames:t,transpose:!0})}async _call(e){p(e,"ASTFeatureExtractor");const t=await this._extract_fbank_features(e,this.config.max_length);if(this.config.do_normalize){const e=2*this.std,n=t.data;for(let t=0;t<n.length;++t)n[t]=(n[t]-this.mean)/e}return{input_values:t.unsqueeze_(0)}}}class ee extends _{constructor(e){super(e),this.mel_filters=(0,l.mel_filter_bank)(this.config.nb_frequency_bins,this.config.feature_size,this.config.frequency_min,this.config.frequency_max,this.config.sampling_rate,null,"htk"),this.mel_filters_slaney=(0,l.mel_filter_bank)(this.config.nb_frequency_bins,this.config.feature_size,this.config.frequency_min,this.config.frequency_max,this.config.sampling_rate,"slaney","slaney"),this.window=(0,l.window_function)(this.config.fft_window_size,"hann")}async _get_input_mel(e,t,n,r){let o,s=!1;const a=e.length-t;if(a>0){if("rand_trunc"!==n)throw new Error(`Truncation strategy "${n}" not implemented`);{s=!0;const n=Math.floor(Math.random()*(a+1));e=e.subarray(n,n+t),o=await this._extract_fbank_features(e,this.mel_filters_slaney,this.config.nb_max_samples)}}else{if(a<0){let n=new Float64Array(t);if(n.set(e),"repeat"===r)for(let r=e.length;r<t;r+=e.length)n.set(e.subarray(0,Math.min(e.length,t-r)),r);else if("repeatpad"===r)for(let t=e.length;t<-a;t+=e.length)n.set(e,t);e=n}if("fusion"===n)throw new Error(`Truncation strategy "${n}" not implemented`);o=await this._extract_fbank_features(e,this.mel_filters_slaney,this.config.nb_max_samples)}return o.unsqueeze_(0)}async _extract_fbank_features(e,t,n=null){return(0,l.spectrogram)(e,this.window,this.config.fft_window_size,this.config.hop_length,{power:2,mel_filters:t,log_mel:"dB",max_num_frames:n,do_pad:!1,transpose:!0})}async _call(e,{max_length:t=null}={}){p(e,"ClapFeatureExtractor");return{input_features:(await this._get_input_mel(e,t??this.config.nb_max_samples,this.config.truncation,this.config.padding)).unsqueeze_(0)}}}class te extends _{async _call(e){p(e,"PyAnnoteFeatureExtractor"),e instanceof Float64Array&&(e=new Float32Array(e));const t=[1,1,e.length];return{input_values:new i.Tensor("float32",e,t)}}samples_to_frames(e){return(e-this.config.offset)/this.config.step}post_process_speaker_diarization(e,t){const n=t/this.samples_to_frames(t)/this.config.sampling_rate,r=[];for(const t of e.tolist()){const e=[];let o=-1;for(let n=0;n<t.length;++n){const r=(0,a.softmax)(t[n]),[s,i]=(0,a.max)(r),[l,c]=[n,n+1];i!==o?(o=i,e.push({id:i,start:l,end:c,score:s})):(e.at(-1).end=c,e.at(-1).score+=s)}r.push(e.map((({id:e,start:t,end:r,score:o})=>({id:e,start:t*n,end:r*n,confidence:o/(r-t)}))))}return r}}class ne extends _{constructor(e){super(e);const t=this.config.sampling_rate,n=(0,l.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<n.length;++e)n[e].push(0);this.mel_filters=n,this.window=(0,l.window_function)(400,"hamming",{periodic:!1}),this.min_num_frames=this.config.min_num_frames}async _extract_fbank_features(e){return e=e.map((e=>32768*e)),(0,l.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,transpose:!0,min_num_frames:this.min_num_frames})}async _call(e){p(e,"WeSpeakerFeatureExtractor");const t=(await this._extract_fbank_features(e)).unsqueeze_(0);if(null===this.config.fbank_centering_span){const e=t.mean(1).data,n=t.data,[r,o,s]=t.dims;for(let t=0;t<r;++t){const r=t*o*s,a=t*s;for(let t=0;t<o;++t){const o=r+t*s;for(let t=0;t<s;++t)n[o+t]-=e[a+t]}}}return{input_features:t}}}class re extends _{}class oe extends r.Callable{constructor(e){super(),this.feature_extractor=e}async _call(e,...t){return await this.feature_extractor(e,...t)}}class se extends oe{async _call(...e){return await this.feature_extractor(...e)}post_process_masks(...e){return this.feature_extractor.post_process_masks(...e)}reshape_input_points(...e){return this.feature_extractor.reshape_input_points(...e)}}class ae extends oe{async _call(e){return await this.feature_extractor(e)}}class ie extends oe{async _call(e){return await this.feature_extractor(e)}}class le extends oe{async _call(e){return await this.feature_extractor(e)}post_process_speaker_diarization(...e){return this.feature_extractor.post_process_speaker_diarization(...e)}}class ce extends oe{async _call(e){return await this.feature_extractor(e)}}class de extends oe{}class ue extends oe{constructor(e){super(e);const{tasks_answer_post_processing_type:t,task_prompts_without_inputs:n,task_prompts_with_input:r}=e.config;this.tasks_answer_post_processing_type=new Map(Object.entries(t??{})),this.task_prompts_without_inputs=new Map(Object.entries(n??{})),this.task_prompts_with_input=new Map(Object.entries(r??{})),this.regexes={quad_boxes:/(.+?)<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>/gm,bboxes:/([^<]+)?<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>/gm},this.size_per_bin=1e3}construct_prompts(e){"string"==typeof e&&(e=[e]);const t=[];for(const n of e)if(this.task_prompts_without_inputs.has(n))t.push(this.task_prompts_without_inputs.get(n));else{for(const[e,r]of this.task_prompts_with_input)if(n.includes(e)){t.push(r.replaceAll("{input}",n).replaceAll(e,""));break}t.length!==e.length&&t.push(n)}return t}post_process_generation(e,t,n){const r=this.tasks_answer_post_processing_type.get(t)??"pure_text";let o;switch(e=e.replaceAll("<s>","").replaceAll("</s>",""),r){case"pure_text":o=e;break;case"description_with_bboxes":case"bboxes":case"phrase_grounding":case"ocr":const s="ocr"===r?"quad_boxes":"bboxes",a=e.matchAll(this.regexes[s]),i=[],l=[];for(const[e,t,...r]of a)i.push(t?t.trim():i.at(-1)??""),l.push(r.map(((e,t)=>(Number(e)+.5)/this.size_per_bin*n[t%2])));o={labels:i,[s]:l};break;default:throw new Error(`Task "${t}" (of type "${r}") not yet implemented.`)}return{[t]:o}}}class pe{static FEATURE_EXTRACTOR_CLASS_MAPPING={ImageFeatureExtractor:f,WhisperFeatureExtractor:Y,ViTFeatureExtractor:S,MobileViTFeatureExtractor:N,MobileViTImageProcessor:O,MobileNetV1FeatureExtractor:L,MobileNetV2FeatureExtractor:z,MobileNetV3FeatureExtractor:I,MobileNetV4FeatureExtractor:B,OwlViTFeatureExtractor:D,Owlv2ImageProcessor:V,CLIPFeatureExtractor:y,CLIPImageProcessor:k,Florence2Processor:ue,ChineseCLIPFeatureExtractor:F,SiglipImageProcessor:C,ConvNextFeatureExtractor:P,ConvNextImageProcessor:v,SegformerFeatureExtractor:M,SapiensFeatureExtractor:g,BitImageProcessor:T,DPTImageProcessor:b,DPTFeatureExtractor:w,GLPNFeatureExtractor:x,BeitFeatureExtractor:G,DeiTFeatureExtractor:R,DetrFeatureExtractor:W,RTDetrImageProcessor:j,YolosFeatureExtractor:U,DonutFeatureExtractor:q,NougatImageProcessor:$,EfficientNetImageProcessor:E,ViTImageProcessor:A,VitMatteImageProcessor:H,SamImageProcessor:X,Swin2SRImageProcessor:Q,Wav2Vec2FeatureExtractor:J,SeamlessM4TFeatureExtractor:K,SpeechT5FeatureExtractor:re,ASTFeatureExtractor:Z,ClapFeatureExtractor:ee,PyAnnoteFeatureExtractor:te,WeSpeakerFeatureExtractor:ne};static PROCESSOR_CLASS_MAPPING={WhisperProcessor:ae,Wav2Vec2ProcessorWithLM:ie,PyAnnoteProcessor:le,SamProcessor:se,SpeechT5Processor:ce,OwlViTProcessor:de,Florence2Processor:ue};static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:o=!1,revision:a="main"}={}){let i=n??await(0,s.getModelJSON)(e,"preprocessor_config.json",!0,{progress_callback:t,config:n,cache_dir:r,local_files_only:o,revision:a}),l=i.feature_extractor_type??i.image_processor_type,c=this.FEATURE_EXTRACTOR_CLASS_MAPPING[l];if(!c){if(void 0===i.size)throw new Error(`Unknown Feature Extractor type: ${l}`);console.warn(`Feature extractor type "${l}" not found, assuming ImageFeatureExtractor due to size parameter in config.`),c=f}return new(this.PROCESSOR_CLASS_MAPPING[i.processor_class]??oe)(new c(i))}}},"./src/tokenizers.js":
128
128
  /*!***************************!*\
129
129
  !*** ./src/tokenizers.js ***!
130
130
  \***************************/(e,t,n)=>{n.r(t),n.d(t,{AlbertTokenizer:()=>xe,AutoTokenizer:()=>ht,BartTokenizer:()=>Ne,BertTokenizer:()=>Te,BlenderbotSmallTokenizer:()=>lt,BlenderbotTokenizer:()=>it,BloomTokenizer:()=>je,CLIPTokenizer:()=>rt,CamembertTokenizer:()=>Ee,CodeGenTokenizer:()=>nt,CodeLlamaTokenizer:()=>qe,CohereTokenizer:()=>pt,ConvBertTokenizer:()=>ve,DebertaTokenizer:()=>Fe,DebertaV2Tokenizer:()=>Ce,DistilBertTokenizer:()=>Ae,ElectraTokenizer:()=>ze,EsmTokenizer:()=>Qe,FalconTokenizer:()=>Ue,GPT2Tokenizer:()=>Be,GPTNeoXTokenizer:()=>Xe,GemmaTokenizer:()=>Ye,Grok1Tokenizer:()=>Je,HerbertTokenizer:()=>Pe,LlamaTokenizer:()=>Ge,M2M100Tokenizer:()=>et,MBart50Tokenizer:()=>De,MBartTokenizer:()=>Oe,MPNetTokenizer:()=>We,MarianTokenizer:()=>st,MobileBertTokenizer:()=>ye,NllbTokenizer:()=>Ze,NougatTokenizer:()=>dt,PreTrainedTokenizer:()=>be,Qwen2Tokenizer:()=>He,RoFormerTokenizer:()=>Se,RobertaTokenizer:()=>Ve,SiglipTokenizer:()=>ot,SpeechT5Tokenizer:()=>ct,SqueezeBertTokenizer:()=>ke,T5Tokenizer:()=>Ie,TokenizerModel:()=>y,VitsTokenizer:()=>ut,Wav2Vec2CTCTokenizer:()=>at,WhisperTokenizer:()=>tt,XLMRobertaTokenizer:()=>$e,XLMTokenizer:()=>Le,is_chinese_char:()=>M});var r=n(/*! ./utils/generic.js */"./src/utils/generic.js"),o=n(/*! ./utils/core.js */"./src/utils/core.js"),s=n(/*! ./utils/hub.js */"./src/utils/hub.js"),a=n(/*! ./utils/maths.js */"./src/utils/maths.js"),i=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),l=n(/*! ./utils/data-structures.js */"./src/utils/data-structures.js"),c=n(/*! @huggingface/jinja */"./node_modules/@huggingface/jinja/dist/index.js"),d=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js"),u=n(/*! ./utils/constants.js */"./src/utils/constants.js");async function p(e,t){const n=await Promise.all([(0,s.getModelJSON)(e,"tokenizer.json",!0,t),(0,s.getModelJSON)(e,"tokenizer_config.json",!0,t)]);return null!==t.legacy&&(n[1].legacy=t.legacy),n}function h(e,t=!0){if(void 0!==e.Regex){let t=e.Regex.replace(/\\([#&~])/g,"$1");for(const[e,n]of T)t=t.replaceAll(e,n);return new RegExp(t,"gu")}if(void 0!==e.String){const n=(0,o.escapeRegExp)(e.String);return new RegExp(t?n:`(${n})`,"gu")}return console.warn("Unknown pattern type:",e),null}function m(e){return new Map(Object.entries(e))}function _(e){const t=e.dims;switch(t.length){case 1:return e.tolist();case 2:if(1!==t[0])throw new Error("Unable to decode tensor with `batch size !== 1`. Use `tokenizer.batch_decode(...)` for batched inputs.");return e.tolist()[0];default:throw new Error(`Expected tensor to have 1-2 dimensions, got ${t.length}.`)}}function f(e){return e.replace(/ \./g,".").replace(/ \?/g,"?").replace(/ \!/g,"!").replace(/ ,/g,",").replace(/ \' /g,"'").replace(/ n\'t/g,"n't").replace(/ \'m/g,"'m").replace(/ \'s/g,"'s").replace(/ \'ve/g,"'ve").replace(/ \'re/g,"'re")}function g(e){return e.replace(/[\u0300-\u036f]/g,"")}function M(e){return e>=19968&&e<=40959||e>=13312&&e<=19903||e>=131072&&e<=173791||e>=173824&&e<=177983||e>=177984&&e<=178207||e>=178208&&e<=183983||e>=63744&&e<=64255||e>=194560&&e<=195103}const w="\\p{P}\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E",b=new RegExp(`^[${w}]+$`,"gu"),T=new Map([["(?i:'s|'t|'re|'ve|'m|'ll|'d)","(?:'([sS]|[tT]|[rR][eE]|[vV][eE]|[mM]|[lL][lL]|[dD]))"]]);class x{constructor(e){this.content=e.content,this.id=e.id,this.single_word=e.single_word??!1,this.lstrip=e.lstrip??!1,this.rstrip=e.rstrip??!1,this.special=e.special??!1,this.normalized=e.normalized??null}}class y extends r.Callable{constructor(e){super(),this.config=e,this.vocab=[],this.tokens_to_ids=new Map,this.unk_token_id=void 0,this.unk_token=void 0,this.end_of_word_suffix=void 0,this.fuse_unk=this.config.fuse_unk??!1}static fromConfig(e,...t){switch(e.type){case"WordPiece":return new k(e);case"Unigram":return new F(e,...t);case"BPE":return new v(e);default:if(e.vocab)return new S(e,...t);throw new Error(`Unknown TokenizerModel type: ${e.type}`)}}_call(e){let t=this.encode(e);return this.fuse_unk&&(t=function(e,t,n){const r=[];let o=0;for(;o<e.length;)if(r.push(e[o]),(n.get(e[o])??t)===t)for(;o<e.length&&(n.get(e[o])??t)===t;)++o;else++o;return r}(t,this.unk_token_id,this.tokens_to_ids)),t}encode(e){throw Error("encode should be implemented in subclass.")}convert_tokens_to_ids(e){return e.map((e=>this.tokens_to_ids.get(e)??this.unk_token_id))}convert_ids_to_tokens(e){return e.map((e=>this.vocab[e]??this.unk_token))}}class k extends y{constructor(e){super(e),this.tokens_to_ids=m(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.max_input_chars_per_word=e.max_input_chars_per_word??100,this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e}encode(e){const t=[];for(const n of e){const e=[...n];if(e.length>this.max_input_chars_per_word){t.push(this.unk_token);continue}let r=!1,o=0;const s=[];for(;o<e.length;){let t=e.length,n=null;for(;o<t;){let r=e.slice(o,t).join("");if(o>0&&(r=this.config.continuing_subword_prefix+r),this.tokens_to_ids.has(r)){n=r;break}--t}if(null===n){r=!0;break}s.push(n),o=t}r?t.push(this.unk_token):t.push(...s)}return t}}class F extends y{constructor(e,t){super(e);const n=e.vocab.length;this.vocab=new Array(n),this.scores=new Array(n);for(let t=0;t<n;++t){const n=e.vocab[t];this.vocab[t]=n[0],this.scores[t]=n[1]}this.unk_token_id=e.unk_id,this.unk_token=this.vocab[e.unk_id],this.tokens_to_ids=new Map(this.vocab.map(((e,t)=>[e,t]))),this.bosToken=" ",this.bosTokenId=this.tokens_to_ids.get(this.bosToken),this.eosToken=t.eos_token,this.eosTokenId=this.tokens_to_ids.get(this.eosToken),this.unkToken=this.vocab[this.unk_token_id],this.minScore=(0,a.min)(this.scores)[0],this.unkScore=this.minScore-10,this.scores[this.unk_token_id]=this.unkScore,this.trie=new l.CharTrie,this.trie.extend(this.vocab),this.fuse_unk=!0}populateNodes(e){const t=e.sentence,n=t.length;let r=0;for(;r<n;){const n=1;let o=!1;const s=[];for(let a of this.trie.commonPrefixSearch(t.slice(r))){s.push(a);const t=this.tokens_to_ids.get(a),i=this.scores[t],l=a.length;e.insert(r,l,i,t),o||l!==n||(o=!0)}o||e.insert(r,n,this.unkScore,this.unk_token_id),r+=n}}tokenize(e){const t=new l.TokenLattice(e,this.bosTokenId,this.eosTokenId);return this.populateNodes(t),t.tokens()}encode(e){const t=[];for(const n of e){const e=this.tokenize(n);t.push(...e)}return t}}const C=(()=>{const e=[...Array.from({length:"~".charCodeAt(0)-"!".charCodeAt(0)+1},((e,t)=>t+"!".charCodeAt(0))),...Array.from({length:"¬".charCodeAt(0)-"¡".charCodeAt(0)+1},((e,t)=>t+"¡".charCodeAt(0))),...Array.from({length:"ÿ".charCodeAt(0)-"®".charCodeAt(0)+1},((e,t)=>t+"®".charCodeAt(0)))],t=e.slice();let n=0;for(let r=0;r<256;++r)e.includes(r)||(e.push(r),t.push(256+n),n+=1);const r=t.map((e=>String.fromCharCode(e)));return Object.fromEntries(e.map(((e,t)=>[e,r[t]])))})(),P=(0,o.reverseDictionary)(C);class v extends y{constructor(e){super(e),this.BPE_SPLIT_TOKEN=" ",this.tokens_to_ids=m(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e;this.bpe_ranks=new Map(e.merges.map(((e,t)=>[e,t]))),this.merges=e.merges.map((e=>e.split(this.BPE_SPLIT_TOKEN))),this.end_of_word_suffix=e.end_of_word_suffix,this.continuing_subword_suffix=e.continuing_subword_suffix??null,this.byte_fallback=this.config.byte_fallback??!1,this.byte_fallback&&(this.text_encoder=new TextEncoder),this.ignore_merges=this.config.ignore_merges??!1,this.cache=new Map}bpe(e){if(0===e.length)return[];const t=this.cache.get(e);if(void 0!==t)return t;const n=Array.from(e);this.end_of_word_suffix&&(n[n.length-1]+=this.end_of_word_suffix);let r=[];if(n.length>1){const e=new l.PriorityQueue(((e,t)=>e.score<t.score));let t={token:n[0],bias:0,prev:null,next:null},o=t;for(let t=1;t<n.length;++t){const r={bias:t/n.length,token:n[t],prev:o,next:null};o.next=r,this._add_node(e,o),o=r}for(;!e.isEmpty();){const n=e.pop();if(n.deleted||!n.next||n.next.deleted)continue;if(n.deleted=!0,n.next.deleted=!0,n.prev){const e={...n.prev};n.prev.deleted=!0,n.prev=e,e.prev?e.prev.next=e:t=e}const r={token:n.token+n.next.token,bias:n.bias,prev:n.prev,next:n.next.next};r.prev?(r.prev.next=r,this._add_node(e,r.prev)):t=r,r.next&&(r.next.prev=r,this._add_node(e,r))}for(let e=t;null!==e;e=e.next)r.push(e.token)}else r=n;if(this.continuing_subword_suffix)for(let e=0;e<r.length-1;++e)r[e]+=this.continuing_subword_suffix;return this.cache.set(e,r),r}_add_node(e,t){const n=this.bpe_ranks.get(t.token+this.BPE_SPLIT_TOKEN+t.next.token);void 0!==n&&(t.score=n+t.bias,e.push(t))}encode(e){const t=[];for(const n of e){if(this.ignore_merges&&this.tokens_to_ids.has(n)){t.push(n);continue}const e=this.bpe(n);for(const n of e)this.tokens_to_ids.has(n)?t.push(n):this.byte_fallback?t.push(...Array.from(this.text_encoder.encode(n)).map((e=>`<0x${e.toString(16).toUpperCase().padStart(2,"0")}>`))):t.push(this.unk_token)}return t}}class S extends y{constructor(e,t){super(e),this.tokens_to_ids=m(t.target_lang?e.vocab[t.target_lang]:e.vocab),this.bos_token=t.bos_token,this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t.eos_token,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.pad_token=t.pad_token,this.pad_token_id=this.tokens_to_ids.get(this.pad_token),this.unk_token=t.unk_token,this.unk_token_id=this.tokens_to_ids.get(this.unk_token),this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e}encode(e){return e}}class A extends r.Callable{constructor(e){super(),this.config=e}static fromConfig(e){if(null===e)return null;switch(e.type){case"BertNormalizer":return new j(e);case"Precompiled":return new pe(e);case"Sequence":return new V(e);case"Replace":return new E(e);case"NFC":return new L(e);case"NFKC":return new z(e);case"NFKD":return new I(e);case"Strip":return new B(e);case"StripAccents":return new N(e);case"Lowercase":return new O(e);case"Prepend":return new D(e);default:throw new Error(`Unknown Normalizer type: ${e.type}`)}}normalize(e){throw Error("normalize should be implemented in subclass.")}_call(e){return this.normalize(e)}}class E extends A{normalize(e){const t=h(this.config.pattern);return null===t?e:e.replaceAll(t,this.config.content)}}class L extends A{normalize(e){return e=e.normalize("NFC")}}class z extends A{normalize(e){return e=e.normalize("NFKC")}}class I extends A{normalize(e){return e=e.normalize("NFKD")}}class B extends A{normalize(e){return this.config.strip_left&&this.config.strip_right?e=e.trim():(this.config.strip_left&&(e=e.trimStart()),this.config.strip_right&&(e=e.trimEnd())),e}}class N extends A{normalize(e){return e=g(e)}}class O extends A{normalize(e){return e=e.toLowerCase()}}class D extends A{normalize(e){return e=this.config.prepend+e}}class V extends A{constructor(e){super(e),this.normalizers=e.normalizers.map((e=>A.fromConfig(e)))}normalize(e){return this.normalizers.reduce(((e,t)=>t.normalize(e)),e)}}class j extends A{_tokenize_chinese_chars(e){const t=[];for(let n=0;n<e.length;++n){const r=e[n];M(r.charCodeAt(0))?(t.push(" "),t.push(r),t.push(" ")):t.push(r)}return t.join("")}stripAccents(e){return e.normalize("NFD").replace(/[\u0300-\u036f]/g,"")}_is_control(e){switch(e){case"\t":case"\n":case"\r":return!1;default:return/^\p{Cc}|\p{Cf}|\p{Co}|\p{Cs}$/u.test(e)}}_clean_text(e){const t=[];for(const n of e){const e=n.charCodeAt(0);0===e||65533===e||this._is_control(n)||(/^\s$/.test(n)?t.push(" "):t.push(n))}return t.join("")}normalize(e){return this.config.clean_text&&(e=this._clean_text(e)),this.config.handle_chinese_chars&&(e=this._tokenize_chinese_chars(e)),this.config.lowercase?(e=e.toLowerCase(),!1!==this.config.strip_accents&&(e=this.stripAccents(e))):this.config.strip_accents&&(e=this.stripAccents(e)),e}}class R extends r.Callable{static fromConfig(e){if(null===e)return null;switch(e.type){case"BertPreTokenizer":return new G(e);case"Sequence":return new he(e);case"Whitespace":return new me(e);case"WhitespaceSplit":return new _e(e);case"Metaspace":return new de(e);case"ByteLevel":return new q(e);case"Split":return new $(e);case"Punctuation":return new W(e);case"Digits":return new U(e);case"Replace":return new fe(e);default:throw new Error(`Unknown PreTokenizer type: ${e.type}`)}}pre_tokenize_text(e,t){throw Error("pre_tokenize_text should be implemented in subclass.")}pre_tokenize(e,t){return(Array.isArray(e)?e.map((e=>this.pre_tokenize_text(e,t))):this.pre_tokenize_text(e,t)).flat()}_call(e,t){return this.pre_tokenize(e,t)}}class G extends R{constructor(e){super(),this.pattern=new RegExp(`[^\\s${w}]+|[${w}]`,"gu")}pre_tokenize_text(e,t){return e.trim().match(this.pattern)||[]}}class q extends R{constructor(e){super(),this.config=e,this.add_prefix_space=this.config.add_prefix_space,this.trim_offsets=this.config.trim_offsets,this.use_regex=this.config.use_regex??!0,this.pattern=/'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu,this.byte_encoder=C,this.text_encoder=new TextEncoder}pre_tokenize_text(e,t){this.add_prefix_space&&!e.startsWith(" ")&&(e=" "+e);return(this.use_regex?e.match(this.pattern)||[]:[e]).map((e=>Array.from(this.text_encoder.encode(e),(e=>this.byte_encoder[e])).join("")))}}class $ extends R{constructor(e){super(),this.config=e,this.pattern=h(this.config.pattern,this.config.invert)}pre_tokenize_text(e,t){return null===this.pattern?[]:this.config.invert?e.match(this.pattern)||[]:function(e,t){const n=[];let r=0;for(const o of e.matchAll(t)){const t=o[0];r<o.index&&n.push(e.slice(r,o.index)),t.length>0&&n.push(t),r=o.index+t.length}return r<e.length&&n.push(e.slice(r)),n}(e,this.pattern)}}class W extends R{constructor(e){super(),this.config=e,this.pattern=new RegExp(`[^${w}]+|[${w}]+`,"gu")}pre_tokenize_text(e,t){return e.match(this.pattern)||[]}}class U extends R{constructor(e){super(),this.config=e;const t="[^\\d]+|\\d"+(this.config.individual_digits?"":"+");this.pattern=new RegExp(t,"gu")}pre_tokenize_text(e,t){return e.match(this.pattern)||[]}}class X extends r.Callable{constructor(e){super(),this.config=e}static fromConfig(e){if(null===e)return null;switch(e.type){case"TemplateProcessing":return new Y(e);case"ByteLevel":return new J(e);case"RobertaProcessing":return new H(e);case"BertProcessing":return new Q(e);case"Sequence":return new K(e);default:throw new Error(`Unknown PostProcessor type: ${e.type}`)}}post_process(e,...t){throw Error("post_process should be implemented in subclass.")}_call(e,...t){return this.post_process(e,...t)}}class Q extends X{constructor(e){super(e),this.cls=e.cls[0],this.sep=e.sep[0]}post_process(e,t=null,{add_special_tokens:n=!0}={}){n&&(e=(0,o.mergeArrays)([this.cls],e,[this.sep]));let r=new Array(e.length).fill(0);if(null!==t){const s=n&&this instanceof H?[this.sep]:[],a=n?[this.sep]:[];e=(0,o.mergeArrays)(e,s,t,a),r=(0,o.mergeArrays)(r,new Array(t.length+s.length+a.length).fill(1))}return{tokens:e,token_type_ids:r}}}class H extends Q{}class Y extends X{constructor(e){super(e),this.single=e.single,this.pair=e.pair}post_process(e,t=null,{add_special_tokens:n=!0}={}){const r=null===t?this.single:this.pair;let s=[],a=[];for(const i of r)"SpecialToken"in i?n&&(s.push(i.SpecialToken.id),a.push(i.SpecialToken.type_id)):"Sequence"in i&&("A"===i.Sequence.id?(s=(0,o.mergeArrays)(s,e),a=(0,o.mergeArrays)(a,new Array(e.length).fill(i.Sequence.type_id))):"B"===i.Sequence.id&&(s=(0,o.mergeArrays)(s,t),a=(0,o.mergeArrays)(a,new Array(t.length).fill(i.Sequence.type_id))));return{tokens:s,token_type_ids:a}}}class J extends X{post_process(e,t=null){return t&&(e=(0,o.mergeArrays)(e,t)),{tokens:e}}}class K extends X{constructor(e){super(e),this.processors=e.processors.map((e=>X.fromConfig(e)))}post_process(e,t=null,n={}){let r;for(const o of this.processors)if(o instanceof J){if(e=o.post_process(e).tokens,t){t=o.post_process(t).tokens}}else{const s=o.post_process(e,t,n);e=s.tokens,r=s.token_type_ids}return{tokens:e,token_type_ids:r}}}class Z extends r.Callable{constructor(e){super(),this.config=e,this.added_tokens=[],this.end_of_word_suffix=null,this.trim_offsets=e.trim_offsets}static fromConfig(e){if(null===e)return null;switch(e.type){case"WordPiece":return new oe(e);case"Metaspace":return new ue(e);case"ByteLevel":return new se(e);case"Replace":return new ee(e);case"ByteFallback":return new te(e);case"Fuse":return new ne(e);case"Strip":return new re(e);case"Sequence":return new ie(e);case"CTC":return new ae(e);case"BPEDecoder":return new le(e);default:throw new Error(`Unknown Decoder type: ${e.type}`)}}_call(e){return this.decode(e)}decode(e){return this.decode_chain(e).join("")}decode_chain(e){throw Error("`decode_chain` should be implemented in subclass.")}}class ee extends Z{decode_chain(e){const t=h(this.config.pattern);return null===t?e:e.map((e=>e.replaceAll(t,this.config.content)))}}class te extends Z{constructor(e){super(e),this.text_decoder=new TextDecoder}decode_chain(e){const t=[];let n=[];for(const r of e){let e=null;if(6===r.length&&r.startsWith("<0x")&&r.endsWith(">")){const t=parseInt(r.slice(3,5),16);isNaN(t)||(e=t)}if(null!==e)n.push(e);else{if(n.length>0){const e=this.text_decoder.decode(Uint8Array.from(n));t.push(e),n=[]}t.push(r)}}if(n.length>0){const e=this.text_decoder.decode(Uint8Array.from(n));t.push(e),n=[]}return t}}class ne extends Z{decode_chain(e){return[e.join("")]}}class re extends Z{constructor(e){super(e),this.content=this.config.content,this.start=this.config.start,this.stop=this.config.stop}decode_chain(e){return e.map((e=>{let t=0;for(let n=0;n<this.start&&e[n]===this.content;++n)t=n+1;let n=e.length;for(let t=0;t<this.stop;++t){const r=e.length-t-1;if(e[r]!==this.content)break;n=r}return e.slice(t,n)}))}}class oe extends Z{constructor(e){super(e),this.cleanup=e.cleanup}decode_chain(e){return e.map(((e,t)=>(0!==t&&(e=e.startsWith(this.config.prefix)?e.replace(this.config.prefix,""):" "+e),this.cleanup&&(e=f(e)),e)))}}class se extends Z{constructor(e){super(e),this.byte_decoder=P,this.text_decoder=new TextDecoder("utf-8",{fatal:!1,ignoreBOM:!0}),this.end_of_word_suffix=null}convert_tokens_to_string(e){const t=e.join(""),n=new Uint8Array([...t].map((e=>this.byte_decoder[e])));return this.text_decoder.decode(n)}decode_chain(e){const t=[];let n=[];for(const r of e)void 0!==this.added_tokens.find((e=>e.content===r))?(n.length>0&&(t.push(this.convert_tokens_to_string(n)),n=[]),t.push(r)):n.push(r);return n.length>0&&t.push(this.convert_tokens_to_string(n)),t}}class ae extends Z{constructor(e){super(e),this.pad_token=this.config.pad_token,this.word_delimiter_token=this.config.word_delimiter_token,this.cleanup=this.config.cleanup}convert_tokens_to_string(e){if(0===e.length)return"";const t=[e[0]];for(let n=1;n<e.length;++n)e[n]!==t.at(-1)&&t.push(e[n]);let n=t.filter((e=>e!==this.pad_token)).join("");return this.cleanup&&(n=f(n).replaceAll(this.word_delimiter_token," ").trim()),n}decode_chain(e){return[this.convert_tokens_to_string(e)]}}class ie extends Z{constructor(e){super(e),this.decoders=e.decoders.map((e=>Z.fromConfig(e)))}decode_chain(e){return this.decoders.reduce(((e,t)=>t.decode_chain(e)),e)}}class le extends Z{constructor(e){super(e),this.suffix=this.config.suffix}decode_chain(e){return e.map(((t,n)=>t.replaceAll(this.suffix,n===e.length-1?"":" ")))}}class ce extends Z{decode_chain(e){let t="";for(let n=1;n<e.length;n+=2)t+=e[n];return[t]}}class de extends R{constructor(e){super(),this.addPrefixSpace=e.add_prefix_space,this.replacement=e.replacement,this.strRep=e.str_rep||this.replacement,this.prepend_scheme=e.prepend_scheme??"always"}pre_tokenize_text(e,{section_index:t}={}){let n=e.replaceAll(" ",this.strRep);return this.addPrefixSpace&&!n.startsWith(this.replacement)&&("always"===this.prepend_scheme||"first"===this.prepend_scheme&&0===t)&&(n=this.strRep+n),[n]}}class ue extends Z{constructor(e){super(e),this.addPrefixSpace=e.add_prefix_space,this.replacement=e.replacement}decode_chain(e){const t=[];for(let n=0;n<e.length;++n){let r=e[n].replaceAll(this.replacement," ");this.addPrefixSpace&&0==n&&r.startsWith(" ")&&(r=r.substring(1)),t.push(r)}return t}}class pe extends A{constructor(e){super(e),this.charsmap=e.precompiled_charsmap}normalize(e){if((e=(e=e.replace(/[\u0001-\u0008\u000B\u000E-\u001F\u007F\u008F\u009F]/gm,"")).replace(/[\u0009\u000A\u000C\u000D\u1680\u200B\u200C\u200E\u200F\u2028\u2029\u2581\uFEFF\uFFFD]/gm," ")).includes("~")){const t=e.split("~");e=t.map((e=>e.normalize("NFKC"))).join("~")}else e=e.normalize("NFKC");return e}}class he extends R{constructor(e){super(),this.tokenizers=e.pretokenizers.map((e=>R.fromConfig(e)))}pre_tokenize_text(e,t){return this.tokenizers.reduce(((e,n)=>n.pre_tokenize(e,t)),[e])}}class me extends R{constructor(e){super()}pre_tokenize_text(e,t){return e.match(/\w+|[^\w\s]+/g)||[]}}class _e extends R{constructor(e){super()}pre_tokenize_text(e,t){return function(e){return e.match(/\S+/g)||[]}(e)}}class fe extends R{constructor(e){super(),this.config=e,this.pattern=h(this.config.pattern),this.content=this.config.content}pre_tokenize_text(e,t){return null===this.pattern?[e]:[e.replaceAll(this.pattern,this.config.content)]}}const ge=["bos_token","eos_token","unk_token","sep_token","pad_token","cls_token","mask_token"];function Me(e,t,n,r){for(const s of Object.keys(e)){const a=t-e[s].length,i=n(s),l=new Array(a).fill(i);e[s]="right"===r?(0,o.mergeArrays)(e[s],l):(0,o.mergeArrays)(l,e[s])}}function we(e,t){for(const n of Object.keys(e))e[n].length=t}class be extends r.Callable{return_token_type_ids=!1;padding_side="right";constructor(e,t){super(),this._tokenizer_config=t,this.normalizer=A.fromConfig(e.normalizer),this.pre_tokenizer=R.fromConfig(e.pre_tokenizer),this.model=y.fromConfig(e.model,t),this.post_processor=X.fromConfig(e.post_processor),this.decoder=Z.fromConfig(e.decoder),this.special_tokens=[],this.all_special_ids=[],this.added_tokens=[];for(const t of e.added_tokens){const e=new x(t);this.added_tokens.push(e),this.model.tokens_to_ids.set(e.content,e.id),this.model.vocab[e.id]=e.content,e.special&&(this.special_tokens.push(e.content),this.all_special_ids.push(e.id))}if(this.additional_special_tokens=t.additional_special_tokens??[],this.special_tokens.push(...this.additional_special_tokens),this.special_tokens=[...new Set(this.special_tokens)],this.decoder&&(this.decoder.added_tokens=this.added_tokens,this.decoder.end_of_word_suffix=this.model.end_of_word_suffix),this.added_tokens_regex=this.added_tokens.length>0?new RegExp(this.added_tokens.toSorted(((e,t)=>t.content.length-e.content.length)).map((e=>`${e.lstrip?"\\s*":""}(${(0,o.escapeRegExp)(e.content)})${e.rstrip?"\\s*":""}`)).join("|")):null,this.mask_token=this.getToken("mask_token"),this.mask_token_id=this.model.tokens_to_ids.get(this.mask_token),this.pad_token=this.getToken("pad_token","eos_token"),this.pad_token_id=this.model.tokens_to_ids.get(this.pad_token),this.sep_token=this.getToken("sep_token"),this.sep_token_id=this.model.tokens_to_ids.get(this.sep_token),this.unk_token=this.getToken("unk_token"),this.unk_token_id=this.model.tokens_to_ids.get(this.unk_token),this.model_max_length=t.model_max_length,this.remove_space=t.remove_space,this.clean_up_tokenization_spaces=t.clean_up_tokenization_spaces??!0,this.do_lowercase_and_remove_accent=t.do_lowercase_and_remove_accent??!1,t.padding_side&&(this.padding_side=t.padding_side),this.legacy=!1,this.chat_template=t.chat_template??null,Array.isArray(this.chat_template)){const e=Object.create(null);for(const{name:t,template:n}of this.chat_template){if("string"!=typeof t||"string"!=typeof n)throw new Error('Chat template must be a list of objects with "name" and "template" properties');e[t]=n}this.chat_template=e}this._compiled_template_cache=new Map}getToken(...e){for(const t of e){const e=this._tokenizer_config[t];if(e){if("object"==typeof e){if("AddedToken"===e.__type)return e.content;throw Error(`Unknown token: ${e}`)}return e}}return null}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:o=!1,revision:s="main",legacy:a=null}={}){return new this(...await p(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:o,revision:s,legacy:a}))}_call(e,{text_pair:t=null,add_special_tokens:n=!0,padding:r=!1,truncation:o=null,max_length:s=null,return_tensor:l=!0,return_token_type_ids:c=null}={}){const d=Array.isArray(e);let u;if(d){if(0===e.length)throw Error("text array must be non-empty");if(null!==t){if(!Array.isArray(t))throw Error("text_pair must also be an array");if(e.length!==t.length)throw Error("text and text_pair must have the same length");u=e.map(((e,r)=>this._encode_plus(e,{text_pair:t[r],add_special_tokens:n,return_token_type_ids:c})))}else u=e.map((e=>this._encode_plus(e,{add_special_tokens:n,return_token_type_ids:c})))}else{if(null==e)throw Error("text may not be null or undefined");if(Array.isArray(t))throw Error("When specifying `text_pair`, since `text` is a string, `text_pair` must also be a string (i.e., not an array).");u=[this._encode_plus(e,{text_pair:t,add_special_tokens:n,return_token_type_ids:c})]}if(null===s?s="max_length"===r?this.model_max_length:(0,a.max)(u.map((e=>e.input_ids.length)))[0]:o||console.warn("Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=true` to explicitly truncate examples to max length."),s=Math.min(s,this.model_max_length??1/0),r||o)for(let e=0;e<u.length;++e)u[e].input_ids.length!==s&&(u[e].input_ids.length>s?o&&we(u[e],s):r&&Me(u[e],s,(e=>"input_ids"===e?this.pad_token_id:0),this.padding_side));const p={};if(l){if((!r||!o)&&u.some((e=>{for(const t of Object.keys(e))if(e[t].length!==u[0][t]?.length)return!0;return!1})))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=true' and 'truncation=true' to have batched tensors with the same length.");const e=[u.length,u[0].input_ids.length];for(const t of Object.keys(u[0]))p[t]=new i.Tensor("int64",BigInt64Array.from(u.flatMap((e=>e[t])).map(BigInt)),e)}else{for(const e of Object.keys(u[0]))p[e]=u.map((t=>t[e]));if(!d)for(const e of Object.keys(p))p[e]=p[e][0]}return p}_encode_text(e){if(null===e)return null;const t=(this.added_tokens_regex?e.split(this.added_tokens_regex).filter((e=>e)):[e]).map(((e,t)=>{if(void 0!==this.added_tokens.find((t=>t.content===e)))return e;{if(!0===this.remove_space&&(e=e.trim().split(/\s+/).join(" ")),this.do_lowercase_and_remove_accent&&(e=function(e){return g(e.toLowerCase())}(e)),null!==this.normalizer&&(e=this.normalizer(e)),0===e.length)return[];const n=null!==this.pre_tokenizer?this.pre_tokenizer(e,{section_index:t}):[e];return this.model(n)}})).flat();return t}_encode_plus(e,{text_pair:t=null,add_special_tokens:n=!0,return_token_type_ids:r=null}={}){const{tokens:o,token_type_ids:s}=this._tokenize_helper(e,{pair:t,add_special_tokens:n}),a=this.model.convert_tokens_to_ids(o),i={input_ids:a,attention_mask:new Array(a.length).fill(1)};return(r??this.return_token_type_ids)&&s&&(i.token_type_ids=s),i}_tokenize_helper(e,{pair:t=null,add_special_tokens:n=!1}={}){const r=this._encode_text(e),s=this._encode_text(t);return this.post_processor?this.post_processor(r,s,{add_special_tokens:n}):{tokens:(0,o.mergeArrays)(r??[],s??[])}}tokenize(e,{pair:t=null,add_special_tokens:n=!1}={}){return this._tokenize_helper(e,{pair:t,add_special_tokens:n}).tokens}encode(e,{text_pair:t=null,add_special_tokens:n=!0,return_token_type_ids:r=null}={}){return this._encode_plus(e,{text_pair:t,add_special_tokens:n,return_token_type_ids:r}).input_ids}batch_decode(e,t={}){return e instanceof i.Tensor&&(e=e.tolist()),e.map((e=>this.decode(e,t)))}decode(e,t={}){if(e instanceof i.Tensor&&(e=_(e)),!Array.isArray(e)||0===e.length||!(0,o.isIntegralNumber)(e[0]))throw Error("token_ids must be a non-empty array of integers.");return this.decode_single(e,t)}decode_single(e,{skip_special_tokens:t=!1,clean_up_tokenization_spaces:n=null}){let r=this.model.convert_ids_to_tokens(e);t&&(r=r.filter((e=>!this.special_tokens.includes(e))));let o=this.decoder?this.decoder(r):r.join(" ");return this.decoder&&this.decoder.end_of_word_suffix&&(o=o.replaceAll(this.decoder.end_of_word_suffix," "),t&&(o=o.trim())),(n??this.clean_up_tokenization_spaces)&&(o=f(o)),o}apply_chat_template(e,{tools:t=null,documents:n=null,chat_template:r=null,add_generation_prompt:o=!1,tokenize:s=!0,padding:a=!1,truncation:i=!1,max_length:l=null,return_tensor:d=!0,return_dict:u=!1,tokenizer_kwargs:p={},...h}={}){if(this.chat_template&&"object"==typeof this.chat_template||null===this.chat_template){const e=this.chat_template;if(null!==r&&Object.hasOwn(e,r))r=e[r];else if(null===r&&"default"in e)r=e.default;else if(null===r)throw Error(`This model has multiple chat templates with no default specified! Please either pass a chat template or the name of the template you wish to use to the 'chat_template' argument. Available template names are ${Object.keys(e).sort()}.`)}else{if(!this.chat_template)throw Error("Cannot use apply_chat_template() because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating");r=this.chat_template}if("string"!=typeof r)throw Error("chat_template must be a string, but got "+typeof r);let m=this._compiled_template_cache.get(r);void 0===m&&(m=new c.Template(r),this._compiled_template_cache.set(r,m));const _=Object.create(null);for(const e of ge){const t=this.getToken(e);t&&(_[e]=t)}const f=m.render({messages:e,add_generation_prompt:o,tools:t,documents:n,..._,...h});if(s){const e=this._call(f,{add_special_tokens:!1,padding:a,truncation:i,max_length:l,return_tensor:d,...p});return u?e:e.input_ids}return f}}class Te extends be{return_token_type_ids=!0}class xe extends be{return_token_type_ids=!0}class ye extends be{return_token_type_ids=!0}class ke extends be{return_token_type_ids=!0}class Fe extends be{return_token_type_ids=!0}class Ce extends be{return_token_type_ids=!0}class Pe extends be{return_token_type_ids=!0}class ve extends be{return_token_type_ids=!0}class Se extends be{return_token_type_ids=!0}class Ae extends be{}class Ee extends be{}class Le extends be{return_token_type_ids=!0;constructor(e,t){super(e,t),console.warn('WARNING: `XLMTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}}class ze extends be{return_token_type_ids=!0}class Ie extends be{}class Be extends be{}class Ne extends be{}class Oe extends be{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{2}_[A-Z]{2}$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))),this.lang_to_token=e=>e}_build_translation_inputs(e,t,n){return Ke(this,e,t,n)}}class De extends Oe{}class Ve extends be{}class je extends be{constructor(e,t){const n=".,!?…。,、।۔،",r=e.pre_tokenizer?.pretokenizers[0]?.pattern;r&&r.Regex===` ?[^(\\s|[${n}])]+`&&(r.Regex=` ?[^\\s${n}]+`),super(e,t)}}const Re="▁";class Ge extends be{padding_side="left";constructor(e,t){super(e,t),this.legacy=t.legacy??!0,this.legacy||(this.normalizer=null,this.pre_tokenizer=new de({replacement:Re,add_prefix_space:!0,prepend_scheme:"first"}))}_encode_text(e){if(null===e)return null;if(this.legacy||0===e.length)return super._encode_text(e);let t=super._encode_text(Re+e.replaceAll(Re," "));return t.length>1&&t[0]===Re&&this.special_tokens.includes(t[1])&&(t=t.slice(1)),t}}class qe extends be{}class $e extends be{}class We extends be{}class Ue extends be{}class Xe extends be{}class Qe extends be{}class He extends be{}class Ye extends be{}class Je extends be{}function Ke(e,t,n,r){if(!("language_codes"in e)||!Array.isArray(e.language_codes))throw new Error("Tokenizer must have `language_codes` attribute set and it should be an array of language ids.");if(!("languageRegex"in e&&e.languageRegex instanceof RegExp))throw new Error("Tokenizer must have `languageRegex` attribute set and it should be a regular expression.");if(!("lang_to_token"in e)||"function"!=typeof e.lang_to_token)throw new Error("Tokenizer must have `lang_to_token` attribute set and it should be a function.");const o=r.src_lang,s=r.tgt_lang;if(!e.language_codes.includes(s))throw new Error(`Target language code "${s}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);if(void 0!==o){if(!e.language_codes.includes(o))throw new Error(`Source language code "${o}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);for(const t of e.post_processor.config.single)if("SpecialToken"in t&&e.languageRegex.test(t.SpecialToken.id)){t.SpecialToken.id=e.lang_to_token(o);break}}return r.forced_bos_token_id=e.model.convert_tokens_to_ids([e.lang_to_token(s)])[0],e._call(t,n)}class Ze extends be{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{3}_[A-Z][a-z]{3}$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))),this.lang_to_token=e=>e}_build_translation_inputs(e,t,n){return Ke(this,e,t,n)}}class et extends be{constructor(e,t){super(e,t),this.languageRegex=/^__[a-z]{2,3}__$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))).map((e=>e.slice(2,-2))),this.lang_to_token=e=>`__${e}__`}_build_translation_inputs(e,t,n){return Ke(this,e,t,n)}}class tt extends be{get timestamp_begin(){return this.model.convert_tokens_to_ids(["<|notimestamps|>"])[0]+1}_decode_asr(e,{return_timestamps:t=!1,return_language:n=!1,time_precision:r=null,force_full_sequences:o=!0}={}){if(null===r)throw Error("Must specify time_precision");let s=null;const i="word"===t;function l(){return{language:s,timestamp:[null,null],text:""}}const c=[];let u=l(),p=0;const h=this.timestamp_begin;let m=[],_=[],f=!1,g=null;const M=new Set(this.all_special_ids);for(const n of e){const e=n.tokens,o=i?n.token_timestamps:null;let w=null,T=h;if("stride"in n){const[t,o,s]=n.stride;if(p-=o,g=t-s,o&&(T=o/r+h),s)for(let t=e.length-1;t>=0;--t){const n=Number(e[t]);if(n>=h){if(null!==w&&(n-h)*r<g)break;w=n}}}let x=[],y=[];for(let n=0;n<e.length;++n){const g=Number(e[n]);if(M.has(g)){const e=this.decode([g]),n=d.WHISPER_LANGUAGE_MAPPING.get(e.slice(2,-2));if(void 0!==n){if(null!==s&&n!==s&&!t){m.push(x);const e=this.findLongestCommonSequence(m)[0],t=this.decode(e);u.text=t,c.push(u),m=[],x=[],u=l()}s=u.language=n}}else if(g>=h){const e=(g-h)*r+p,t=(0,a.round)(e,2);if(null!==w&&g>=w)f=!0;else if(f||m.length>0&&g<T)f=!1;else if(null===u.timestamp[0])u.timestamp[0]=t;else if(t===u.timestamp[0]);else{u.timestamp[1]=t,m.push(x),i&&_.push(y);const[e,n]=this.findLongestCommonSequence(m,_),r=this.decode(e);u.text=r,i&&(u.words=this.collateWordTimestamps(e,n,s)),c.push(u),m=[],x=[],_=[],y=[],u=l()}}else if(x.push(g),i){let e,t=(0,a.round)(o[n]+p,2);if(n+1<o.length){e=(0,a.round)(o[n+1]+p,2);const s=this.decode([g]);b.test(s)&&(e=(0,a.round)(Math.min(t+r,e),2))}else e=null;y.push([t,e])}}if("stride"in n){const[e,t,r]=n.stride;p+=e-r}x.length>0?(m.push(x),i&&_.push(y)):m.every((e=>0===e.length))&&(u=l(),m=[],x=[],_=[],y=[])}if(m.length>0){if(o&&t)throw new Error("Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.");const[e,n]=this.findLongestCommonSequence(m,_),r=this.decode(e);u.text=r,i&&(u.words=this.collateWordTimestamps(e,n,s)),c.push(u)}let w=Object.create(null);const T=c.map((e=>e.text)).join("");if(t||n){for(let e=0;e<c.length;++e){const r=c[e];t||delete r.timestamp,n||delete r.language}if(i){const e=[];for(const t of c)for(const n of t.words)e.push(n);w={chunks:e}}else w={chunks:c}}return[T,w]}findLongestCommonSequence(e,t=null){let n=e[0],r=n.length,o=[];const s=Array.isArray(t)&&t.length>0;let a=s?[]:null,i=s?t[0]:null;for(let l=1;l<e.length;++l){const c=e[l];let d=0,u=[r,r,0,0];const p=c.length;for(let e=1;e<r+p;++e){const o=Math.max(0,r-e),a=Math.min(r,r+p-e),h=n.slice(o,a),m=Math.max(0,e-r),_=Math.min(p,e),f=c.slice(m,_);if(h.length!==f.length)throw new Error("There is a bug within whisper `decode_asr` function, please report it. Dropping to prevent bad inference.");let g;g=s?h.filter(((e,n)=>e===f[n]&&i[o+n]<=t[l][m+n])).length:h.filter(((e,t)=>e===f[t])).length;const M=g/e+e/1e4;g>1&&M>d&&(d=M,u=[o,a,m,_])}const[h,m,_,f]=u,g=Math.floor((m+h)/2),M=Math.floor((f+_)/2);o.push(...n.slice(0,g)),n=c.slice(M),r=n.length,s&&(a.push(...i.slice(0,g)),i=t[l].slice(M))}return o.push(...n),s?(a.push(...i),[o,a]):[o,[]]}collateWordTimestamps(e,t,n){const[r,o,s]=this.combineTokensIntoWords(e,n),a=[];for(let e=0;e<r.length;++e){const n=s[e];a.push({text:r[e],timestamp:[t[n.at(0)][0],t[n.at(-1)][1]]})}return a}combineTokensIntoWords(e,t,n="\"'“¡¿([{-",r="\"'.。,,!!??::”)]}、"){let o,s,a;return["chinese","japanese","thai","lao","myanmar"].includes(t=t??"english")?[o,s,a]=this.splitTokensOnUnicode(e):[o,s,a]=this.splitTokensOnSpaces(e),this.mergePunctuations(o,s,a,n,r)}decode(e,t){let n;return t?.decode_with_timestamps?(e instanceof i.Tensor&&(e=_(e)),n=this.decodeWithTimestamps(e,t)):n=super.decode(e,t),n}decodeWithTimestamps(e,t){const n=t?.time_precision??.02,r=Array.from(this.all_special_ids).at(-1)+1;let o=[[]];for(let t of e)if(t=Number(t),t>=r){const e=((t-r)*n).toFixed(2);o.push(`<|${e}|>`),o.push([])}else o[o.length-1].push(t);return o=o.map((e=>"string"==typeof e?e:super.decode(e,t))),o.join("")}splitTokensOnUnicode(e){const t=this.decode(e,{decode_with_timestamps:!0}),n=[],r=[],o=[];let s=[],a=[],i=0;for(let l=0;l<e.length;++l){const c=e[l];s.push(c),a.push(l);const d=this.decode(s,{decode_with_timestamps:!0});d.includes("�")&&"�"!==t[i+d.indexOf("�")]||(n.push(d),r.push(s),o.push(a),s=[],a=[],i+=d.length)}return[n,r,o]}splitTokensOnSpaces(e){const[t,n,r]=this.splitTokensOnUnicode(e),o=[],s=[],a=[],i=new RegExp(`^[${w}]$`,"gu");for(let e=0;e<t.length;++e){const l=t[e],c=n[e],d=r[e],u=c[0]>=this.model.tokens_to_ids.get("<|endoftext|>"),p=l.startsWith(" "),h=l.trim(),m=i.test(h);if(u||p||m||0===o.length)o.push(l),s.push(c),a.push(d);else{const e=o.length-1;o[e]+=l,s[e].push(...c),a[e].push(...d)}}return[o,s,a]}mergePunctuations(e,t,n,r,s){const a=structuredClone(e),i=structuredClone(t),l=structuredClone(n);let c=a.length-2,d=a.length-1;for(;c>=0;)a[c].startsWith(" ")&&r.includes(a[c].trim())?(a[d]=a[c]+a[d],i[d]=(0,o.mergeArrays)(i[c],i[d]),l[d]=(0,o.mergeArrays)(l[c],l[d]),a[c]="",i[c]=[],l[c]=[]):d=c,--c;for(c=0,d=1;d<a.length;)!a[c].endsWith(" ")&&s.includes(a[d])?(a[c]+=a[d],i[c]=(0,o.mergeArrays)(i[c],i[d]),l[c]=(0,o.mergeArrays)(l[c],l[d]),a[d]="",i[d]=[],l[d]=[]):c=d,++d;return[a.filter((e=>e)),i.filter((e=>e.length>0)),l.filter((e=>e.length>0))]}get_decoder_prompt_ids({language:e=null,task:t=null,no_timestamps:n=!0}={}){const r=[];if(e){const t=(0,d.whisper_language_to_code)(e),n=this.model.tokens_to_ids.get(`<|${t}|>`);if(void 0===n)throw new Error(`Unable to find language "${t}" in model vocabulary. Please report this issue at ${u.GITHUB_ISSUE_URL}.`);r.push(n)}else r.push(null);if(t){if("transcribe"!==(t=t.toLowerCase())&&"translate"!==t)throw new Error(`Task "${t}" is not supported. Must be one of: ["transcribe", "translate"]`);const e=this.model.tokens_to_ids.get(`<|${t}|>`);if(void 0===e)throw new Error(`Unable to find task "${t}" in model vocabulary. Please report this issue at ${u.GITHUB_ISSUE_URL}.`);r.push(e)}else r.push(null);if(n){const e=this.model.tokens_to_ids.get("<|notimestamps|>");if(void 0===e)throw new Error(`Unable to find "<|notimestamps|>" in model vocabulary. Please report this issue at ${u.GITHUB_ISSUE_URL}.`);r.push(e)}return r.map(((e,t)=>[t+1,e])).filter((e=>null!==e[1]))}}class nt extends be{}class rt extends be{}class ot extends be{}class st extends be{constructor(e,t){super(e,t),this.languageRegex=/^(>>\w+<<)\s*/g,this.supported_language_codes=this.model.vocab.filter((e=>this.languageRegex.test(e))),console.warn('WARNING: `MarianTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}_encode_text(e){if(null===e)return null;const[t,...n]=e.trim().split(this.languageRegex);if(0===n.length)return super._encode_text(t);if(2===n.length){const[e,t]=n;return this.supported_language_codes.includes(e)||console.warn(`Unsupported language code "${e}" detected, which may lead to unexpected behavior. Should be one of: ${JSON.stringify(this.supported_language_codes)}`),(0,o.mergeArrays)([e],super._encode_text(t))}}}class at extends be{}class it extends be{}class lt extends be{}class ct extends be{}class dt extends be{}class ut extends be{constructor(e,t){super(e,t),this.decoder=new ce({})}}class pt extends be{}class ht{static TOKENIZER_CLASS_MAPPING={T5Tokenizer:Ie,DistilBertTokenizer:Ae,CamembertTokenizer:Ee,DebertaTokenizer:Fe,DebertaV2Tokenizer:Ce,BertTokenizer:Te,HerbertTokenizer:Pe,ConvBertTokenizer:ve,RoFormerTokenizer:Se,XLMTokenizer:Le,ElectraTokenizer:ze,MobileBertTokenizer:ye,SqueezeBertTokenizer:ke,AlbertTokenizer:xe,GPT2Tokenizer:Be,BartTokenizer:Ne,MBartTokenizer:Oe,MBart50Tokenizer:De,RobertaTokenizer:Ve,WhisperTokenizer:tt,CodeGenTokenizer:nt,CLIPTokenizer:rt,SiglipTokenizer:ot,MarianTokenizer:st,BloomTokenizer:je,NllbTokenizer:Ze,M2M100Tokenizer:et,LlamaTokenizer:Ge,CodeLlamaTokenizer:qe,XLMRobertaTokenizer:$e,MPNetTokenizer:We,FalconTokenizer:Ue,GPTNeoXTokenizer:Xe,EsmTokenizer:Qe,Wav2Vec2CTCTokenizer:at,BlenderbotTokenizer:it,BlenderbotSmallTokenizer:lt,SpeechT5Tokenizer:ct,NougatTokenizer:dt,VitsTokenizer:ut,Qwen2Tokenizer:He,GemmaTokenizer:Ye,Grok1Tokenizer:Je,CohereTokenizer:pt,PreTrainedTokenizer:be};static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:o=!1,revision:s="main",legacy:a=null}={}){const[i,l]=await p(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:o,revision:s,legacy:a}),c=l.tokenizer_class?.replace(/Fast$/,"")??"PreTrainedTokenizer";let d=this.TOKENIZER_CLASS_MAPPING[c];return d||(console.warn(`Unknown tokenizer class "${c}", attempting to construct from base class.`),d=be),new d(i,l)}}},"./src/utils/audio.js":
@@ -163,5 +163,5 @@ import*as e from"fs";import*as t from"onnxruntime-node";import*as n from"path";i
163
163
  \*****************************/(e,t,n)=>{n.r(t),n.d(t,{Tensor:()=>i,cat:()=>w,full:()=>k,full_like:()=>F,interpolate:()=>c,interpolate_4d:()=>d,layer_norm:()=>_,matmul:()=>u,mean:()=>x,mean_pooling:()=>m,ones:()=>C,ones_like:()=>P,permute:()=>l,quantize_embeddings:()=>A,rfft:()=>p,stack:()=>b,std_mean:()=>T,topk:()=>h,zeros:()=>v,zeros_like:()=>S});var r=n(/*! ./maths.js */"./src/utils/maths.js"),o=n(/*! ../backends/onnx.js */"./src/backends/onnx.js"),s=n(/*! ../ops/registry.js */"./src/ops/registry.js");const a=Object.freeze({float32:Float32Array,float16:Uint16Array,float64:Float64Array,string:Array,int8:Int8Array,uint8:Uint8Array,int16:Int16Array,uint16:Uint16Array,int32:Int32Array,uint32:Uint32Array,int64:BigInt64Array,uint64:BigUint64Array,bool:Uint8Array});class i{get dims(){return this.ort_tensor.dims}set dims(e){this.ort_tensor.dims=e}get type(){return this.ort_tensor.type}get data(){return this.ort_tensor.data}get size(){return this.ort_tensor.size}get location(){return this.ort_tensor.location}ort_tensor;constructor(...e){return(0,o.isONNXTensor)(e[0])?this.ort_tensor=e[0]:this.ort_tensor=new o.Tensor(e[0],e[1],e[2]),new Proxy(this,{get:(e,t)=>{if("string"==typeof t){let n=Number(t);if(Number.isInteger(n))return e._getitem(n)}return e[t]},set:(e,t,n)=>e[t]=n})}dispose(){this.ort_tensor.dispose()}*[Symbol.iterator](){const[e,...t]=this.dims;if(t.length>0){const n=t.reduce(((e,t)=>e*t));for(let r=0;r<e;++r)yield this._subarray(r,n,t)}else yield*this.data}_getitem(e){const[t,...n]=this.dims;if(e=M(e,t),n.length>0){const t=n.reduce(((e,t)=>e*t));return this._subarray(e,t,n)}return new i(this.type,[this.data[e]],n)}indexOf(e){const t=this.data;for(let n=0;n<t.length;++n)if(t[n]==e)return n;return-1}_subarray(e,t,n){const r=e*t,o=(e+1)*t,s="subarray"in this.data?this.data.subarray(r,o):this.data.slice(r,o);return new i(this.type,s,n)}item(){const e=this.data;if(1!==e.length)throw new Error(`a Tensor with ${e.length} elements cannot be converted to Scalar`);return e[0]}tolist(){return function(e,t){const n=e.length,r=t.reduce(((e,t)=>e*t));if(n!==r)throw Error(`cannot reshape array of size ${n} into shape (${t})`);let o=e;for(let e=t.length-1;e>=0;e--)o=o.reduce(((n,r)=>{let o=n[n.length-1];return o.length<t[e]?o.push(r):n.push([r]),n}),[[]]);return o[0]}(this.data,this.dims)}sigmoid(){return this.clone().sigmoid_()}sigmoid_(){const e=this.data;for(let t=0;t<e.length;++t)e[t]=1/(1+Math.exp(-e[t]));return this}mul(e){return this.clone().mul_(e)}mul_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]*=e;return this}div(e){return this.clone().div_(e)}div_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]/=e;return this}add(e){return this.clone().add_(e)}add_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]+=e;return this}sub(e){return this.clone().sub_(e)}sub_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]-=e;return this}clone(){return new i(this.type,this.data.slice(),this.dims.slice())}slice(...e){const t=[],n=[];for(let r=0;r<this.dims.length;++r){let o=e[r];if(null==o)n.push([0,this.dims[r]]),t.push(this.dims[r]);else if("number"==typeof o)o=M(o,this.dims[r],r),n.push([o,o+1]);else{if(!Array.isArray(o)||2!==o.length)throw new Error(`Invalid slice: ${o}`);{let[e,s]=o;if(e=null===e?0:M(e,this.dims[r],r,!1),s=null===s?this.dims[r]:M(s,this.dims[r],r,!1),e>s)throw new Error(`Invalid slice: ${o}`);const a=[Math.max(e,0),Math.min(s,this.dims[r])];n.push(a),t.push(a[1]-a[0])}}}const r=n.map((([e,t])=>t-e)),o=r.reduce(((e,t)=>e*t)),s=this.data,a=new s.constructor(o),l=this.stride();for(let e=0;e<o;++e){let t=0;for(let o=r.length-1,s=e;o>=0;--o){const e=r[o];t+=(s%e+n[o][0])*l[o],s=Math.floor(s/e)}a[e]=s[t]}return new i(this.type,a,t)}permute(...e){return l(this,e)}transpose(...e){return this.permute(...e)}sum(e=null,t=!1){return this.norm(1,e,t)}norm(e="fro",t=null,n=!1){if("fro"===e)e=2;else if("string"==typeof e)throw Error(`Unsupported norm: ${e}`);const r=this.data;if(null===t){let t=r.reduce(((t,n)=>t+n**e),0)**(1/e);return new i(this.type,[t],[])}t=M(t,this.dims.length);const o=this.dims.slice();o[t]=1;const s=new r.constructor(r.length/this.dims[t]);for(let n=0;n<r.length;++n){let a=0;for(let e=this.dims.length-1,r=n,s=1;e>=0;--e){const n=this.dims[e];if(e!==t){a+=r%n*s,s*=o[e]}r=Math.floor(r/n)}s[a]+=r[n]**e}if(1!==e)for(let t=0;t<s.length;++t)s[t]=s[t]**(1/e);return n||o.splice(t,1),new i(this.type,s,o)}normalize_(e=2,t=1){t=M(t,this.dims.length);const n=this.norm(e,t,!0),r=this.data,o=n.data;for(let e=0;e<r.length;++e){let n=0;for(let r=this.dims.length-1,o=e,s=1;r>=0;--r){const e=this.dims[r];if(r!==t){n+=o%e*s,s*=this.dims[r]}o=Math.floor(o/e)}r[e]/=o[n]}return this}normalize(e=2,t=1){return this.clone().normalize_(e,t)}stride(){return function(e){const t=new Array(e.length);for(let n=e.length-1,r=1;n>=0;--n)t[n]=r,r*=e[n];return t}(this.dims)}squeeze(e=null){return new i(this.type,this.data,f(this.dims,e))}squeeze_(e=null){return this.dims=f(this.dims,e),this}unsqueeze(e=null){return new i(this.type,this.data,g(this.dims,e))}unsqueeze_(e=null){return this.dims=g(this.dims,e),this}flatten_(e=0,t=-1){t=(t+this.dims.length)%this.dims.length;let n=this.dims.slice(0,e),r=this.dims.slice(e,t+1),o=this.dims.slice(t+1);return this.dims=[...n,r.reduce(((e,t)=>e*t),1),...o],this}flatten(e=0,t=-1){return this.clone().flatten_(e,t)}view(...e){let t=-1;for(let n=0;n<e.length;++n)if(-1===e[n]){if(-1!==t)throw new Error("Only one dimension can be inferred");t=n}const n=this.data;if(-1!==t){const r=e.reduce(((e,n,r)=>r!==t?e*n:e),1);e[t]=n.length/r}return new i(this.type,n,e)}neg_(){const e=this.data;for(let t=0;t<e.length;++t)e[t]=-e[t];return this}neg(){return this.clone().neg_()}clamp_(e,t){const n=this.data;for(let r=0;r<n.length;++r)n[r]=Math.min(Math.max(n[r],e),t);return this}clamp(e,t){return this.clone().clamp_(e,t)}round_(){const e=this.data;for(let t=0;t<e.length;++t)e[t]=Math.round(e[t]);return this}round(){return this.clone().round_()}mean(e=null,t=!1){return x(this,e,t)}to(e){if(this.type===e)return this;if(!a.hasOwnProperty(e))throw new Error(`Unsupported type: ${e}`);return new i(e,a[e].from(this.data),this.dims)}}function l(e,t){const[n,o]=(0,r.permute_data)(e.data,e.dims,t);return new i(e.type,n,o)}function c(e,[t,n],o="bilinear",s=!1){const a=e.dims.at(-3)??1,l=e.dims.at(-2),c=e.dims.at(-1);let d=(0,r.interpolate_data)(e.data,[a,l,c],[t,n],o,s);return new i(e.type,d,[a,t,n])}async function d(e,{size:t=null,mode:n="bilinear"}={}){if(4!==e.dims.length)throw new Error("`interpolate_4d` currently only supports 4D input.");if(!t)throw new Error("`interpolate_4d` requires a `size` argument.");let r,o;if(2===t.length)r=[...e.dims.slice(0,2),...t];else if(3===t.length)r=[e.dims[0],...t];else{if(4!==t.length)throw new Error("`size` must be of length 2, 3, or 4.");r=t}if("bilinear"===n)o=await s.TensorOpRegistry.bilinear_interpolate_4d;else{if("bicubic"!==n)throw new Error(`Unsupported mode: ${n}`);o=await s.TensorOpRegistry.bicubic_interpolate_4d}const a=new i("int64",new BigInt64Array(r.map(BigInt)),[r.length]);return await o({x:e,s:a})}async function u(e,t){const n=await s.TensorOpRegistry.matmul;return await n({a:e,b:t})}async function p(e,t){const n=await s.TensorOpRegistry.rfft;return await n({x:e,a:t})}async function h(e,t){const n=await s.TensorOpRegistry.top_k;return t=null===t?e.dims.at(-1):Math.min(t,e.dims.at(-1)),await n({x:e,k:new i("int64",[BigInt(t)],[1])})}function m(e,t){const n=e.data,r=t.data,o=[e.dims[0],e.dims[2]],s=new n.constructor(o[0]*o[1]),[a,l,c]=e.dims;let d=0;for(let e=0;e<a;++e){const t=e*c*l;for(let o=0;o<c;++o){let a=0,i=0;const u=e*l,p=t+o;for(let e=0;e<l;++e){const t=Number(r[u+e]);i+=t,a+=n[p+e*c]*t}const h=a/i;s[d++]=h}}return new i(e.type,s,o)}function _(e,t,{eps:n=1e-5}={}){if(2!==e.dims.length)throw new Error("`layer_norm` currently only supports 2D input.");const[r,o]=e.dims;if(1!==t.length&&t[0]!==o)throw new Error("`normalized_shape` must be a 1D array with shape `[input.dims[1]]`.");const[s,a]=T(e,1,0,!0),l=s.data,c=a.data,d=e.data,u=new d.constructor(d.length);for(let e=0;e<r;++e){const t=e*o;for(let r=0;r<o;++r){const o=t+r;u[o]=(d[o]-c[e])/(l[e]+n)}}return new i(e.type,u,e.dims)}function f(e,t){return e=e.slice(),null===t?e=e.filter((e=>1!==e)):"number"==typeof t?1===e[t]&&e.splice(t,1):Array.isArray(t)&&(e=e.filter(((e,n)=>1!==e||!t.includes(n)))),e}function g(e,t){return t=M(t,e.length+1),(e=e.slice()).splice(t,0,1),e}function M(e,t,n=null,r=!0){if(r&&(e<-t||e>=t))throw new Error(`IndexError: index ${e} is out of bounds for dimension${null===n?"":" "+n} with size ${t}`);return e<0&&(e=(e%t+t)%t),e}function w(e,t=0){t=M(t,e[0].dims.length);const n=e[0].dims.slice();n[t]=e.reduce(((e,n)=>e+n.dims[t]),0);const r=n.reduce(((e,t)=>e*t),1),o=new e[0].data.constructor(r),s=e[0].type;if(0===t){let t=0;for(const n of e){const e=n.data;o.set(e,t),t+=e.length}}else{let r=0;for(let s=0;s<e.length;++s){const{data:a,dims:i}=e[s];for(let e=0;e<a.length;++e){let s=0;for(let o=i.length-1,a=e,l=1;o>=0;--o){const e=i[o];let c=a%e;o===t&&(c+=r),s+=c*l,l*=n[o],a=Math.floor(a/e)}o[s]=a[e]}r+=i[t]}}return new i(s,o,n)}function b(e,t=0){return w(e.map((e=>e.unsqueeze(t))),t)}function T(e,t=null,n=1,r=!1){const o=e.data,s=e.dims;if(null===t){const t=o.reduce(((e,t)=>e+t),0)/o.length,r=Math.sqrt(o.reduce(((e,n)=>e+(n-t)**2),0)/(o.length-n)),s=new i(e.type,[t],[]);return[new i(e.type,[r],[]),s]}const a=x(e,t=M(t,s.length),r),l=a.data,c=s.slice();c[t]=1;const d=new o.constructor(o.length/s[t]);for(let e=0;e<o.length;++e){let n=0;for(let r=s.length-1,o=e,a=1;r>=0;--r){const e=s[r];if(r!==t){n+=o%e*a,a*=c[r]}o=Math.floor(o/e)}d[n]+=(o[e]-l[n])**2}for(let e=0;e<d.length;++e)d[e]=Math.sqrt(d[e]/(s[t]-n));r||c.splice(t,1);return[new i(e.type,d,c),a]}function x(e,t=null,n=!1){const r=e.data;if(null===t){const t=r.reduce(((e,t)=>e+t),0);return new i(e.type,[t/r.length],[])}const o=e.dims;t=M(t,o.length);const s=o.slice();s[t]=1;const a=new r.constructor(r.length/o[t]);for(let e=0;e<r.length;++e){let n=0;for(let r=o.length-1,a=e,i=1;r>=0;--r){const e=o[r];if(r!==t){n+=a%e*i,i*=s[r]}a=Math.floor(a/e)}a[n]+=r[e]}if(1!==o[t])for(let e=0;e<a.length;++e)a[e]=a[e]/o[t];return n||s.splice(t,1),new i(e.type,a,s)}function y(e,t,n,r){const o=e.reduce(((e,t)=>e*t),1);return new i(n,new r(o).fill(t),e)}function k(e,t){let n,r;if("number"==typeof t)n="float32",r=Float32Array;else{if("bigint"!=typeof t)throw new Error("Unsupported data type: "+typeof t);n="int64",r=BigInt64Array}return y(e,t,n,r)}function F(e,t){return k(e.dims,t)}function C(e){return y(e,1n,"int64",BigInt64Array)}function P(e){return C(e.dims)}function v(e){return y(e,0n,"int64",BigInt64Array)}function S(e){return v(e.dims)}function A(e,t){if(2!==e.dims.length)throw new Error("The tensor must have 2 dimensions");if(e.dims.at(-1)%8!=0)throw new Error("The last dimension of the tensor must be a multiple of 8");if(!["binary","ubinary"].includes(t))throw new Error("The precision must be either 'binary' or 'ubinary'");const n="binary"===t,r=n?"int8":"uint8",o=n?Int8Array:Uint8Array,s=e.data,a=new o(s.length/8);for(let e=0;e<s.length;++e){const t=s[e]>0?1:0,r=Math.floor(e/8),o=e%8;a[r]|=t<<7-o,n&&0===o&&(a[r]-=128)}return new i(r,a,[e.dims[0],e.dims[1]/8])}}},l={};function c(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={exports:{}};return i[e](n,n.exports,c),n.exports}a=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,c.t=function(e,t){if(1&t&&(e=this(e)),8&t)return e;if("object"==typeof e&&e){if(4&t&&e.__esModule)return e;if(16&t&&"function"==typeof e.then)return e}var n=Object.create(null);c.r(n);var r={};s=s||[null,a({}),a([]),a(a)];for(var o=2&t&&e;"object"==typeof o&&!~s.indexOf(o);o=a(o))Object.getOwnPropertyNames(o).forEach((t=>r[t]=()=>e[t]));return r.default=()=>e,c.d(n,r),n},c.d=(e,t)=>{for(var n in t)c.o(t,n)&&!c.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},c.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),c.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})};var d={};
164
164
  /*!*****************************!*\
165
165
  !*** ./src/transformers.js ***!
166
- \*****************************/c.r(d),c.d(d,{ASTFeatureExtractor:()=>_.ASTFeatureExtractor,ASTForAudioClassification:()=>h.ASTForAudioClassification,ASTModel:()=>h.ASTModel,ASTPreTrainedModel:()=>h.ASTPreTrainedModel,AlbertForMaskedLM:()=>h.AlbertForMaskedLM,AlbertForQuestionAnswering:()=>h.AlbertForQuestionAnswering,AlbertForSequenceClassification:()=>h.AlbertForSequenceClassification,AlbertModel:()=>h.AlbertModel,AlbertPreTrainedModel:()=>h.AlbertPreTrainedModel,AlbertTokenizer:()=>m.AlbertTokenizer,AudioClassificationPipeline:()=>p.AudioClassificationPipeline,AutoConfig:()=>f.AutoConfig,AutoModel:()=>h.AutoModel,AutoModelForAudioClassification:()=>h.AutoModelForAudioClassification,AutoModelForAudioFrameClassification:()=>h.AutoModelForAudioFrameClassification,AutoModelForCTC:()=>h.AutoModelForCTC,AutoModelForCausalLM:()=>h.AutoModelForCausalLM,AutoModelForDepthEstimation:()=>h.AutoModelForDepthEstimation,AutoModelForDocumentQuestionAnswering:()=>h.AutoModelForDocumentQuestionAnswering,AutoModelForImageClassification:()=>h.AutoModelForImageClassification,AutoModelForImageFeatureExtraction:()=>h.AutoModelForImageFeatureExtraction,AutoModelForImageMatting:()=>h.AutoModelForImageMatting,AutoModelForImageSegmentation:()=>h.AutoModelForImageSegmentation,AutoModelForImageToImage:()=>h.AutoModelForImageToImage,AutoModelForMaskGeneration:()=>h.AutoModelForMaskGeneration,AutoModelForMaskedLM:()=>h.AutoModelForMaskedLM,AutoModelForNormalEstimation:()=>h.AutoModelForNormalEstimation,AutoModelForObjectDetection:()=>h.AutoModelForObjectDetection,AutoModelForQuestionAnswering:()=>h.AutoModelForQuestionAnswering,AutoModelForSemanticSegmentation:()=>h.AutoModelForSemanticSegmentation,AutoModelForSeq2SeqLM:()=>h.AutoModelForSeq2SeqLM,AutoModelForSequenceClassification:()=>h.AutoModelForSequenceClassification,AutoModelForSpeechSeq2Seq:()=>h.AutoModelForSpeechSeq2Seq,AutoModelForTextToSpectrogram:()=>h.AutoModelForTextToSpectrogram,AutoModelForTextToWaveform:()=>h.AutoModelForTextToWaveform,AutoModelForTokenClassification:()=>h.AutoModelForTokenClassification,AutoModelForVision2Seq:()=>h.AutoModelForVision2Seq,AutoModelForXVector:()=>h.AutoModelForXVector,AutoModelForZeroShotObjectDetection:()=>h.AutoModelForZeroShotObjectDetection,AutoProcessor:()=>_.AutoProcessor,AutoTokenizer:()=>m.AutoTokenizer,AutomaticSpeechRecognitionPipeline:()=>p.AutomaticSpeechRecognitionPipeline,BartForConditionalGeneration:()=>h.BartForConditionalGeneration,BartForSequenceClassification:()=>h.BartForSequenceClassification,BartModel:()=>h.BartModel,BartPretrainedModel:()=>h.BartPretrainedModel,BartTokenizer:()=>m.BartTokenizer,BaseModelOutput:()=>h.BaseModelOutput,BaseStreamer:()=>T.BaseStreamer,BeitFeatureExtractor:()=>_.BeitFeatureExtractor,BeitForImageClassification:()=>h.BeitForImageClassification,BeitModel:()=>h.BeitModel,BeitPreTrainedModel:()=>h.BeitPreTrainedModel,BertForMaskedLM:()=>h.BertForMaskedLM,BertForQuestionAnswering:()=>h.BertForQuestionAnswering,BertForSequenceClassification:()=>h.BertForSequenceClassification,BertForTokenClassification:()=>h.BertForTokenClassification,BertModel:()=>h.BertModel,BertPreTrainedModel:()=>h.BertPreTrainedModel,BertTokenizer:()=>m.BertTokenizer,BitImageProcessor:()=>_.BitImageProcessor,BlenderbotForConditionalGeneration:()=>h.BlenderbotForConditionalGeneration,BlenderbotModel:()=>h.BlenderbotModel,BlenderbotPreTrainedModel:()=>h.BlenderbotPreTrainedModel,BlenderbotSmallForConditionalGeneration:()=>h.BlenderbotSmallForConditionalGeneration,BlenderbotSmallModel:()=>h.BlenderbotSmallModel,BlenderbotSmallPreTrainedModel:()=>h.BlenderbotSmallPreTrainedModel,BlenderbotSmallTokenizer:()=>m.BlenderbotSmallTokenizer,BlenderbotTokenizer:()=>m.BlenderbotTokenizer,BloomForCausalLM:()=>h.BloomForCausalLM,BloomModel:()=>h.BloomModel,BloomPreTrainedModel:()=>h.BloomPreTrainedModel,BloomTokenizer:()=>m.BloomTokenizer,CLIPFeatureExtractor:()=>_.CLIPFeatureExtractor,CLIPImageProcessor:()=>_.CLIPImageProcessor,CLIPModel:()=>h.CLIPModel,CLIPPreTrainedModel:()=>h.CLIPPreTrainedModel,CLIPSegForImageSegmentation:()=>h.CLIPSegForImageSegmentation,CLIPSegModel:()=>h.CLIPSegModel,CLIPSegPreTrainedModel:()=>h.CLIPSegPreTrainedModel,CLIPTextModelWithProjection:()=>h.CLIPTextModelWithProjection,CLIPTokenizer:()=>m.CLIPTokenizer,CLIPVisionModelWithProjection:()=>h.CLIPVisionModelWithProjection,CamembertForMaskedLM:()=>h.CamembertForMaskedLM,CamembertForQuestionAnswering:()=>h.CamembertForQuestionAnswering,CamembertForSequenceClassification:()=>h.CamembertForSequenceClassification,CamembertForTokenClassification:()=>h.CamembertForTokenClassification,CamembertModel:()=>h.CamembertModel,CamembertPreTrainedModel:()=>h.CamembertPreTrainedModel,CamembertTokenizer:()=>m.CamembertTokenizer,CausalLMOutput:()=>h.CausalLMOutput,CausalLMOutputWithPast:()=>h.CausalLMOutputWithPast,ChineseCLIPFeatureExtractor:()=>_.ChineseCLIPFeatureExtractor,ChineseCLIPModel:()=>h.ChineseCLIPModel,ChineseCLIPPreTrainedModel:()=>h.ChineseCLIPPreTrainedModel,ClapAudioModelWithProjection:()=>h.ClapAudioModelWithProjection,ClapFeatureExtractor:()=>_.ClapFeatureExtractor,ClapModel:()=>h.ClapModel,ClapPreTrainedModel:()=>h.ClapPreTrainedModel,ClapTextModelWithProjection:()=>h.ClapTextModelWithProjection,CodeGenForCausalLM:()=>h.CodeGenForCausalLM,CodeGenModel:()=>h.CodeGenModel,CodeGenPreTrainedModel:()=>h.CodeGenPreTrainedModel,CodeGenTokenizer:()=>m.CodeGenTokenizer,CodeLlamaTokenizer:()=>m.CodeLlamaTokenizer,CohereForCausalLM:()=>h.CohereForCausalLM,CohereModel:()=>h.CohereModel,CoherePreTrainedModel:()=>h.CoherePreTrainedModel,CohereTokenizer:()=>m.CohereTokenizer,ConvBertForMaskedLM:()=>h.ConvBertForMaskedLM,ConvBertForQuestionAnswering:()=>h.ConvBertForQuestionAnswering,ConvBertForSequenceClassification:()=>h.ConvBertForSequenceClassification,ConvBertForTokenClassification:()=>h.ConvBertForTokenClassification,ConvBertModel:()=>h.ConvBertModel,ConvBertPreTrainedModel:()=>h.ConvBertPreTrainedModel,ConvBertTokenizer:()=>m.ConvBertTokenizer,ConvNextFeatureExtractor:()=>_.ConvNextFeatureExtractor,ConvNextForImageClassification:()=>h.ConvNextForImageClassification,ConvNextImageProcessor:()=>_.ConvNextImageProcessor,ConvNextModel:()=>h.ConvNextModel,ConvNextPreTrainedModel:()=>h.ConvNextPreTrainedModel,ConvNextV2ForImageClassification:()=>h.ConvNextV2ForImageClassification,ConvNextV2Model:()=>h.ConvNextV2Model,ConvNextV2PreTrainedModel:()=>h.ConvNextV2PreTrainedModel,DPTFeatureExtractor:()=>_.DPTFeatureExtractor,DPTForDepthEstimation:()=>h.DPTForDepthEstimation,DPTImageProcessor:()=>_.DPTImageProcessor,DPTModel:()=>h.DPTModel,DPTPreTrainedModel:()=>h.DPTPreTrainedModel,DebertaForMaskedLM:()=>h.DebertaForMaskedLM,DebertaForQuestionAnswering:()=>h.DebertaForQuestionAnswering,DebertaForSequenceClassification:()=>h.DebertaForSequenceClassification,DebertaForTokenClassification:()=>h.DebertaForTokenClassification,DebertaModel:()=>h.DebertaModel,DebertaPreTrainedModel:()=>h.DebertaPreTrainedModel,DebertaTokenizer:()=>m.DebertaTokenizer,DebertaV2ForMaskedLM:()=>h.DebertaV2ForMaskedLM,DebertaV2ForQuestionAnswering:()=>h.DebertaV2ForQuestionAnswering,DebertaV2ForSequenceClassification:()=>h.DebertaV2ForSequenceClassification,DebertaV2ForTokenClassification:()=>h.DebertaV2ForTokenClassification,DebertaV2Model:()=>h.DebertaV2Model,DebertaV2PreTrainedModel:()=>h.DebertaV2PreTrainedModel,DebertaV2Tokenizer:()=>m.DebertaV2Tokenizer,DeiTFeatureExtractor:()=>_.DeiTFeatureExtractor,DeiTForImageClassification:()=>h.DeiTForImageClassification,DeiTModel:()=>h.DeiTModel,DeiTPreTrainedModel:()=>h.DeiTPreTrainedModel,DepthAnythingForDepthEstimation:()=>h.DepthAnythingForDepthEstimation,DepthAnythingPreTrainedModel:()=>h.DepthAnythingPreTrainedModel,DepthEstimationPipeline:()=>p.DepthEstimationPipeline,DetrFeatureExtractor:()=>_.DetrFeatureExtractor,DetrForObjectDetection:()=>h.DetrForObjectDetection,DetrForSegmentation:()=>h.DetrForSegmentation,DetrModel:()=>h.DetrModel,DetrObjectDetectionOutput:()=>h.DetrObjectDetectionOutput,DetrPreTrainedModel:()=>h.DetrPreTrainedModel,DetrSegmentationOutput:()=>h.DetrSegmentationOutput,Dinov2ForImageClassification:()=>h.Dinov2ForImageClassification,Dinov2Model:()=>h.Dinov2Model,Dinov2PreTrainedModel:()=>h.Dinov2PreTrainedModel,DistilBertForMaskedLM:()=>h.DistilBertForMaskedLM,DistilBertForQuestionAnswering:()=>h.DistilBertForQuestionAnswering,DistilBertForSequenceClassification:()=>h.DistilBertForSequenceClassification,DistilBertForTokenClassification:()=>h.DistilBertForTokenClassification,DistilBertModel:()=>h.DistilBertModel,DistilBertPreTrainedModel:()=>h.DistilBertPreTrainedModel,DistilBertTokenizer:()=>m.DistilBertTokenizer,DocumentQuestionAnsweringPipeline:()=>p.DocumentQuestionAnsweringPipeline,DonutFeatureExtractor:()=>_.DonutFeatureExtractor,DonutSwinModel:()=>h.DonutSwinModel,DonutSwinPreTrainedModel:()=>h.DonutSwinPreTrainedModel,EfficientNetForImageClassification:()=>h.EfficientNetForImageClassification,EfficientNetImageProcessor:()=>_.EfficientNetImageProcessor,EfficientNetModel:()=>h.EfficientNetModel,EfficientNetPreTrainedModel:()=>h.EfficientNetPreTrainedModel,ElectraForMaskedLM:()=>h.ElectraForMaskedLM,ElectraForQuestionAnswering:()=>h.ElectraForQuestionAnswering,ElectraForSequenceClassification:()=>h.ElectraForSequenceClassification,ElectraForTokenClassification:()=>h.ElectraForTokenClassification,ElectraModel:()=>h.ElectraModel,ElectraPreTrainedModel:()=>h.ElectraPreTrainedModel,ElectraTokenizer:()=>m.ElectraTokenizer,EosTokenCriteria:()=>x.EosTokenCriteria,EsmForMaskedLM:()=>h.EsmForMaskedLM,EsmForSequenceClassification:()=>h.EsmForSequenceClassification,EsmForTokenClassification:()=>h.EsmForTokenClassification,EsmModel:()=>h.EsmModel,EsmPreTrainedModel:()=>h.EsmPreTrainedModel,EsmTokenizer:()=>m.EsmTokenizer,FFT:()=>b.FFT,FalconForCausalLM:()=>h.FalconForCausalLM,FalconModel:()=>h.FalconModel,FalconPreTrainedModel:()=>h.FalconPreTrainedModel,FalconTokenizer:()=>m.FalconTokenizer,FastViTForImageClassification:()=>h.FastViTForImageClassification,FastViTModel:()=>h.FastViTModel,FastViTPreTrainedModel:()=>h.FastViTPreTrainedModel,FeatureExtractionPipeline:()=>p.FeatureExtractionPipeline,FeatureExtractor:()=>_.FeatureExtractor,FillMaskPipeline:()=>p.FillMaskPipeline,Florence2ForConditionalGeneration:()=>h.Florence2ForConditionalGeneration,Florence2PreTrainedModel:()=>h.Florence2PreTrainedModel,Florence2Processor:()=>_.Florence2Processor,GLPNFeatureExtractor:()=>_.GLPNFeatureExtractor,GLPNForDepthEstimation:()=>h.GLPNForDepthEstimation,GLPNModel:()=>h.GLPNModel,GLPNPreTrainedModel:()=>h.GLPNPreTrainedModel,GPT2LMHeadModel:()=>h.GPT2LMHeadModel,GPT2Model:()=>h.GPT2Model,GPT2PreTrainedModel:()=>h.GPT2PreTrainedModel,GPT2Tokenizer:()=>m.GPT2Tokenizer,GPTBigCodeForCausalLM:()=>h.GPTBigCodeForCausalLM,GPTBigCodeModel:()=>h.GPTBigCodeModel,GPTBigCodePreTrainedModel:()=>h.GPTBigCodePreTrainedModel,GPTJForCausalLM:()=>h.GPTJForCausalLM,GPTJModel:()=>h.GPTJModel,GPTJPreTrainedModel:()=>h.GPTJPreTrainedModel,GPTNeoForCausalLM:()=>h.GPTNeoForCausalLM,GPTNeoModel:()=>h.GPTNeoModel,GPTNeoPreTrainedModel:()=>h.GPTNeoPreTrainedModel,GPTNeoXForCausalLM:()=>h.GPTNeoXForCausalLM,GPTNeoXModel:()=>h.GPTNeoXModel,GPTNeoXPreTrainedModel:()=>h.GPTNeoXPreTrainedModel,GPTNeoXTokenizer:()=>m.GPTNeoXTokenizer,Gemma2ForCausalLM:()=>h.Gemma2ForCausalLM,Gemma2Model:()=>h.Gemma2Model,Gemma2PreTrainedModel:()=>h.Gemma2PreTrainedModel,GemmaForCausalLM:()=>h.GemmaForCausalLM,GemmaModel:()=>h.GemmaModel,GemmaPreTrainedModel:()=>h.GemmaPreTrainedModel,GemmaTokenizer:()=>m.GemmaTokenizer,Grok1Tokenizer:()=>m.Grok1Tokenizer,HerbertTokenizer:()=>m.HerbertTokenizer,HubertForCTC:()=>h.HubertForCTC,HubertForSequenceClassification:()=>h.HubertForSequenceClassification,HubertModel:()=>h.HubertModel,HubertPreTrainedModel:()=>h.HubertPreTrainedModel,ImageClassificationPipeline:()=>p.ImageClassificationPipeline,ImageFeatureExtractionPipeline:()=>p.ImageFeatureExtractionPipeline,ImageFeatureExtractor:()=>_.ImageFeatureExtractor,ImageMattingOutput:()=>h.ImageMattingOutput,ImageSegmentationPipeline:()=>p.ImageSegmentationPipeline,ImageToImagePipeline:()=>p.ImageToImagePipeline,ImageToTextPipeline:()=>p.ImageToTextPipeline,InterruptableStoppingCriteria:()=>x.InterruptableStoppingCriteria,JAISLMHeadModel:()=>h.JAISLMHeadModel,JAISModel:()=>h.JAISModel,JAISPreTrainedModel:()=>h.JAISPreTrainedModel,LlamaForCausalLM:()=>h.LlamaForCausalLM,LlamaModel:()=>h.LlamaModel,LlamaPreTrainedModel:()=>h.LlamaPreTrainedModel,LlamaTokenizer:()=>m.LlamaTokenizer,LlavaForConditionalGeneration:()=>h.LlavaForConditionalGeneration,LlavaPreTrainedModel:()=>h.LlavaPreTrainedModel,LongT5ForConditionalGeneration:()=>h.LongT5ForConditionalGeneration,LongT5Model:()=>h.LongT5Model,LongT5PreTrainedModel:()=>h.LongT5PreTrainedModel,M2M100ForConditionalGeneration:()=>h.M2M100ForConditionalGeneration,M2M100Model:()=>h.M2M100Model,M2M100PreTrainedModel:()=>h.M2M100PreTrainedModel,M2M100Tokenizer:()=>m.M2M100Tokenizer,MBart50Tokenizer:()=>m.MBart50Tokenizer,MBartForCausalLM:()=>h.MBartForCausalLM,MBartForConditionalGeneration:()=>h.MBartForConditionalGeneration,MBartForSequenceClassification:()=>h.MBartForSequenceClassification,MBartModel:()=>h.MBartModel,MBartPreTrainedModel:()=>h.MBartPreTrainedModel,MBartTokenizer:()=>m.MBartTokenizer,MPNetForMaskedLM:()=>h.MPNetForMaskedLM,MPNetForQuestionAnswering:()=>h.MPNetForQuestionAnswering,MPNetForSequenceClassification:()=>h.MPNetForSequenceClassification,MPNetForTokenClassification:()=>h.MPNetForTokenClassification,MPNetModel:()=>h.MPNetModel,MPNetPreTrainedModel:()=>h.MPNetPreTrainedModel,MPNetTokenizer:()=>m.MPNetTokenizer,MT5ForConditionalGeneration:()=>h.MT5ForConditionalGeneration,MT5Model:()=>h.MT5Model,MT5PreTrainedModel:()=>h.MT5PreTrainedModel,MarianMTModel:()=>h.MarianMTModel,MarianModel:()=>h.MarianModel,MarianPreTrainedModel:()=>h.MarianPreTrainedModel,MarianTokenizer:()=>m.MarianTokenizer,MaskedLMOutput:()=>h.MaskedLMOutput,MaxLengthCriteria:()=>x.MaxLengthCriteria,MistralForCausalLM:()=>h.MistralForCausalLM,MistralModel:()=>h.MistralModel,MistralPreTrainedModel:()=>h.MistralPreTrainedModel,MobileBertForMaskedLM:()=>h.MobileBertForMaskedLM,MobileBertForQuestionAnswering:()=>h.MobileBertForQuestionAnswering,MobileBertForSequenceClassification:()=>h.MobileBertForSequenceClassification,MobileBertModel:()=>h.MobileBertModel,MobileBertPreTrainedModel:()=>h.MobileBertPreTrainedModel,MobileBertTokenizer:()=>m.MobileBertTokenizer,MobileNetV1FeatureExtractor:()=>_.MobileNetV1FeatureExtractor,MobileNetV1ForImageClassification:()=>h.MobileNetV1ForImageClassification,MobileNetV1Model:()=>h.MobileNetV1Model,MobileNetV1PreTrainedModel:()=>h.MobileNetV1PreTrainedModel,MobileNetV2FeatureExtractor:()=>_.MobileNetV2FeatureExtractor,MobileNetV2ForImageClassification:()=>h.MobileNetV2ForImageClassification,MobileNetV2Model:()=>h.MobileNetV2Model,MobileNetV2PreTrainedModel:()=>h.MobileNetV2PreTrainedModel,MobileNetV3FeatureExtractor:()=>_.MobileNetV3FeatureExtractor,MobileNetV3ForImageClassification:()=>h.MobileNetV3ForImageClassification,MobileNetV3Model:()=>h.MobileNetV3Model,MobileNetV3PreTrainedModel:()=>h.MobileNetV3PreTrainedModel,MobileNetV4FeatureExtractor:()=>_.MobileNetV4FeatureExtractor,MobileNetV4ForImageClassification:()=>h.MobileNetV4ForImageClassification,MobileNetV4Model:()=>h.MobileNetV4Model,MobileNetV4PreTrainedModel:()=>h.MobileNetV4PreTrainedModel,MobileViTFeatureExtractor:()=>_.MobileViTFeatureExtractor,MobileViTForImageClassification:()=>h.MobileViTForImageClassification,MobileViTImageProcessor:()=>_.MobileViTImageProcessor,MobileViTModel:()=>h.MobileViTModel,MobileViTPreTrainedModel:()=>h.MobileViTPreTrainedModel,MobileViTV2ForImageClassification:()=>h.MobileViTV2ForImageClassification,MobileViTV2Model:()=>h.MobileViTV2Model,MobileViTV2PreTrainedModel:()=>h.MobileViTV2PreTrainedModel,ModelOutput:()=>h.ModelOutput,Moondream1ForConditionalGeneration:()=>h.Moondream1ForConditionalGeneration,MptForCausalLM:()=>h.MptForCausalLM,MptModel:()=>h.MptModel,MptPreTrainedModel:()=>h.MptPreTrainedModel,MusicgenForCausalLM:()=>h.MusicgenForCausalLM,MusicgenForConditionalGeneration:()=>h.MusicgenForConditionalGeneration,MusicgenModel:()=>h.MusicgenModel,MusicgenPreTrainedModel:()=>h.MusicgenPreTrainedModel,NllbTokenizer:()=>m.NllbTokenizer,NomicBertModel:()=>h.NomicBertModel,NomicBertPreTrainedModel:()=>h.NomicBertPreTrainedModel,NougatImageProcessor:()=>_.NougatImageProcessor,NougatTokenizer:()=>m.NougatTokenizer,OPTForCausalLM:()=>h.OPTForCausalLM,OPTModel:()=>h.OPTModel,OPTPreTrainedModel:()=>h.OPTPreTrainedModel,ObjectDetectionPipeline:()=>p.ObjectDetectionPipeline,OpenELMForCausalLM:()=>h.OpenELMForCausalLM,OpenELMModel:()=>h.OpenELMModel,OpenELMPreTrainedModel:()=>h.OpenELMPreTrainedModel,OwlViTFeatureExtractor:()=>_.OwlViTFeatureExtractor,OwlViTForObjectDetection:()=>h.OwlViTForObjectDetection,OwlViTModel:()=>h.OwlViTModel,OwlViTPreTrainedModel:()=>h.OwlViTPreTrainedModel,OwlViTProcessor:()=>_.OwlViTProcessor,Owlv2ForObjectDetection:()=>h.Owlv2ForObjectDetection,Owlv2ImageProcessor:()=>_.Owlv2ImageProcessor,Owlv2Model:()=>h.Owlv2Model,Owlv2PreTrainedModel:()=>h.Owlv2PreTrainedModel,Phi3ForCausalLM:()=>h.Phi3ForCausalLM,Phi3Model:()=>h.Phi3Model,Phi3PreTrainedModel:()=>h.Phi3PreTrainedModel,PhiForCausalLM:()=>h.PhiForCausalLM,PhiModel:()=>h.PhiModel,PhiPreTrainedModel:()=>h.PhiPreTrainedModel,Pipeline:()=>p.Pipeline,PreTrainedModel:()=>h.PreTrainedModel,PreTrainedTokenizer:()=>m.PreTrainedTokenizer,PretrainedConfig:()=>f.PretrainedConfig,PretrainedMixin:()=>h.PretrainedMixin,Processor:()=>_.Processor,PyAnnoteFeatureExtractor:()=>_.PyAnnoteFeatureExtractor,PyAnnoteForAudioFrameClassification:()=>h.PyAnnoteForAudioFrameClassification,PyAnnoteModel:()=>h.PyAnnoteModel,PyAnnotePreTrainedModel:()=>h.PyAnnotePreTrainedModel,PyAnnoteProcessor:()=>_.PyAnnoteProcessor,QuestionAnsweringModelOutput:()=>h.QuestionAnsweringModelOutput,QuestionAnsweringPipeline:()=>p.QuestionAnsweringPipeline,Qwen2ForCausalLM:()=>h.Qwen2ForCausalLM,Qwen2Model:()=>h.Qwen2Model,Qwen2PreTrainedModel:()=>h.Qwen2PreTrainedModel,Qwen2Tokenizer:()=>m.Qwen2Tokenizer,RTDetrForObjectDetection:()=>h.RTDetrForObjectDetection,RTDetrImageProcessor:()=>_.RTDetrImageProcessor,RTDetrModel:()=>h.RTDetrModel,RTDetrObjectDetectionOutput:()=>h.RTDetrObjectDetectionOutput,RTDetrPreTrainedModel:()=>h.RTDetrPreTrainedModel,RawImage:()=>M.RawImage,ResNetForImageClassification:()=>h.ResNetForImageClassification,ResNetModel:()=>h.ResNetModel,ResNetPreTrainedModel:()=>h.ResNetPreTrainedModel,RoFormerForMaskedLM:()=>h.RoFormerForMaskedLM,RoFormerForQuestionAnswering:()=>h.RoFormerForQuestionAnswering,RoFormerForSequenceClassification:()=>h.RoFormerForSequenceClassification,RoFormerForTokenClassification:()=>h.RoFormerForTokenClassification,RoFormerModel:()=>h.RoFormerModel,RoFormerPreTrainedModel:()=>h.RoFormerPreTrainedModel,RoFormerTokenizer:()=>m.RoFormerTokenizer,RobertaForMaskedLM:()=>h.RobertaForMaskedLM,RobertaForQuestionAnswering:()=>h.RobertaForQuestionAnswering,RobertaForSequenceClassification:()=>h.RobertaForSequenceClassification,RobertaForTokenClassification:()=>h.RobertaForTokenClassification,RobertaModel:()=>h.RobertaModel,RobertaPreTrainedModel:()=>h.RobertaPreTrainedModel,RobertaTokenizer:()=>m.RobertaTokenizer,SamImageProcessor:()=>_.SamImageProcessor,SamImageSegmentationOutput:()=>h.SamImageSegmentationOutput,SamModel:()=>h.SamModel,SamPreTrainedModel:()=>h.SamPreTrainedModel,SamProcessor:()=>_.SamProcessor,SapiensFeatureExtractor:()=>_.SapiensFeatureExtractor,SapiensForDepthEstimation:()=>h.SapiensForDepthEstimation,SapiensForNormalEstimation:()=>h.SapiensForNormalEstimation,SapiensForSemanticSegmentation:()=>h.SapiensForSemanticSegmentation,SapiensPreTrainedModel:()=>h.SapiensPreTrainedModel,SeamlessM4TFeatureExtractor:()=>_.SeamlessM4TFeatureExtractor,SegformerFeatureExtractor:()=>_.SegformerFeatureExtractor,SegformerForImageClassification:()=>h.SegformerForImageClassification,SegformerForSemanticSegmentation:()=>h.SegformerForSemanticSegmentation,SegformerModel:()=>h.SegformerModel,SegformerPreTrainedModel:()=>h.SegformerPreTrainedModel,Seq2SeqLMOutput:()=>h.Seq2SeqLMOutput,SequenceClassifierOutput:()=>h.SequenceClassifierOutput,SiglipImageProcessor:()=>_.SiglipImageProcessor,SiglipModel:()=>h.SiglipModel,SiglipPreTrainedModel:()=>h.SiglipPreTrainedModel,SiglipTextModel:()=>h.SiglipTextModel,SiglipTokenizer:()=>m.SiglipTokenizer,SiglipVisionModel:()=>h.SiglipVisionModel,SpeechT5FeatureExtractor:()=>_.SpeechT5FeatureExtractor,SpeechT5ForSpeechToText:()=>h.SpeechT5ForSpeechToText,SpeechT5ForTextToSpeech:()=>h.SpeechT5ForTextToSpeech,SpeechT5HifiGan:()=>h.SpeechT5HifiGan,SpeechT5Model:()=>h.SpeechT5Model,SpeechT5PreTrainedModel:()=>h.SpeechT5PreTrainedModel,SpeechT5Processor:()=>_.SpeechT5Processor,SpeechT5Tokenizer:()=>m.SpeechT5Tokenizer,SqueezeBertForMaskedLM:()=>h.SqueezeBertForMaskedLM,SqueezeBertForQuestionAnswering:()=>h.SqueezeBertForQuestionAnswering,SqueezeBertForSequenceClassification:()=>h.SqueezeBertForSequenceClassification,SqueezeBertModel:()=>h.SqueezeBertModel,SqueezeBertPreTrainedModel:()=>h.SqueezeBertPreTrainedModel,SqueezeBertTokenizer:()=>m.SqueezeBertTokenizer,StableLmForCausalLM:()=>h.StableLmForCausalLM,StableLmModel:()=>h.StableLmModel,StableLmPreTrainedModel:()=>h.StableLmPreTrainedModel,Starcoder2ForCausalLM:()=>h.Starcoder2ForCausalLM,Starcoder2Model:()=>h.Starcoder2Model,Starcoder2PreTrainedModel:()=>h.Starcoder2PreTrainedModel,StoppingCriteria:()=>x.StoppingCriteria,StoppingCriteriaList:()=>x.StoppingCriteriaList,SummarizationPipeline:()=>p.SummarizationPipeline,Swin2SRForImageSuperResolution:()=>h.Swin2SRForImageSuperResolution,Swin2SRImageProcessor:()=>_.Swin2SRImageProcessor,Swin2SRModel:()=>h.Swin2SRModel,Swin2SRPreTrainedModel:()=>h.Swin2SRPreTrainedModel,SwinForImageClassification:()=>h.SwinForImageClassification,SwinModel:()=>h.SwinModel,SwinPreTrainedModel:()=>h.SwinPreTrainedModel,T5ForConditionalGeneration:()=>h.T5ForConditionalGeneration,T5Model:()=>h.T5Model,T5PreTrainedModel:()=>h.T5PreTrainedModel,T5Tokenizer:()=>m.T5Tokenizer,TableTransformerForObjectDetection:()=>h.TableTransformerForObjectDetection,TableTransformerModel:()=>h.TableTransformerModel,TableTransformerObjectDetectionOutput:()=>h.TableTransformerObjectDetectionOutput,TableTransformerPreTrainedModel:()=>h.TableTransformerPreTrainedModel,Tensor:()=>w.Tensor,Text2TextGenerationPipeline:()=>p.Text2TextGenerationPipeline,TextClassificationPipeline:()=>p.TextClassificationPipeline,TextGenerationPipeline:()=>p.TextGenerationPipeline,TextStreamer:()=>T.TextStreamer,TextToAudioPipeline:()=>p.TextToAudioPipeline,TokenClassificationPipeline:()=>p.TokenClassificationPipeline,TokenClassifierOutput:()=>h.TokenClassifierOutput,TokenizerModel:()=>m.TokenizerModel,TrOCRForCausalLM:()=>h.TrOCRForCausalLM,TrOCRPreTrainedModel:()=>h.TrOCRPreTrainedModel,TranslationPipeline:()=>p.TranslationPipeline,UniSpeechForCTC:()=>h.UniSpeechForCTC,UniSpeechForSequenceClassification:()=>h.UniSpeechForSequenceClassification,UniSpeechModel:()=>h.UniSpeechModel,UniSpeechPreTrainedModel:()=>h.UniSpeechPreTrainedModel,UniSpeechSatForAudioFrameClassification:()=>h.UniSpeechSatForAudioFrameClassification,UniSpeechSatForCTC:()=>h.UniSpeechSatForCTC,UniSpeechSatForSequenceClassification:()=>h.UniSpeechSatForSequenceClassification,UniSpeechSatModel:()=>h.UniSpeechSatModel,UniSpeechSatPreTrainedModel:()=>h.UniSpeechSatPreTrainedModel,ViTFeatureExtractor:()=>_.ViTFeatureExtractor,ViTForImageClassification:()=>h.ViTForImageClassification,ViTImageProcessor:()=>_.ViTImageProcessor,ViTModel:()=>h.ViTModel,ViTPreTrainedModel:()=>h.ViTPreTrainedModel,VisionEncoderDecoderModel:()=>h.VisionEncoderDecoderModel,VitMatteForImageMatting:()=>h.VitMatteForImageMatting,VitMatteImageProcessor:()=>_.VitMatteImageProcessor,VitMattePreTrainedModel:()=>h.VitMattePreTrainedModel,VitsModel:()=>h.VitsModel,VitsModelOutput:()=>h.VitsModelOutput,VitsPreTrainedModel:()=>h.VitsPreTrainedModel,VitsTokenizer:()=>m.VitsTokenizer,Wav2Vec2BertForCTC:()=>h.Wav2Vec2BertForCTC,Wav2Vec2BertForSequenceClassification:()=>h.Wav2Vec2BertForSequenceClassification,Wav2Vec2BertModel:()=>h.Wav2Vec2BertModel,Wav2Vec2BertPreTrainedModel:()=>h.Wav2Vec2BertPreTrainedModel,Wav2Vec2CTCTokenizer:()=>m.Wav2Vec2CTCTokenizer,Wav2Vec2FeatureExtractor:()=>_.Wav2Vec2FeatureExtractor,Wav2Vec2ForAudioFrameClassification:()=>h.Wav2Vec2ForAudioFrameClassification,Wav2Vec2ForCTC:()=>h.Wav2Vec2ForCTC,Wav2Vec2ForSequenceClassification:()=>h.Wav2Vec2ForSequenceClassification,Wav2Vec2Model:()=>h.Wav2Vec2Model,Wav2Vec2PreTrainedModel:()=>h.Wav2Vec2PreTrainedModel,Wav2Vec2ProcessorWithLM:()=>_.Wav2Vec2ProcessorWithLM,WavLMForAudioFrameClassification:()=>h.WavLMForAudioFrameClassification,WavLMForCTC:()=>h.WavLMForCTC,WavLMForSequenceClassification:()=>h.WavLMForSequenceClassification,WavLMForXVector:()=>h.WavLMForXVector,WavLMModel:()=>h.WavLMModel,WavLMPreTrainedModel:()=>h.WavLMPreTrainedModel,WeSpeakerFeatureExtractor:()=>_.WeSpeakerFeatureExtractor,WeSpeakerResNetModel:()=>h.WeSpeakerResNetModel,WeSpeakerResNetPreTrainedModel:()=>h.WeSpeakerResNetPreTrainedModel,WhisperFeatureExtractor:()=>_.WhisperFeatureExtractor,WhisperForConditionalGeneration:()=>h.WhisperForConditionalGeneration,WhisperModel:()=>h.WhisperModel,WhisperPreTrainedModel:()=>h.WhisperPreTrainedModel,WhisperProcessor:()=>_.WhisperProcessor,WhisperTextStreamer:()=>T.WhisperTextStreamer,WhisperTokenizer:()=>m.WhisperTokenizer,XLMForQuestionAnswering:()=>h.XLMForQuestionAnswering,XLMForSequenceClassification:()=>h.XLMForSequenceClassification,XLMForTokenClassification:()=>h.XLMForTokenClassification,XLMModel:()=>h.XLMModel,XLMPreTrainedModel:()=>h.XLMPreTrainedModel,XLMRobertaForMaskedLM:()=>h.XLMRobertaForMaskedLM,XLMRobertaForQuestionAnswering:()=>h.XLMRobertaForQuestionAnswering,XLMRobertaForSequenceClassification:()=>h.XLMRobertaForSequenceClassification,XLMRobertaForTokenClassification:()=>h.XLMRobertaForTokenClassification,XLMRobertaModel:()=>h.XLMRobertaModel,XLMRobertaPreTrainedModel:()=>h.XLMRobertaPreTrainedModel,XLMRobertaTokenizer:()=>m.XLMRobertaTokenizer,XLMTokenizer:()=>m.XLMTokenizer,XLMWithLMHeadModel:()=>h.XLMWithLMHeadModel,XVectorOutput:()=>h.XVectorOutput,YolosFeatureExtractor:()=>_.YolosFeatureExtractor,YolosForObjectDetection:()=>h.YolosForObjectDetection,YolosModel:()=>h.YolosModel,YolosObjectDetectionOutput:()=>h.YolosObjectDetectionOutput,YolosPreTrainedModel:()=>h.YolosPreTrainedModel,ZeroShotAudioClassificationPipeline:()=>p.ZeroShotAudioClassificationPipeline,ZeroShotClassificationPipeline:()=>p.ZeroShotClassificationPipeline,ZeroShotImageClassificationPipeline:()=>p.ZeroShotImageClassificationPipeline,ZeroShotObjectDetectionPipeline:()=>p.ZeroShotObjectDetectionPipeline,bankers_round:()=>b.bankers_round,cat:()=>w.cat,cos_sim:()=>b.cos_sim,dot:()=>b.dot,dynamic_time_warping:()=>b.dynamic_time_warping,env:()=>u.env,full:()=>w.full,full_like:()=>w.full_like,getKeyValueShapes:()=>f.getKeyValueShapes,hamming:()=>g.hamming,hanning:()=>g.hanning,interpolate:()=>w.interpolate,interpolate_4d:()=>w.interpolate_4d,interpolate_data:()=>b.interpolate_data,is_chinese_char:()=>m.is_chinese_char,layer_norm:()=>w.layer_norm,log_softmax:()=>b.log_softmax,magnitude:()=>b.magnitude,matmul:()=>w.matmul,max:()=>b.max,mean:()=>w.mean,mean_pooling:()=>w.mean_pooling,medianFilter:()=>b.medianFilter,mel_filter_bank:()=>g.mel_filter_bank,min:()=>b.min,ones:()=>w.ones,ones_like:()=>w.ones_like,permute:()=>w.permute,permute_data:()=>b.permute_data,pipeline:()=>p.pipeline,quantize_embeddings:()=>w.quantize_embeddings,read_audio:()=>g.read_audio,rfft:()=>w.rfft,round:()=>b.round,softmax:()=>b.softmax,spectrogram:()=>g.spectrogram,stack:()=>w.stack,std_mean:()=>w.std_mean,topk:()=>w.topk,window_function:()=>g.window_function,zeros:()=>w.zeros,zeros_like:()=>w.zeros_like});var u=c(/*! ./env.js */"./src/env.js"),p=c(/*! ./pipelines.js */"./src/pipelines.js"),h=c(/*! ./models.js */"./src/models.js"),m=c(/*! ./tokenizers.js */"./src/tokenizers.js"),_=c(/*! ./processors.js */"./src/processors.js"),f=c(/*! ./configs.js */"./src/configs.js"),g=c(/*! ./utils/audio.js */"./src/utils/audio.js"),M=c(/*! ./utils/image.js */"./src/utils/image.js"),w=c(/*! ./utils/tensor.js */"./src/utils/tensor.js"),b=c(/*! ./utils/maths.js */"./src/utils/maths.js"),T=c(/*! ./generation/streamers.js */"./src/generation/streamers.js"),x=c(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),y=d.ASTFeatureExtractor,k=d.ASTForAudioClassification,F=d.ASTModel,C=d.ASTPreTrainedModel,P=d.AlbertForMaskedLM,v=d.AlbertForQuestionAnswering,S=d.AlbertForSequenceClassification,A=d.AlbertModel,E=d.AlbertPreTrainedModel,L=d.AlbertTokenizer,z=d.AudioClassificationPipeline,I=d.AutoConfig,B=d.AutoModel,N=d.AutoModelForAudioClassification,O=d.AutoModelForAudioFrameClassification,D=d.AutoModelForCTC,V=d.AutoModelForCausalLM,j=d.AutoModelForDepthEstimation,R=d.AutoModelForDocumentQuestionAnswering,G=d.AutoModelForImageClassification,q=d.AutoModelForImageFeatureExtraction,$=d.AutoModelForImageMatting,W=d.AutoModelForImageSegmentation,U=d.AutoModelForImageToImage,X=d.AutoModelForMaskGeneration,Q=d.AutoModelForMaskedLM,H=d.AutoModelForNormalEstimation,Y=d.AutoModelForObjectDetection,J=d.AutoModelForQuestionAnswering,K=d.AutoModelForSemanticSegmentation,Z=d.AutoModelForSeq2SeqLM,ee=d.AutoModelForSequenceClassification,te=d.AutoModelForSpeechSeq2Seq,ne=d.AutoModelForTextToSpectrogram,re=d.AutoModelForTextToWaveform,oe=d.AutoModelForTokenClassification,se=d.AutoModelForVision2Seq,ae=d.AutoModelForXVector,ie=d.AutoModelForZeroShotObjectDetection,le=d.AutoProcessor,ce=d.AutoTokenizer,de=d.AutomaticSpeechRecognitionPipeline,ue=d.BartForConditionalGeneration,pe=d.BartForSequenceClassification,he=d.BartModel,me=d.BartPretrainedModel,_e=d.BartTokenizer,fe=d.BaseModelOutput,ge=d.BaseStreamer,Me=d.BeitFeatureExtractor,we=d.BeitForImageClassification,be=d.BeitModel,Te=d.BeitPreTrainedModel,xe=d.BertForMaskedLM,ye=d.BertForQuestionAnswering,ke=d.BertForSequenceClassification,Fe=d.BertForTokenClassification,Ce=d.BertModel,Pe=d.BertPreTrainedModel,ve=d.BertTokenizer,Se=d.BitImageProcessor,Ae=d.BlenderbotForConditionalGeneration,Ee=d.BlenderbotModel,Le=d.BlenderbotPreTrainedModel,ze=d.BlenderbotSmallForConditionalGeneration,Ie=d.BlenderbotSmallModel,Be=d.BlenderbotSmallPreTrainedModel,Ne=d.BlenderbotSmallTokenizer,Oe=d.BlenderbotTokenizer,De=d.BloomForCausalLM,Ve=d.BloomModel,je=d.BloomPreTrainedModel,Re=d.BloomTokenizer,Ge=d.CLIPFeatureExtractor,qe=d.CLIPImageProcessor,$e=d.CLIPModel,We=d.CLIPPreTrainedModel,Ue=d.CLIPSegForImageSegmentation,Xe=d.CLIPSegModel,Qe=d.CLIPSegPreTrainedModel,He=d.CLIPTextModelWithProjection,Ye=d.CLIPTokenizer,Je=d.CLIPVisionModelWithProjection,Ke=d.CamembertForMaskedLM,Ze=d.CamembertForQuestionAnswering,et=d.CamembertForSequenceClassification,tt=d.CamembertForTokenClassification,nt=d.CamembertModel,rt=d.CamembertPreTrainedModel,ot=d.CamembertTokenizer,st=d.CausalLMOutput,at=d.CausalLMOutputWithPast,it=d.ChineseCLIPFeatureExtractor,lt=d.ChineseCLIPModel,ct=d.ChineseCLIPPreTrainedModel,dt=d.ClapAudioModelWithProjection,ut=d.ClapFeatureExtractor,pt=d.ClapModel,ht=d.ClapPreTrainedModel,mt=d.ClapTextModelWithProjection,_t=d.CodeGenForCausalLM,ft=d.CodeGenModel,gt=d.CodeGenPreTrainedModel,Mt=d.CodeGenTokenizer,wt=d.CodeLlamaTokenizer,bt=d.CohereForCausalLM,Tt=d.CohereModel,xt=d.CoherePreTrainedModel,yt=d.CohereTokenizer,kt=d.ConvBertForMaskedLM,Ft=d.ConvBertForQuestionAnswering,Ct=d.ConvBertForSequenceClassification,Pt=d.ConvBertForTokenClassification,vt=d.ConvBertModel,St=d.ConvBertPreTrainedModel,At=d.ConvBertTokenizer,Et=d.ConvNextFeatureExtractor,Lt=d.ConvNextForImageClassification,zt=d.ConvNextImageProcessor,It=d.ConvNextModel,Bt=d.ConvNextPreTrainedModel,Nt=d.ConvNextV2ForImageClassification,Ot=d.ConvNextV2Model,Dt=d.ConvNextV2PreTrainedModel,Vt=d.DPTFeatureExtractor,jt=d.DPTForDepthEstimation,Rt=d.DPTImageProcessor,Gt=d.DPTModel,qt=d.DPTPreTrainedModel,$t=d.DebertaForMaskedLM,Wt=d.DebertaForQuestionAnswering,Ut=d.DebertaForSequenceClassification,Xt=d.DebertaForTokenClassification,Qt=d.DebertaModel,Ht=d.DebertaPreTrainedModel,Yt=d.DebertaTokenizer,Jt=d.DebertaV2ForMaskedLM,Kt=d.DebertaV2ForQuestionAnswering,Zt=d.DebertaV2ForSequenceClassification,en=d.DebertaV2ForTokenClassification,tn=d.DebertaV2Model,nn=d.DebertaV2PreTrainedModel,rn=d.DebertaV2Tokenizer,on=d.DeiTFeatureExtractor,sn=d.DeiTForImageClassification,an=d.DeiTModel,ln=d.DeiTPreTrainedModel,cn=d.DepthAnythingForDepthEstimation,dn=d.DepthAnythingPreTrainedModel,un=d.DepthEstimationPipeline,pn=d.DetrFeatureExtractor,hn=d.DetrForObjectDetection,mn=d.DetrForSegmentation,_n=d.DetrModel,fn=d.DetrObjectDetectionOutput,gn=d.DetrPreTrainedModel,Mn=d.DetrSegmentationOutput,wn=d.Dinov2ForImageClassification,bn=d.Dinov2Model,Tn=d.Dinov2PreTrainedModel,xn=d.DistilBertForMaskedLM,yn=d.DistilBertForQuestionAnswering,kn=d.DistilBertForSequenceClassification,Fn=d.DistilBertForTokenClassification,Cn=d.DistilBertModel,Pn=d.DistilBertPreTrainedModel,vn=d.DistilBertTokenizer,Sn=d.DocumentQuestionAnsweringPipeline,An=d.DonutFeatureExtractor,En=d.DonutSwinModel,Ln=d.DonutSwinPreTrainedModel,zn=d.EfficientNetForImageClassification,In=d.EfficientNetImageProcessor,Bn=d.EfficientNetModel,Nn=d.EfficientNetPreTrainedModel,On=d.ElectraForMaskedLM,Dn=d.ElectraForQuestionAnswering,Vn=d.ElectraForSequenceClassification,jn=d.ElectraForTokenClassification,Rn=d.ElectraModel,Gn=d.ElectraPreTrainedModel,qn=d.ElectraTokenizer,$n=d.EosTokenCriteria,Wn=d.EsmForMaskedLM,Un=d.EsmForSequenceClassification,Xn=d.EsmForTokenClassification,Qn=d.EsmModel,Hn=d.EsmPreTrainedModel,Yn=d.EsmTokenizer,Jn=d.FFT,Kn=d.FalconForCausalLM,Zn=d.FalconModel,er=d.FalconPreTrainedModel,tr=d.FalconTokenizer,nr=d.FastViTForImageClassification,rr=d.FastViTModel,or=d.FastViTPreTrainedModel,sr=d.FeatureExtractionPipeline,ar=d.FeatureExtractor,ir=d.FillMaskPipeline,lr=d.Florence2ForConditionalGeneration,cr=d.Florence2PreTrainedModel,dr=d.Florence2Processor,ur=d.GLPNFeatureExtractor,pr=d.GLPNForDepthEstimation,hr=d.GLPNModel,mr=d.GLPNPreTrainedModel,_r=d.GPT2LMHeadModel,fr=d.GPT2Model,gr=d.GPT2PreTrainedModel,Mr=d.GPT2Tokenizer,wr=d.GPTBigCodeForCausalLM,br=d.GPTBigCodeModel,Tr=d.GPTBigCodePreTrainedModel,xr=d.GPTJForCausalLM,yr=d.GPTJModel,kr=d.GPTJPreTrainedModel,Fr=d.GPTNeoForCausalLM,Cr=d.GPTNeoModel,Pr=d.GPTNeoPreTrainedModel,vr=d.GPTNeoXForCausalLM,Sr=d.GPTNeoXModel,Ar=d.GPTNeoXPreTrainedModel,Er=d.GPTNeoXTokenizer,Lr=d.Gemma2ForCausalLM,zr=d.Gemma2Model,Ir=d.Gemma2PreTrainedModel,Br=d.GemmaForCausalLM,Nr=d.GemmaModel,Or=d.GemmaPreTrainedModel,Dr=d.GemmaTokenizer,Vr=d.Grok1Tokenizer,jr=d.HerbertTokenizer,Rr=d.HubertForCTC,Gr=d.HubertForSequenceClassification,qr=d.HubertModel,$r=d.HubertPreTrainedModel,Wr=d.ImageClassificationPipeline,Ur=d.ImageFeatureExtractionPipeline,Xr=d.ImageFeatureExtractor,Qr=d.ImageMattingOutput,Hr=d.ImageSegmentationPipeline,Yr=d.ImageToImagePipeline,Jr=d.ImageToTextPipeline,Kr=d.InterruptableStoppingCriteria,Zr=d.JAISLMHeadModel,eo=d.JAISModel,to=d.JAISPreTrainedModel,no=d.LlamaForCausalLM,ro=d.LlamaModel,oo=d.LlamaPreTrainedModel,so=d.LlamaTokenizer,ao=d.LlavaForConditionalGeneration,io=d.LlavaPreTrainedModel,lo=d.LongT5ForConditionalGeneration,co=d.LongT5Model,uo=d.LongT5PreTrainedModel,po=d.M2M100ForConditionalGeneration,ho=d.M2M100Model,mo=d.M2M100PreTrainedModel,_o=d.M2M100Tokenizer,fo=d.MBart50Tokenizer,go=d.MBartForCausalLM,Mo=d.MBartForConditionalGeneration,wo=d.MBartForSequenceClassification,bo=d.MBartModel,To=d.MBartPreTrainedModel,xo=d.MBartTokenizer,yo=d.MPNetForMaskedLM,ko=d.MPNetForQuestionAnswering,Fo=d.MPNetForSequenceClassification,Co=d.MPNetForTokenClassification,Po=d.MPNetModel,vo=d.MPNetPreTrainedModel,So=d.MPNetTokenizer,Ao=d.MT5ForConditionalGeneration,Eo=d.MT5Model,Lo=d.MT5PreTrainedModel,zo=d.MarianMTModel,Io=d.MarianModel,Bo=d.MarianPreTrainedModel,No=d.MarianTokenizer,Oo=d.MaskedLMOutput,Do=d.MaxLengthCriteria,Vo=d.MistralForCausalLM,jo=d.MistralModel,Ro=d.MistralPreTrainedModel,Go=d.MobileBertForMaskedLM,qo=d.MobileBertForQuestionAnswering,$o=d.MobileBertForSequenceClassification,Wo=d.MobileBertModel,Uo=d.MobileBertPreTrainedModel,Xo=d.MobileBertTokenizer,Qo=d.MobileNetV1FeatureExtractor,Ho=d.MobileNetV1ForImageClassification,Yo=d.MobileNetV1Model,Jo=d.MobileNetV1PreTrainedModel,Ko=d.MobileNetV2FeatureExtractor,Zo=d.MobileNetV2ForImageClassification,es=d.MobileNetV2Model,ts=d.MobileNetV2PreTrainedModel,ns=d.MobileNetV3FeatureExtractor,rs=d.MobileNetV3ForImageClassification,os=d.MobileNetV3Model,ss=d.MobileNetV3PreTrainedModel,as=d.MobileNetV4FeatureExtractor,is=d.MobileNetV4ForImageClassification,ls=d.MobileNetV4Model,cs=d.MobileNetV4PreTrainedModel,ds=d.MobileViTFeatureExtractor,us=d.MobileViTForImageClassification,ps=d.MobileViTImageProcessor,hs=d.MobileViTModel,ms=d.MobileViTPreTrainedModel,_s=d.MobileViTV2ForImageClassification,fs=d.MobileViTV2Model,gs=d.MobileViTV2PreTrainedModel,Ms=d.ModelOutput,ws=d.Moondream1ForConditionalGeneration,bs=d.MptForCausalLM,Ts=d.MptModel,xs=d.MptPreTrainedModel,ys=d.MusicgenForCausalLM,ks=d.MusicgenForConditionalGeneration,Fs=d.MusicgenModel,Cs=d.MusicgenPreTrainedModel,Ps=d.NllbTokenizer,vs=d.NomicBertModel,Ss=d.NomicBertPreTrainedModel,As=d.NougatImageProcessor,Es=d.NougatTokenizer,Ls=d.OPTForCausalLM,zs=d.OPTModel,Is=d.OPTPreTrainedModel,Bs=d.ObjectDetectionPipeline,Ns=d.OpenELMForCausalLM,Os=d.OpenELMModel,Ds=d.OpenELMPreTrainedModel,Vs=d.OwlViTFeatureExtractor,js=d.OwlViTForObjectDetection,Rs=d.OwlViTModel,Gs=d.OwlViTPreTrainedModel,qs=d.OwlViTProcessor,$s=d.Owlv2ForObjectDetection,Ws=d.Owlv2ImageProcessor,Us=d.Owlv2Model,Xs=d.Owlv2PreTrainedModel,Qs=d.Phi3ForCausalLM,Hs=d.Phi3Model,Ys=d.Phi3PreTrainedModel,Js=d.PhiForCausalLM,Ks=d.PhiModel,Zs=d.PhiPreTrainedModel,ea=d.Pipeline,ta=d.PreTrainedModel,na=d.PreTrainedTokenizer,ra=d.PretrainedConfig,oa=d.PretrainedMixin,sa=d.Processor,aa=d.PyAnnoteFeatureExtractor,ia=d.PyAnnoteForAudioFrameClassification,la=d.PyAnnoteModel,ca=d.PyAnnotePreTrainedModel,da=d.PyAnnoteProcessor,ua=d.QuestionAnsweringModelOutput,pa=d.QuestionAnsweringPipeline,ha=d.Qwen2ForCausalLM,ma=d.Qwen2Model,_a=d.Qwen2PreTrainedModel,fa=d.Qwen2Tokenizer,ga=d.RTDetrForObjectDetection,Ma=d.RTDetrImageProcessor,wa=d.RTDetrModel,ba=d.RTDetrObjectDetectionOutput,Ta=d.RTDetrPreTrainedModel,xa=d.RawImage,ya=d.ResNetForImageClassification,ka=d.ResNetModel,Fa=d.ResNetPreTrainedModel,Ca=d.RoFormerForMaskedLM,Pa=d.RoFormerForQuestionAnswering,va=d.RoFormerForSequenceClassification,Sa=d.RoFormerForTokenClassification,Aa=d.RoFormerModel,Ea=d.RoFormerPreTrainedModel,La=d.RoFormerTokenizer,za=d.RobertaForMaskedLM,Ia=d.RobertaForQuestionAnswering,Ba=d.RobertaForSequenceClassification,Na=d.RobertaForTokenClassification,Oa=d.RobertaModel,Da=d.RobertaPreTrainedModel,Va=d.RobertaTokenizer,ja=d.SamImageProcessor,Ra=d.SamImageSegmentationOutput,Ga=d.SamModel,qa=d.SamPreTrainedModel,$a=d.SamProcessor,Wa=d.SapiensFeatureExtractor,Ua=d.SapiensForDepthEstimation,Xa=d.SapiensForNormalEstimation,Qa=d.SapiensForSemanticSegmentation,Ha=d.SapiensPreTrainedModel,Ya=d.SeamlessM4TFeatureExtractor,Ja=d.SegformerFeatureExtractor,Ka=d.SegformerForImageClassification,Za=d.SegformerForSemanticSegmentation,ei=d.SegformerModel,ti=d.SegformerPreTrainedModel,ni=d.Seq2SeqLMOutput,ri=d.SequenceClassifierOutput,oi=d.SiglipImageProcessor,si=d.SiglipModel,ai=d.SiglipPreTrainedModel,ii=d.SiglipTextModel,li=d.SiglipTokenizer,ci=d.SiglipVisionModel,di=d.SpeechT5FeatureExtractor,ui=d.SpeechT5ForSpeechToText,pi=d.SpeechT5ForTextToSpeech,hi=d.SpeechT5HifiGan,mi=d.SpeechT5Model,_i=d.SpeechT5PreTrainedModel,fi=d.SpeechT5Processor,gi=d.SpeechT5Tokenizer,Mi=d.SqueezeBertForMaskedLM,wi=d.SqueezeBertForQuestionAnswering,bi=d.SqueezeBertForSequenceClassification,Ti=d.SqueezeBertModel,xi=d.SqueezeBertPreTrainedModel,yi=d.SqueezeBertTokenizer,ki=d.StableLmForCausalLM,Fi=d.StableLmModel,Ci=d.StableLmPreTrainedModel,Pi=d.Starcoder2ForCausalLM,vi=d.Starcoder2Model,Si=d.Starcoder2PreTrainedModel,Ai=d.StoppingCriteria,Ei=d.StoppingCriteriaList,Li=d.SummarizationPipeline,zi=d.Swin2SRForImageSuperResolution,Ii=d.Swin2SRImageProcessor,Bi=d.Swin2SRModel,Ni=d.Swin2SRPreTrainedModel,Oi=d.SwinForImageClassification,Di=d.SwinModel,Vi=d.SwinPreTrainedModel,ji=d.T5ForConditionalGeneration,Ri=d.T5Model,Gi=d.T5PreTrainedModel,qi=d.T5Tokenizer,$i=d.TableTransformerForObjectDetection,Wi=d.TableTransformerModel,Ui=d.TableTransformerObjectDetectionOutput,Xi=d.TableTransformerPreTrainedModel,Qi=d.Tensor,Hi=d.Text2TextGenerationPipeline,Yi=d.TextClassificationPipeline,Ji=d.TextGenerationPipeline,Ki=d.TextStreamer,Zi=d.TextToAudioPipeline,el=d.TokenClassificationPipeline,tl=d.TokenClassifierOutput,nl=d.TokenizerModel,rl=d.TrOCRForCausalLM,ol=d.TrOCRPreTrainedModel,sl=d.TranslationPipeline,al=d.UniSpeechForCTC,il=d.UniSpeechForSequenceClassification,ll=d.UniSpeechModel,cl=d.UniSpeechPreTrainedModel,dl=d.UniSpeechSatForAudioFrameClassification,ul=d.UniSpeechSatForCTC,pl=d.UniSpeechSatForSequenceClassification,hl=d.UniSpeechSatModel,ml=d.UniSpeechSatPreTrainedModel,_l=d.ViTFeatureExtractor,fl=d.ViTForImageClassification,gl=d.ViTImageProcessor,Ml=d.ViTModel,wl=d.ViTPreTrainedModel,bl=d.VisionEncoderDecoderModel,Tl=d.VitMatteForImageMatting,xl=d.VitMatteImageProcessor,yl=d.VitMattePreTrainedModel,kl=d.VitsModel,Fl=d.VitsModelOutput,Cl=d.VitsPreTrainedModel,Pl=d.VitsTokenizer,vl=d.Wav2Vec2BertForCTC,Sl=d.Wav2Vec2BertForSequenceClassification,Al=d.Wav2Vec2BertModel,El=d.Wav2Vec2BertPreTrainedModel,Ll=d.Wav2Vec2CTCTokenizer,zl=d.Wav2Vec2FeatureExtractor,Il=d.Wav2Vec2ForAudioFrameClassification,Bl=d.Wav2Vec2ForCTC,Nl=d.Wav2Vec2ForSequenceClassification,Ol=d.Wav2Vec2Model,Dl=d.Wav2Vec2PreTrainedModel,Vl=d.Wav2Vec2ProcessorWithLM,jl=d.WavLMForAudioFrameClassification,Rl=d.WavLMForCTC,Gl=d.WavLMForSequenceClassification,ql=d.WavLMForXVector,$l=d.WavLMModel,Wl=d.WavLMPreTrainedModel,Ul=d.WeSpeakerFeatureExtractor,Xl=d.WeSpeakerResNetModel,Ql=d.WeSpeakerResNetPreTrainedModel,Hl=d.WhisperFeatureExtractor,Yl=d.WhisperForConditionalGeneration,Jl=d.WhisperModel,Kl=d.WhisperPreTrainedModel,Zl=d.WhisperProcessor,ec=d.WhisperTextStreamer,tc=d.WhisperTokenizer,nc=d.XLMForQuestionAnswering,rc=d.XLMForSequenceClassification,oc=d.XLMForTokenClassification,sc=d.XLMModel,ac=d.XLMPreTrainedModel,ic=d.XLMRobertaForMaskedLM,lc=d.XLMRobertaForQuestionAnswering,cc=d.XLMRobertaForSequenceClassification,dc=d.XLMRobertaForTokenClassification,uc=d.XLMRobertaModel,pc=d.XLMRobertaPreTrainedModel,hc=d.XLMRobertaTokenizer,mc=d.XLMTokenizer,_c=d.XLMWithLMHeadModel,fc=d.XVectorOutput,gc=d.YolosFeatureExtractor,Mc=d.YolosForObjectDetection,wc=d.YolosModel,bc=d.YolosObjectDetectionOutput,Tc=d.YolosPreTrainedModel,xc=d.ZeroShotAudioClassificationPipeline,yc=d.ZeroShotClassificationPipeline,kc=d.ZeroShotImageClassificationPipeline,Fc=d.ZeroShotObjectDetectionPipeline,Cc=d.bankers_round,Pc=d.cat,vc=d.cos_sim,Sc=d.dot,Ac=d.dynamic_time_warping,Ec=d.env,Lc=d.full,zc=d.full_like,Ic=d.getKeyValueShapes,Bc=d.hamming,Nc=d.hanning,Oc=d.interpolate,Dc=d.interpolate_4d,Vc=d.interpolate_data,jc=d.is_chinese_char,Rc=d.layer_norm,Gc=d.log_softmax,qc=d.magnitude,$c=d.matmul,Wc=d.max,Uc=d.mean,Xc=d.mean_pooling,Qc=d.medianFilter,Hc=d.mel_filter_bank,Yc=d.min,Jc=d.ones,Kc=d.ones_like,Zc=d.permute,ed=d.permute_data,td=d.pipeline,nd=d.quantize_embeddings,rd=d.read_audio,od=d.rfft,sd=d.round,ad=d.softmax,id=d.spectrogram,ld=d.stack,cd=d.std_mean,dd=d.topk,ud=d.window_function,pd=d.zeros,hd=d.zeros_like;export{y as ASTFeatureExtractor,k as ASTForAudioClassification,F as ASTModel,C as ASTPreTrainedModel,P as AlbertForMaskedLM,v as AlbertForQuestionAnswering,S as AlbertForSequenceClassification,A as AlbertModel,E as AlbertPreTrainedModel,L as AlbertTokenizer,z as AudioClassificationPipeline,I as AutoConfig,B as AutoModel,N as AutoModelForAudioClassification,O as AutoModelForAudioFrameClassification,D as AutoModelForCTC,V as AutoModelForCausalLM,j as AutoModelForDepthEstimation,R as AutoModelForDocumentQuestionAnswering,G as AutoModelForImageClassification,q as AutoModelForImageFeatureExtraction,$ as AutoModelForImageMatting,W as AutoModelForImageSegmentation,U as AutoModelForImageToImage,X as AutoModelForMaskGeneration,Q as AutoModelForMaskedLM,H as AutoModelForNormalEstimation,Y as AutoModelForObjectDetection,J as AutoModelForQuestionAnswering,K as AutoModelForSemanticSegmentation,Z as AutoModelForSeq2SeqLM,ee as AutoModelForSequenceClassification,te as AutoModelForSpeechSeq2Seq,ne as AutoModelForTextToSpectrogram,re as AutoModelForTextToWaveform,oe as AutoModelForTokenClassification,se as AutoModelForVision2Seq,ae as AutoModelForXVector,ie as AutoModelForZeroShotObjectDetection,le as AutoProcessor,ce as AutoTokenizer,de as AutomaticSpeechRecognitionPipeline,ue as BartForConditionalGeneration,pe as BartForSequenceClassification,he as BartModel,me as BartPretrainedModel,_e as BartTokenizer,fe as BaseModelOutput,ge as BaseStreamer,Me as BeitFeatureExtractor,we as BeitForImageClassification,be as BeitModel,Te as BeitPreTrainedModel,xe as BertForMaskedLM,ye as BertForQuestionAnswering,ke as BertForSequenceClassification,Fe as BertForTokenClassification,Ce as BertModel,Pe as BertPreTrainedModel,ve as BertTokenizer,Se as BitImageProcessor,Ae as BlenderbotForConditionalGeneration,Ee as BlenderbotModel,Le as BlenderbotPreTrainedModel,ze as BlenderbotSmallForConditionalGeneration,Ie as BlenderbotSmallModel,Be as BlenderbotSmallPreTrainedModel,Ne as BlenderbotSmallTokenizer,Oe as BlenderbotTokenizer,De as BloomForCausalLM,Ve as BloomModel,je as BloomPreTrainedModel,Re as BloomTokenizer,Ge as CLIPFeatureExtractor,qe as CLIPImageProcessor,$e as CLIPModel,We as CLIPPreTrainedModel,Ue as CLIPSegForImageSegmentation,Xe as CLIPSegModel,Qe as CLIPSegPreTrainedModel,He as CLIPTextModelWithProjection,Ye as CLIPTokenizer,Je as CLIPVisionModelWithProjection,Ke as CamembertForMaskedLM,Ze as CamembertForQuestionAnswering,et as CamembertForSequenceClassification,tt as CamembertForTokenClassification,nt as CamembertModel,rt as CamembertPreTrainedModel,ot as CamembertTokenizer,st as CausalLMOutput,at as CausalLMOutputWithPast,it as ChineseCLIPFeatureExtractor,lt as ChineseCLIPModel,ct as ChineseCLIPPreTrainedModel,dt as ClapAudioModelWithProjection,ut as ClapFeatureExtractor,pt as ClapModel,ht as ClapPreTrainedModel,mt as ClapTextModelWithProjection,_t as CodeGenForCausalLM,ft as CodeGenModel,gt as CodeGenPreTrainedModel,Mt as CodeGenTokenizer,wt as CodeLlamaTokenizer,bt as CohereForCausalLM,Tt as CohereModel,xt as CoherePreTrainedModel,yt as CohereTokenizer,kt as ConvBertForMaskedLM,Ft as ConvBertForQuestionAnswering,Ct as ConvBertForSequenceClassification,Pt as ConvBertForTokenClassification,vt as ConvBertModel,St as ConvBertPreTrainedModel,At as ConvBertTokenizer,Et as ConvNextFeatureExtractor,Lt as ConvNextForImageClassification,zt as ConvNextImageProcessor,It as ConvNextModel,Bt as ConvNextPreTrainedModel,Nt as ConvNextV2ForImageClassification,Ot as ConvNextV2Model,Dt as ConvNextV2PreTrainedModel,Vt as DPTFeatureExtractor,jt as DPTForDepthEstimation,Rt as DPTImageProcessor,Gt as DPTModel,qt as DPTPreTrainedModel,$t as DebertaForMaskedLM,Wt as DebertaForQuestionAnswering,Ut as DebertaForSequenceClassification,Xt as DebertaForTokenClassification,Qt as DebertaModel,Ht as DebertaPreTrainedModel,Yt as DebertaTokenizer,Jt as DebertaV2ForMaskedLM,Kt as DebertaV2ForQuestionAnswering,Zt as DebertaV2ForSequenceClassification,en as DebertaV2ForTokenClassification,tn as DebertaV2Model,nn as DebertaV2PreTrainedModel,rn as DebertaV2Tokenizer,on as DeiTFeatureExtractor,sn as DeiTForImageClassification,an as DeiTModel,ln as DeiTPreTrainedModel,cn as DepthAnythingForDepthEstimation,dn as DepthAnythingPreTrainedModel,un as DepthEstimationPipeline,pn as DetrFeatureExtractor,hn as DetrForObjectDetection,mn as DetrForSegmentation,_n as DetrModel,fn as DetrObjectDetectionOutput,gn as DetrPreTrainedModel,Mn as DetrSegmentationOutput,wn as Dinov2ForImageClassification,bn as Dinov2Model,Tn as Dinov2PreTrainedModel,xn as DistilBertForMaskedLM,yn as DistilBertForQuestionAnswering,kn as DistilBertForSequenceClassification,Fn as DistilBertForTokenClassification,Cn as DistilBertModel,Pn as DistilBertPreTrainedModel,vn as DistilBertTokenizer,Sn as DocumentQuestionAnsweringPipeline,An as DonutFeatureExtractor,En as DonutSwinModel,Ln as DonutSwinPreTrainedModel,zn as EfficientNetForImageClassification,In as EfficientNetImageProcessor,Bn as EfficientNetModel,Nn as EfficientNetPreTrainedModel,On as ElectraForMaskedLM,Dn as ElectraForQuestionAnswering,Vn as ElectraForSequenceClassification,jn as ElectraForTokenClassification,Rn as ElectraModel,Gn as ElectraPreTrainedModel,qn as ElectraTokenizer,$n as EosTokenCriteria,Wn as EsmForMaskedLM,Un as EsmForSequenceClassification,Xn as EsmForTokenClassification,Qn as EsmModel,Hn as EsmPreTrainedModel,Yn as EsmTokenizer,Jn as FFT,Kn as FalconForCausalLM,Zn as FalconModel,er as FalconPreTrainedModel,tr as FalconTokenizer,nr as FastViTForImageClassification,rr as FastViTModel,or as FastViTPreTrainedModel,sr as FeatureExtractionPipeline,ar as FeatureExtractor,ir as FillMaskPipeline,lr as Florence2ForConditionalGeneration,cr as Florence2PreTrainedModel,dr as Florence2Processor,ur as GLPNFeatureExtractor,pr as GLPNForDepthEstimation,hr as GLPNModel,mr as GLPNPreTrainedModel,_r as GPT2LMHeadModel,fr as GPT2Model,gr as GPT2PreTrainedModel,Mr as GPT2Tokenizer,wr as GPTBigCodeForCausalLM,br as GPTBigCodeModel,Tr as GPTBigCodePreTrainedModel,xr as GPTJForCausalLM,yr as GPTJModel,kr as GPTJPreTrainedModel,Fr as GPTNeoForCausalLM,Cr as GPTNeoModel,Pr as GPTNeoPreTrainedModel,vr as GPTNeoXForCausalLM,Sr as GPTNeoXModel,Ar as GPTNeoXPreTrainedModel,Er as GPTNeoXTokenizer,Lr as Gemma2ForCausalLM,zr as Gemma2Model,Ir as Gemma2PreTrainedModel,Br as GemmaForCausalLM,Nr as GemmaModel,Or as GemmaPreTrainedModel,Dr as GemmaTokenizer,Vr as Grok1Tokenizer,jr as HerbertTokenizer,Rr as HubertForCTC,Gr as HubertForSequenceClassification,qr as HubertModel,$r as HubertPreTrainedModel,Wr as ImageClassificationPipeline,Ur as ImageFeatureExtractionPipeline,Xr as ImageFeatureExtractor,Qr as ImageMattingOutput,Hr as ImageSegmentationPipeline,Yr as ImageToImagePipeline,Jr as ImageToTextPipeline,Kr as InterruptableStoppingCriteria,Zr as JAISLMHeadModel,eo as JAISModel,to as JAISPreTrainedModel,no as LlamaForCausalLM,ro as LlamaModel,oo as LlamaPreTrainedModel,so as LlamaTokenizer,ao as LlavaForConditionalGeneration,io as LlavaPreTrainedModel,lo as LongT5ForConditionalGeneration,co as LongT5Model,uo as LongT5PreTrainedModel,po as M2M100ForConditionalGeneration,ho as M2M100Model,mo as M2M100PreTrainedModel,_o as M2M100Tokenizer,fo as MBart50Tokenizer,go as MBartForCausalLM,Mo as MBartForConditionalGeneration,wo as MBartForSequenceClassification,bo as MBartModel,To as MBartPreTrainedModel,xo as MBartTokenizer,yo as MPNetForMaskedLM,ko as MPNetForQuestionAnswering,Fo as MPNetForSequenceClassification,Co as MPNetForTokenClassification,Po as MPNetModel,vo as MPNetPreTrainedModel,So as MPNetTokenizer,Ao as MT5ForConditionalGeneration,Eo as MT5Model,Lo as MT5PreTrainedModel,zo as MarianMTModel,Io as MarianModel,Bo as MarianPreTrainedModel,No as MarianTokenizer,Oo as MaskedLMOutput,Do as MaxLengthCriteria,Vo as MistralForCausalLM,jo as MistralModel,Ro as MistralPreTrainedModel,Go as MobileBertForMaskedLM,qo as MobileBertForQuestionAnswering,$o as MobileBertForSequenceClassification,Wo as MobileBertModel,Uo as MobileBertPreTrainedModel,Xo as MobileBertTokenizer,Qo as MobileNetV1FeatureExtractor,Ho as MobileNetV1ForImageClassification,Yo as MobileNetV1Model,Jo as MobileNetV1PreTrainedModel,Ko as MobileNetV2FeatureExtractor,Zo as MobileNetV2ForImageClassification,es as MobileNetV2Model,ts as MobileNetV2PreTrainedModel,ns as MobileNetV3FeatureExtractor,rs as MobileNetV3ForImageClassification,os as MobileNetV3Model,ss as MobileNetV3PreTrainedModel,as as MobileNetV4FeatureExtractor,is as MobileNetV4ForImageClassification,ls as MobileNetV4Model,cs as MobileNetV4PreTrainedModel,ds as MobileViTFeatureExtractor,us as MobileViTForImageClassification,ps as MobileViTImageProcessor,hs as MobileViTModel,ms as MobileViTPreTrainedModel,_s as MobileViTV2ForImageClassification,fs as MobileViTV2Model,gs as MobileViTV2PreTrainedModel,Ms as ModelOutput,ws as Moondream1ForConditionalGeneration,bs as MptForCausalLM,Ts as MptModel,xs as MptPreTrainedModel,ys as MusicgenForCausalLM,ks as MusicgenForConditionalGeneration,Fs as MusicgenModel,Cs as MusicgenPreTrainedModel,Ps as NllbTokenizer,vs as NomicBertModel,Ss as NomicBertPreTrainedModel,As as NougatImageProcessor,Es as NougatTokenizer,Ls as OPTForCausalLM,zs as OPTModel,Is as OPTPreTrainedModel,Bs as ObjectDetectionPipeline,Ns as OpenELMForCausalLM,Os as OpenELMModel,Ds as OpenELMPreTrainedModel,Vs as OwlViTFeatureExtractor,js as OwlViTForObjectDetection,Rs as OwlViTModel,Gs as OwlViTPreTrainedModel,qs as OwlViTProcessor,$s as Owlv2ForObjectDetection,Ws as Owlv2ImageProcessor,Us as Owlv2Model,Xs as Owlv2PreTrainedModel,Qs as Phi3ForCausalLM,Hs as Phi3Model,Ys as Phi3PreTrainedModel,Js as PhiForCausalLM,Ks as PhiModel,Zs as PhiPreTrainedModel,ea as Pipeline,ta as PreTrainedModel,na as PreTrainedTokenizer,ra as PretrainedConfig,oa as PretrainedMixin,sa as Processor,aa as PyAnnoteFeatureExtractor,ia as PyAnnoteForAudioFrameClassification,la as PyAnnoteModel,ca as PyAnnotePreTrainedModel,da as PyAnnoteProcessor,ua as QuestionAnsweringModelOutput,pa as QuestionAnsweringPipeline,ha as Qwen2ForCausalLM,ma as Qwen2Model,_a as Qwen2PreTrainedModel,fa as Qwen2Tokenizer,ga as RTDetrForObjectDetection,Ma as RTDetrImageProcessor,wa as RTDetrModel,ba as RTDetrObjectDetectionOutput,Ta as RTDetrPreTrainedModel,xa as RawImage,ya as ResNetForImageClassification,ka as ResNetModel,Fa as ResNetPreTrainedModel,Ca as RoFormerForMaskedLM,Pa as RoFormerForQuestionAnswering,va as RoFormerForSequenceClassification,Sa as RoFormerForTokenClassification,Aa as RoFormerModel,Ea as RoFormerPreTrainedModel,La as RoFormerTokenizer,za as RobertaForMaskedLM,Ia as RobertaForQuestionAnswering,Ba as RobertaForSequenceClassification,Na as RobertaForTokenClassification,Oa as RobertaModel,Da as RobertaPreTrainedModel,Va as RobertaTokenizer,ja as SamImageProcessor,Ra as SamImageSegmentationOutput,Ga as SamModel,qa as SamPreTrainedModel,$a as SamProcessor,Wa as SapiensFeatureExtractor,Ua as SapiensForDepthEstimation,Xa as SapiensForNormalEstimation,Qa as SapiensForSemanticSegmentation,Ha as SapiensPreTrainedModel,Ya as SeamlessM4TFeatureExtractor,Ja as SegformerFeatureExtractor,Ka as SegformerForImageClassification,Za as SegformerForSemanticSegmentation,ei as SegformerModel,ti as SegformerPreTrainedModel,ni as Seq2SeqLMOutput,ri as SequenceClassifierOutput,oi as SiglipImageProcessor,si as SiglipModel,ai as SiglipPreTrainedModel,ii as SiglipTextModel,li as SiglipTokenizer,ci as SiglipVisionModel,di as SpeechT5FeatureExtractor,ui as SpeechT5ForSpeechToText,pi as SpeechT5ForTextToSpeech,hi as SpeechT5HifiGan,mi as SpeechT5Model,_i as SpeechT5PreTrainedModel,fi as SpeechT5Processor,gi as SpeechT5Tokenizer,Mi as SqueezeBertForMaskedLM,wi as SqueezeBertForQuestionAnswering,bi as SqueezeBertForSequenceClassification,Ti as SqueezeBertModel,xi as SqueezeBertPreTrainedModel,yi as SqueezeBertTokenizer,ki as StableLmForCausalLM,Fi as StableLmModel,Ci as StableLmPreTrainedModel,Pi as Starcoder2ForCausalLM,vi as Starcoder2Model,Si as Starcoder2PreTrainedModel,Ai as StoppingCriteria,Ei as StoppingCriteriaList,Li as SummarizationPipeline,zi as Swin2SRForImageSuperResolution,Ii as Swin2SRImageProcessor,Bi as Swin2SRModel,Ni as Swin2SRPreTrainedModel,Oi as SwinForImageClassification,Di as SwinModel,Vi as SwinPreTrainedModel,ji as T5ForConditionalGeneration,Ri as T5Model,Gi as T5PreTrainedModel,qi as T5Tokenizer,$i as TableTransformerForObjectDetection,Wi as TableTransformerModel,Ui as TableTransformerObjectDetectionOutput,Xi as TableTransformerPreTrainedModel,Qi as Tensor,Hi as Text2TextGenerationPipeline,Yi as TextClassificationPipeline,Ji as TextGenerationPipeline,Ki as TextStreamer,Zi as TextToAudioPipeline,el as TokenClassificationPipeline,tl as TokenClassifierOutput,nl as TokenizerModel,rl as TrOCRForCausalLM,ol as TrOCRPreTrainedModel,sl as TranslationPipeline,al as UniSpeechForCTC,il as UniSpeechForSequenceClassification,ll as UniSpeechModel,cl as UniSpeechPreTrainedModel,dl as UniSpeechSatForAudioFrameClassification,ul as UniSpeechSatForCTC,pl as UniSpeechSatForSequenceClassification,hl as UniSpeechSatModel,ml as UniSpeechSatPreTrainedModel,_l as ViTFeatureExtractor,fl as ViTForImageClassification,gl as ViTImageProcessor,Ml as ViTModel,wl as ViTPreTrainedModel,bl as VisionEncoderDecoderModel,Tl as VitMatteForImageMatting,xl as VitMatteImageProcessor,yl as VitMattePreTrainedModel,kl as VitsModel,Fl as VitsModelOutput,Cl as VitsPreTrainedModel,Pl as VitsTokenizer,vl as Wav2Vec2BertForCTC,Sl as Wav2Vec2BertForSequenceClassification,Al as Wav2Vec2BertModel,El as Wav2Vec2BertPreTrainedModel,Ll as Wav2Vec2CTCTokenizer,zl as Wav2Vec2FeatureExtractor,Il as Wav2Vec2ForAudioFrameClassification,Bl as Wav2Vec2ForCTC,Nl as Wav2Vec2ForSequenceClassification,Ol as Wav2Vec2Model,Dl as Wav2Vec2PreTrainedModel,Vl as Wav2Vec2ProcessorWithLM,jl as WavLMForAudioFrameClassification,Rl as WavLMForCTC,Gl as WavLMForSequenceClassification,ql as WavLMForXVector,$l as WavLMModel,Wl as WavLMPreTrainedModel,Ul as WeSpeakerFeatureExtractor,Xl as WeSpeakerResNetModel,Ql as WeSpeakerResNetPreTrainedModel,Hl as WhisperFeatureExtractor,Yl as WhisperForConditionalGeneration,Jl as WhisperModel,Kl as WhisperPreTrainedModel,Zl as WhisperProcessor,ec as WhisperTextStreamer,tc as WhisperTokenizer,nc as XLMForQuestionAnswering,rc as XLMForSequenceClassification,oc as XLMForTokenClassification,sc as XLMModel,ac as XLMPreTrainedModel,ic as XLMRobertaForMaskedLM,lc as XLMRobertaForQuestionAnswering,cc as XLMRobertaForSequenceClassification,dc as XLMRobertaForTokenClassification,uc as XLMRobertaModel,pc as XLMRobertaPreTrainedModel,hc as XLMRobertaTokenizer,mc as XLMTokenizer,_c as XLMWithLMHeadModel,fc as XVectorOutput,gc as YolosFeatureExtractor,Mc as YolosForObjectDetection,wc as YolosModel,bc as YolosObjectDetectionOutput,Tc as YolosPreTrainedModel,xc as ZeroShotAudioClassificationPipeline,yc as ZeroShotClassificationPipeline,kc as ZeroShotImageClassificationPipeline,Fc as ZeroShotObjectDetectionPipeline,Cc as bankers_round,Pc as cat,vc as cos_sim,Sc as dot,Ac as dynamic_time_warping,Ec as env,Lc as full,zc as full_like,Ic as getKeyValueShapes,Bc as hamming,Nc as hanning,Oc as interpolate,Dc as interpolate_4d,Vc as interpolate_data,jc as is_chinese_char,Rc as layer_norm,Gc as log_softmax,qc as magnitude,$c as matmul,Wc as max,Uc as mean,Xc as mean_pooling,Qc as medianFilter,Hc as mel_filter_bank,Yc as min,Jc as ones,Kc as ones_like,Zc as permute,ed as permute_data,td as pipeline,nd as quantize_embeddings,rd as read_audio,od as rfft,sd as round,ad as softmax,id as spectrogram,ld as stack,cd as std_mean,dd as topk,ud as window_function,pd as zeros,hd as zeros_like};
166
+ \*****************************/c.r(d),c.d(d,{ASTFeatureExtractor:()=>_.ASTFeatureExtractor,ASTForAudioClassification:()=>h.ASTForAudioClassification,ASTModel:()=>h.ASTModel,ASTPreTrainedModel:()=>h.ASTPreTrainedModel,AlbertForMaskedLM:()=>h.AlbertForMaskedLM,AlbertForQuestionAnswering:()=>h.AlbertForQuestionAnswering,AlbertForSequenceClassification:()=>h.AlbertForSequenceClassification,AlbertModel:()=>h.AlbertModel,AlbertPreTrainedModel:()=>h.AlbertPreTrainedModel,AlbertTokenizer:()=>m.AlbertTokenizer,AudioClassificationPipeline:()=>p.AudioClassificationPipeline,AutoConfig:()=>f.AutoConfig,AutoModel:()=>h.AutoModel,AutoModelForAudioClassification:()=>h.AutoModelForAudioClassification,AutoModelForAudioFrameClassification:()=>h.AutoModelForAudioFrameClassification,AutoModelForCTC:()=>h.AutoModelForCTC,AutoModelForCausalLM:()=>h.AutoModelForCausalLM,AutoModelForDepthEstimation:()=>h.AutoModelForDepthEstimation,AutoModelForDocumentQuestionAnswering:()=>h.AutoModelForDocumentQuestionAnswering,AutoModelForImageClassification:()=>h.AutoModelForImageClassification,AutoModelForImageFeatureExtraction:()=>h.AutoModelForImageFeatureExtraction,AutoModelForImageMatting:()=>h.AutoModelForImageMatting,AutoModelForImageSegmentation:()=>h.AutoModelForImageSegmentation,AutoModelForImageToImage:()=>h.AutoModelForImageToImage,AutoModelForMaskGeneration:()=>h.AutoModelForMaskGeneration,AutoModelForMaskedLM:()=>h.AutoModelForMaskedLM,AutoModelForNormalEstimation:()=>h.AutoModelForNormalEstimation,AutoModelForObjectDetection:()=>h.AutoModelForObjectDetection,AutoModelForQuestionAnswering:()=>h.AutoModelForQuestionAnswering,AutoModelForSemanticSegmentation:()=>h.AutoModelForSemanticSegmentation,AutoModelForSeq2SeqLM:()=>h.AutoModelForSeq2SeqLM,AutoModelForSequenceClassification:()=>h.AutoModelForSequenceClassification,AutoModelForSpeechSeq2Seq:()=>h.AutoModelForSpeechSeq2Seq,AutoModelForTextToSpectrogram:()=>h.AutoModelForTextToSpectrogram,AutoModelForTextToWaveform:()=>h.AutoModelForTextToWaveform,AutoModelForTokenClassification:()=>h.AutoModelForTokenClassification,AutoModelForVision2Seq:()=>h.AutoModelForVision2Seq,AutoModelForXVector:()=>h.AutoModelForXVector,AutoModelForZeroShotObjectDetection:()=>h.AutoModelForZeroShotObjectDetection,AutoProcessor:()=>_.AutoProcessor,AutoTokenizer:()=>m.AutoTokenizer,AutomaticSpeechRecognitionPipeline:()=>p.AutomaticSpeechRecognitionPipeline,BartForConditionalGeneration:()=>h.BartForConditionalGeneration,BartForSequenceClassification:()=>h.BartForSequenceClassification,BartModel:()=>h.BartModel,BartPretrainedModel:()=>h.BartPretrainedModel,BartTokenizer:()=>m.BartTokenizer,BaseModelOutput:()=>h.BaseModelOutput,BaseStreamer:()=>T.BaseStreamer,BeitFeatureExtractor:()=>_.BeitFeatureExtractor,BeitForImageClassification:()=>h.BeitForImageClassification,BeitModel:()=>h.BeitModel,BeitPreTrainedModel:()=>h.BeitPreTrainedModel,BertForMaskedLM:()=>h.BertForMaskedLM,BertForQuestionAnswering:()=>h.BertForQuestionAnswering,BertForSequenceClassification:()=>h.BertForSequenceClassification,BertForTokenClassification:()=>h.BertForTokenClassification,BertModel:()=>h.BertModel,BertPreTrainedModel:()=>h.BertPreTrainedModel,BertTokenizer:()=>m.BertTokenizer,BitImageProcessor:()=>_.BitImageProcessor,BlenderbotForConditionalGeneration:()=>h.BlenderbotForConditionalGeneration,BlenderbotModel:()=>h.BlenderbotModel,BlenderbotPreTrainedModel:()=>h.BlenderbotPreTrainedModel,BlenderbotSmallForConditionalGeneration:()=>h.BlenderbotSmallForConditionalGeneration,BlenderbotSmallModel:()=>h.BlenderbotSmallModel,BlenderbotSmallPreTrainedModel:()=>h.BlenderbotSmallPreTrainedModel,BlenderbotSmallTokenizer:()=>m.BlenderbotSmallTokenizer,BlenderbotTokenizer:()=>m.BlenderbotTokenizer,BloomForCausalLM:()=>h.BloomForCausalLM,BloomModel:()=>h.BloomModel,BloomPreTrainedModel:()=>h.BloomPreTrainedModel,BloomTokenizer:()=>m.BloomTokenizer,CLIPFeatureExtractor:()=>_.CLIPFeatureExtractor,CLIPImageProcessor:()=>_.CLIPImageProcessor,CLIPModel:()=>h.CLIPModel,CLIPPreTrainedModel:()=>h.CLIPPreTrainedModel,CLIPSegForImageSegmentation:()=>h.CLIPSegForImageSegmentation,CLIPSegModel:()=>h.CLIPSegModel,CLIPSegPreTrainedModel:()=>h.CLIPSegPreTrainedModel,CLIPTextModelWithProjection:()=>h.CLIPTextModelWithProjection,CLIPTokenizer:()=>m.CLIPTokenizer,CLIPVisionModelWithProjection:()=>h.CLIPVisionModelWithProjection,CamembertForMaskedLM:()=>h.CamembertForMaskedLM,CamembertForQuestionAnswering:()=>h.CamembertForQuestionAnswering,CamembertForSequenceClassification:()=>h.CamembertForSequenceClassification,CamembertForTokenClassification:()=>h.CamembertForTokenClassification,CamembertModel:()=>h.CamembertModel,CamembertPreTrainedModel:()=>h.CamembertPreTrainedModel,CamembertTokenizer:()=>m.CamembertTokenizer,CausalLMOutput:()=>h.CausalLMOutput,CausalLMOutputWithPast:()=>h.CausalLMOutputWithPast,ChineseCLIPFeatureExtractor:()=>_.ChineseCLIPFeatureExtractor,ChineseCLIPModel:()=>h.ChineseCLIPModel,ChineseCLIPPreTrainedModel:()=>h.ChineseCLIPPreTrainedModel,ClapAudioModelWithProjection:()=>h.ClapAudioModelWithProjection,ClapFeatureExtractor:()=>_.ClapFeatureExtractor,ClapModel:()=>h.ClapModel,ClapPreTrainedModel:()=>h.ClapPreTrainedModel,ClapTextModelWithProjection:()=>h.ClapTextModelWithProjection,CodeGenForCausalLM:()=>h.CodeGenForCausalLM,CodeGenModel:()=>h.CodeGenModel,CodeGenPreTrainedModel:()=>h.CodeGenPreTrainedModel,CodeGenTokenizer:()=>m.CodeGenTokenizer,CodeLlamaTokenizer:()=>m.CodeLlamaTokenizer,CohereForCausalLM:()=>h.CohereForCausalLM,CohereModel:()=>h.CohereModel,CoherePreTrainedModel:()=>h.CoherePreTrainedModel,CohereTokenizer:()=>m.CohereTokenizer,ConvBertForMaskedLM:()=>h.ConvBertForMaskedLM,ConvBertForQuestionAnswering:()=>h.ConvBertForQuestionAnswering,ConvBertForSequenceClassification:()=>h.ConvBertForSequenceClassification,ConvBertForTokenClassification:()=>h.ConvBertForTokenClassification,ConvBertModel:()=>h.ConvBertModel,ConvBertPreTrainedModel:()=>h.ConvBertPreTrainedModel,ConvBertTokenizer:()=>m.ConvBertTokenizer,ConvNextFeatureExtractor:()=>_.ConvNextFeatureExtractor,ConvNextForImageClassification:()=>h.ConvNextForImageClassification,ConvNextImageProcessor:()=>_.ConvNextImageProcessor,ConvNextModel:()=>h.ConvNextModel,ConvNextPreTrainedModel:()=>h.ConvNextPreTrainedModel,ConvNextV2ForImageClassification:()=>h.ConvNextV2ForImageClassification,ConvNextV2Model:()=>h.ConvNextV2Model,ConvNextV2PreTrainedModel:()=>h.ConvNextV2PreTrainedModel,DPTFeatureExtractor:()=>_.DPTFeatureExtractor,DPTForDepthEstimation:()=>h.DPTForDepthEstimation,DPTImageProcessor:()=>_.DPTImageProcessor,DPTModel:()=>h.DPTModel,DPTPreTrainedModel:()=>h.DPTPreTrainedModel,DebertaForMaskedLM:()=>h.DebertaForMaskedLM,DebertaForQuestionAnswering:()=>h.DebertaForQuestionAnswering,DebertaForSequenceClassification:()=>h.DebertaForSequenceClassification,DebertaForTokenClassification:()=>h.DebertaForTokenClassification,DebertaModel:()=>h.DebertaModel,DebertaPreTrainedModel:()=>h.DebertaPreTrainedModel,DebertaTokenizer:()=>m.DebertaTokenizer,DebertaV2ForMaskedLM:()=>h.DebertaV2ForMaskedLM,DebertaV2ForQuestionAnswering:()=>h.DebertaV2ForQuestionAnswering,DebertaV2ForSequenceClassification:()=>h.DebertaV2ForSequenceClassification,DebertaV2ForTokenClassification:()=>h.DebertaV2ForTokenClassification,DebertaV2Model:()=>h.DebertaV2Model,DebertaV2PreTrainedModel:()=>h.DebertaV2PreTrainedModel,DebertaV2Tokenizer:()=>m.DebertaV2Tokenizer,DeiTFeatureExtractor:()=>_.DeiTFeatureExtractor,DeiTForImageClassification:()=>h.DeiTForImageClassification,DeiTModel:()=>h.DeiTModel,DeiTPreTrainedModel:()=>h.DeiTPreTrainedModel,DepthAnythingForDepthEstimation:()=>h.DepthAnythingForDepthEstimation,DepthAnythingPreTrainedModel:()=>h.DepthAnythingPreTrainedModel,DepthEstimationPipeline:()=>p.DepthEstimationPipeline,DetrFeatureExtractor:()=>_.DetrFeatureExtractor,DetrForObjectDetection:()=>h.DetrForObjectDetection,DetrForSegmentation:()=>h.DetrForSegmentation,DetrModel:()=>h.DetrModel,DetrObjectDetectionOutput:()=>h.DetrObjectDetectionOutput,DetrPreTrainedModel:()=>h.DetrPreTrainedModel,DetrSegmentationOutput:()=>h.DetrSegmentationOutput,Dinov2ForImageClassification:()=>h.Dinov2ForImageClassification,Dinov2Model:()=>h.Dinov2Model,Dinov2PreTrainedModel:()=>h.Dinov2PreTrainedModel,DistilBertForMaskedLM:()=>h.DistilBertForMaskedLM,DistilBertForQuestionAnswering:()=>h.DistilBertForQuestionAnswering,DistilBertForSequenceClassification:()=>h.DistilBertForSequenceClassification,DistilBertForTokenClassification:()=>h.DistilBertForTokenClassification,DistilBertModel:()=>h.DistilBertModel,DistilBertPreTrainedModel:()=>h.DistilBertPreTrainedModel,DistilBertTokenizer:()=>m.DistilBertTokenizer,DocumentQuestionAnsweringPipeline:()=>p.DocumentQuestionAnsweringPipeline,DonutFeatureExtractor:()=>_.DonutFeatureExtractor,DonutSwinModel:()=>h.DonutSwinModel,DonutSwinPreTrainedModel:()=>h.DonutSwinPreTrainedModel,EfficientNetForImageClassification:()=>h.EfficientNetForImageClassification,EfficientNetImageProcessor:()=>_.EfficientNetImageProcessor,EfficientNetModel:()=>h.EfficientNetModel,EfficientNetPreTrainedModel:()=>h.EfficientNetPreTrainedModel,ElectraForMaskedLM:()=>h.ElectraForMaskedLM,ElectraForQuestionAnswering:()=>h.ElectraForQuestionAnswering,ElectraForSequenceClassification:()=>h.ElectraForSequenceClassification,ElectraForTokenClassification:()=>h.ElectraForTokenClassification,ElectraModel:()=>h.ElectraModel,ElectraPreTrainedModel:()=>h.ElectraPreTrainedModel,ElectraTokenizer:()=>m.ElectraTokenizer,EosTokenCriteria:()=>x.EosTokenCriteria,EsmForMaskedLM:()=>h.EsmForMaskedLM,EsmForSequenceClassification:()=>h.EsmForSequenceClassification,EsmForTokenClassification:()=>h.EsmForTokenClassification,EsmModel:()=>h.EsmModel,EsmPreTrainedModel:()=>h.EsmPreTrainedModel,EsmTokenizer:()=>m.EsmTokenizer,FFT:()=>b.FFT,FalconForCausalLM:()=>h.FalconForCausalLM,FalconModel:()=>h.FalconModel,FalconPreTrainedModel:()=>h.FalconPreTrainedModel,FalconTokenizer:()=>m.FalconTokenizer,FastViTForImageClassification:()=>h.FastViTForImageClassification,FastViTModel:()=>h.FastViTModel,FastViTPreTrainedModel:()=>h.FastViTPreTrainedModel,FeatureExtractionPipeline:()=>p.FeatureExtractionPipeline,FeatureExtractor:()=>_.FeatureExtractor,FillMaskPipeline:()=>p.FillMaskPipeline,Florence2ForConditionalGeneration:()=>h.Florence2ForConditionalGeneration,Florence2PreTrainedModel:()=>h.Florence2PreTrainedModel,Florence2Processor:()=>_.Florence2Processor,GLPNFeatureExtractor:()=>_.GLPNFeatureExtractor,GLPNForDepthEstimation:()=>h.GLPNForDepthEstimation,GLPNModel:()=>h.GLPNModel,GLPNPreTrainedModel:()=>h.GLPNPreTrainedModel,GPT2LMHeadModel:()=>h.GPT2LMHeadModel,GPT2Model:()=>h.GPT2Model,GPT2PreTrainedModel:()=>h.GPT2PreTrainedModel,GPT2Tokenizer:()=>m.GPT2Tokenizer,GPTBigCodeForCausalLM:()=>h.GPTBigCodeForCausalLM,GPTBigCodeModel:()=>h.GPTBigCodeModel,GPTBigCodePreTrainedModel:()=>h.GPTBigCodePreTrainedModel,GPTJForCausalLM:()=>h.GPTJForCausalLM,GPTJModel:()=>h.GPTJModel,GPTJPreTrainedModel:()=>h.GPTJPreTrainedModel,GPTNeoForCausalLM:()=>h.GPTNeoForCausalLM,GPTNeoModel:()=>h.GPTNeoModel,GPTNeoPreTrainedModel:()=>h.GPTNeoPreTrainedModel,GPTNeoXForCausalLM:()=>h.GPTNeoXForCausalLM,GPTNeoXModel:()=>h.GPTNeoXModel,GPTNeoXPreTrainedModel:()=>h.GPTNeoXPreTrainedModel,GPTNeoXTokenizer:()=>m.GPTNeoXTokenizer,Gemma2ForCausalLM:()=>h.Gemma2ForCausalLM,Gemma2Model:()=>h.Gemma2Model,Gemma2PreTrainedModel:()=>h.Gemma2PreTrainedModel,GemmaForCausalLM:()=>h.GemmaForCausalLM,GemmaModel:()=>h.GemmaModel,GemmaPreTrainedModel:()=>h.GemmaPreTrainedModel,GemmaTokenizer:()=>m.GemmaTokenizer,Grok1Tokenizer:()=>m.Grok1Tokenizer,HerbertTokenizer:()=>m.HerbertTokenizer,HieraForImageClassification:()=>h.HieraForImageClassification,HieraModel:()=>h.HieraModel,HieraPreTrainedModel:()=>h.HieraPreTrainedModel,HubertForCTC:()=>h.HubertForCTC,HubertForSequenceClassification:()=>h.HubertForSequenceClassification,HubertModel:()=>h.HubertModel,HubertPreTrainedModel:()=>h.HubertPreTrainedModel,ImageClassificationPipeline:()=>p.ImageClassificationPipeline,ImageFeatureExtractionPipeline:()=>p.ImageFeatureExtractionPipeline,ImageFeatureExtractor:()=>_.ImageFeatureExtractor,ImageMattingOutput:()=>h.ImageMattingOutput,ImageSegmentationPipeline:()=>p.ImageSegmentationPipeline,ImageToImagePipeline:()=>p.ImageToImagePipeline,ImageToTextPipeline:()=>p.ImageToTextPipeline,InterruptableStoppingCriteria:()=>x.InterruptableStoppingCriteria,JAISLMHeadModel:()=>h.JAISLMHeadModel,JAISModel:()=>h.JAISModel,JAISPreTrainedModel:()=>h.JAISPreTrainedModel,LlamaForCausalLM:()=>h.LlamaForCausalLM,LlamaModel:()=>h.LlamaModel,LlamaPreTrainedModel:()=>h.LlamaPreTrainedModel,LlamaTokenizer:()=>m.LlamaTokenizer,LlavaForConditionalGeneration:()=>h.LlavaForConditionalGeneration,LlavaPreTrainedModel:()=>h.LlavaPreTrainedModel,LongT5ForConditionalGeneration:()=>h.LongT5ForConditionalGeneration,LongT5Model:()=>h.LongT5Model,LongT5PreTrainedModel:()=>h.LongT5PreTrainedModel,M2M100ForConditionalGeneration:()=>h.M2M100ForConditionalGeneration,M2M100Model:()=>h.M2M100Model,M2M100PreTrainedModel:()=>h.M2M100PreTrainedModel,M2M100Tokenizer:()=>m.M2M100Tokenizer,MBart50Tokenizer:()=>m.MBart50Tokenizer,MBartForCausalLM:()=>h.MBartForCausalLM,MBartForConditionalGeneration:()=>h.MBartForConditionalGeneration,MBartForSequenceClassification:()=>h.MBartForSequenceClassification,MBartModel:()=>h.MBartModel,MBartPreTrainedModel:()=>h.MBartPreTrainedModel,MBartTokenizer:()=>m.MBartTokenizer,MPNetForMaskedLM:()=>h.MPNetForMaskedLM,MPNetForQuestionAnswering:()=>h.MPNetForQuestionAnswering,MPNetForSequenceClassification:()=>h.MPNetForSequenceClassification,MPNetForTokenClassification:()=>h.MPNetForTokenClassification,MPNetModel:()=>h.MPNetModel,MPNetPreTrainedModel:()=>h.MPNetPreTrainedModel,MPNetTokenizer:()=>m.MPNetTokenizer,MT5ForConditionalGeneration:()=>h.MT5ForConditionalGeneration,MT5Model:()=>h.MT5Model,MT5PreTrainedModel:()=>h.MT5PreTrainedModel,MarianMTModel:()=>h.MarianMTModel,MarianModel:()=>h.MarianModel,MarianPreTrainedModel:()=>h.MarianPreTrainedModel,MarianTokenizer:()=>m.MarianTokenizer,MaskedLMOutput:()=>h.MaskedLMOutput,MaxLengthCriteria:()=>x.MaxLengthCriteria,MistralForCausalLM:()=>h.MistralForCausalLM,MistralModel:()=>h.MistralModel,MistralPreTrainedModel:()=>h.MistralPreTrainedModel,MobileBertForMaskedLM:()=>h.MobileBertForMaskedLM,MobileBertForQuestionAnswering:()=>h.MobileBertForQuestionAnswering,MobileBertForSequenceClassification:()=>h.MobileBertForSequenceClassification,MobileBertModel:()=>h.MobileBertModel,MobileBertPreTrainedModel:()=>h.MobileBertPreTrainedModel,MobileBertTokenizer:()=>m.MobileBertTokenizer,MobileNetV1FeatureExtractor:()=>_.MobileNetV1FeatureExtractor,MobileNetV1ForImageClassification:()=>h.MobileNetV1ForImageClassification,MobileNetV1Model:()=>h.MobileNetV1Model,MobileNetV1PreTrainedModel:()=>h.MobileNetV1PreTrainedModel,MobileNetV2FeatureExtractor:()=>_.MobileNetV2FeatureExtractor,MobileNetV2ForImageClassification:()=>h.MobileNetV2ForImageClassification,MobileNetV2Model:()=>h.MobileNetV2Model,MobileNetV2PreTrainedModel:()=>h.MobileNetV2PreTrainedModel,MobileNetV3FeatureExtractor:()=>_.MobileNetV3FeatureExtractor,MobileNetV3ForImageClassification:()=>h.MobileNetV3ForImageClassification,MobileNetV3Model:()=>h.MobileNetV3Model,MobileNetV3PreTrainedModel:()=>h.MobileNetV3PreTrainedModel,MobileNetV4FeatureExtractor:()=>_.MobileNetV4FeatureExtractor,MobileNetV4ForImageClassification:()=>h.MobileNetV4ForImageClassification,MobileNetV4Model:()=>h.MobileNetV4Model,MobileNetV4PreTrainedModel:()=>h.MobileNetV4PreTrainedModel,MobileViTFeatureExtractor:()=>_.MobileViTFeatureExtractor,MobileViTForImageClassification:()=>h.MobileViTForImageClassification,MobileViTImageProcessor:()=>_.MobileViTImageProcessor,MobileViTModel:()=>h.MobileViTModel,MobileViTPreTrainedModel:()=>h.MobileViTPreTrainedModel,MobileViTV2ForImageClassification:()=>h.MobileViTV2ForImageClassification,MobileViTV2Model:()=>h.MobileViTV2Model,MobileViTV2PreTrainedModel:()=>h.MobileViTV2PreTrainedModel,ModelOutput:()=>h.ModelOutput,Moondream1ForConditionalGeneration:()=>h.Moondream1ForConditionalGeneration,MptForCausalLM:()=>h.MptForCausalLM,MptModel:()=>h.MptModel,MptPreTrainedModel:()=>h.MptPreTrainedModel,MusicgenForCausalLM:()=>h.MusicgenForCausalLM,MusicgenForConditionalGeneration:()=>h.MusicgenForConditionalGeneration,MusicgenModel:()=>h.MusicgenModel,MusicgenPreTrainedModel:()=>h.MusicgenPreTrainedModel,NllbTokenizer:()=>m.NllbTokenizer,NomicBertModel:()=>h.NomicBertModel,NomicBertPreTrainedModel:()=>h.NomicBertPreTrainedModel,NougatImageProcessor:()=>_.NougatImageProcessor,NougatTokenizer:()=>m.NougatTokenizer,OPTForCausalLM:()=>h.OPTForCausalLM,OPTModel:()=>h.OPTModel,OPTPreTrainedModel:()=>h.OPTPreTrainedModel,ObjectDetectionPipeline:()=>p.ObjectDetectionPipeline,OpenELMForCausalLM:()=>h.OpenELMForCausalLM,OpenELMModel:()=>h.OpenELMModel,OpenELMPreTrainedModel:()=>h.OpenELMPreTrainedModel,OwlViTFeatureExtractor:()=>_.OwlViTFeatureExtractor,OwlViTForObjectDetection:()=>h.OwlViTForObjectDetection,OwlViTModel:()=>h.OwlViTModel,OwlViTPreTrainedModel:()=>h.OwlViTPreTrainedModel,OwlViTProcessor:()=>_.OwlViTProcessor,Owlv2ForObjectDetection:()=>h.Owlv2ForObjectDetection,Owlv2ImageProcessor:()=>_.Owlv2ImageProcessor,Owlv2Model:()=>h.Owlv2Model,Owlv2PreTrainedModel:()=>h.Owlv2PreTrainedModel,Phi3ForCausalLM:()=>h.Phi3ForCausalLM,Phi3Model:()=>h.Phi3Model,Phi3PreTrainedModel:()=>h.Phi3PreTrainedModel,PhiForCausalLM:()=>h.PhiForCausalLM,PhiModel:()=>h.PhiModel,PhiPreTrainedModel:()=>h.PhiPreTrainedModel,Pipeline:()=>p.Pipeline,PreTrainedModel:()=>h.PreTrainedModel,PreTrainedTokenizer:()=>m.PreTrainedTokenizer,PretrainedConfig:()=>f.PretrainedConfig,PretrainedMixin:()=>h.PretrainedMixin,Processor:()=>_.Processor,PyAnnoteFeatureExtractor:()=>_.PyAnnoteFeatureExtractor,PyAnnoteForAudioFrameClassification:()=>h.PyAnnoteForAudioFrameClassification,PyAnnoteModel:()=>h.PyAnnoteModel,PyAnnotePreTrainedModel:()=>h.PyAnnotePreTrainedModel,PyAnnoteProcessor:()=>_.PyAnnoteProcessor,QuestionAnsweringModelOutput:()=>h.QuestionAnsweringModelOutput,QuestionAnsweringPipeline:()=>p.QuestionAnsweringPipeline,Qwen2ForCausalLM:()=>h.Qwen2ForCausalLM,Qwen2Model:()=>h.Qwen2Model,Qwen2PreTrainedModel:()=>h.Qwen2PreTrainedModel,Qwen2Tokenizer:()=>m.Qwen2Tokenizer,RTDetrForObjectDetection:()=>h.RTDetrForObjectDetection,RTDetrImageProcessor:()=>_.RTDetrImageProcessor,RTDetrModel:()=>h.RTDetrModel,RTDetrObjectDetectionOutput:()=>h.RTDetrObjectDetectionOutput,RTDetrPreTrainedModel:()=>h.RTDetrPreTrainedModel,RawImage:()=>M.RawImage,ResNetForImageClassification:()=>h.ResNetForImageClassification,ResNetModel:()=>h.ResNetModel,ResNetPreTrainedModel:()=>h.ResNetPreTrainedModel,RoFormerForMaskedLM:()=>h.RoFormerForMaskedLM,RoFormerForQuestionAnswering:()=>h.RoFormerForQuestionAnswering,RoFormerForSequenceClassification:()=>h.RoFormerForSequenceClassification,RoFormerForTokenClassification:()=>h.RoFormerForTokenClassification,RoFormerModel:()=>h.RoFormerModel,RoFormerPreTrainedModel:()=>h.RoFormerPreTrainedModel,RoFormerTokenizer:()=>m.RoFormerTokenizer,RobertaForMaskedLM:()=>h.RobertaForMaskedLM,RobertaForQuestionAnswering:()=>h.RobertaForQuestionAnswering,RobertaForSequenceClassification:()=>h.RobertaForSequenceClassification,RobertaForTokenClassification:()=>h.RobertaForTokenClassification,RobertaModel:()=>h.RobertaModel,RobertaPreTrainedModel:()=>h.RobertaPreTrainedModel,RobertaTokenizer:()=>m.RobertaTokenizer,SamImageProcessor:()=>_.SamImageProcessor,SamImageSegmentationOutput:()=>h.SamImageSegmentationOutput,SamModel:()=>h.SamModel,SamPreTrainedModel:()=>h.SamPreTrainedModel,SamProcessor:()=>_.SamProcessor,SapiensFeatureExtractor:()=>_.SapiensFeatureExtractor,SapiensForDepthEstimation:()=>h.SapiensForDepthEstimation,SapiensForNormalEstimation:()=>h.SapiensForNormalEstimation,SapiensForSemanticSegmentation:()=>h.SapiensForSemanticSegmentation,SapiensPreTrainedModel:()=>h.SapiensPreTrainedModel,SeamlessM4TFeatureExtractor:()=>_.SeamlessM4TFeatureExtractor,SegformerFeatureExtractor:()=>_.SegformerFeatureExtractor,SegformerForImageClassification:()=>h.SegformerForImageClassification,SegformerForSemanticSegmentation:()=>h.SegformerForSemanticSegmentation,SegformerModel:()=>h.SegformerModel,SegformerPreTrainedModel:()=>h.SegformerPreTrainedModel,Seq2SeqLMOutput:()=>h.Seq2SeqLMOutput,SequenceClassifierOutput:()=>h.SequenceClassifierOutput,SiglipImageProcessor:()=>_.SiglipImageProcessor,SiglipModel:()=>h.SiglipModel,SiglipPreTrainedModel:()=>h.SiglipPreTrainedModel,SiglipTextModel:()=>h.SiglipTextModel,SiglipTokenizer:()=>m.SiglipTokenizer,SiglipVisionModel:()=>h.SiglipVisionModel,SpeechT5FeatureExtractor:()=>_.SpeechT5FeatureExtractor,SpeechT5ForSpeechToText:()=>h.SpeechT5ForSpeechToText,SpeechT5ForTextToSpeech:()=>h.SpeechT5ForTextToSpeech,SpeechT5HifiGan:()=>h.SpeechT5HifiGan,SpeechT5Model:()=>h.SpeechT5Model,SpeechT5PreTrainedModel:()=>h.SpeechT5PreTrainedModel,SpeechT5Processor:()=>_.SpeechT5Processor,SpeechT5Tokenizer:()=>m.SpeechT5Tokenizer,SqueezeBertForMaskedLM:()=>h.SqueezeBertForMaskedLM,SqueezeBertForQuestionAnswering:()=>h.SqueezeBertForQuestionAnswering,SqueezeBertForSequenceClassification:()=>h.SqueezeBertForSequenceClassification,SqueezeBertModel:()=>h.SqueezeBertModel,SqueezeBertPreTrainedModel:()=>h.SqueezeBertPreTrainedModel,SqueezeBertTokenizer:()=>m.SqueezeBertTokenizer,StableLmForCausalLM:()=>h.StableLmForCausalLM,StableLmModel:()=>h.StableLmModel,StableLmPreTrainedModel:()=>h.StableLmPreTrainedModel,Starcoder2ForCausalLM:()=>h.Starcoder2ForCausalLM,Starcoder2Model:()=>h.Starcoder2Model,Starcoder2PreTrainedModel:()=>h.Starcoder2PreTrainedModel,StoppingCriteria:()=>x.StoppingCriteria,StoppingCriteriaList:()=>x.StoppingCriteriaList,SummarizationPipeline:()=>p.SummarizationPipeline,Swin2SRForImageSuperResolution:()=>h.Swin2SRForImageSuperResolution,Swin2SRImageProcessor:()=>_.Swin2SRImageProcessor,Swin2SRModel:()=>h.Swin2SRModel,Swin2SRPreTrainedModel:()=>h.Swin2SRPreTrainedModel,SwinForImageClassification:()=>h.SwinForImageClassification,SwinModel:()=>h.SwinModel,SwinPreTrainedModel:()=>h.SwinPreTrainedModel,T5ForConditionalGeneration:()=>h.T5ForConditionalGeneration,T5Model:()=>h.T5Model,T5PreTrainedModel:()=>h.T5PreTrainedModel,T5Tokenizer:()=>m.T5Tokenizer,TableTransformerForObjectDetection:()=>h.TableTransformerForObjectDetection,TableTransformerModel:()=>h.TableTransformerModel,TableTransformerObjectDetectionOutput:()=>h.TableTransformerObjectDetectionOutput,TableTransformerPreTrainedModel:()=>h.TableTransformerPreTrainedModel,Tensor:()=>w.Tensor,Text2TextGenerationPipeline:()=>p.Text2TextGenerationPipeline,TextClassificationPipeline:()=>p.TextClassificationPipeline,TextGenerationPipeline:()=>p.TextGenerationPipeline,TextStreamer:()=>T.TextStreamer,TextToAudioPipeline:()=>p.TextToAudioPipeline,TokenClassificationPipeline:()=>p.TokenClassificationPipeline,TokenClassifierOutput:()=>h.TokenClassifierOutput,TokenizerModel:()=>m.TokenizerModel,TrOCRForCausalLM:()=>h.TrOCRForCausalLM,TrOCRPreTrainedModel:()=>h.TrOCRPreTrainedModel,TranslationPipeline:()=>p.TranslationPipeline,UniSpeechForCTC:()=>h.UniSpeechForCTC,UniSpeechForSequenceClassification:()=>h.UniSpeechForSequenceClassification,UniSpeechModel:()=>h.UniSpeechModel,UniSpeechPreTrainedModel:()=>h.UniSpeechPreTrainedModel,UniSpeechSatForAudioFrameClassification:()=>h.UniSpeechSatForAudioFrameClassification,UniSpeechSatForCTC:()=>h.UniSpeechSatForCTC,UniSpeechSatForSequenceClassification:()=>h.UniSpeechSatForSequenceClassification,UniSpeechSatModel:()=>h.UniSpeechSatModel,UniSpeechSatPreTrainedModel:()=>h.UniSpeechSatPreTrainedModel,ViTFeatureExtractor:()=>_.ViTFeatureExtractor,ViTForImageClassification:()=>h.ViTForImageClassification,ViTImageProcessor:()=>_.ViTImageProcessor,ViTModel:()=>h.ViTModel,ViTPreTrainedModel:()=>h.ViTPreTrainedModel,VisionEncoderDecoderModel:()=>h.VisionEncoderDecoderModel,VitMatteForImageMatting:()=>h.VitMatteForImageMatting,VitMatteImageProcessor:()=>_.VitMatteImageProcessor,VitMattePreTrainedModel:()=>h.VitMattePreTrainedModel,VitsModel:()=>h.VitsModel,VitsModelOutput:()=>h.VitsModelOutput,VitsPreTrainedModel:()=>h.VitsPreTrainedModel,VitsTokenizer:()=>m.VitsTokenizer,Wav2Vec2BertForCTC:()=>h.Wav2Vec2BertForCTC,Wav2Vec2BertForSequenceClassification:()=>h.Wav2Vec2BertForSequenceClassification,Wav2Vec2BertModel:()=>h.Wav2Vec2BertModel,Wav2Vec2BertPreTrainedModel:()=>h.Wav2Vec2BertPreTrainedModel,Wav2Vec2CTCTokenizer:()=>m.Wav2Vec2CTCTokenizer,Wav2Vec2FeatureExtractor:()=>_.Wav2Vec2FeatureExtractor,Wav2Vec2ForAudioFrameClassification:()=>h.Wav2Vec2ForAudioFrameClassification,Wav2Vec2ForCTC:()=>h.Wav2Vec2ForCTC,Wav2Vec2ForSequenceClassification:()=>h.Wav2Vec2ForSequenceClassification,Wav2Vec2Model:()=>h.Wav2Vec2Model,Wav2Vec2PreTrainedModel:()=>h.Wav2Vec2PreTrainedModel,Wav2Vec2ProcessorWithLM:()=>_.Wav2Vec2ProcessorWithLM,WavLMForAudioFrameClassification:()=>h.WavLMForAudioFrameClassification,WavLMForCTC:()=>h.WavLMForCTC,WavLMForSequenceClassification:()=>h.WavLMForSequenceClassification,WavLMForXVector:()=>h.WavLMForXVector,WavLMModel:()=>h.WavLMModel,WavLMPreTrainedModel:()=>h.WavLMPreTrainedModel,WeSpeakerFeatureExtractor:()=>_.WeSpeakerFeatureExtractor,WeSpeakerResNetModel:()=>h.WeSpeakerResNetModel,WeSpeakerResNetPreTrainedModel:()=>h.WeSpeakerResNetPreTrainedModel,WhisperFeatureExtractor:()=>_.WhisperFeatureExtractor,WhisperForConditionalGeneration:()=>h.WhisperForConditionalGeneration,WhisperModel:()=>h.WhisperModel,WhisperPreTrainedModel:()=>h.WhisperPreTrainedModel,WhisperProcessor:()=>_.WhisperProcessor,WhisperTextStreamer:()=>T.WhisperTextStreamer,WhisperTokenizer:()=>m.WhisperTokenizer,XLMForQuestionAnswering:()=>h.XLMForQuestionAnswering,XLMForSequenceClassification:()=>h.XLMForSequenceClassification,XLMForTokenClassification:()=>h.XLMForTokenClassification,XLMModel:()=>h.XLMModel,XLMPreTrainedModel:()=>h.XLMPreTrainedModel,XLMRobertaForMaskedLM:()=>h.XLMRobertaForMaskedLM,XLMRobertaForQuestionAnswering:()=>h.XLMRobertaForQuestionAnswering,XLMRobertaForSequenceClassification:()=>h.XLMRobertaForSequenceClassification,XLMRobertaForTokenClassification:()=>h.XLMRobertaForTokenClassification,XLMRobertaModel:()=>h.XLMRobertaModel,XLMRobertaPreTrainedModel:()=>h.XLMRobertaPreTrainedModel,XLMRobertaTokenizer:()=>m.XLMRobertaTokenizer,XLMTokenizer:()=>m.XLMTokenizer,XLMWithLMHeadModel:()=>h.XLMWithLMHeadModel,XVectorOutput:()=>h.XVectorOutput,YolosFeatureExtractor:()=>_.YolosFeatureExtractor,YolosForObjectDetection:()=>h.YolosForObjectDetection,YolosModel:()=>h.YolosModel,YolosObjectDetectionOutput:()=>h.YolosObjectDetectionOutput,YolosPreTrainedModel:()=>h.YolosPreTrainedModel,ZeroShotAudioClassificationPipeline:()=>p.ZeroShotAudioClassificationPipeline,ZeroShotClassificationPipeline:()=>p.ZeroShotClassificationPipeline,ZeroShotImageClassificationPipeline:()=>p.ZeroShotImageClassificationPipeline,ZeroShotObjectDetectionPipeline:()=>p.ZeroShotObjectDetectionPipeline,bankers_round:()=>b.bankers_round,cat:()=>w.cat,cos_sim:()=>b.cos_sim,dot:()=>b.dot,dynamic_time_warping:()=>b.dynamic_time_warping,env:()=>u.env,full:()=>w.full,full_like:()=>w.full_like,getKeyValueShapes:()=>f.getKeyValueShapes,hamming:()=>g.hamming,hanning:()=>g.hanning,interpolate:()=>w.interpolate,interpolate_4d:()=>w.interpolate_4d,interpolate_data:()=>b.interpolate_data,is_chinese_char:()=>m.is_chinese_char,layer_norm:()=>w.layer_norm,log_softmax:()=>b.log_softmax,magnitude:()=>b.magnitude,matmul:()=>w.matmul,max:()=>b.max,mean:()=>w.mean,mean_pooling:()=>w.mean_pooling,medianFilter:()=>b.medianFilter,mel_filter_bank:()=>g.mel_filter_bank,min:()=>b.min,ones:()=>w.ones,ones_like:()=>w.ones_like,permute:()=>w.permute,permute_data:()=>b.permute_data,pipeline:()=>p.pipeline,quantize_embeddings:()=>w.quantize_embeddings,read_audio:()=>g.read_audio,rfft:()=>w.rfft,round:()=>b.round,softmax:()=>b.softmax,spectrogram:()=>g.spectrogram,stack:()=>w.stack,std_mean:()=>w.std_mean,topk:()=>w.topk,window_function:()=>g.window_function,zeros:()=>w.zeros,zeros_like:()=>w.zeros_like});var u=c(/*! ./env.js */"./src/env.js"),p=c(/*! ./pipelines.js */"./src/pipelines.js"),h=c(/*! ./models.js */"./src/models.js"),m=c(/*! ./tokenizers.js */"./src/tokenizers.js"),_=c(/*! ./processors.js */"./src/processors.js"),f=c(/*! ./configs.js */"./src/configs.js"),g=c(/*! ./utils/audio.js */"./src/utils/audio.js"),M=c(/*! ./utils/image.js */"./src/utils/image.js"),w=c(/*! ./utils/tensor.js */"./src/utils/tensor.js"),b=c(/*! ./utils/maths.js */"./src/utils/maths.js"),T=c(/*! ./generation/streamers.js */"./src/generation/streamers.js"),x=c(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),y=d.ASTFeatureExtractor,k=d.ASTForAudioClassification,F=d.ASTModel,C=d.ASTPreTrainedModel,P=d.AlbertForMaskedLM,v=d.AlbertForQuestionAnswering,S=d.AlbertForSequenceClassification,A=d.AlbertModel,E=d.AlbertPreTrainedModel,L=d.AlbertTokenizer,z=d.AudioClassificationPipeline,I=d.AutoConfig,B=d.AutoModel,N=d.AutoModelForAudioClassification,O=d.AutoModelForAudioFrameClassification,D=d.AutoModelForCTC,V=d.AutoModelForCausalLM,j=d.AutoModelForDepthEstimation,R=d.AutoModelForDocumentQuestionAnswering,G=d.AutoModelForImageClassification,q=d.AutoModelForImageFeatureExtraction,$=d.AutoModelForImageMatting,W=d.AutoModelForImageSegmentation,U=d.AutoModelForImageToImage,X=d.AutoModelForMaskGeneration,Q=d.AutoModelForMaskedLM,H=d.AutoModelForNormalEstimation,Y=d.AutoModelForObjectDetection,J=d.AutoModelForQuestionAnswering,K=d.AutoModelForSemanticSegmentation,Z=d.AutoModelForSeq2SeqLM,ee=d.AutoModelForSequenceClassification,te=d.AutoModelForSpeechSeq2Seq,ne=d.AutoModelForTextToSpectrogram,re=d.AutoModelForTextToWaveform,oe=d.AutoModelForTokenClassification,se=d.AutoModelForVision2Seq,ae=d.AutoModelForXVector,ie=d.AutoModelForZeroShotObjectDetection,le=d.AutoProcessor,ce=d.AutoTokenizer,de=d.AutomaticSpeechRecognitionPipeline,ue=d.BartForConditionalGeneration,pe=d.BartForSequenceClassification,he=d.BartModel,me=d.BartPretrainedModel,_e=d.BartTokenizer,fe=d.BaseModelOutput,ge=d.BaseStreamer,Me=d.BeitFeatureExtractor,we=d.BeitForImageClassification,be=d.BeitModel,Te=d.BeitPreTrainedModel,xe=d.BertForMaskedLM,ye=d.BertForQuestionAnswering,ke=d.BertForSequenceClassification,Fe=d.BertForTokenClassification,Ce=d.BertModel,Pe=d.BertPreTrainedModel,ve=d.BertTokenizer,Se=d.BitImageProcessor,Ae=d.BlenderbotForConditionalGeneration,Ee=d.BlenderbotModel,Le=d.BlenderbotPreTrainedModel,ze=d.BlenderbotSmallForConditionalGeneration,Ie=d.BlenderbotSmallModel,Be=d.BlenderbotSmallPreTrainedModel,Ne=d.BlenderbotSmallTokenizer,Oe=d.BlenderbotTokenizer,De=d.BloomForCausalLM,Ve=d.BloomModel,je=d.BloomPreTrainedModel,Re=d.BloomTokenizer,Ge=d.CLIPFeatureExtractor,qe=d.CLIPImageProcessor,$e=d.CLIPModel,We=d.CLIPPreTrainedModel,Ue=d.CLIPSegForImageSegmentation,Xe=d.CLIPSegModel,Qe=d.CLIPSegPreTrainedModel,He=d.CLIPTextModelWithProjection,Ye=d.CLIPTokenizer,Je=d.CLIPVisionModelWithProjection,Ke=d.CamembertForMaskedLM,Ze=d.CamembertForQuestionAnswering,et=d.CamembertForSequenceClassification,tt=d.CamembertForTokenClassification,nt=d.CamembertModel,rt=d.CamembertPreTrainedModel,ot=d.CamembertTokenizer,st=d.CausalLMOutput,at=d.CausalLMOutputWithPast,it=d.ChineseCLIPFeatureExtractor,lt=d.ChineseCLIPModel,ct=d.ChineseCLIPPreTrainedModel,dt=d.ClapAudioModelWithProjection,ut=d.ClapFeatureExtractor,pt=d.ClapModel,ht=d.ClapPreTrainedModel,mt=d.ClapTextModelWithProjection,_t=d.CodeGenForCausalLM,ft=d.CodeGenModel,gt=d.CodeGenPreTrainedModel,Mt=d.CodeGenTokenizer,wt=d.CodeLlamaTokenizer,bt=d.CohereForCausalLM,Tt=d.CohereModel,xt=d.CoherePreTrainedModel,yt=d.CohereTokenizer,kt=d.ConvBertForMaskedLM,Ft=d.ConvBertForQuestionAnswering,Ct=d.ConvBertForSequenceClassification,Pt=d.ConvBertForTokenClassification,vt=d.ConvBertModel,St=d.ConvBertPreTrainedModel,At=d.ConvBertTokenizer,Et=d.ConvNextFeatureExtractor,Lt=d.ConvNextForImageClassification,zt=d.ConvNextImageProcessor,It=d.ConvNextModel,Bt=d.ConvNextPreTrainedModel,Nt=d.ConvNextV2ForImageClassification,Ot=d.ConvNextV2Model,Dt=d.ConvNextV2PreTrainedModel,Vt=d.DPTFeatureExtractor,jt=d.DPTForDepthEstimation,Rt=d.DPTImageProcessor,Gt=d.DPTModel,qt=d.DPTPreTrainedModel,$t=d.DebertaForMaskedLM,Wt=d.DebertaForQuestionAnswering,Ut=d.DebertaForSequenceClassification,Xt=d.DebertaForTokenClassification,Qt=d.DebertaModel,Ht=d.DebertaPreTrainedModel,Yt=d.DebertaTokenizer,Jt=d.DebertaV2ForMaskedLM,Kt=d.DebertaV2ForQuestionAnswering,Zt=d.DebertaV2ForSequenceClassification,en=d.DebertaV2ForTokenClassification,tn=d.DebertaV2Model,nn=d.DebertaV2PreTrainedModel,rn=d.DebertaV2Tokenizer,on=d.DeiTFeatureExtractor,sn=d.DeiTForImageClassification,an=d.DeiTModel,ln=d.DeiTPreTrainedModel,cn=d.DepthAnythingForDepthEstimation,dn=d.DepthAnythingPreTrainedModel,un=d.DepthEstimationPipeline,pn=d.DetrFeatureExtractor,hn=d.DetrForObjectDetection,mn=d.DetrForSegmentation,_n=d.DetrModel,fn=d.DetrObjectDetectionOutput,gn=d.DetrPreTrainedModel,Mn=d.DetrSegmentationOutput,wn=d.Dinov2ForImageClassification,bn=d.Dinov2Model,Tn=d.Dinov2PreTrainedModel,xn=d.DistilBertForMaskedLM,yn=d.DistilBertForQuestionAnswering,kn=d.DistilBertForSequenceClassification,Fn=d.DistilBertForTokenClassification,Cn=d.DistilBertModel,Pn=d.DistilBertPreTrainedModel,vn=d.DistilBertTokenizer,Sn=d.DocumentQuestionAnsweringPipeline,An=d.DonutFeatureExtractor,En=d.DonutSwinModel,Ln=d.DonutSwinPreTrainedModel,zn=d.EfficientNetForImageClassification,In=d.EfficientNetImageProcessor,Bn=d.EfficientNetModel,Nn=d.EfficientNetPreTrainedModel,On=d.ElectraForMaskedLM,Dn=d.ElectraForQuestionAnswering,Vn=d.ElectraForSequenceClassification,jn=d.ElectraForTokenClassification,Rn=d.ElectraModel,Gn=d.ElectraPreTrainedModel,qn=d.ElectraTokenizer,$n=d.EosTokenCriteria,Wn=d.EsmForMaskedLM,Un=d.EsmForSequenceClassification,Xn=d.EsmForTokenClassification,Qn=d.EsmModel,Hn=d.EsmPreTrainedModel,Yn=d.EsmTokenizer,Jn=d.FFT,Kn=d.FalconForCausalLM,Zn=d.FalconModel,er=d.FalconPreTrainedModel,tr=d.FalconTokenizer,nr=d.FastViTForImageClassification,rr=d.FastViTModel,or=d.FastViTPreTrainedModel,sr=d.FeatureExtractionPipeline,ar=d.FeatureExtractor,ir=d.FillMaskPipeline,lr=d.Florence2ForConditionalGeneration,cr=d.Florence2PreTrainedModel,dr=d.Florence2Processor,ur=d.GLPNFeatureExtractor,pr=d.GLPNForDepthEstimation,hr=d.GLPNModel,mr=d.GLPNPreTrainedModel,_r=d.GPT2LMHeadModel,fr=d.GPT2Model,gr=d.GPT2PreTrainedModel,Mr=d.GPT2Tokenizer,wr=d.GPTBigCodeForCausalLM,br=d.GPTBigCodeModel,Tr=d.GPTBigCodePreTrainedModel,xr=d.GPTJForCausalLM,yr=d.GPTJModel,kr=d.GPTJPreTrainedModel,Fr=d.GPTNeoForCausalLM,Cr=d.GPTNeoModel,Pr=d.GPTNeoPreTrainedModel,vr=d.GPTNeoXForCausalLM,Sr=d.GPTNeoXModel,Ar=d.GPTNeoXPreTrainedModel,Er=d.GPTNeoXTokenizer,Lr=d.Gemma2ForCausalLM,zr=d.Gemma2Model,Ir=d.Gemma2PreTrainedModel,Br=d.GemmaForCausalLM,Nr=d.GemmaModel,Or=d.GemmaPreTrainedModel,Dr=d.GemmaTokenizer,Vr=d.Grok1Tokenizer,jr=d.HerbertTokenizer,Rr=d.HieraForImageClassification,Gr=d.HieraModel,qr=d.HieraPreTrainedModel,$r=d.HubertForCTC,Wr=d.HubertForSequenceClassification,Ur=d.HubertModel,Xr=d.HubertPreTrainedModel,Qr=d.ImageClassificationPipeline,Hr=d.ImageFeatureExtractionPipeline,Yr=d.ImageFeatureExtractor,Jr=d.ImageMattingOutput,Kr=d.ImageSegmentationPipeline,Zr=d.ImageToImagePipeline,eo=d.ImageToTextPipeline,to=d.InterruptableStoppingCriteria,no=d.JAISLMHeadModel,ro=d.JAISModel,oo=d.JAISPreTrainedModel,so=d.LlamaForCausalLM,ao=d.LlamaModel,io=d.LlamaPreTrainedModel,lo=d.LlamaTokenizer,co=d.LlavaForConditionalGeneration,uo=d.LlavaPreTrainedModel,po=d.LongT5ForConditionalGeneration,ho=d.LongT5Model,mo=d.LongT5PreTrainedModel,_o=d.M2M100ForConditionalGeneration,fo=d.M2M100Model,go=d.M2M100PreTrainedModel,Mo=d.M2M100Tokenizer,wo=d.MBart50Tokenizer,bo=d.MBartForCausalLM,To=d.MBartForConditionalGeneration,xo=d.MBartForSequenceClassification,yo=d.MBartModel,ko=d.MBartPreTrainedModel,Fo=d.MBartTokenizer,Co=d.MPNetForMaskedLM,Po=d.MPNetForQuestionAnswering,vo=d.MPNetForSequenceClassification,So=d.MPNetForTokenClassification,Ao=d.MPNetModel,Eo=d.MPNetPreTrainedModel,Lo=d.MPNetTokenizer,zo=d.MT5ForConditionalGeneration,Io=d.MT5Model,Bo=d.MT5PreTrainedModel,No=d.MarianMTModel,Oo=d.MarianModel,Do=d.MarianPreTrainedModel,Vo=d.MarianTokenizer,jo=d.MaskedLMOutput,Ro=d.MaxLengthCriteria,Go=d.MistralForCausalLM,qo=d.MistralModel,$o=d.MistralPreTrainedModel,Wo=d.MobileBertForMaskedLM,Uo=d.MobileBertForQuestionAnswering,Xo=d.MobileBertForSequenceClassification,Qo=d.MobileBertModel,Ho=d.MobileBertPreTrainedModel,Yo=d.MobileBertTokenizer,Jo=d.MobileNetV1FeatureExtractor,Ko=d.MobileNetV1ForImageClassification,Zo=d.MobileNetV1Model,es=d.MobileNetV1PreTrainedModel,ts=d.MobileNetV2FeatureExtractor,ns=d.MobileNetV2ForImageClassification,rs=d.MobileNetV2Model,os=d.MobileNetV2PreTrainedModel,ss=d.MobileNetV3FeatureExtractor,as=d.MobileNetV3ForImageClassification,is=d.MobileNetV3Model,ls=d.MobileNetV3PreTrainedModel,cs=d.MobileNetV4FeatureExtractor,ds=d.MobileNetV4ForImageClassification,us=d.MobileNetV4Model,ps=d.MobileNetV4PreTrainedModel,hs=d.MobileViTFeatureExtractor,ms=d.MobileViTForImageClassification,_s=d.MobileViTImageProcessor,fs=d.MobileViTModel,gs=d.MobileViTPreTrainedModel,Ms=d.MobileViTV2ForImageClassification,ws=d.MobileViTV2Model,bs=d.MobileViTV2PreTrainedModel,Ts=d.ModelOutput,xs=d.Moondream1ForConditionalGeneration,ys=d.MptForCausalLM,ks=d.MptModel,Fs=d.MptPreTrainedModel,Cs=d.MusicgenForCausalLM,Ps=d.MusicgenForConditionalGeneration,vs=d.MusicgenModel,Ss=d.MusicgenPreTrainedModel,As=d.NllbTokenizer,Es=d.NomicBertModel,Ls=d.NomicBertPreTrainedModel,zs=d.NougatImageProcessor,Is=d.NougatTokenizer,Bs=d.OPTForCausalLM,Ns=d.OPTModel,Os=d.OPTPreTrainedModel,Ds=d.ObjectDetectionPipeline,Vs=d.OpenELMForCausalLM,js=d.OpenELMModel,Rs=d.OpenELMPreTrainedModel,Gs=d.OwlViTFeatureExtractor,qs=d.OwlViTForObjectDetection,$s=d.OwlViTModel,Ws=d.OwlViTPreTrainedModel,Us=d.OwlViTProcessor,Xs=d.Owlv2ForObjectDetection,Qs=d.Owlv2ImageProcessor,Hs=d.Owlv2Model,Ys=d.Owlv2PreTrainedModel,Js=d.Phi3ForCausalLM,Ks=d.Phi3Model,Zs=d.Phi3PreTrainedModel,ea=d.PhiForCausalLM,ta=d.PhiModel,na=d.PhiPreTrainedModel,ra=d.Pipeline,oa=d.PreTrainedModel,sa=d.PreTrainedTokenizer,aa=d.PretrainedConfig,ia=d.PretrainedMixin,la=d.Processor,ca=d.PyAnnoteFeatureExtractor,da=d.PyAnnoteForAudioFrameClassification,ua=d.PyAnnoteModel,pa=d.PyAnnotePreTrainedModel,ha=d.PyAnnoteProcessor,ma=d.QuestionAnsweringModelOutput,_a=d.QuestionAnsweringPipeline,fa=d.Qwen2ForCausalLM,ga=d.Qwen2Model,Ma=d.Qwen2PreTrainedModel,wa=d.Qwen2Tokenizer,ba=d.RTDetrForObjectDetection,Ta=d.RTDetrImageProcessor,xa=d.RTDetrModel,ya=d.RTDetrObjectDetectionOutput,ka=d.RTDetrPreTrainedModel,Fa=d.RawImage,Ca=d.ResNetForImageClassification,Pa=d.ResNetModel,va=d.ResNetPreTrainedModel,Sa=d.RoFormerForMaskedLM,Aa=d.RoFormerForQuestionAnswering,Ea=d.RoFormerForSequenceClassification,La=d.RoFormerForTokenClassification,za=d.RoFormerModel,Ia=d.RoFormerPreTrainedModel,Ba=d.RoFormerTokenizer,Na=d.RobertaForMaskedLM,Oa=d.RobertaForQuestionAnswering,Da=d.RobertaForSequenceClassification,Va=d.RobertaForTokenClassification,ja=d.RobertaModel,Ra=d.RobertaPreTrainedModel,Ga=d.RobertaTokenizer,qa=d.SamImageProcessor,$a=d.SamImageSegmentationOutput,Wa=d.SamModel,Ua=d.SamPreTrainedModel,Xa=d.SamProcessor,Qa=d.SapiensFeatureExtractor,Ha=d.SapiensForDepthEstimation,Ya=d.SapiensForNormalEstimation,Ja=d.SapiensForSemanticSegmentation,Ka=d.SapiensPreTrainedModel,Za=d.SeamlessM4TFeatureExtractor,ei=d.SegformerFeatureExtractor,ti=d.SegformerForImageClassification,ni=d.SegformerForSemanticSegmentation,ri=d.SegformerModel,oi=d.SegformerPreTrainedModel,si=d.Seq2SeqLMOutput,ai=d.SequenceClassifierOutput,ii=d.SiglipImageProcessor,li=d.SiglipModel,ci=d.SiglipPreTrainedModel,di=d.SiglipTextModel,ui=d.SiglipTokenizer,pi=d.SiglipVisionModel,hi=d.SpeechT5FeatureExtractor,mi=d.SpeechT5ForSpeechToText,_i=d.SpeechT5ForTextToSpeech,fi=d.SpeechT5HifiGan,gi=d.SpeechT5Model,Mi=d.SpeechT5PreTrainedModel,wi=d.SpeechT5Processor,bi=d.SpeechT5Tokenizer,Ti=d.SqueezeBertForMaskedLM,xi=d.SqueezeBertForQuestionAnswering,yi=d.SqueezeBertForSequenceClassification,ki=d.SqueezeBertModel,Fi=d.SqueezeBertPreTrainedModel,Ci=d.SqueezeBertTokenizer,Pi=d.StableLmForCausalLM,vi=d.StableLmModel,Si=d.StableLmPreTrainedModel,Ai=d.Starcoder2ForCausalLM,Ei=d.Starcoder2Model,Li=d.Starcoder2PreTrainedModel,zi=d.StoppingCriteria,Ii=d.StoppingCriteriaList,Bi=d.SummarizationPipeline,Ni=d.Swin2SRForImageSuperResolution,Oi=d.Swin2SRImageProcessor,Di=d.Swin2SRModel,Vi=d.Swin2SRPreTrainedModel,ji=d.SwinForImageClassification,Ri=d.SwinModel,Gi=d.SwinPreTrainedModel,qi=d.T5ForConditionalGeneration,$i=d.T5Model,Wi=d.T5PreTrainedModel,Ui=d.T5Tokenizer,Xi=d.TableTransformerForObjectDetection,Qi=d.TableTransformerModel,Hi=d.TableTransformerObjectDetectionOutput,Yi=d.TableTransformerPreTrainedModel,Ji=d.Tensor,Ki=d.Text2TextGenerationPipeline,Zi=d.TextClassificationPipeline,el=d.TextGenerationPipeline,tl=d.TextStreamer,nl=d.TextToAudioPipeline,rl=d.TokenClassificationPipeline,ol=d.TokenClassifierOutput,sl=d.TokenizerModel,al=d.TrOCRForCausalLM,il=d.TrOCRPreTrainedModel,ll=d.TranslationPipeline,cl=d.UniSpeechForCTC,dl=d.UniSpeechForSequenceClassification,ul=d.UniSpeechModel,pl=d.UniSpeechPreTrainedModel,hl=d.UniSpeechSatForAudioFrameClassification,ml=d.UniSpeechSatForCTC,_l=d.UniSpeechSatForSequenceClassification,fl=d.UniSpeechSatModel,gl=d.UniSpeechSatPreTrainedModel,Ml=d.ViTFeatureExtractor,wl=d.ViTForImageClassification,bl=d.ViTImageProcessor,Tl=d.ViTModel,xl=d.ViTPreTrainedModel,yl=d.VisionEncoderDecoderModel,kl=d.VitMatteForImageMatting,Fl=d.VitMatteImageProcessor,Cl=d.VitMattePreTrainedModel,Pl=d.VitsModel,vl=d.VitsModelOutput,Sl=d.VitsPreTrainedModel,Al=d.VitsTokenizer,El=d.Wav2Vec2BertForCTC,Ll=d.Wav2Vec2BertForSequenceClassification,zl=d.Wav2Vec2BertModel,Il=d.Wav2Vec2BertPreTrainedModel,Bl=d.Wav2Vec2CTCTokenizer,Nl=d.Wav2Vec2FeatureExtractor,Ol=d.Wav2Vec2ForAudioFrameClassification,Dl=d.Wav2Vec2ForCTC,Vl=d.Wav2Vec2ForSequenceClassification,jl=d.Wav2Vec2Model,Rl=d.Wav2Vec2PreTrainedModel,Gl=d.Wav2Vec2ProcessorWithLM,ql=d.WavLMForAudioFrameClassification,$l=d.WavLMForCTC,Wl=d.WavLMForSequenceClassification,Ul=d.WavLMForXVector,Xl=d.WavLMModel,Ql=d.WavLMPreTrainedModel,Hl=d.WeSpeakerFeatureExtractor,Yl=d.WeSpeakerResNetModel,Jl=d.WeSpeakerResNetPreTrainedModel,Kl=d.WhisperFeatureExtractor,Zl=d.WhisperForConditionalGeneration,ec=d.WhisperModel,tc=d.WhisperPreTrainedModel,nc=d.WhisperProcessor,rc=d.WhisperTextStreamer,oc=d.WhisperTokenizer,sc=d.XLMForQuestionAnswering,ac=d.XLMForSequenceClassification,ic=d.XLMForTokenClassification,lc=d.XLMModel,cc=d.XLMPreTrainedModel,dc=d.XLMRobertaForMaskedLM,uc=d.XLMRobertaForQuestionAnswering,pc=d.XLMRobertaForSequenceClassification,hc=d.XLMRobertaForTokenClassification,mc=d.XLMRobertaModel,_c=d.XLMRobertaPreTrainedModel,fc=d.XLMRobertaTokenizer,gc=d.XLMTokenizer,Mc=d.XLMWithLMHeadModel,wc=d.XVectorOutput,bc=d.YolosFeatureExtractor,Tc=d.YolosForObjectDetection,xc=d.YolosModel,yc=d.YolosObjectDetectionOutput,kc=d.YolosPreTrainedModel,Fc=d.ZeroShotAudioClassificationPipeline,Cc=d.ZeroShotClassificationPipeline,Pc=d.ZeroShotImageClassificationPipeline,vc=d.ZeroShotObjectDetectionPipeline,Sc=d.bankers_round,Ac=d.cat,Ec=d.cos_sim,Lc=d.dot,zc=d.dynamic_time_warping,Ic=d.env,Bc=d.full,Nc=d.full_like,Oc=d.getKeyValueShapes,Dc=d.hamming,Vc=d.hanning,jc=d.interpolate,Rc=d.interpolate_4d,Gc=d.interpolate_data,qc=d.is_chinese_char,$c=d.layer_norm,Wc=d.log_softmax,Uc=d.magnitude,Xc=d.matmul,Qc=d.max,Hc=d.mean,Yc=d.mean_pooling,Jc=d.medianFilter,Kc=d.mel_filter_bank,Zc=d.min,ed=d.ones,td=d.ones_like,nd=d.permute,rd=d.permute_data,od=d.pipeline,sd=d.quantize_embeddings,ad=d.read_audio,id=d.rfft,ld=d.round,cd=d.softmax,dd=d.spectrogram,ud=d.stack,pd=d.std_mean,hd=d.topk,md=d.window_function,_d=d.zeros,fd=d.zeros_like;export{y as ASTFeatureExtractor,k as ASTForAudioClassification,F as ASTModel,C as ASTPreTrainedModel,P as AlbertForMaskedLM,v as AlbertForQuestionAnswering,S as AlbertForSequenceClassification,A as AlbertModel,E as AlbertPreTrainedModel,L as AlbertTokenizer,z as AudioClassificationPipeline,I as AutoConfig,B as AutoModel,N as AutoModelForAudioClassification,O as AutoModelForAudioFrameClassification,D as AutoModelForCTC,V as AutoModelForCausalLM,j as AutoModelForDepthEstimation,R as AutoModelForDocumentQuestionAnswering,G as AutoModelForImageClassification,q as AutoModelForImageFeatureExtraction,$ as AutoModelForImageMatting,W as AutoModelForImageSegmentation,U as AutoModelForImageToImage,X as AutoModelForMaskGeneration,Q as AutoModelForMaskedLM,H as AutoModelForNormalEstimation,Y as AutoModelForObjectDetection,J as AutoModelForQuestionAnswering,K as AutoModelForSemanticSegmentation,Z as AutoModelForSeq2SeqLM,ee as AutoModelForSequenceClassification,te as AutoModelForSpeechSeq2Seq,ne as AutoModelForTextToSpectrogram,re as AutoModelForTextToWaveform,oe as AutoModelForTokenClassification,se as AutoModelForVision2Seq,ae as AutoModelForXVector,ie as AutoModelForZeroShotObjectDetection,le as AutoProcessor,ce as AutoTokenizer,de as AutomaticSpeechRecognitionPipeline,ue as BartForConditionalGeneration,pe as BartForSequenceClassification,he as BartModel,me as BartPretrainedModel,_e as BartTokenizer,fe as BaseModelOutput,ge as BaseStreamer,Me as BeitFeatureExtractor,we as BeitForImageClassification,be as BeitModel,Te as BeitPreTrainedModel,xe as BertForMaskedLM,ye as BertForQuestionAnswering,ke as BertForSequenceClassification,Fe as BertForTokenClassification,Ce as BertModel,Pe as BertPreTrainedModel,ve as BertTokenizer,Se as BitImageProcessor,Ae as BlenderbotForConditionalGeneration,Ee as BlenderbotModel,Le as BlenderbotPreTrainedModel,ze as BlenderbotSmallForConditionalGeneration,Ie as BlenderbotSmallModel,Be as BlenderbotSmallPreTrainedModel,Ne as BlenderbotSmallTokenizer,Oe as BlenderbotTokenizer,De as BloomForCausalLM,Ve as BloomModel,je as BloomPreTrainedModel,Re as BloomTokenizer,Ge as CLIPFeatureExtractor,qe as CLIPImageProcessor,$e as CLIPModel,We as CLIPPreTrainedModel,Ue as CLIPSegForImageSegmentation,Xe as CLIPSegModel,Qe as CLIPSegPreTrainedModel,He as CLIPTextModelWithProjection,Ye as CLIPTokenizer,Je as CLIPVisionModelWithProjection,Ke as CamembertForMaskedLM,Ze as CamembertForQuestionAnswering,et as CamembertForSequenceClassification,tt as CamembertForTokenClassification,nt as CamembertModel,rt as CamembertPreTrainedModel,ot as CamembertTokenizer,st as CausalLMOutput,at as CausalLMOutputWithPast,it as ChineseCLIPFeatureExtractor,lt as ChineseCLIPModel,ct as ChineseCLIPPreTrainedModel,dt as ClapAudioModelWithProjection,ut as ClapFeatureExtractor,pt as ClapModel,ht as ClapPreTrainedModel,mt as ClapTextModelWithProjection,_t as CodeGenForCausalLM,ft as CodeGenModel,gt as CodeGenPreTrainedModel,Mt as CodeGenTokenizer,wt as CodeLlamaTokenizer,bt as CohereForCausalLM,Tt as CohereModel,xt as CoherePreTrainedModel,yt as CohereTokenizer,kt as ConvBertForMaskedLM,Ft as ConvBertForQuestionAnswering,Ct as ConvBertForSequenceClassification,Pt as ConvBertForTokenClassification,vt as ConvBertModel,St as ConvBertPreTrainedModel,At as ConvBertTokenizer,Et as ConvNextFeatureExtractor,Lt as ConvNextForImageClassification,zt as ConvNextImageProcessor,It as ConvNextModel,Bt as ConvNextPreTrainedModel,Nt as ConvNextV2ForImageClassification,Ot as ConvNextV2Model,Dt as ConvNextV2PreTrainedModel,Vt as DPTFeatureExtractor,jt as DPTForDepthEstimation,Rt as DPTImageProcessor,Gt as DPTModel,qt as DPTPreTrainedModel,$t as DebertaForMaskedLM,Wt as DebertaForQuestionAnswering,Ut as DebertaForSequenceClassification,Xt as DebertaForTokenClassification,Qt as DebertaModel,Ht as DebertaPreTrainedModel,Yt as DebertaTokenizer,Jt as DebertaV2ForMaskedLM,Kt as DebertaV2ForQuestionAnswering,Zt as DebertaV2ForSequenceClassification,en as DebertaV2ForTokenClassification,tn as DebertaV2Model,nn as DebertaV2PreTrainedModel,rn as DebertaV2Tokenizer,on as DeiTFeatureExtractor,sn as DeiTForImageClassification,an as DeiTModel,ln as DeiTPreTrainedModel,cn as DepthAnythingForDepthEstimation,dn as DepthAnythingPreTrainedModel,un as DepthEstimationPipeline,pn as DetrFeatureExtractor,hn as DetrForObjectDetection,mn as DetrForSegmentation,_n as DetrModel,fn as DetrObjectDetectionOutput,gn as DetrPreTrainedModel,Mn as DetrSegmentationOutput,wn as Dinov2ForImageClassification,bn as Dinov2Model,Tn as Dinov2PreTrainedModel,xn as DistilBertForMaskedLM,yn as DistilBertForQuestionAnswering,kn as DistilBertForSequenceClassification,Fn as DistilBertForTokenClassification,Cn as DistilBertModel,Pn as DistilBertPreTrainedModel,vn as DistilBertTokenizer,Sn as DocumentQuestionAnsweringPipeline,An as DonutFeatureExtractor,En as DonutSwinModel,Ln as DonutSwinPreTrainedModel,zn as EfficientNetForImageClassification,In as EfficientNetImageProcessor,Bn as EfficientNetModel,Nn as EfficientNetPreTrainedModel,On as ElectraForMaskedLM,Dn as ElectraForQuestionAnswering,Vn as ElectraForSequenceClassification,jn as ElectraForTokenClassification,Rn as ElectraModel,Gn as ElectraPreTrainedModel,qn as ElectraTokenizer,$n as EosTokenCriteria,Wn as EsmForMaskedLM,Un as EsmForSequenceClassification,Xn as EsmForTokenClassification,Qn as EsmModel,Hn as EsmPreTrainedModel,Yn as EsmTokenizer,Jn as FFT,Kn as FalconForCausalLM,Zn as FalconModel,er as FalconPreTrainedModel,tr as FalconTokenizer,nr as FastViTForImageClassification,rr as FastViTModel,or as FastViTPreTrainedModel,sr as FeatureExtractionPipeline,ar as FeatureExtractor,ir as FillMaskPipeline,lr as Florence2ForConditionalGeneration,cr as Florence2PreTrainedModel,dr as Florence2Processor,ur as GLPNFeatureExtractor,pr as GLPNForDepthEstimation,hr as GLPNModel,mr as GLPNPreTrainedModel,_r as GPT2LMHeadModel,fr as GPT2Model,gr as GPT2PreTrainedModel,Mr as GPT2Tokenizer,wr as GPTBigCodeForCausalLM,br as GPTBigCodeModel,Tr as GPTBigCodePreTrainedModel,xr as GPTJForCausalLM,yr as GPTJModel,kr as GPTJPreTrainedModel,Fr as GPTNeoForCausalLM,Cr as GPTNeoModel,Pr as GPTNeoPreTrainedModel,vr as GPTNeoXForCausalLM,Sr as GPTNeoXModel,Ar as GPTNeoXPreTrainedModel,Er as GPTNeoXTokenizer,Lr as Gemma2ForCausalLM,zr as Gemma2Model,Ir as Gemma2PreTrainedModel,Br as GemmaForCausalLM,Nr as GemmaModel,Or as GemmaPreTrainedModel,Dr as GemmaTokenizer,Vr as Grok1Tokenizer,jr as HerbertTokenizer,Rr as HieraForImageClassification,Gr as HieraModel,qr as HieraPreTrainedModel,$r as HubertForCTC,Wr as HubertForSequenceClassification,Ur as HubertModel,Xr as HubertPreTrainedModel,Qr as ImageClassificationPipeline,Hr as ImageFeatureExtractionPipeline,Yr as ImageFeatureExtractor,Jr as ImageMattingOutput,Kr as ImageSegmentationPipeline,Zr as ImageToImagePipeline,eo as ImageToTextPipeline,to as InterruptableStoppingCriteria,no as JAISLMHeadModel,ro as JAISModel,oo as JAISPreTrainedModel,so as LlamaForCausalLM,ao as LlamaModel,io as LlamaPreTrainedModel,lo as LlamaTokenizer,co as LlavaForConditionalGeneration,uo as LlavaPreTrainedModel,po as LongT5ForConditionalGeneration,ho as LongT5Model,mo as LongT5PreTrainedModel,_o as M2M100ForConditionalGeneration,fo as M2M100Model,go as M2M100PreTrainedModel,Mo as M2M100Tokenizer,wo as MBart50Tokenizer,bo as MBartForCausalLM,To as MBartForConditionalGeneration,xo as MBartForSequenceClassification,yo as MBartModel,ko as MBartPreTrainedModel,Fo as MBartTokenizer,Co as MPNetForMaskedLM,Po as MPNetForQuestionAnswering,vo as MPNetForSequenceClassification,So as MPNetForTokenClassification,Ao as MPNetModel,Eo as MPNetPreTrainedModel,Lo as MPNetTokenizer,zo as MT5ForConditionalGeneration,Io as MT5Model,Bo as MT5PreTrainedModel,No as MarianMTModel,Oo as MarianModel,Do as MarianPreTrainedModel,Vo as MarianTokenizer,jo as MaskedLMOutput,Ro as MaxLengthCriteria,Go as MistralForCausalLM,qo as MistralModel,$o as MistralPreTrainedModel,Wo as MobileBertForMaskedLM,Uo as MobileBertForQuestionAnswering,Xo as MobileBertForSequenceClassification,Qo as MobileBertModel,Ho as MobileBertPreTrainedModel,Yo as MobileBertTokenizer,Jo as MobileNetV1FeatureExtractor,Ko as MobileNetV1ForImageClassification,Zo as MobileNetV1Model,es as MobileNetV1PreTrainedModel,ts as MobileNetV2FeatureExtractor,ns as MobileNetV2ForImageClassification,rs as MobileNetV2Model,os as MobileNetV2PreTrainedModel,ss as MobileNetV3FeatureExtractor,as as MobileNetV3ForImageClassification,is as MobileNetV3Model,ls as MobileNetV3PreTrainedModel,cs as MobileNetV4FeatureExtractor,ds as MobileNetV4ForImageClassification,us as MobileNetV4Model,ps as MobileNetV4PreTrainedModel,hs as MobileViTFeatureExtractor,ms as MobileViTForImageClassification,_s as MobileViTImageProcessor,fs as MobileViTModel,gs as MobileViTPreTrainedModel,Ms as MobileViTV2ForImageClassification,ws as MobileViTV2Model,bs as MobileViTV2PreTrainedModel,Ts as ModelOutput,xs as Moondream1ForConditionalGeneration,ys as MptForCausalLM,ks as MptModel,Fs as MptPreTrainedModel,Cs as MusicgenForCausalLM,Ps as MusicgenForConditionalGeneration,vs as MusicgenModel,Ss as MusicgenPreTrainedModel,As as NllbTokenizer,Es as NomicBertModel,Ls as NomicBertPreTrainedModel,zs as NougatImageProcessor,Is as NougatTokenizer,Bs as OPTForCausalLM,Ns as OPTModel,Os as OPTPreTrainedModel,Ds as ObjectDetectionPipeline,Vs as OpenELMForCausalLM,js as OpenELMModel,Rs as OpenELMPreTrainedModel,Gs as OwlViTFeatureExtractor,qs as OwlViTForObjectDetection,$s as OwlViTModel,Ws as OwlViTPreTrainedModel,Us as OwlViTProcessor,Xs as Owlv2ForObjectDetection,Qs as Owlv2ImageProcessor,Hs as Owlv2Model,Ys as Owlv2PreTrainedModel,Js as Phi3ForCausalLM,Ks as Phi3Model,Zs as Phi3PreTrainedModel,ea as PhiForCausalLM,ta as PhiModel,na as PhiPreTrainedModel,ra as Pipeline,oa as PreTrainedModel,sa as PreTrainedTokenizer,aa as PretrainedConfig,ia as PretrainedMixin,la as Processor,ca as PyAnnoteFeatureExtractor,da as PyAnnoteForAudioFrameClassification,ua as PyAnnoteModel,pa as PyAnnotePreTrainedModel,ha as PyAnnoteProcessor,ma as QuestionAnsweringModelOutput,_a as QuestionAnsweringPipeline,fa as Qwen2ForCausalLM,ga as Qwen2Model,Ma as Qwen2PreTrainedModel,wa as Qwen2Tokenizer,ba as RTDetrForObjectDetection,Ta as RTDetrImageProcessor,xa as RTDetrModel,ya as RTDetrObjectDetectionOutput,ka as RTDetrPreTrainedModel,Fa as RawImage,Ca as ResNetForImageClassification,Pa as ResNetModel,va as ResNetPreTrainedModel,Sa as RoFormerForMaskedLM,Aa as RoFormerForQuestionAnswering,Ea as RoFormerForSequenceClassification,La as RoFormerForTokenClassification,za as RoFormerModel,Ia as RoFormerPreTrainedModel,Ba as RoFormerTokenizer,Na as RobertaForMaskedLM,Oa as RobertaForQuestionAnswering,Da as RobertaForSequenceClassification,Va as RobertaForTokenClassification,ja as RobertaModel,Ra as RobertaPreTrainedModel,Ga as RobertaTokenizer,qa as SamImageProcessor,$a as SamImageSegmentationOutput,Wa as SamModel,Ua as SamPreTrainedModel,Xa as SamProcessor,Qa as SapiensFeatureExtractor,Ha as SapiensForDepthEstimation,Ya as SapiensForNormalEstimation,Ja as SapiensForSemanticSegmentation,Ka as SapiensPreTrainedModel,Za as SeamlessM4TFeatureExtractor,ei as SegformerFeatureExtractor,ti as SegformerForImageClassification,ni as SegformerForSemanticSegmentation,ri as SegformerModel,oi as SegformerPreTrainedModel,si as Seq2SeqLMOutput,ai as SequenceClassifierOutput,ii as SiglipImageProcessor,li as SiglipModel,ci as SiglipPreTrainedModel,di as SiglipTextModel,ui as SiglipTokenizer,pi as SiglipVisionModel,hi as SpeechT5FeatureExtractor,mi as SpeechT5ForSpeechToText,_i as SpeechT5ForTextToSpeech,fi as SpeechT5HifiGan,gi as SpeechT5Model,Mi as SpeechT5PreTrainedModel,wi as SpeechT5Processor,bi as SpeechT5Tokenizer,Ti as SqueezeBertForMaskedLM,xi as SqueezeBertForQuestionAnswering,yi as SqueezeBertForSequenceClassification,ki as SqueezeBertModel,Fi as SqueezeBertPreTrainedModel,Ci as SqueezeBertTokenizer,Pi as StableLmForCausalLM,vi as StableLmModel,Si as StableLmPreTrainedModel,Ai as Starcoder2ForCausalLM,Ei as Starcoder2Model,Li as Starcoder2PreTrainedModel,zi as StoppingCriteria,Ii as StoppingCriteriaList,Bi as SummarizationPipeline,Ni as Swin2SRForImageSuperResolution,Oi as Swin2SRImageProcessor,Di as Swin2SRModel,Vi as Swin2SRPreTrainedModel,ji as SwinForImageClassification,Ri as SwinModel,Gi as SwinPreTrainedModel,qi as T5ForConditionalGeneration,$i as T5Model,Wi as T5PreTrainedModel,Ui as T5Tokenizer,Xi as TableTransformerForObjectDetection,Qi as TableTransformerModel,Hi as TableTransformerObjectDetectionOutput,Yi as TableTransformerPreTrainedModel,Ji as Tensor,Ki as Text2TextGenerationPipeline,Zi as TextClassificationPipeline,el as TextGenerationPipeline,tl as TextStreamer,nl as TextToAudioPipeline,rl as TokenClassificationPipeline,ol as TokenClassifierOutput,sl as TokenizerModel,al as TrOCRForCausalLM,il as TrOCRPreTrainedModel,ll as TranslationPipeline,cl as UniSpeechForCTC,dl as UniSpeechForSequenceClassification,ul as UniSpeechModel,pl as UniSpeechPreTrainedModel,hl as UniSpeechSatForAudioFrameClassification,ml as UniSpeechSatForCTC,_l as UniSpeechSatForSequenceClassification,fl as UniSpeechSatModel,gl as UniSpeechSatPreTrainedModel,Ml as ViTFeatureExtractor,wl as ViTForImageClassification,bl as ViTImageProcessor,Tl as ViTModel,xl as ViTPreTrainedModel,yl as VisionEncoderDecoderModel,kl as VitMatteForImageMatting,Fl as VitMatteImageProcessor,Cl as VitMattePreTrainedModel,Pl as VitsModel,vl as VitsModelOutput,Sl as VitsPreTrainedModel,Al as VitsTokenizer,El as Wav2Vec2BertForCTC,Ll as Wav2Vec2BertForSequenceClassification,zl as Wav2Vec2BertModel,Il as Wav2Vec2BertPreTrainedModel,Bl as Wav2Vec2CTCTokenizer,Nl as Wav2Vec2FeatureExtractor,Ol as Wav2Vec2ForAudioFrameClassification,Dl as Wav2Vec2ForCTC,Vl as Wav2Vec2ForSequenceClassification,jl as Wav2Vec2Model,Rl as Wav2Vec2PreTrainedModel,Gl as Wav2Vec2ProcessorWithLM,ql as WavLMForAudioFrameClassification,$l as WavLMForCTC,Wl as WavLMForSequenceClassification,Ul as WavLMForXVector,Xl as WavLMModel,Ql as WavLMPreTrainedModel,Hl as WeSpeakerFeatureExtractor,Yl as WeSpeakerResNetModel,Jl as WeSpeakerResNetPreTrainedModel,Kl as WhisperFeatureExtractor,Zl as WhisperForConditionalGeneration,ec as WhisperModel,tc as WhisperPreTrainedModel,nc as WhisperProcessor,rc as WhisperTextStreamer,oc as WhisperTokenizer,sc as XLMForQuestionAnswering,ac as XLMForSequenceClassification,ic as XLMForTokenClassification,lc as XLMModel,cc as XLMPreTrainedModel,dc as XLMRobertaForMaskedLM,uc as XLMRobertaForQuestionAnswering,pc as XLMRobertaForSequenceClassification,hc as XLMRobertaForTokenClassification,mc as XLMRobertaModel,_c as XLMRobertaPreTrainedModel,fc as XLMRobertaTokenizer,gc as XLMTokenizer,Mc as XLMWithLMHeadModel,wc as XVectorOutput,bc as YolosFeatureExtractor,Tc as YolosForObjectDetection,xc as YolosModel,yc as YolosObjectDetectionOutput,kc as YolosPreTrainedModel,Fc as ZeroShotAudioClassificationPipeline,Cc as ZeroShotClassificationPipeline,Pc as ZeroShotImageClassificationPipeline,vc as ZeroShotObjectDetectionPipeline,Sc as bankers_round,Ac as cat,Ec as cos_sim,Lc as dot,zc as dynamic_time_warping,Ic as env,Bc as full,Nc as full_like,Oc as getKeyValueShapes,Dc as hamming,Vc as hanning,jc as interpolate,Rc as interpolate_4d,Gc as interpolate_data,qc as is_chinese_char,$c as layer_norm,Wc as log_softmax,Uc as magnitude,Xc as matmul,Qc as max,Hc as mean,Yc as mean_pooling,Jc as medianFilter,Kc as mel_filter_bank,Zc as min,ed as ones,td as ones_like,nd as permute,rd as permute_data,od as pipeline,sd as quantize_embeddings,ad as read_audio,id as rfft,ld as round,cd as softmax,dd as spectrogram,ud as stack,pd as std_mean,hd as topk,md as window_function,_d as zeros,fd as zeros_like};
167
167
  //# sourceMappingURL=transformers.min.mjs.map