@huggingface/transformers 3.0.0-alpha.13 → 3.0.0-alpha.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -2
- package/dist/transformers.cjs +43 -11
- package/dist/transformers.cjs.map +1 -1
- package/dist/transformers.js +47 -12
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.cjs +5 -5
- package/dist/transformers.min.cjs.map +1 -1
- package/dist/transformers.min.js +5 -5
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.min.mjs +5 -5
- package/dist/transformers.min.mjs.map +1 -1
- package/dist/transformers.mjs +47 -12
- package/dist/transformers.mjs.map +1 -1
- package/package.json +1 -1
- package/src/env.js +1 -1
- package/src/generation/logits_process.js +3 -3
- package/src/models.js +16 -0
- package/src/ops/registry.js +14 -3
- package/src/processors.js +2 -3
- package/src/utils/maths.js +1 -1
- package/types/models.d.ts +10 -0
- package/types/models.d.ts.map +1 -1
- package/types/ops/registry.d.ts +6 -6
- package/types/ops/registry.d.ts.map +1 -1
- package/types/processors.d.ts.map +1 -1
- package/types/utils/maths.d.ts +2 -2
- package/types/utils/maths.d.ts.map +1 -1
package/dist/transformers.min.js
CHANGED
|
@@ -154,13 +154,13 @@ var r,i,a,s,o,l,u,d,c,p,h,m,f,g,_,w,y,b,v,x,M,T,k,$,S,C,E,P,A,F,z,I,O,B=Object.d
|
|
|
154
154
|
\************************/(e,t,n)=>{n.r(t),n.d(t,{AutoConfig:()=>l,PretrainedConfig:()=>o,getKeyValueShapes:()=>s});var r=n(/*! ./utils/core.js */"./src/utils/core.js"),i=n(/*! ./utils/hub.js */"./src/utils/hub.js");function a(e){const t={};let n={};switch(e.model_type){case"llava":case"paligemma":case"florence2":n=a(e.text_config);break;case"moondream1":n=a(e.phi_config);break;case"musicgen":n=a(e.decoder);break;case"gpt2":case"gptj":case"jais":case"codegen":case"gpt_bigcode":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="n_embd";break;case"gpt_neox":case"stablelm":case"opt":case"phi":case"phi3":case"falcon":t.num_heads="num_attention_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size";break;case"llama":case"cohere":case"mistral":case"starcoder2":case"qwen2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size",t.num_attention_heads="num_attention_heads";break;case"gemma":case"gemma2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.dim_kv="head_dim";break;case"openelm":t.num_heads="num_kv_heads",t.num_layers="num_transformer_layers",t.dim_kv="head_dim";break;case"gpt_neo":case"donut-swin":t.num_heads="num_heads",t.num_layers="num_layers",t.hidden_size="hidden_size";break;case"bloom":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="hidden_size";break;case"mpt":t.num_heads="n_heads",t.num_layers="n_layers",t.hidden_size="d_model";break;case"t5":case"mt5":case"longt5":t.num_decoder_layers="num_decoder_layers",t.num_decoder_heads="num_heads",t.decoder_dim_kv="d_kv",t.num_encoder_layers="num_layers",t.num_encoder_heads="num_heads",t.encoder_dim_kv="d_kv";break;case"bart":case"mbart":case"marian":case"whisper":case"m2m_100":case"blenderbot":case"blenderbot-small":case"florence2_language":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="d_model",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="d_model";break;case"speecht5":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="hidden_size",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="hidden_size";break;case"trocr":t.num_encoder_layers=t.num_decoder_layers="decoder_layers",t.num_encoder_heads=t.num_decoder_heads="decoder_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="d_model";break;case"musicgen_decoder":t.num_encoder_layers=t.num_decoder_layers="num_hidden_layers",t.num_encoder_heads=t.num_decoder_heads="num_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="hidden_size";break;case"vision-encoder-decoder":const i=a(e.decoder),s="num_decoder_layers"in i,o=(0,r.pick)(e,["model_type","is_encoder_decoder"]);return s?(o.num_decoder_layers=i.num_decoder_layers,o.num_decoder_heads=i.num_decoder_heads,o.decoder_hidden_size=i.decoder_hidden_size,o.num_encoder_layers=i.num_encoder_layers,o.num_encoder_heads=i.num_encoder_heads,o.encoder_hidden_size=i.encoder_hidden_size):(o.num_layers=i.num_layers,o.num_heads=i.num_heads,o.hidden_size=i.hidden_size),o}const i={...n,...(0,r.pick)(e,["model_type","multi_query","is_encoder_decoder"])};for(const n in t)i[n]=e[t[n]];return i}function s(e,{prefix:t="past_key_values"}={}){const n={},r=e.normalized_config;if(r.is_encoder_decoder&&"num_encoder_heads"in r&&"num_decoder_heads"in r){const e=r.encoder_dim_kv??r.encoder_hidden_size/r.num_encoder_heads,i=r.decoder_dim_kv??r.decoder_hidden_size/r.num_decoder_heads,a=[1,r.num_encoder_heads,0,e],s=[1,r.num_decoder_heads,0,i];for(let e=0;e<r.num_decoder_layers;++e)n[`${t}.${e}.encoder.key`]=a,n[`${t}.${e}.encoder.value`]=a,n[`${t}.${e}.decoder.key`]=s,n[`${t}.${e}.decoder.value`]=s}else{const e=r.num_heads,i=r.num_layers,a=r.dim_kv??r.hidden_size/(r.num_attention_heads??e);if("falcon"===r.model_type){const r=[1*e,0,a];for(let e=0;e<i;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=r}else if(r.multi_query){const r=[1*e,0,2*a];for(let e=0;e<i;++e)n[`${t}.${e}.key_value`]=r}else if("bloom"===r.model_type){const r=[1*e,a,0],s=[1*e,0,a];for(let e=0;e<i;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=s}else if("openelm"===r.model_type)for(let r=0;r<i;++r){const i=[1,e[r],0,a];n[`${t}.${r}.key`]=i,n[`${t}.${r}.value`]=i}else{const r=[1,e,0,a];for(let e=0;e<i;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=r}}return n}class o{max_position_embeddings;constructor(e){this.model_type=null,this.is_encoder_decoder=!1,Object.assign(this,e),this.normalized_config=a(this)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:a=!1,revision:s="main"}={}){!n||n instanceof o||(n=new o(n));const l=n??await async function(e,t){return await(0,i.getModelJSON)(e,"config.json",!0,t)}(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:a,revision:s});return new this(l)}}class l{static async from_pretrained(...e){return o.from_pretrained(...e)}}},"./src/env.js":
|
|
155
155
|
/*!********************!*\
|
|
156
156
|
!*** ./src/env.js ***!
|
|
157
|
-
\********************/(e,t,n)=>{n.r(t),n.d(t,{apis:()=>f,env:()=>b});var r=n(/*! fs */"?569f"),i=n(/*! path */"?3f59"),a=n(/*! url */"?154a");const s="undefined"!=typeof self,o=s&&"DedicatedWorkerGlobalScope"===self.constructor.name,l=s&&"caches"in self,u="undefined"!=typeof navigator&&"gpu"in navigator,d="undefined"!=typeof navigator&&"ml"in navigator,c="undefined"!=typeof process,p=c&&"node"===process?.release?.name,h=!v(r),m=!v(i),f=Object.freeze({IS_BROWSER_ENV:s,IS_WEBWORKER_ENV:o,IS_WEB_CACHE_AVAILABLE:l,IS_WEBGPU_AVAILABLE:u,IS_WEBNN_AVAILABLE:d,IS_PROCESS_AVAILABLE:c,IS_NODE_ENV:p,IS_FS_AVAILABLE:h,IS_PATH_AVAILABLE:m}),g=h&&m,_=g?i.dirname(i.dirname(a.fileURLToPath(import.meta.url))):"./",w=g?i.join(_,"/.cache/"):null,y="/models/",b={version:"3.0.0-alpha.
|
|
157
|
+
\********************/(e,t,n)=>{n.r(t),n.d(t,{apis:()=>f,env:()=>b});var r=n(/*! fs */"?569f"),i=n(/*! path */"?3f59"),a=n(/*! url */"?154a");const s="undefined"!=typeof self,o=s&&"DedicatedWorkerGlobalScope"===self.constructor.name,l=s&&"caches"in self,u="undefined"!=typeof navigator&&"gpu"in navigator,d="undefined"!=typeof navigator&&"ml"in navigator,c="undefined"!=typeof process,p=c&&"node"===process?.release?.name,h=!v(r),m=!v(i),f=Object.freeze({IS_BROWSER_ENV:s,IS_WEBWORKER_ENV:o,IS_WEB_CACHE_AVAILABLE:l,IS_WEBGPU_AVAILABLE:u,IS_WEBNN_AVAILABLE:d,IS_PROCESS_AVAILABLE:c,IS_NODE_ENV:p,IS_FS_AVAILABLE:h,IS_PATH_AVAILABLE:m}),g=h&&m,_=g?i.dirname(i.dirname(a.fileURLToPath(import.meta.url))):"./",w=g?i.join(_,"/.cache/"):null,y="/models/",b={version:"3.0.0-alpha.14",backends:{onnx:{}},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!s,localModelPath:g?i.join(_,y):y,useFS:h,useBrowserCache:l,useFSCache:h,cacheDir:w,useCustomCache:!1,customCache:null};function v(e){return 0===Object.keys(e).length}},"./src/generation/configuration_utils.js":
|
|
158
158
|
/*!***********************************************!*\
|
|
159
159
|
!*** ./src/generation/configuration_utils.js ***!
|
|
160
160
|
\***********************************************/(e,t,n)=>{n.r(t),n.d(t,{GenerationConfig:()=>i});var r=n(/*! ../utils/core.js */"./src/utils/core.js");class i{max_length=20;max_new_tokens=null;min_length=0;min_new_tokens=null;early_stopping=!1;max_time=null;do_sample=!1;num_beams=1;num_beam_groups=1;penalty_alpha=null;use_cache=!0;temperature=1;top_k=50;top_p=1;typical_p=1;epsilon_cutoff=0;eta_cutoff=0;diversity_penalty=0;repetition_penalty=1;encoder_repetition_penalty=1;length_penalty=1;no_repeat_ngram_size=0;bad_words_ids=null;force_words_ids=null;renormalize_logits=!1;constraints=null;forced_bos_token_id=null;forced_eos_token_id=null;remove_invalid_values=!1;exponential_decay_length_penalty=null;suppress_tokens=null;begin_suppress_tokens=null;forced_decoder_ids=null;guidance_scale=null;num_return_sequences=1;output_attentions=!1;output_hidden_states=!1;output_scores=!1;return_dict_in_generate=!1;pad_token_id=null;bos_token_id=null;eos_token_id=null;encoder_no_repeat_ngram_size=0;decoder_start_token_id=null;generation_kwargs={};constructor(e){Object.assign(this,(0,r.pick)(e,Object.getOwnPropertyNames(this)))}}},"./src/generation/logits_process.js":
|
|
161
161
|
/*!******************************************!*\
|
|
162
162
|
!*** ./src/generation/logits_process.js ***!
|
|
163
|
-
\******************************************/(e,t,n)=>{n.r(t),n.d(t,{ClassifierFreeGuidanceLogitsProcessor:()=>_,ForcedBOSTokenLogitsProcessor:()=>l,ForcedEOSTokenLogitsProcessor:()=>u,LogitsProcessor:()=>a,LogitsProcessorList:()=>o,LogitsWarper:()=>s,MinLengthLogitsProcessor:()=>m,MinNewTokensLengthLogitsProcessor:()=>f,NoBadWordsLogitsProcessor:()=>g,NoRepeatNGramLogitsProcessor:()=>p,RepetitionPenaltyLogitsProcessor:()=>h,SuppressTokensAtBeginLogitsProcessor:()=>d,TemperatureLogitsWarper:()=>w,TopKLogitsWarper:()=>b,TopPLogitsWarper:()=>y,WhisperTimeStampLogitsProcessor:()=>c});var r=n(/*! ../utils/generic.js */"./src/utils/generic.js"),i=(n(/*! ../utils/tensor.js */"./src/utils/tensor.js"),n(/*! ../utils/maths.js */"./src/utils/maths.js"));class a extends r.Callable{_call(e,t){throw Error("`_call` should be implemented in a subclass")}}class s extends r.Callable{_call(e,t){throw Error("`_call` should be implemented in a subclass")}}class o extends r.Callable{constructor(){super(),this.processors=[]}push(e){this.processors.push(e)}extend(e){this.processors.push(...e)}_call(e,t){let n=t;for(const t of this.processors)n=t(e,n);return n}[Symbol.iterator](){return this.processors.values()}}class l extends a{constructor(e){super(),this.bos_token_id=e}_call(e,t){for(let n=0;n<e.length;++n)if(1===e[n].length){const e=t[n].data;e.fill(-1/0),e[this.bos_token_id]=0}return t}}class u extends a{constructor(e,t){super(),this.max_length=e,this.eos_token_id=Array.isArray(t)?t:[t]}_call(e,t){for(let n=0;n<e.length;++n)if(e[n].length===this.max_length-1){const e=t[n].data;e.fill(-1/0);for(const t of this.eos_token_id)e[t]=0}return t}}class d extends a{constructor(e,t){super(),this.begin_suppress_tokens=e,this.begin_index=t}_call(e,t){for(let n=0;n<e.length;++n)if(e[n].length===this.begin_index){const e=t[n].data;for(const t of this.begin_suppress_tokens)e[t]=-1/0}return t}}class c extends a{constructor(e,t){super(),this.eos_token_id=Array.isArray(e.eos_token_id)?e.eos_token_id[0]:e.eos_token_id,this.no_timestamps_token_id=e.no_timestamps_token_id,this.timestamp_begin=this.no_timestamps_token_id+1,this.begin_index=t.length,t.at(-1)===this.no_timestamps_token_id&&(this.begin_index-=1),this.max_initial_timestamp_index=e.max_initial_timestamp_index}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data;if(r[this.no_timestamps_token_id]=-1/0,e[n].length===this.begin_index-1){r.fill(-1/0),r[this.timestamp_begin]=0;continue}const a=e[n].slice(this.begin_index),s=a.length>=1&&a[a.length-1]>=this.timestamp_begin,o=a.length<2||a[a.length-2]>=this.timestamp_begin;if(s&&(o?r.subarray(this.timestamp_begin).fill(-1/0):r.subarray(0,this.eos_token_id).fill(-1/0)),e[n].length===this.begin_index&&null!==this.max_initial_timestamp_index){const e=this.timestamp_begin+this.max_initial_timestamp_index;r.subarray(e+1).fill(-1/0)}const l=(0,i.log_softmax)(r);Math.log(l.subarray(this.timestamp_begin).map(Math.exp).reduce(((e,t)=>e+t)))>(0,i.max)(l.subarray(0,this.timestamp_begin))[0]&&r.subarray(0,this.timestamp_begin).fill(-1/0)}return t}}class p extends a{constructor(e){super(),this.no_repeat_ngram_size=e}getNgrams(e){const t=e.length,n=[];for(let r=0;r<t+1-this.no_repeat_ngram_size;++r){const t=[];for(let n=0;n<this.no_repeat_ngram_size;++n)t.push(e[r+n]);n.push(t.map(Number))}const r=new Map;for(const e of n){const t=e.slice(0,e.length-1),n=JSON.stringify(t),i=r.get(n)??[];i.push(e[e.length-1]),r.set(n,i)}return r}getGeneratedNgrams(e,t){const n=t.slice(t.length+1-this.no_repeat_ngram_size,t.length);return e.get(JSON.stringify(n.map(Number)))??[]}calcBannedNgramTokens(e){const t=[];if(e.length+1<this.no_repeat_ngram_size)return t;{const t=this.getNgrams(e);return this.getGeneratedNgrams(t,e)}}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data,i=this.calcBannedNgramTokens(e[n]);for(const e of i)r[e]=-1/0}return t}}class h extends a{constructor(e){super(),this.penalty=e}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data;for(const t of e[n]){const e=Number(t);r[e]<0?r[e]*=this.penalty:r[e]/=this.penalty}}return t}}class m extends a{constructor(e,t){super(),this.min_length=e,this.eos_token_id=Array.isArray(t)?t:[t]}_call(e,t){for(let n=0;n<e.length;++n)if(e[n].length<this.min_length){const e=t[n].data;for(const t of this.eos_token_id)e[t]=-1/0}return t}}class f extends a{constructor(e,t,n){super(),this.prompt_length_to_skip=e,this.min_new_tokens=t,this.eos_token_id=Array.isArray(n)?n:[n]}_call(e,t){for(let n=0;n<e.length;++n){if(e[n].length-this.prompt_length_to_skip<this.min_new_tokens){const e=t[n].data;for(const t of this.eos_token_id)e[t]=-1/0}}return t}}class g extends a{constructor(e,t){super(),this.bad_words_ids=e,this.eos_token_id=Array.isArray(t)?t:[t]}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data;for(const
|
|
163
|
+
\******************************************/(e,t,n)=>{n.r(t),n.d(t,{ClassifierFreeGuidanceLogitsProcessor:()=>_,ForcedBOSTokenLogitsProcessor:()=>l,ForcedEOSTokenLogitsProcessor:()=>u,LogitsProcessor:()=>a,LogitsProcessorList:()=>o,LogitsWarper:()=>s,MinLengthLogitsProcessor:()=>m,MinNewTokensLengthLogitsProcessor:()=>f,NoBadWordsLogitsProcessor:()=>g,NoRepeatNGramLogitsProcessor:()=>p,RepetitionPenaltyLogitsProcessor:()=>h,SuppressTokensAtBeginLogitsProcessor:()=>d,TemperatureLogitsWarper:()=>w,TopKLogitsWarper:()=>b,TopPLogitsWarper:()=>y,WhisperTimeStampLogitsProcessor:()=>c});var r=n(/*! ../utils/generic.js */"./src/utils/generic.js"),i=(n(/*! ../utils/tensor.js */"./src/utils/tensor.js"),n(/*! ../utils/maths.js */"./src/utils/maths.js"));class a extends r.Callable{_call(e,t){throw Error("`_call` should be implemented in a subclass")}}class s extends r.Callable{_call(e,t){throw Error("`_call` should be implemented in a subclass")}}class o extends r.Callable{constructor(){super(),this.processors=[]}push(e){this.processors.push(e)}extend(e){this.processors.push(...e)}_call(e,t){let n=t;for(const t of this.processors)n=t(e,n);return n}[Symbol.iterator](){return this.processors.values()}}class l extends a{constructor(e){super(),this.bos_token_id=e}_call(e,t){for(let n=0;n<e.length;++n)if(1===e[n].length){const e=t[n].data;e.fill(-1/0),e[this.bos_token_id]=0}return t}}class u extends a{constructor(e,t){super(),this.max_length=e,this.eos_token_id=Array.isArray(t)?t:[t]}_call(e,t){for(let n=0;n<e.length;++n)if(e[n].length===this.max_length-1){const e=t[n].data;e.fill(-1/0);for(const t of this.eos_token_id)e[t]=0}return t}}class d extends a{constructor(e,t){super(),this.begin_suppress_tokens=e,this.begin_index=t}_call(e,t){for(let n=0;n<e.length;++n)if(e[n].length===this.begin_index){const e=t[n].data;for(const t of this.begin_suppress_tokens)e[t]=-1/0}return t}}class c extends a{constructor(e,t){super(),this.eos_token_id=Array.isArray(e.eos_token_id)?e.eos_token_id[0]:e.eos_token_id,this.no_timestamps_token_id=e.no_timestamps_token_id,this.timestamp_begin=this.no_timestamps_token_id+1,this.begin_index=t.length,t.at(-1)===this.no_timestamps_token_id&&(this.begin_index-=1),this.max_initial_timestamp_index=e.max_initial_timestamp_index}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data;if(r[this.no_timestamps_token_id]=-1/0,e[n].length===this.begin_index-1){r.fill(-1/0),r[this.timestamp_begin]=0;continue}const a=e[n].slice(this.begin_index),s=a.length>=1&&a[a.length-1]>=this.timestamp_begin,o=a.length<2||a[a.length-2]>=this.timestamp_begin;if(s&&(o?r.subarray(this.timestamp_begin).fill(-1/0):r.subarray(0,this.eos_token_id).fill(-1/0)),e[n].length===this.begin_index&&null!==this.max_initial_timestamp_index){const e=this.timestamp_begin+this.max_initial_timestamp_index;r.subarray(e+1).fill(-1/0)}const l=(0,i.log_softmax)(r);Math.log(l.subarray(this.timestamp_begin).map(Math.exp).reduce(((e,t)=>e+t)))>(0,i.max)(l.subarray(0,this.timestamp_begin))[0]&&r.subarray(0,this.timestamp_begin).fill(-1/0)}return t}}class p extends a{constructor(e){super(),this.no_repeat_ngram_size=e}getNgrams(e){const t=e.length,n=[];for(let r=0;r<t+1-this.no_repeat_ngram_size;++r){const t=[];for(let n=0;n<this.no_repeat_ngram_size;++n)t.push(e[r+n]);n.push(t.map(Number))}const r=new Map;for(const e of n){const t=e.slice(0,e.length-1),n=JSON.stringify(t),i=r.get(n)??[];i.push(e[e.length-1]),r.set(n,i)}return r}getGeneratedNgrams(e,t){const n=t.slice(t.length+1-this.no_repeat_ngram_size,t.length);return e.get(JSON.stringify(n.map(Number)))??[]}calcBannedNgramTokens(e){const t=[];if(e.length+1<this.no_repeat_ngram_size)return t;{const t=this.getNgrams(e);return this.getGeneratedNgrams(t,e)}}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data,i=this.calcBannedNgramTokens(e[n]);for(const e of i)r[e]=-1/0}return t}}class h extends a{constructor(e){super(),this.penalty=e}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data;for(const t of e[n]){const e=Number(t);r[e]<0?r[e]*=this.penalty:r[e]/=this.penalty}}return t}}class m extends a{constructor(e,t){super(),this.min_length=e,this.eos_token_id=Array.isArray(t)?t:[t]}_call(e,t){for(let n=0;n<e.length;++n)if(e[n].length<this.min_length){const e=t[n].data;for(const t of this.eos_token_id)e[t]=-1/0}return t}}class f extends a{constructor(e,t,n){super(),this.prompt_length_to_skip=e,this.min_new_tokens=t,this.eos_token_id=Array.isArray(n)?n:[n]}_call(e,t){for(let n=0;n<e.length;++n){if(e[n].length-this.prompt_length_to_skip<this.min_new_tokens){const e=t[n].data;for(const t of this.eos_token_id)e[t]=-1/0}}return t}}class g extends a{constructor(e,t){super(),this.bad_words_ids=e,this.eos_token_id=Array.isArray(t)?t:[t]}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data,i=e[n];for(const e of this.bad_words_ids){let t=!0;for(let n=1;n<=e.length-1&&e.length<i.length;++n)if(e.at(-n-1)!=i.at(-n)){t=!1;break}t&&(r[e.at(-1)]=-1/0)}}return t}}class _ extends a{constructor(e){if(super(),e<=1)throw new Error(`Require guidance scale >1 to use the classifier free guidance processor, got guidance scale ${e}.`);this.guidance_scale=e}_call(e,t){if(t.dims[0]!==2*e.length)throw new Error(`Logits should have twice the batch size of the input ids, the first half of batches corresponding to the conditional inputs, and the second half of batches corresponding to the unconditional inputs. Got batch size ${t.dims[0]} for the logits and ${e.length} for the input ids.`);const n=e.length,r=t.slice([0,n],null),i=t.slice([n,t.dims[0]],null);for(let e=0;e<i.data.length;++e)i.data[e]+=(r.data[e]-i.data[e])*this.guidance_scale;return i}}class w extends s{constructor(e){if(super(),"number"!=typeof e||e<=0){let t=`\`temperature\` (=${e}) must be a strictly positive float, otherwise your next token scores will be invalid.`;0===e&&(t+=" If you're looking for greedy decoding strategies, set `do_sample=false`.")}this.temperature=e}_call(e,t){const n=t.data;for(let e=0;e<n.length;++e)n[e]/=this.temperature;return t}}class y extends s{constructor(e,{filter_value:t=-1/0,min_tokens_to_keep:n=1}={}){if(super(),e<0||e>1)throw new Error(`\`top_p\` must be a float > 0 and < 1, but is ${e}`);if(!Number.isInteger(n)||n<1)throw new Error(`\`min_tokens_to_keep\` must be a positive integer, but is ${n}`);this.top_p=e,this.filter_value=t,this.min_tokens_to_keep=n}}class b extends s{constructor(e,{filter_value:t=-1/0,min_tokens_to_keep:n=1}={}){if(super(),!Number.isInteger(e)||e<0)throw new Error(`\`top_k\` must be a positive integer, but is ${e}`);this.top_k=Math.max(e,n),this.filter_value=t}}},"./src/generation/logits_sampler.js":
|
|
164
164
|
/*!******************************************!*\
|
|
165
165
|
!*** ./src/generation/logits_sampler.js ***!
|
|
166
166
|
\******************************************/(e,t,n)=>{n.r(t),n.d(t,{LogitsSampler:()=>s});var r=n(/*! ../utils/generic.js */"./src/utils/generic.js"),i=n(/*! ../utils/tensor.js */"./src/utils/tensor.js"),a=n(/*! ../utils/maths.js */"./src/utils/maths.js");n(/*! ../generation/configuration_utils.js */"./src/generation/configuration_utils.js");class s extends r.Callable{constructor(e){super(),this.generation_config=e}async _call(e){return this.sample(e)}async sample(e){throw Error("sample should be implemented in subclasses.")}getLogits(e,t){let n=e.dims.at(-1),r=e.data;if(-1===t)r=r.slice(-n);else{let e=t*n;r=r.slice(e,e+n)}return r}randomSelect(e){let t=0;for(let n=0;n<e.length;++n)t+=e[n];let n=Math.random()*t;for(let t=0;t<e.length;++t)if(n-=e[t],n<=0)return t;return 0}static getSampler(e){if(e.do_sample)return new l(e);if(e.num_beams>1)return new u(e);if(e.num_return_sequences>1)throw Error(`num_return_sequences has to be 1 when doing greedy search, but is ${e.num_return_sequences}.`);return new o(e)}}class o extends s{async sample(e){const t=(0,a.max)(e.data)[1];return[[BigInt(t),0]]}}class l extends s{async sample(e){let t=e.dims.at(-1);this.generation_config.top_k>0&&(t=Math.min(this.generation_config.top_k,t));const[n,r]=await(0,i.topk)(e,t),s=(0,a.softmax)(n.data);return Array.from({length:this.generation_config.num_beams},(()=>{const e=this.randomSelect(s);return[r.data[e],Math.log(s[e])]}))}}class u extends s{async sample(e){let t=e.dims.at(-1);this.generation_config.top_k>0&&(t=Math.min(this.generation_config.top_k,t));const[n,r]=await(0,i.topk)(e,t),s=(0,a.softmax)(n.data);return Array.from({length:this.generation_config.num_beams},((e,t)=>[r.data[t],Math.log(s[t])]))}}},"./src/generation/stopping_criteria.js":
|
|
@@ -172,7 +172,7 @@ var r,i,a,s,o,l,u,d,c,p,h,m,f,g,_,w,y,b,v,x,M,T,k,$,S,C,E,P,A,F,z,I,O,B=Object.d
|
|
|
172
172
|
\*************************************/(e,t,n)=>{n.r(t),n.d(t,{BaseStreamer:()=>s,TextStreamer:()=>l,WhisperTextStreamer:()=>u});var r=n(/*! ../utils/core.js */"./src/utils/core.js"),i=n(/*! ../tokenizers.js */"./src/tokenizers.js"),a=n(/*! ../env.js */"./src/env.js");class s{put(e){throw Error("Not implemented")}end(){throw Error("Not implemented")}}const o=a.apis.IS_PROCESS_AVAILABLE?e=>process.stdout.write(e):e=>console.log(e);class l extends s{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:r=null,decode_kwargs:i={},...a}={}){super(),this.tokenizer=e,this.skip_prompt=t,this.callback_function=n??o,this.token_callback_function=r,this.decode_kwargs={...i,...a},this.token_cache=[],this.print_len=0,this.next_tokens_are_prompt=!0}put(e){if(e.length>1)throw Error("TextStreamer only supports batch size of 1");if(this.skip_prompt&&this.next_tokens_are_prompt)return void(this.next_tokens_are_prompt=!1);const t=e[0];this.token_callback_function?.(t),this.token_cache=(0,r.mergeArrays)(this.token_cache,t);const n=this.tokenizer.decode(this.token_cache,this.decode_kwargs);let a;n.endsWith("\n")?(a=n.slice(this.print_len),this.token_cache=[],this.print_len=0):n.length>0&&(0,i.is_chinese_char)(n.charCodeAt(n.length-1))?(a=n.slice(this.print_len),this.print_len+=a.length):(a=n.slice(this.print_len,n.lastIndexOf(" ")+1),this.print_len+=a.length),this.on_finalized_text(a,!1)}end(){let e;if(this.token_cache.length>0){e=this.tokenizer.decode(this.token_cache,this.decode_kwargs).slice(this.print_len),this.token_cache=[],this.print_len=0}else e="";this.next_tokens_are_prompt=!0,this.on_finalized_text(e,!0)}on_finalized_text(e,t){e.length>0&&this.callback_function?.(e),t&&this.callback_function===o&&a.apis.IS_PROCESS_AVAILABLE&&this.callback_function?.("\n")}}class u extends l{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:r=null,on_chunk_start:i=null,on_chunk_end:a=null,on_finalize:s=null,time_precision:o=.02,skip_special_tokens:l=!0,decode_kwargs:u={}}={}){super(e,{skip_prompt:t,callback_function:n,token_callback_function:r,decode_kwargs:{skip_special_tokens:l,...u}}),this.timestamp_begin=e.timestamp_begin,this.on_chunk_start=i,this.on_chunk_end=a,this.on_finalize=s,this.time_precision=o,this.waiting_for_timestamp=!1}put(e){if(e.length>1)throw Error("WhisperTextStreamer only supports batch size of 1");const t=e[0];if(1===t.length){const n=Number(t[0])-this.timestamp_begin;if(n>=0){const t=n*this.time_precision;this.waiting_for_timestamp?this.on_chunk_end?.(t):this.on_chunk_start?.(t),this.waiting_for_timestamp=!this.waiting_for_timestamp,e=[[]]}}return super.put(e)}end(){super.end(),this.on_finalize?.()}}},"./src/models.js":
|
|
173
173
|
/*!***********************!*\
|
|
174
174
|
!*** ./src/models.js ***!
|
|
175
|
-
\***********************/(e,t,n)=>{n.r(t),n.d(t,{ASTForAudioClassification:()=>nn,ASTModel:()=>tn,ASTPreTrainedModel:()=>en,AlbertForMaskedLM:()=>ct,AlbertForQuestionAnswering:()=>dt,AlbertForSequenceClassification:()=>ut,AlbertModel:()=>lt,AlbertPreTrainedModel:()=>ot,AutoModel:()=>uo,AutoModelForAudioClassification:()=>Co,AutoModelForAudioFrameClassification:()=>Po,AutoModelForCTC:()=>So,AutoModelForCausalLM:()=>_o,AutoModelForDepthEstimation:()=>Io,AutoModelForDocumentQuestionAnswering:()=>Ao,AutoModelForImageClassification:()=>vo,AutoModelForImageFeatureExtraction:()=>Bo,AutoModelForImageMatting:()=>Fo,AutoModelForImageSegmentation:()=>xo,AutoModelForImageToImage:()=>zo,AutoModelForMaskGeneration:()=>$o,AutoModelForMaskedLM:()=>wo,AutoModelForNormalEstimation:()=>Oo,AutoModelForObjectDetection:()=>To,AutoModelForQuestionAnswering:()=>yo,AutoModelForSemanticSegmentation:()=>Mo,AutoModelForSeq2SeqLM:()=>ho,AutoModelForSequenceClassification:()=>co,AutoModelForSpeechSeq2Seq:()=>mo,AutoModelForTextToSpectrogram:()=>fo,AutoModelForTextToWaveform:()=>go,AutoModelForTokenClassification:()=>po,AutoModelForVision2Seq:()=>bo,AutoModelForXVector:()=>Eo,AutoModelForZeroShotObjectDetection:()=>ko,BartForConditionalGeneration:()=>Mt,BartForSequenceClassification:()=>Tt,BartModel:()=>xt,BartPretrainedModel:()=>vt,BaseModelOutput:()=>G,BeitForImageClassification:()=>Xr,BeitModel:()=>Hr,BeitPreTrainedModel:()=>Wr,BertForMaskedLM:()=>W,BertForQuestionAnswering:()=>K,BertForSequenceClassification:()=>H,BertForTokenClassification:()=>X,BertModel:()=>U,BertPreTrainedModel:()=>q,BlenderbotForConditionalGeneration:()=>Ft,BlenderbotModel:()=>At,BlenderbotPreTrainedModel:()=>Pt,BlenderbotSmallForConditionalGeneration:()=>Ot,BlenderbotSmallModel:()=>It,BlenderbotSmallPreTrainedModel:()=>zt,BloomForCausalLM:()=>yr,BloomModel:()=>wr,BloomPreTrainedModel:()=>_r,CLIPModel:()=>mn,CLIPPreTrainedModel:()=>hn,CLIPSegForImageSegmentation:()=>kn,CLIPSegModel:()=>Tn,CLIPSegPreTrainedModel:()=>Mn,CLIPTextModelWithProjection:()=>fn,CLIPVisionModelWithProjection:()=>gn,CamembertForMaskedLM:()=>we,CamembertForQuestionAnswering:()=>ve,CamembertForSequenceClassification:()=>ye,CamembertForTokenClassification:()=>be,CamembertModel:()=>_e,CamembertPreTrainedModel:()=>ge,CausalLMOutput:()=>Go,CausalLMOutputWithPast:()=>qo,ChineseCLIPModel:()=>xn,ChineseCLIPPreTrainedModel:()=>vn,ClapAudioModelWithProjection:()=>ts,ClapModel:()=>Za,ClapPreTrainedModel:()=>Ja,ClapTextModelWithProjection:()=>es,CodeGenForCausalLM:()=>Wn,CodeGenModel:()=>Un,CodeGenPreTrainedModel:()=>qn,CohereForCausalLM:()=>Jn,CohereModel:()=>Yn,CoherePreTrainedModel:()=>Qn,ConvBertForMaskedLM:()=>se,ConvBertForQuestionAnswering:()=>ue,ConvBertForSequenceClassification:()=>oe,ConvBertForTokenClassification:()=>le,ConvBertModel:()=>ae,ConvBertPreTrainedModel:()=>ie,ConvNextForImageClassification:()=>Li,ConvNextModel:()=>Bi,ConvNextPreTrainedModel:()=>Oi,ConvNextV2ForImageClassification:()=>Ni,ConvNextV2Model:()=>Ri,ConvNextV2PreTrainedModel:()=>Di,DPTForDepthEstimation:()=>Mi,DPTModel:()=>xi,DPTPreTrainedModel:()=>vi,DebertaForMaskedLM:()=>Te,DebertaForQuestionAnswering:()=>Se,DebertaForSequenceClassification:()=>ke,DebertaForTokenClassification:()=>$e,DebertaModel:()=>Me,DebertaPreTrainedModel:()=>xe,DebertaV2ForMaskedLM:()=>Pe,DebertaV2ForQuestionAnswering:()=>ze,DebertaV2ForSequenceClassification:()=>Ae,DebertaV2ForTokenClassification:()=>Fe,DebertaV2Model:()=>Ee,DebertaV2PreTrainedModel:()=>Ce,DeiTForImageClassification:()=>ci,DeiTModel:()=>di,DeiTPreTrainedModel:()=>ui,DepthAnythingForDepthEstimation:()=>ki,DepthAnythingPreTrainedModel:()=>Ti,DetrForObjectDetection:()=>Yr,DetrForSegmentation:()=>Jr,DetrModel:()=>Qr,DetrObjectDetectionOutput:()=>Zr,DetrPreTrainedModel:()=>Kr,DetrSegmentationOutput:()=>ei,Dinov2ForImageClassification:()=>Gi,Dinov2Model:()=>ji,Dinov2PreTrainedModel:()=>Vi,DistilBertForMaskedLM:()=>Re,DistilBertForQuestionAnswering:()=>De,DistilBertForSequenceClassification:()=>Be,DistilBertForTokenClassification:()=>Le,DistilBertModel:()=>Oe,DistilBertPreTrainedModel:()=>Ie,DonutSwinModel:()=>Ii,DonutSwinPreTrainedModel:()=>zi,EfficientNetForImageClassification:()=>hs,EfficientNetModel:()=>ps,EfficientNetPreTrainedModel:()=>cs,ElectraForMaskedLM:()=>pe,ElectraForQuestionAnswering:()=>fe,ElectraForSequenceClassification:()=>he,ElectraForTokenClassification:()=>me,ElectraModel:()=>ce,ElectraPreTrainedModel:()=>de,EsmForMaskedLM:()=>je,EsmForSequenceClassification:()=>Ge,EsmForTokenClassification:()=>qe,EsmModel:()=>Ve,EsmPreTrainedModel:()=>Ne,FalconForCausalLM:()=>Ya,FalconModel:()=>Qa,FalconPreTrainedModel:()=>Ka,FastViTForImageClassification:()=>Ar,FastViTModel:()=>Pr,FastViTPreTrainedModel:()=>Er,Florence2ForConditionalGeneration:()=>pn,Florence2PreTrainedModel:()=>cn,GLPNForDepthEstimation:()=>Fi,GLPNModel:()=>Ai,GLPNPreTrainedModel:()=>Pi,GPT2LMHeadModel:()=>Cn,GPT2Model:()=>Sn,GPT2PreTrainedModel:()=>$n,GPTBigCodeForCausalLM:()=>Gn,GPTBigCodeModel:()=>jn,GPTBigCodePreTrainedModel:()=>Vn,GPTJForCausalLM:()=>Nn,GPTJModel:()=>Rn,GPTJPreTrainedModel:()=>Dn,GPTNeoForCausalLM:()=>In,GPTNeoModel:()=>zn,GPTNeoPreTrainedModel:()=>Fn,GPTNeoXForCausalLM:()=>Ln,GPTNeoXModel:()=>Bn,GPTNeoXPreTrainedModel:()=>On,Gemma2ForCausalLM:()=>ir,Gemma2Model:()=>rr,Gemma2PreTrainedModel:()=>nr,GemmaForCausalLM:()=>tr,GemmaModel:()=>er,GemmaPreTrainedModel:()=>Zn,HubertForCTC:()=>Ca,HubertForSequenceClassification:()=>Ea,HubertModel:()=>Sa,HubertPreTrainedModel:()=>$a,ImageMattingOutput:()=>Uo,JAISLMHeadModel:()=>An,JAISModel:()=>Pn,JAISPreTrainedModel:()=>En,LlamaForCausalLM:()=>Kn,LlamaModel:()=>Xn,LlamaPreTrainedModel:()=>Hn,LlavaForConditionalGeneration:()=>un,LlavaPreTrainedModel:()=>ln,LongT5ForConditionalGeneration:()=>_t,LongT5Model:()=>gt,LongT5PreTrainedModel:()=>ft,M2M100ForConditionalGeneration:()=>na,M2M100Model:()=>ta,M2M100PreTrainedModel:()=>ea,MBartForCausalLM:()=>Et,MBartForConditionalGeneration:()=>St,MBartForSequenceClassification:()=>Ct,MBartModel:()=>$t,MBartPreTrainedModel:()=>kt,MPNetForMaskedLM:()=>Je,MPNetForQuestionAnswering:()=>tt,MPNetForSequenceClassification:()=>Ze,MPNetForTokenClassification:()=>et,MPNetModel:()=>Ye,MPNetPreTrainedModel:()=>Qe,MT5ForConditionalGeneration:()=>bt,MT5Model:()=>yt,MT5PreTrainedModel:()=>wt,MarianMTModel:()=>Zi,MarianModel:()=>Ji,MarianPreTrainedModel:()=>Yi,MaskedLMOutput:()=>Vo,MistralForCausalLM:()=>Ua,MistralModel:()=>qa,MistralPreTrainedModel:()=>Ga,MobileBertForMaskedLM:()=>He,MobileBertForQuestionAnswering:()=>Ke,MobileBertForSequenceClassification:()=>Xe,MobileBertModel:()=>We,MobileBertPreTrainedModel:()=>Ue,MobileNetV1ForImageClassification:()=>bs,MobileNetV1Model:()=>ys,MobileNetV1PreTrainedModel:()=>ws,MobileNetV2ForImageClassification:()=>Ms,MobileNetV2Model:()=>xs,MobileNetV2PreTrainedModel:()=>vs,MobileNetV3ForImageClassification:()=>$s,MobileNetV3Model:()=>ks,MobileNetV3PreTrainedModel:()=>Ts,MobileNetV4ForImageClassification:()=>Es,MobileNetV4Model:()=>Cs,MobileNetV4PreTrainedModel:()=>Ss,MobileViTForImageClassification:()=>Br,MobileViTModel:()=>Or,MobileViTPreTrainedModel:()=>Ir,MobileViTV2ForImageClassification:()=>Rr,MobileViTV2Model:()=>Dr,MobileViTV2PreTrainedModel:()=>Lr,ModelOutput:()=>j,Moondream1ForConditionalGeneration:()=>dn,MptForCausalLM:()=>xr,MptModel:()=>vr,MptPreTrainedModel:()=>br,MusicgenForCausalLM:()=>gs,MusicgenForConditionalGeneration:()=>_s,MusicgenModel:()=>fs,MusicgenPreTrainedModel:()=>ms,NomicBertModel:()=>Y,NomicBertPreTrainedModel:()=>Q,OPTForCausalLM:()=>kr,OPTModel:()=>Tr,OPTPreTrainedModel:()=>Mr,OpenELMForCausalLM:()=>or,OpenELMModel:()=>sr,OpenELMPreTrainedModel:()=>ar,OwlViTForObjectDetection:()=>jr,OwlViTModel:()=>Vr,OwlViTPreTrainedModel:()=>Nr,Owlv2ForObjectDetection:()=>Ur,Owlv2Model:()=>qr,Owlv2PreTrainedModel:()=>Gr,Phi3ForCausalLM:()=>gr,Phi3Model:()=>fr,Phi3PreTrainedModel:()=>mr,PhiForCausalLM:()=>hr,PhiModel:()=>pr,PhiPreTrainedModel:()=>cr,PreTrainedModel:()=>V,PretrainedMixin:()=>Ps,PyAnnoteForAudioFrameClassification:()=>da,PyAnnoteModel:()=>ua,PyAnnotePreTrainedModel:()=>la,QuestionAnsweringModelOutput:()=>jo,Qwen2ForCausalLM:()=>dr,Qwen2Model:()=>ur,Qwen2PreTrainedModel:()=>lr,RTDetrForObjectDetection:()=>ri,RTDetrModel:()=>ni,RTDetrObjectDetectionOutput:()=>ii,RTDetrPreTrainedModel:()=>ti,ResNetForImageClassification:()=>mi,ResNetModel:()=>hi,ResNetPreTrainedModel:()=>pi,RoFormerForMaskedLM:()=>ee,RoFormerForQuestionAnswering:()=>re,RoFormerForSequenceClassification:()=>te,RoFormerForTokenClassification:()=>ne,RoFormerModel:()=>Z,RoFormerPreTrainedModel:()=>J,RobertaForMaskedLM:()=>Dt,RobertaForQuestionAnswering:()=>Vt,RobertaForSequenceClassification:()=>Rt,RobertaForTokenClassification:()=>Nt,RobertaModel:()=>Lt,RobertaPreTrainedModel:()=>Bt,SamImageSegmentationOutput:()=>Qi,SamModel:()=>Ki,SamPreTrainedModel:()=>Xi,SapiensForDepthEstimation:()=>Ci,SapiensForNormalEstimation:()=>Ei,SapiensForSemanticSegmentation:()=>Si,SapiensPreTrainedModel:()=>$i,SegformerForImageClassification:()=>ss,SegformerForSemanticSegmentation:()=>os,SegformerModel:()=>as,SegformerPreTrainedModel:()=>is,Seq2SeqLMOutput:()=>Lo,SequenceClassifierOutput:()=>Do,SiglipModel:()=>wn,SiglipPreTrainedModel:()=>_n,SiglipTextModel:()=>yn,SiglipVisionModel:()=>bn,SpeechT5ForSpeechToText:()=>Da,SpeechT5ForTextToSpeech:()=>Ra,SpeechT5HifiGan:()=>Na,SpeechT5Model:()=>La,SpeechT5PreTrainedModel:()=>Ba,SqueezeBertForMaskedLM:()=>it,SqueezeBertForQuestionAnswering:()=>st,SqueezeBertForSequenceClassification:()=>at,SqueezeBertModel:()=>rt,SqueezeBertPreTrainedModel:()=>nt,StableLmForCausalLM:()=>ds,StableLmModel:()=>us,StableLmPreTrainedModel:()=>ls,Starcoder2ForCausalLM:()=>Xa,Starcoder2Model:()=>Ha,Starcoder2PreTrainedModel:()=>Wa,Swin2SRForImageSuperResolution:()=>bi,Swin2SRModel:()=>yi,Swin2SRPreTrainedModel:()=>wi,SwinForImageClassification:()=>_i,SwinModel:()=>gi,SwinPreTrainedModel:()=>fi,T5ForConditionalGeneration:()=>mt,T5Model:()=>ht,T5PreTrainedModel:()=>pt,TableTransformerForObjectDetection:()=>oi,TableTransformerModel:()=>si,TableTransformerObjectDetectionOutput:()=>li,TableTransformerPreTrainedModel:()=>ai,TokenClassifierOutput:()=>No,TrOCRForCausalLM:()=>ja,TrOCRPreTrainedModel:()=>Va,UniSpeechForCTC:()=>fa,UniSpeechForSequenceClassification:()=>ga,UniSpeechModel:()=>ma,UniSpeechPreTrainedModel:()=>ha,UniSpeechSatForAudioFrameClassification:()=>va,UniSpeechSatForCTC:()=>ya,UniSpeechSatForSequenceClassification:()=>ba,UniSpeechSatModel:()=>wa,UniSpeechSatPreTrainedModel:()=>_a,ViTForImageClassification:()=>Cr,ViTModel:()=>Sr,ViTPreTrainedModel:()=>$r,VisionEncoderDecoderModel:()=>on,VitMatteForImageMatting:()=>zr,VitMattePreTrainedModel:()=>Fr,VitsModel:()=>rs,VitsModelOutput:()=>Wo,VitsPreTrainedModel:()=>ns,Wav2Vec2BertForCTC:()=>Ta,Wav2Vec2BertForSequenceClassification:()=>ka,Wav2Vec2BertModel:()=>Ma,Wav2Vec2BertPreTrainedModel:()=>xa,Wav2Vec2ForAudioFrameClassification:()=>oa,Wav2Vec2ForCTC:()=>aa,Wav2Vec2ForSequenceClassification:()=>sa,Wav2Vec2Model:()=>ia,Wav2Vec2PreTrainedModel:()=>ra,WavLMForAudioFrameClassification:()=>Oa,WavLMForCTC:()=>Fa,WavLMForSequenceClassification:()=>za,WavLMForXVector:()=>Ia,WavLMModel:()=>Aa,WavLMPreTrainedModel:()=>Pa,WeSpeakerResNetModel:()=>pa,WeSpeakerResNetPreTrainedModel:()=>ca,WhisperForConditionalGeneration:()=>sn,WhisperModel:()=>an,WhisperPreTrainedModel:()=>rn,XLMForQuestionAnswering:()=>Ht,XLMForSequenceClassification:()=>Ut,XLMForTokenClassification:()=>Wt,XLMModel:()=>Gt,XLMPreTrainedModel:()=>jt,XLMRobertaForMaskedLM:()=>Qt,XLMRobertaForQuestionAnswering:()=>Zt,XLMRobertaForSequenceClassification:()=>Yt,XLMRobertaForTokenClassification:()=>Jt,XLMRobertaModel:()=>Kt,XLMRobertaPreTrainedModel:()=>Xt,XLMWithLMHeadModel:()=>qt,XVectorOutput:()=>Ro,YolosForObjectDetection:()=>Wi,YolosModel:()=>Ui,YolosObjectDetectionOutput:()=>Hi,YolosPreTrainedModel:()=>qi});var r=n(/*! ./configs.js */"./src/configs.js"),i=n(/*! ./backends/onnx.js */"./src/backends/onnx.js"),a=n(/*! ./utils/dtypes.js */"./src/utils/dtypes.js"),s=n(/*! ./utils/generic.js */"./src/utils/generic.js"),o=n(/*! ./utils/core.js */"./src/utils/core.js"),l=n(/*! ./utils/hub.js */"./src/utils/hub.js"),u=n(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),d=n(/*! ./generation/configuration_utils.js */"./src/generation/configuration_utils.js"),c=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),p=n(/*! ./utils/maths.js */"./src/utils/maths.js"),h=n(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),m=n(/*! ./generation/logits_sampler.js */"./src/generation/logits_sampler.js"),f=n(/*! ./env.js */"./src/env.js"),g=n(/*! ./models/whisper/generation_whisper.js */"./src/models/whisper/generation_whisper.js"),_=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");const w=0,y=1,b=2,v=3,x=4,M=5,T=6,k=7,$=new Map,S=new Map,C=new Map;async function E(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async s=>{const{buffer:o,session_options:u}=await async function(e,t,n){let s=n.device;s&&"string"!=typeof s&&(s.hasOwnProperty(t)?s=s[t]:(console.warn(`device not specified for "${t}". Using the default device.`),s=null));const o=s??(f.apis.IS_NODE_ENV?"cpu":"wasm"),u=(0,i.deviceToExecutionProviders)(o);let d=n.dtype;"string"!=typeof d&&(d&&d.hasOwnProperty(t)?d=d[t]:(d=a.DEFAULT_DEVICE_DTYPE_MAPPING[o]??a.DATA_TYPES.fp32,console.warn(`dtype not specified for "${t}". Using the default dtype (${d}) for this device (${o}).`)));const c=d;if(!a.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(c))throw new Error(`Invalid dtype: ${c}. Should be one of: ${Object.keys(a.DATA_TYPES).join(", ")}`);if(c===a.DATA_TYPES.fp16&&"webgpu"===o&&!await(0,a.isWebGpuFp16Supported)())throw new Error(`The device (${o}) does not support fp16.`);const p=a.DEFAULT_DTYPE_SUFFIX_MAPPING[c],h=`${n.subfolder??""}/${t}${p}.onnx`,m={...n.session_options}??{};m.executionProviders??=u;const g=(0,l.getModelFile)(e,h,!0,n);let _=[];if(n.use_external_data_format&&(!0===n.use_external_data_format||"object"==typeof n.use_external_data_format&&n.use_external_data_format.hasOwnProperty(t)&&!0===n.use_external_data_format[t])){if(f.apis.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const r=`${t}${p}.onnx_data`,i=`${n.subfolder??""}/${r}`;_.push(new Promise((async(t,a)=>{const s=await(0,l.getModelFile)(e,i,!0,n);t({path:r,data:s})})))}else void 0!==m.externalData&&(_=m.externalData.map((async t=>{if("string"==typeof t.data){const r=await(0,l.getModelFile)(e,t.data,!0,n);return{...t,data:r}}return t})));if(_.length>0&&(m.externalData=await Promise.all(_)),"webgpu"===o){const e=(0,r.getKeyValueShapes)(n.config,{prefix:"present"});if(Object.keys(e).length>0&&!(0,i.isONNXProxy)()){const t={};for(const n in e)t[n]="gpu-buffer";m.preferredOutputLocation=t}}return{buffer:await g,session_options:m}}(e,t[s],n);return[s,await(0,i.createInferenceSession)(o,u)]}))))}async function P(e,t){const n=function(e,t){const n=Object.create(null),r=[];for(const a of e.inputNames){const e=t[a];e instanceof c.Tensor?n[a]=(0,i.isONNXProxy)()?e.clone():e:r.push(a)}if(r.length>0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${r.join(", ")}.`);const a=Object.keys(t).length,s=e.inputNames.length;if(a>s){let n=Object.keys(t).filter((t=>!e.inputNames.includes(t)));console.warn(`WARNING: Too many inputs were provided (${a} > ${s}). The following inputs will be ignored: "${n.join(", ")}".`)}return n}(e,t);try{const t=Object.fromEntries(Object.entries(n).map((([e,t])=>[e,t.ort_tensor])));let r=await e.run(t);return r=A(r),r}catch(e){throw console.error(`An error occurred during model execution: "${e}".`),console.error("Inputs given to model:",n),e}}function A(e){for(let t in e)(0,i.isONNXTensor)(e[t])?e[t]=new c.Tensor(e[t]):"object"==typeof e[t]&&A(e[t]);return e}function F(e){if(e instanceof c.Tensor)return e;if(0===e.length)throw Error("items must be non-empty");if(Array.isArray(e[0])){if(e.some((t=>t.length!==e[0].length)))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new c.Tensor("int64",BigInt64Array.from(e.flat().map((e=>BigInt(e)))),[e.length,e[0].length])}return new c.Tensor("int64",BigInt64Array.from(e.map((e=>BigInt(e)))),[1,e.length])}function z(e){return new c.Tensor("bool",[e],[1])}async function I(e,t){let{encoder_outputs:n,input_ids:r,decoder_input_ids:i,...a}=t;if(!n){const r=(0,o.pick)(t,e.sessions.model.inputNames);n=(await O(e,r)).last_hidden_state}a.input_ids=i,a.encoder_hidden_states=n,e.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(a.encoder_attention_mask=t.attention_mask);return await B(e,a,!0)}async function O(e,t){const n=e.sessions.model,r=(0,o.pick)(t,n.inputNames);if(n.inputNames.includes("inputs_embeds")&&!r.inputs_embeds){if(!t.input_ids)throw new Error("Both `input_ids` and `inputs_embeds` are missing in the model inputs.");r.inputs_embeds=await e.encode_text({input_ids:t.input_ids})}return n.inputNames.includes("token_type_ids")&&!r.token_type_ids&&(r.token_type_ids=new c.Tensor("int64",new BigInt64Array(r.input_ids.data.length),r.input_ids.dims)),await P(n,r)}async function B(e,t,n=!1){const r=e.sessions[n?"decoder_model_merged":"model"],{past_key_values:i,...a}=t;r.inputNames.includes("use_cache_branch")&&(a.use_cache_branch=z(!!i)),r.inputNames.includes("position_ids")&&a.attention_mask&&!a.position_ids&&(a.position_ids=function(e,t=null){const{input_ids:n,inputs_embeds:r,attention_mask:i}=e,[a,s]=i.dims,o=new BigInt64Array(i.data.length);for(let e=0;e<a;++e){const t=e*s;let n=BigInt(0);for(let e=0;e<s;++e){const r=t+e;0n===i.data[r]?o[r]=BigInt(1):(o[r]=n,n+=i.data[r])}}let l=new c.Tensor("int64",o,i.dims);if(t){const e=-(n??r).dims.at(1);l=l.slice(null,[e,null])}return l}(a,i)),e.addPastKeyValues(a,i);const s=(0,o.pick)(a,r.inputNames);return await P(r,s)}async function L(e,{input_ids:t=null,attention_mask:n=null,pixel_values:r=null,position_ids:i=null,inputs_embeds:a=null,past_key_values:s=null,generation_config:o=null,logits_processor:l=null,...u}){if(!a)if(a=await e.encode_text({input_ids:t}),r&&1!==t.dims[1]){const i=await e.encode_image({pixel_values:r});({inputs_embeds:a,attention_mask:n}=e._merge_input_ids_with_image_features({image_features:i,inputs_embeds:a,input_ids:t,attention_mask:n}))}else if(s&&r&&1===t.dims[1]){const e=t.dims[1],r=Object.values(s)[0].dims.at(-2);n=(0,c.cat)([(0,c.ones)([t.dims[0],r]),n.slice(null,[n.dims[1]-e,n.dims[1]])],1)}return await B(e,{inputs_embeds:a,past_key_values:s,attention_mask:n,position_ids:i,generation_config:o,logits_processor:l},!0)}function D(e,t,n,r){if(n.past_key_values){const t=Object.values(n.past_key_values)[0].dims.at(-2),{input_ids:r,attention_mask:i}=n;if(i&&i.dims[1]>r.dims[1]);else if(t<r.dims[1])n.input_ids=r.slice(null,[t,null]);else if(null!=e.config.image_token_index&&r.data.some((t=>t==e.config.image_token_index))){const i=e.config.num_image_tokens;if(!i)throw new Error("`num_image_tokens` is missing in the model configuration.");const a=r.dims[1]-(t-i);n.input_ids=r.slice(null,[-a,null]),n.attention_mask=(0,c.ones)([1,t+a])}}return n}function R(e,t,n,r){return n.past_key_values&&(t=t.map((e=>[e.at(-1)]))),{...n,decoder_input_ids:F(t)}}function N(e,...t){return e.config.is_encoder_decoder?R(e,...t):D(e,...t)}class V extends s.Callable{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,t){super(),this.config=e,this.sessions=t;const n=C.get(this.constructor),r=$.get(n);switch(this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,r){case x:this.can_generate=!0,this._forward=B,this._prepare_inputs_for_generation=D;break;case b:case v:case k:this.can_generate=!0,this._forward=I,this._prepare_inputs_for_generation=R;break;case y:this._forward=I;break;case T:this.can_generate=!0,this._forward=L,this._prepare_inputs_for_generation=N;break;default:this._forward=O}this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const t of Object.values(this.sessions))t?.handler?.dispose&&e.push(t.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:i=null,local_files_only:a=!1,revision:s="main",model_file_name:o=null,subfolder:u="onnx",device:d=null,dtype:c=null,use_external_data_format:p=null,session_options:h={}}={}){let m={progress_callback:t,config:n,cache_dir:i,local_files_only:a,revision:s,model_file_name:o,subfolder:u,device:d,dtype:c,use_external_data_format:p,session_options:h};const f=C.get(this),g=$.get(f);let _;if(n=m.config=await r.AutoConfig.from_pretrained(e,m),g===x)_=await Promise.all([E(e,{model:m.model_file_name??"model"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(g===b||g===v)_=await Promise.all([E(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(g===M)_=await Promise.all([E(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},m)]);else if(g===y)_=await Promise.all([E(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m)]);else if(g===T){const t={embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"};n.is_encoder_decoder&&(t.model="encoder_model"),_=await Promise.all([E(e,t,m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)])}else g===k?_=await Promise.all([E(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]):(g!==w&&console.warn(`Model type for '${f??n?.model_type}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`),_=await Promise.all([E(e,{model:m.model_file_name??"model"},m)]));return new this(n,..._)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}_get_logits_warper(e){const t=new u.LogitsProcessorList;return null!==e.temperature&&1!==e.temperature&&t.push(new u.TemperatureLogitsWarper(e.temperature)),null!==e.top_k&&0!==e.top_k&&t.push(new u.TopKLogitsWarper(e.top_k)),null!==e.top_p&&e.top_p<1&&t.push(new u.TopPLogitsWarper(e.top_p)),t}_get_logits_processor(e,t,n=null){const r=new u.LogitsProcessorList;if(null!==e.repetition_penalty&&1!==e.repetition_penalty&&r.push(new u.RepetitionPenaltyLogitsProcessor(e.repetition_penalty)),null!==e.no_repeat_ngram_size&&e.no_repeat_ngram_size>0&&r.push(new u.NoRepeatNGramLogitsProcessor(e.no_repeat_ngram_size)),null!==e.bad_words_ids&&r.push(new u.NoBadWordsLogitsProcessor(e.bad_words_ids,e.eos_token_id)),null!==e.min_length&&null!==e.eos_token_id&&e.min_length>0&&r.push(new u.MinLengthLogitsProcessor(e.min_length,e.eos_token_id)),null!==e.min_new_tokens&&null!==e.eos_token_id&&e.min_new_tokens>0&&r.push(new u.MinNewTokensLengthLogitsProcessor(t,e.min_new_tokens,e.eos_token_id)),null!==e.forced_bos_token_id&&r.push(new u.ForcedBOSTokenLogitsProcessor(e.forced_bos_token_id)),null!==e.forced_eos_token_id&&r.push(new u.ForcedEOSTokenLogitsProcessor(e.max_length,e.forced_eos_token_id)),null!==e.begin_suppress_tokens){const n=t>1||null===e.forced_bos_token_id?t:t+1;r.push(new u.SuppressTokensAtBeginLogitsProcessor(e.begin_suppress_tokens,n))}return null!==e.guidance_scale&&e.guidance_scale>1&&r.push(new u.ClassifierFreeGuidanceLogitsProcessor(e.guidance_scale)),null!==n&&r.extend(n),r}_prepare_generation_config(e,t,n=d.GenerationConfig){const r={...this.config};for(const e of["decoder","generator","text_config"])e in r&&Object.assign(r,r[e]);const i=new n(r);return"generation_config"in this&&Object.assign(i,this.generation_config),e&&Object.assign(i,e),t&&Object.assign(i,(0,o.pick)(t,Object.getOwnPropertyNames(i))),i}_get_stopping_criteria(e,t=null){const n=new h.StoppingCriteriaList;return null!==e.max_length&&n.push(new h.MaxLengthCriteria(e.max_length,this.config.max_position_embeddings??null)),null!==e.eos_token_id&&n.push(new h.EosTokenCriteria(e.eos_token_id)),t&&n.extend(t),n}_validate_model_class(){if(!this.can_generate){const e=[Ns,Gs,Rs,Is],t=C.get(this.constructor),n=new Set,r=this.config.model_type;for(const t of e){const e=t.get(r);e&&n.add(e[0])}let i=`The current model class (${t}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw n.size>0&&(i+=` Please use the following class instead: ${[...n].join(", ")}`),Error(i)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:t,model_inputs:n,is_encoder_decoder:r}){return n.past_key_values=this.getPastKeyValues(t,n.past_key_values),n.input_ids=new c.Tensor("int64",e.flat(),[e.length,1]),r||(n.attention_mask=(0,c.cat)([n.attention_mask,(0,c.ones)([n.attention_mask.dims[0],1])],1)),n.position_ids=null,n}_prepare_model_inputs({inputs:e,bos_token_id:t,model_kwargs:n}){const r=(0,o.pick)(n,this.forward_params),i=this.main_input_name;if(i in r){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else r[i]=e;return{inputs_tensor:r[i],model_inputs:r,model_input_name:i}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:t,model_input_name:n,generation_config:r}){if(this.sessions.model.inputNames.includes("inputs_embeds")&&!t.inputs_embeds&&"_prepare_inputs_embeds"in this){const{input_ids:e,pixel_values:n,attention_mask:r,...i}=t,a=await this._prepare_inputs_embeds(t);t={...i,...(0,o.pick)(a,["inputs_embeds","attention_mask"])}}let{last_hidden_state:i}=await O(this,t);if(null!==r.guidance_scale&&r.guidance_scale>1)i=(0,c.cat)([i,(0,c.full_like)(i,0)],0),"attention_mask"in t&&(t.attention_mask=(0,c.cat)([t.attention_mask,(0,c.zeros_like)(t.attention_mask)],0));else if(t.decoder_input_ids){const e=F(t.decoder_input_ids).dims[0];if(e!==i.dims[0]){if(1!==i.dims[0])throw new Error(`The encoder outputs have a different batch size (${i.dims[0]}) than the decoder inputs (${e}).`);i=(0,c.cat)(Array.from({length:e},(()=>i)),0)}}return t.encoder_outputs=i,t}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:t,model_kwargs:n,decoder_start_token_id:r,bos_token_id:i,generation_config:a}){let{decoder_input_ids:s,...o}=n;if(s)Array.isArray(s[0])||(s=Array.from({length:e},(()=>s)));else if(r??=i,"musicgen"===this.config.model_type)s=Array.from({length:e*this.config.decoder.num_codebooks},(()=>[r]));else if(Array.isArray(r)){if(r.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${r.length}`);s=r}else s=Array.from({length:e},(()=>[r]));return s=F(s),n.decoder_attention_mask=(0,c.ones_like)(s),{input_ids:s,model_inputs:o}}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,streamer:i=null,...a}){this._validate_model_class(),t=this._prepare_generation_config(t,a);let{inputs_tensor:s,model_inputs:o,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:a});const u=this.config.is_encoder_decoder;let d;u&&("encoder_outputs"in o||(o=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:s,model_inputs:o,model_input_name:l,generation_config:t}))),u?({input_ids:d,model_inputs:o}=this._prepare_decoder_input_ids_for_generation({batch_size:o[l].dims.at(0),model_input_name:l,model_kwargs:o,decoder_start_token_id:t.decoder_start_token_id,bos_token_id:t.bos_token_id,generation_config:t})):d=o[l];let p=d.dims.at(-1);null!==t.max_new_tokens&&(t.max_length=p+t.max_new_tokens);const h=this._get_logits_processor(t,p,n),f=this._get_stopping_criteria(t,r),g=o[l].dims.at(0),_=m.LogitsSampler.getSampler(t),w=new Array(g).fill(0),y=d.tolist();i&&i.put(y);let b=null,v={};for(;;){o=this.prepare_inputs_for_generation(y,o,t);const e=await this.forward(o);if(t.output_attentions&&t.return_dict_in_generate){const t=this.getAttentions(e);for(const e in t)e in v||(v[e]=[]),v[e].push(t[e])}const n=h(y,e.logits.slice(null,-1,null)),r=[];for(let e=0;e<n.dims.at(0);++e){const t=n[e],i=await _(t);for(const[t,n]of i){const i=BigInt(t);w[e]+=n,y[e].push(i),r.push([i]);break}}i&&i.put(r);if(f(y).every((e=>e))){t.return_dict_in_generate&&(b=this.getPastKeyValues(e,o.past_key_values,!1));break}o=this._update_model_kwargs_for_generation({generated_input_ids:r,outputs:e,model_inputs:o,is_encoder_decoder:u})}i&&i.end();const x=new c.Tensor("int64",y.flat(),[y.length,y[0].length]);return t.return_dict_in_generate?{sequences:x,past_key_values:b,...v}:x}getPastKeyValues(e,t,n=!0){const r=Object.create(null);for(const i in e)if(i.startsWith("present")){const a=i.replace("present","past_key_values");if(t&&i.includes("encoder"))r[a]=t[a];else{if(n&&t){const e=t[a];"gpu-buffer"===e.location&&e.dispose()}r[a]=e[i]}}return r}getAttentions(e){const t={};for(const n of["cross_attentions","encoder_attentions","decoder_attentions"])for(const r in e)r.startsWith(n)&&(n in t||(t[n]=[]),t[n].push(e[r]));return t}addPastKeyValues(e,t){if(t)Object.assign(e,t);else{const t=this.custom_config.kv_cache_dtype??"float32",n="float16"===t?new Uint16Array:[],i=(0,r.getKeyValueShapes)(this.config);for(const r in i)e[r]=new c.Tensor(t,n,i[r])}}async encode_image({pixel_values:e}){const t=(await P(this.sessions.vision_encoder,{pixel_values:e})).image_features;return this.config.num_image_tokens||(console.warn(`The number of image tokens was not set in the model configuration. Setting it to the number of features detected by the vision encoder (${t.dims[1]}).`),this.config.num_image_tokens=t.dims[1]),t}async encode_text({input_ids:e}){return(await P(this.sessions.embed_tokens,{input_ids:e})).inputs_embeds}}class j{}class G extends j{constructor({last_hidden_state:e,hidden_states:t=null,attentions:n=null}){super(),this.last_hidden_state=e,this.hidden_states=t,this.attentions=n}}class q extends V{}class U extends q{}class W extends q{async _call(e){return new Vo(await super._call(e))}}class H extends q{async _call(e){return new Do(await super._call(e))}}class X extends q{async _call(e){return new No(await super._call(e))}}class K extends q{async _call(e){return new jo(await super._call(e))}}class Q extends V{}class Y extends Q{}class J extends V{}class Z extends J{}class ee extends J{async _call(e){return new Vo(await super._call(e))}}class te extends J{async _call(e){return new Do(await super._call(e))}}class ne extends J{async _call(e){return new No(await super._call(e))}}class re extends J{async _call(e){return new jo(await super._call(e))}}class ie extends V{}class ae extends ie{}class se extends ie{async _call(e){return new Vo(await super._call(e))}}class oe extends ie{async _call(e){return new Do(await super._call(e))}}class le extends ie{async _call(e){return new No(await super._call(e))}}class ue extends ie{async _call(e){return new jo(await super._call(e))}}class de extends V{}class ce extends de{}class pe extends de{async _call(e){return new Vo(await super._call(e))}}class he extends de{async _call(e){return new Do(await super._call(e))}}class me extends de{async _call(e){return new No(await super._call(e))}}class fe extends de{async _call(e){return new jo(await super._call(e))}}class ge extends V{}class _e extends ge{}class we extends ge{async _call(e){return new Vo(await super._call(e))}}class ye extends ge{async _call(e){return new Do(await super._call(e))}}class be extends ge{async _call(e){return new No(await super._call(e))}}class ve extends ge{async _call(e){return new jo(await super._call(e))}}class xe extends V{}class Me extends xe{}class Te extends xe{async _call(e){return new Vo(await super._call(e))}}class ke extends xe{async _call(e){return new Do(await super._call(e))}}class $e extends xe{async _call(e){return new No(await super._call(e))}}class Se extends xe{async _call(e){return new jo(await super._call(e))}}class Ce extends V{}class Ee extends Ce{}class Pe extends Ce{async _call(e){return new Vo(await super._call(e))}}class Ae extends Ce{async _call(e){return new Do(await super._call(e))}}class Fe extends Ce{async _call(e){return new No(await super._call(e))}}class ze extends Ce{async _call(e){return new jo(await super._call(e))}}class Ie extends V{}class Oe extends Ie{}class Be extends Ie{async _call(e){return new Do(await super._call(e))}}class Le extends Ie{async _call(e){return new No(await super._call(e))}}class De extends Ie{async _call(e){return new jo(await super._call(e))}}class Re extends Ie{async _call(e){return new Vo(await super._call(e))}}class Ne extends V{}class Ve extends Ne{}class je extends Ne{async _call(e){return new Vo(await super._call(e))}}class Ge extends Ne{async _call(e){return new Do(await super._call(e))}}class qe extends Ne{async _call(e){return new No(await super._call(e))}}class Ue extends V{}class We extends Ue{}class He extends Ue{async _call(e){return new Vo(await super._call(e))}}class Xe extends Ue{async _call(e){return new Do(await super._call(e))}}class Ke extends Ue{async _call(e){return new jo(await super._call(e))}}class Qe extends V{}class Ye extends Qe{}class Je extends Qe{async _call(e){return new Vo(await super._call(e))}}class Ze extends Qe{async _call(e){return new Do(await super._call(e))}}class et extends Qe{async _call(e){return new No(await super._call(e))}}class tt extends Qe{async _call(e){return new jo(await super._call(e))}}class nt extends V{}class rt extends nt{}class it extends nt{async _call(e){return new Vo(await super._call(e))}}class at extends nt{async _call(e){return new Do(await super._call(e))}}class st extends nt{async _call(e){return new jo(await super._call(e))}}class ot extends V{}class lt extends ot{}class ut extends ot{async _call(e){return new Do(await super._call(e))}}class dt extends ot{async _call(e){return new jo(await super._call(e))}}class ct extends ot{async _call(e){return new Vo(await super._call(e))}}class pt extends V{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class ht extends pt{}class mt extends pt{}class ft extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class gt extends ft{}class _t extends ft{}class wt extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class yt extends wt{}class bt extends wt{}class vt extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class xt extends vt{}class Mt extends vt{}class Tt extends vt{async _call(e){return new Do(await super._call(e))}}class kt extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class $t extends kt{}class St extends kt{}class Ct extends kt{async _call(e){return new Do(await super._call(e))}}class Et extends kt{}class Pt extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class At extends Pt{}class Ft extends Pt{}class zt extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class It extends zt{}class Ot extends zt{}class Bt extends V{}class Lt extends Bt{}class Dt extends Bt{async _call(e){return new Vo(await super._call(e))}}class Rt extends Bt{async _call(e){return new Do(await super._call(e))}}class Nt extends Bt{async _call(e){return new No(await super._call(e))}}class Vt extends Bt{async _call(e){return new jo(await super._call(e))}}class jt extends V{}class Gt extends jt{}class qt extends jt{async _call(e){return new Vo(await super._call(e))}}class Ut extends jt{async _call(e){return new Do(await super._call(e))}}class Wt extends jt{async _call(e){return new No(await super._call(e))}}class Ht extends jt{async _call(e){return new jo(await super._call(e))}}class Xt extends V{}class Kt extends Xt{}class Qt extends Xt{async _call(e){return new Vo(await super._call(e))}}class Yt extends Xt{async _call(e){return new Do(await super._call(e))}}class Jt extends Xt{async _call(e){return new No(await super._call(e))}}class Zt extends Xt{async _call(e){return new jo(await super._call(e))}}class en extends V{}class tn extends en{}class nn extends en{}class rn extends V{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class an extends rn{}class sn extends rn{_prepare_generation_config(e,t){return super._prepare_generation_config(e,t,g.WhisperGenerationConfig)}_retrieve_init_tokens(e){const t=[e.decoder_start_token_id];let n=e.language;const r=e.task;if(e.is_multilingual){n||(console.warn("No language specified - defaulting to English (en)."),n="en");const i=`<|${(0,_.whisper_language_to_code)(n)}|>`;t.push(e.lang_to_id[i]),t.push(e.task_to_id[r??"transcribe"])}else if(n||r)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&t.at(-1)!==e.no_timestamps_token_id?t.push(e.no_timestamps_token_id):e.return_timestamps&&t.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),t.pop()),t.filter((e=>null!=e))}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,...i}){t=this._prepare_generation_config(t,i);const a=i.decoder_input_ids??this._retrieve_init_tokens(t);if(t.return_timestamps&&(n??=new u.LogitsProcessorList,n.push(new u.WhisperTimeStampLogitsProcessor(t,a))),t.begin_suppress_tokens&&(n??=new u.LogitsProcessorList,n.push(new u.SuppressTokensAtBeginLogitsProcessor(t.begin_suppress_tokens,a.length))),t.return_token_timestamps){if(!t.alignment_heads)throw new Error("Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config.");"translate"===t.task&&console.warn("Token-level timestamps may not be reliable for task 'translate'."),t.output_attentions=!0,t.return_dict_in_generate=!0}const s=await super.generate({inputs:e,generation_config:t,logits_processor:n,decoder_input_ids:a,...i});return t.return_token_timestamps&&(s.token_timestamps=this._extract_token_timestamps(s,t.alignment_heads,t.num_frames)),s}_extract_token_timestamps(e,t,n=null,r=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");null==n&&console.warn("`num_frames` has not been set, meaning the entire audio will be analyzed. This may lead to inaccurate token-level timestamps for short audios (< 30 seconds).");let i=this.config.median_filter_width;void 0===i&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),i=7);const a=e.cross_attentions,s=Array.from({length:this.config.decoder_layers},((e,t)=>(0,c.cat)(a.map((e=>e[t])),2))),l=(0,c.stack)(t.map((([e,t])=>{if(e>=s.length)throw new Error(`Layer index ${e} is out of bounds for cross attentions (length ${s.length}).`);return n?s[e].slice(null,t,null,[0,n]):s[e].slice(null,t)}))).transpose(1,0,2,3),[u,d]=(0,c.std_mean)(l,-2,0,!0),h=l.clone();for(let e=0;e<h.dims[0];++e){const t=h[e];for(let n=0;n<t.dims[0];++n){const r=t[n],a=u[e][n][0].data,s=d[e][n][0].data;for(let e=0;e<r.dims[0];++e){let t=r[e].data;for(let e=0;e<t.length;++e)t[e]=(t[e]-s[e])/a[e];t.set((0,p.medianFilter)(t,i))}}}const m=[(0,c.mean)(h,1)],f=e.sequences.dims,g=new c.Tensor("float32",new Float32Array(f[0]*f[1]),f);for(let e=0;e<f[0];++e){const t=m[e].neg().squeeze_(0),[n,i]=(0,p.dynamic_time_warping)(t.tolist()),a=Array.from({length:n.length-1},((e,t)=>n[t+1]-n[t])),s=(0,o.mergeArrays)([1],a).map((e=>!!e)),l=[];for(let e=0;e<s.length;++e)s[e]&&l.push(i[e]*r);g[e].data.set(l,1)}return g}}class on extends V{main_input_name="pixel_values";forward_params=["pixel_values","input_ids","encoder_hidden_states","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class ln extends V{forward_params=["input_ids","pixel_values","attention_mask","position_ids","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class un extends ln{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){const i=this.config.image_token_index,a=n.tolist().map((e=>e.findIndex((e=>e==i)))),s=a.every((e=>-1===e)),o=a.every((e=>-1!==e));if(!s&&!o)throw new Error("Every input should contain either 0 or 1 image token.");if(s)return{inputs_embeds:e,attention_mask:r};const l=[],u=[];for(let n=0;n<a.length;++n){const i=a[n],s=e[n],o=t[n],d=r[n];l.push((0,c.cat)([s.slice([0,i]),o,s.slice([i+1,s.dims[0]])],0)),u.push((0,c.cat)([d.slice([0,i]),(0,c.ones)([o.dims[0]]),d.slice([i+1,d.dims[0]])],0))}return{inputs_embeds:(0,c.stack)(l,0),attention_mask:(0,c.stack)(u,0)}}}class dn extends un{}class cn extends V{forward_params=["input_ids","inputs_embeds","attention_mask","pixel_values","encoder_outputs","decoder_input_ids","decoder_inputs_embeds","decoder_attention_mask","past_key_values"];main_input_name="inputs_embeds";constructor(e,t,n){super(e,t),this.generation_config=n}}class pn extends cn{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){return{inputs_embeds:(0,c.cat)([t,e],1),attention_mask:(0,c.cat)([(0,c.ones)(t.dims.slice(0,2)),r],1)}}async _prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:n,attention_mask:r}){if(!e&&!t)throw new Error("Either `input_ids` or `pixel_values` should be provided.");let i,a;return e&&(i=await this.encode_text({input_ids:e})),t&&(a=await this.encode_image({pixel_values:t})),i&&a?({inputs_embeds:n,attention_mask:r}=this._merge_input_ids_with_image_features({inputs_embeds:i,image_features:a,input_ids:e,attention_mask:r})):n=i||a,{inputs_embeds:n,attention_mask:r}}async forward({input_ids:e,pixel_values:t,attention_mask:n,decoder_input_ids:r,decoder_attention_mask:i,encoder_outputs:a,past_key_values:s,inputs_embeds:o,decoder_inputs_embeds:l}){if(o||({inputs_embeds:o,attention_mask:n}=await this._prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:o,attention_mask:n})),!a){let{last_hidden_state:e}=await O(this,{inputs_embeds:o,attention_mask:n});a=e}if(!l){if(!r)throw new Error("Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.");l=await this.encode_text({input_ids:r})}const u={inputs_embeds:l,attention_mask:i,encoder_attention_mask:n,encoder_hidden_states:a,past_key_values:s};return await B(this,u,!0)}}class hn extends V{}class mn extends hn{}class fn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class gn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class _n extends V{}class wn extends _n{}class yn extends _n{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class bn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class vn extends V{}class xn extends vn{}class Mn extends V{}class Tn extends Mn{}class kn extends Mn{}class $n extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Sn extends $n{}class Cn extends $n{}class En extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Pn extends En{}class An extends En{}class Fn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class zn extends Fn{}class In extends Fn{}class On extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Bn extends On{}class Ln extends On{}class Dn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Rn extends Dn{}class Nn extends Dn{}class Vn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class jn extends Vn{}class Gn extends Vn{}class qn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Un extends qn{}class Wn extends qn{}class Hn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Xn extends Hn{}class Kn extends Hn{}class Qn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Yn extends Qn{}class Jn extends Qn{}class Zn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class er extends Zn{}class tr extends Zn{}class nr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class rr extends nr{}class ir extends nr{}class ar extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class sr extends ar{}class or extends ar{}class lr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ur extends lr{}class dr extends lr{}class cr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class pr extends cr{}class hr extends cr{}class mr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class fr extends mr{}class gr extends mr{}class _r extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class wr extends _r{}class yr extends _r{}class br extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class vr extends br{}class xr extends br{}class Mr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Tr extends Mr{}class kr extends Mr{}class $r extends V{}class Sr extends $r{}class Cr extends $r{async _call(e){return new Do(await super._call(e))}}class Er extends V{}class Pr extends Er{}class Ar extends Er{async _call(e){return new Do(await super._call(e))}}class Fr extends V{}class zr extends Fr{async _call(e){return new Uo(await super._call(e))}}class Ir extends V{}class Or extends Ir{}class Br extends Ir{async _call(e){return new Do(await super._call(e))}}class Lr extends V{}class Dr extends Lr{}class Rr extends Lr{async _call(e){return new Do(await super._call(e))}}class Nr extends V{}class Vr extends Nr{}class jr extends Nr{}class Gr extends V{}class qr extends Gr{}class Ur extends Gr{}class Wr extends V{}class Hr extends Wr{}class Xr extends Wr{async _call(e){return new Do(await super._call(e))}}class Kr extends V{}class Qr extends Kr{}class Yr extends Kr{async _call(e){return new Zr(await super._call(e))}}class Jr extends Kr{async _call(e){return new ei(await super._call(e))}}class Zr extends j{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class ei extends j{constructor({logits:e,pred_boxes:t,pred_masks:n}){super(),this.logits=e,this.pred_boxes=t,this.pred_masks=n}}class ti extends V{}class ni extends ti{}class ri extends ti{async _call(e){return new ii(await super._call(e))}}class ii extends j{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class ai extends V{}class si extends ai{}class oi extends ai{async _call(e){return new li(await super._call(e))}}class li extends Zr{}class ui extends V{}class di extends ui{}class ci extends ui{async _call(e){return new Do(await super._call(e))}}class pi extends V{}class hi extends pi{}class mi extends pi{async _call(e){return new Do(await super._call(e))}}class fi extends V{}class gi extends fi{}class _i extends fi{async _call(e){return new Do(await super._call(e))}}class wi extends V{}class yi extends wi{}class bi extends wi{}class vi extends V{}class xi extends vi{}class Mi extends vi{}class Ti extends V{}class ki extends Ti{}class $i extends V{}class Si extends $i{}class Ci extends $i{}class Ei extends $i{}class Pi extends V{}class Ai extends Pi{}class Fi extends Pi{}class zi extends V{}class Ii extends zi{}class Oi extends V{}class Bi extends Oi{}class Li extends Oi{async _call(e){return new Do(await super._call(e))}}class Di extends V{}class Ri extends Di{}class Ni extends Di{async _call(e){return new Do(await super._call(e))}}class Vi extends V{}class ji extends Vi{}class Gi extends Vi{async _call(e){return new Do(await super._call(e))}}class qi extends V{}class Ui extends qi{}class Wi extends qi{async _call(e){return new Hi(await super._call(e))}}class Hi extends j{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Xi extends V{}class Ki extends Xi{async get_image_embeddings({pixel_values:e}){return await O(this,{pixel_values:e})}async forward(e){if(e.image_embeddings&&e.image_positional_embeddings||(e={...e,...await this.get_image_embeddings(e)}),!e.input_labels&&e.input_points){const t=e.input_points.dims.slice(0,-1),n=t.reduce(((e,t)=>e*t),1);e.input_labels=new c.Tensor("int64",new BigInt64Array(n).fill(1n),t)}const t={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(t.input_points=e.input_points),e.input_labels&&(t.input_labels=e.input_labels),e.input_boxes&&(t.input_boxes=e.input_boxes),await P(this.sessions.prompt_encoder_mask_decoder,t)}async _call(e){return new Qi(await super._call(e))}}class Qi extends j{constructor({iou_scores:e,pred_masks:t}){super(),this.iou_scores=e,this.pred_masks=t}}class Yi extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ji extends Yi{}class Zi extends Yi{}class ea extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ta extends ea{}class na extends ea{}class ra extends V{}class ia extends ra{}class aa extends ra{async _call(e){return new Go(await super._call(e))}}class sa extends ra{async _call(e){return new Do(await super._call(e))}}class oa extends ra{async _call(e){return new No(await super._call(e))}}class la extends V{}class ua extends la{}class da extends la{async _call(e){return new No(await super._call(e))}}class ca extends V{}class pa extends ca{}class ha extends V{}class ma extends ha{}class fa extends ha{async _call(e){return new Go(await super._call(e))}}class ga extends ha{async _call(e){return new Do(await super._call(e))}}class _a extends V{}class wa extends _a{}class ya extends _a{async _call(e){return new Go(await super._call(e))}}class ba extends _a{async _call(e){return new Do(await super._call(e))}}class va extends _a{async _call(e){return new No(await super._call(e))}}class xa extends V{}class Ma extends xa{}class Ta extends xa{async _call(e){return new Go(await super._call(e))}}class ka extends xa{async _call(e){return new Do(await super._call(e))}}class $a extends V{}class Sa extends ra{}class Ca extends ra{async _call(e){return new Go(await super._call(e))}}class Ea extends ra{async _call(e){return new Do(await super._call(e))}}class Pa extends V{}class Aa extends Pa{}class Fa extends Pa{async _call(e){return new Go(await super._call(e))}}class za extends Pa{async _call(e){return new Do(await super._call(e))}}class Ia extends Pa{async _call(e){return new Ro(await super._call(e))}}class Oa extends Pa{async _call(e){return new No(await super._call(e))}}class Ba extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class La extends Ba{}class Da extends Ba{}class Ra extends Ba{async generate_speech(e,t,{threshold:n=.5,minlenratio:r=0,maxlenratio:i=20,vocoder:a=null}={}){const s={input_ids:e},{encoder_outputs:o,encoder_attention_mask:l}=await O(this,s),u=o.dims[1]/this.config.reduction_factor,d=Math.floor(u*i),p=Math.floor(u*r),h=this.config.num_mel_bins;let m=[],f=null,g=null,_=0;for(;;){++_;const e=z(!!g);let r;r=g?g.output_sequence_out:new c.Tensor("float32",new Float32Array(h),[1,1,h]);let i={use_cache_branch:e,output_sequence:r,encoder_attention_mask:l,speaker_embeddings:t,encoder_hidden_states:o};this.addPastKeyValues(i,f),g=await P(this.sessions.decoder_model_merged,i),f=this.getPastKeyValues(g,f);const{prob:a,spectrum:s}=g;if(m.push(s),_>=p&&(Array.from(a.data).filter((e=>e>=n)).length>0||_>=d))break}const w=(0,c.cat)(m),{waveform:y}=await P(a.sessions.model,{spectrogram:w});return{spectrogram:w,waveform:y}}}class Na extends V{main_input_name="spectrogram"}class Va extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ja extends Va{}class Ga extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class qa extends Ga{}class Ua extends Ga{}class Wa extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ha extends Wa{}class Xa extends Wa{}class Ka extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Qa extends Ka{}class Ya extends Ka{}class Ja extends V{}class Za extends Ja{}class es extends Ja{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class ts extends Ja{static async from_pretrained(e,t={}){return t.model_file_name??="audio_model",super.from_pretrained(e,t)}}class ns extends V{}class rs extends ns{async _call(e){return new Wo(await super._call(e))}}class is extends V{}class as extends is{}class ss extends is{}class os extends is{}class ls extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class us extends ls{}class ds extends ls{}class cs extends V{}class ps extends cs{}class hs extends cs{async _call(e){return new Do(await super._call(e))}}class ms extends V{}class fs extends ms{}class gs extends ms{}class _s extends V{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}_apply_and_filter_by_delay_pattern_mask(e){const[t,n]=e.dims,r=this.config.decoder.num_codebooks,i=n-r;let a=0;for(let t=0;t<e.size;++t){if(e.data[t]===this.config.decoder.pad_token_id)continue;const s=t%n-Math.floor(t/n)%r;s>0&&s<=i&&(e.data[a++]=e.data[t])}const s=Math.floor(t/r),o=a/(s*r);return new c.Tensor(e.type,e.data.slice(0,a),[s,r,o])}prepare_inputs_for_generation(e,t,n){let r=structuredClone(e);for(let e=0;e<r.length;++e)for(let t=0;t<r[e].length;++t)e%this.config.decoder.num_codebooks>=t&&(r[e][t]=BigInt(this.config.decoder.pad_token_id));null!==n.guidance_scale&&n.guidance_scale>1&&(r=r.concat(r));return super.prepare_inputs_for_generation(r,t,n)}async generate(e){const t=await super.generate(e),n=this._apply_and_filter_by_delay_pattern_mask(t).unsqueeze_(0),{audio_values:r}=await P(this.sessions.encodec_decode,{audio_codes:n});return r}}class ws extends V{}class ys extends ws{}class bs extends ws{async _call(e){return new Do(await super._call(e))}}class vs extends V{}class xs extends vs{}class Ms extends vs{async _call(e){return new Do(await super._call(e))}}class Ts extends V{}class ks extends Ts{}class $s extends Ts{async _call(e){return new Do(await super._call(e))}}class Ss extends V{}class Cs extends Ss{}class Es extends Ss{async _call(e){return new Do(await super._call(e))}}class Ps{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:i=null,local_files_only:a=!1,revision:s="main",model_file_name:o=null,subfolder:l="onnx",device:u=null,dtype:d=null,use_external_data_format:c=null,session_options:p={}}={}){let h={progress_callback:t,config:n,cache_dir:i,local_files_only:a,revision:s,model_file_name:o,subfolder:l,device:u,dtype:d,use_external_data_format:c,session_options:p};if(h.config=await r.AutoConfig.from_pretrained(e,h),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(let t of this.MODEL_CLASS_MAPPINGS){const n=t.get(h.config.model_type);if(n)return await n[1].from_pretrained(e,h)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${h.config.model_type}", attempting to construct from base class.`),await V.from_pretrained(e,h);throw Error(`Unsupported model type: ${h.config.model_type}`)}}const As=new Map([["bert",["BertModel",U]],["nomic_bert",["NomicBertModel",Y]],["roformer",["RoFormerModel",Z]],["electra",["ElectraModel",ce]],["esm",["EsmModel",Ve]],["convbert",["ConvBertModel",ae]],["camembert",["CamembertModel",_e]],["deberta",["DebertaModel",Me]],["deberta-v2",["DebertaV2Model",Ee]],["mpnet",["MPNetModel",Ye]],["albert",["AlbertModel",lt]],["distilbert",["DistilBertModel",Oe]],["roberta",["RobertaModel",Lt]],["xlm",["XLMModel",Gt]],["xlm-roberta",["XLMRobertaModel",Kt]],["clap",["ClapModel",Za]],["clip",["CLIPModel",mn]],["clipseg",["CLIPSegModel",Tn]],["chinese_clip",["ChineseCLIPModel",xn]],["siglip",["SiglipModel",wn]],["mobilebert",["MobileBertModel",We]],["squeezebert",["SqueezeBertModel",rt]],["wav2vec2",["Wav2Vec2Model",ia]],["wav2vec2-bert",["Wav2Vec2BertModel",Ma]],["unispeech",["UniSpeechModel",ma]],["unispeech-sat",["UniSpeechSatModel",wa]],["hubert",["HubertModel",Sa]],["wavlm",["WavLMModel",Aa]],["audio-spectrogram-transformer",["ASTModel",tn]],["vits",["VitsModel",rs]],["pyannote",["PyAnnoteModel",ua]],["wespeaker-resnet",["WeSpeakerResNetModel",pa]],["detr",["DetrModel",Qr]],["rt_detr",["RTDetrModel",ni]],["table-transformer",["TableTransformerModel",si]],["vit",["ViTModel",Sr]],["fastvit",["FastViTModel",Pr]],["mobilevit",["MobileViTModel",Or]],["mobilevitv2",["MobileViTV2Model",Dr]],["owlvit",["OwlViTModel",Vr]],["owlv2",["Owlv2Model",qr]],["beit",["BeitModel",Hr]],["deit",["DeiTModel",di]],["convnext",["ConvNextModel",Bi]],["convnextv2",["ConvNextV2Model",Ri]],["dinov2",["Dinov2Model",ji]],["resnet",["ResNetModel",hi]],["swin",["SwinModel",gi]],["swin2sr",["Swin2SRModel",yi]],["donut-swin",["DonutSwinModel",Ii]],["yolos",["YolosModel",Ui]],["dpt",["DPTModel",xi]],["glpn",["GLPNModel",Ai]],["hifigan",["SpeechT5HifiGan",Na]],["efficientnet",["EfficientNetModel",ps]],["mobilenet_v1",["MobileNetV1Model",ys]],["mobilenet_v2",["MobileNetV2Model",xs]],["mobilenet_v3",["MobileNetV3Model",ks]],["mobilenet_v4",["MobileNetV4Model",Cs]]]),Fs=new Map([["t5",["T5Model",ht]],["longt5",["LongT5Model",gt]],["mt5",["MT5Model",yt]],["bart",["BartModel",xt]],["mbart",["MBartModel",$t]],["marian",["MarianModel",Ji]],["whisper",["WhisperModel",an]],["m2m_100",["M2M100Model",ta]],["blenderbot",["BlenderbotModel",At]],["blenderbot-small",["BlenderbotSmallModel",It]]]),zs=new Map([["bloom",["BloomModel",wr]],["jais",["JAISModel",Pn]],["gpt2",["GPT2Model",Sn]],["gptj",["GPTJModel",Rn]],["gpt_bigcode",["GPTBigCodeModel",jn]],["gpt_neo",["GPTNeoModel",zn]],["gpt_neox",["GPTNeoXModel",Bn]],["codegen",["CodeGenModel",Un]],["llama",["LlamaModel",Xn]],["cohere",["CohereModel",Yn]],["gemma",["GemmaModel",er]],["gemma2",["Gemma2Model",rr]],["openelm",["OpenELMModel",sr]],["qwen2",["Qwen2Model",ur]],["phi",["PhiModel",pr]],["phi3",["Phi3Model",fr]],["mpt",["MptModel",vr]],["opt",["OPTModel",Tr]],["mistral",["MistralModel",qa]],["starcoder2",["Starcoder2Model",Ha]],["falcon",["FalconModel",Qa]],["stablelm",["StableLmModel",us]]]),Is=new Map([["speecht5",["SpeechT5ForSpeechToText",Da]],["whisper",["WhisperForConditionalGeneration",sn]]]),Os=new Map([["speecht5",["SpeechT5ForTextToSpeech",Ra]]]),Bs=new Map([["vits",["VitsModel",rs]],["musicgen",["MusicgenForConditionalGeneration",_s]]]),Ls=new Map([["bert",["BertForSequenceClassification",H]],["roformer",["RoFormerForSequenceClassification",te]],["electra",["ElectraForSequenceClassification",he]],["esm",["EsmForSequenceClassification",Ge]],["convbert",["ConvBertForSequenceClassification",oe]],["camembert",["CamembertForSequenceClassification",ye]],["deberta",["DebertaForSequenceClassification",ke]],["deberta-v2",["DebertaV2ForSequenceClassification",Ae]],["mpnet",["MPNetForSequenceClassification",Ze]],["albert",["AlbertForSequenceClassification",ut]],["distilbert",["DistilBertForSequenceClassification",Be]],["roberta",["RobertaForSequenceClassification",Rt]],["xlm",["XLMForSequenceClassification",Ut]],["xlm-roberta",["XLMRobertaForSequenceClassification",Yt]],["bart",["BartForSequenceClassification",Tt]],["mbart",["MBartForSequenceClassification",Ct]],["mobilebert",["MobileBertForSequenceClassification",Xe]],["squeezebert",["SqueezeBertForSequenceClassification",at]]]),Ds=new Map([["bert",["BertForTokenClassification",X]],["roformer",["RoFormerForTokenClassification",ne]],["electra",["ElectraForTokenClassification",me]],["esm",["EsmForTokenClassification",qe]],["convbert",["ConvBertForTokenClassification",le]],["camembert",["CamembertForTokenClassification",be]],["deberta",["DebertaForTokenClassification",$e]],["deberta-v2",["DebertaV2ForTokenClassification",Fe]],["mpnet",["MPNetForTokenClassification",et]],["distilbert",["DistilBertForTokenClassification",Le]],["roberta",["RobertaForTokenClassification",Nt]],["xlm",["XLMForTokenClassification",Wt]],["xlm-roberta",["XLMRobertaForTokenClassification",Jt]]]),Rs=new Map([["t5",["T5ForConditionalGeneration",mt]],["longt5",["LongT5ForConditionalGeneration",_t]],["mt5",["MT5ForConditionalGeneration",bt]],["bart",["BartForConditionalGeneration",Mt]],["mbart",["MBartForConditionalGeneration",St]],["marian",["MarianMTModel",Zi]],["m2m_100",["M2M100ForConditionalGeneration",na]],["blenderbot",["BlenderbotForConditionalGeneration",Ft]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",Ot]]]),Ns=new Map([["bloom",["BloomForCausalLM",yr]],["gpt2",["GPT2LMHeadModel",Cn]],["jais",["JAISLMHeadModel",An]],["gptj",["GPTJForCausalLM",Nn]],["gpt_bigcode",["GPTBigCodeForCausalLM",Gn]],["gpt_neo",["GPTNeoForCausalLM",In]],["gpt_neox",["GPTNeoXForCausalLM",Ln]],["codegen",["CodeGenForCausalLM",Wn]],["llama",["LlamaForCausalLM",Kn]],["cohere",["CohereForCausalLM",Jn]],["gemma",["GemmaForCausalLM",tr]],["gemma2",["Gemma2ForCausalLM",ir]],["openelm",["OpenELMForCausalLM",or]],["qwen2",["Qwen2ForCausalLM",dr]],["phi",["PhiForCausalLM",hr]],["phi3",["Phi3ForCausalLM",gr]],["mpt",["MptForCausalLM",xr]],["opt",["OPTForCausalLM",kr]],["mbart",["MBartForCausalLM",Et]],["mistral",["MistralForCausalLM",Ua]],["starcoder2",["Starcoder2ForCausalLM",Xa]],["falcon",["FalconForCausalLM",Ya]],["trocr",["TrOCRForCausalLM",ja]],["stablelm",["StableLmForCausalLM",ds]]]),Vs=new Map([["bert",["BertForMaskedLM",W]],["roformer",["RoFormerForMaskedLM",ee]],["electra",["ElectraForMaskedLM",pe]],["esm",["EsmForMaskedLM",je]],["convbert",["ConvBertForMaskedLM",se]],["camembert",["CamembertForMaskedLM",we]],["deberta",["DebertaForMaskedLM",Te]],["deberta-v2",["DebertaV2ForMaskedLM",Pe]],["mpnet",["MPNetForMaskedLM",Je]],["albert",["AlbertForMaskedLM",ct]],["distilbert",["DistilBertForMaskedLM",Re]],["roberta",["RobertaForMaskedLM",Dt]],["xlm",["XLMWithLMHeadModel",qt]],["xlm-roberta",["XLMRobertaForMaskedLM",Qt]],["mobilebert",["MobileBertForMaskedLM",He]],["squeezebert",["SqueezeBertForMaskedLM",it]]]),js=new Map([["bert",["BertForQuestionAnswering",K]],["roformer",["RoFormerForQuestionAnswering",re]],["electra",["ElectraForQuestionAnswering",fe]],["convbert",["ConvBertForQuestionAnswering",ue]],["camembert",["CamembertForQuestionAnswering",ve]],["deberta",["DebertaForQuestionAnswering",Se]],["deberta-v2",["DebertaV2ForQuestionAnswering",ze]],["mpnet",["MPNetForQuestionAnswering",tt]],["albert",["AlbertForQuestionAnswering",dt]],["distilbert",["DistilBertForQuestionAnswering",De]],["roberta",["RobertaForQuestionAnswering",Vt]],["xlm",["XLMForQuestionAnswering",Ht]],["xlm-roberta",["XLMRobertaForQuestionAnswering",Zt]],["mobilebert",["MobileBertForQuestionAnswering",Ke]],["squeezebert",["SqueezeBertForQuestionAnswering",st]]]),Gs=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",on]]]),qs=new Map([["llava",["LlavaForConditionalGeneration",un]],["moondream1",["Moondream1ForConditionalGeneration",dn]],["florence2",["Florence2ForConditionalGeneration",pn]]]),Us=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",on]]]),Ws=new Map([["vit",["ViTForImageClassification",Cr]],["fastvit",["FastViTForImageClassification",Ar]],["mobilevit",["MobileViTForImageClassification",Br]],["mobilevitv2",["MobileViTV2ForImageClassification",Rr]],["beit",["BeitForImageClassification",Xr]],["deit",["DeiTForImageClassification",ci]],["convnext",["ConvNextForImageClassification",Li]],["convnextv2",["ConvNextV2ForImageClassification",Ni]],["dinov2",["Dinov2ForImageClassification",Gi]],["resnet",["ResNetForImageClassification",mi]],["swin",["SwinForImageClassification",_i]],["segformer",["SegformerForImageClassification",ss]],["efficientnet",["EfficientNetForImageClassification",hs]],["mobilenet_v1",["MobileNetV1ForImageClassification",bs]],["mobilenet_v2",["MobileNetV2ForImageClassification",Ms]],["mobilenet_v3",["MobileNetV3ForImageClassification",$s]],["mobilenet_v4",["MobileNetV4ForImageClassification",Es]]]),Hs=new Map([["detr",["DetrForObjectDetection",Yr]],["rt_detr",["RTDetrForObjectDetection",ri]],["table-transformer",["TableTransformerForObjectDetection",oi]],["yolos",["YolosForObjectDetection",Wi]]]),Xs=new Map([["owlvit",["OwlViTForObjectDetection",jr]],["owlv2",["Owlv2ForObjectDetection",Ur]]]),Ks=new Map([["detr",["DetrForSegmentation",Jr]],["clipseg",["CLIPSegForImageSegmentation",kn]]]),Qs=new Map([["segformer",["SegformerForSemanticSegmentation",os]],["sapiens",["SapiensForSemanticSegmentation",Si]]]),Ys=new Map([["sam",["SamModel",Ki]]]),Js=new Map([["wav2vec2",["Wav2Vec2ForCTC",aa]],["wav2vec2-bert",["Wav2Vec2BertForCTC",Ta]],["unispeech",["UniSpeechForCTC",fa]],["unispeech-sat",["UniSpeechSatForCTC",ya]],["wavlm",["WavLMForCTC",Fa]],["hubert",["HubertForCTC",Ca]]]),Zs=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",sa]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",ka]],["unispeech",["UniSpeechForSequenceClassification",ga]],["unispeech-sat",["UniSpeechSatForSequenceClassification",ba]],["wavlm",["WavLMForSequenceClassification",za]],["hubert",["HubertForSequenceClassification",Ea]],["audio-spectrogram-transformer",["ASTForAudioClassification",nn]]]),eo=new Map([["wavlm",["WavLMForXVector",Ia]]]),to=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",va]],["wavlm",["WavLMForAudioFrameClassification",Oa]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",oa]],["pyannote",["PyAnnoteForAudioFrameClassification",da]]]),no=new Map([["vitmatte",["VitMatteForImageMatting",zr]]]),ro=new Map([["swin2sr",["Swin2SRForImageSuperResolution",bi]]]),io=new Map([["dpt",["DPTForDepthEstimation",Mi]],["depth_anything",["DepthAnythingForDepthEstimation",ki]],["glpn",["GLPNForDepthEstimation",Fi]],["sapiens",["SapiensForDepthEstimation",Ci]]]),ao=new Map([["sapiens",["SapiensForNormalEstimation",Ei]]]),so=new Map([["clip",["CLIPVisionModelWithProjection",gn]],["siglip",["SiglipVisionModel",bn]]]),oo=[[As,w],[Fs,y],[zs,x],[Ls,w],[Ds,w],[Rs,b],[Is,b],[Ns,x],[Vs,w],[js,w],[Gs,v],[qs,T],[Ws,w],[Ks,w],[Qs,w],[no,w],[ro,w],[io,w],[ao,w],[Hs,w],[Xs,w],[Ys,M],[Js,w],[Zs,w],[Os,b],[Bs,w],[eo,w],[to,w],[so,w]];for(const[e,t]of oo)for(const[n,r]of e.values())$.set(n,t),C.set(r,n),S.set(n,r);const lo=[["MusicgenForConditionalGeneration",_s,k],["CLIPTextModelWithProjection",fn,w],["SiglipTextModel",yn,w],["ClapTextModelWithProjection",es,w],["ClapAudioModelWithProjection",ts,w]];for(const[e,t,n]of lo)$.set(e,n),C.set(t,e),S.set(e,t);class uo extends Ps{static MODEL_CLASS_MAPPINGS=oo.map((e=>e[0]));static BASE_IF_FAIL=!0}class co extends Ps{static MODEL_CLASS_MAPPINGS=[Ls]}class po extends Ps{static MODEL_CLASS_MAPPINGS=[Ds]}class ho extends Ps{static MODEL_CLASS_MAPPINGS=[Rs]}class mo extends Ps{static MODEL_CLASS_MAPPINGS=[Is]}class fo extends Ps{static MODEL_CLASS_MAPPINGS=[Os]}class go extends Ps{static MODEL_CLASS_MAPPINGS=[Bs]}class _o extends Ps{static MODEL_CLASS_MAPPINGS=[Ns]}class wo extends Ps{static MODEL_CLASS_MAPPINGS=[Vs]}class yo extends Ps{static MODEL_CLASS_MAPPINGS=[js]}class bo extends Ps{static MODEL_CLASS_MAPPINGS=[Gs]}class vo extends Ps{static MODEL_CLASS_MAPPINGS=[Ws]}class xo extends Ps{static MODEL_CLASS_MAPPINGS=[Ks]}class Mo extends Ps{static MODEL_CLASS_MAPPINGS=[Qs]}class To extends Ps{static MODEL_CLASS_MAPPINGS=[Hs]}class ko extends Ps{static MODEL_CLASS_MAPPINGS=[Xs]}class $o extends Ps{static MODEL_CLASS_MAPPINGS=[Ys]}class So extends Ps{static MODEL_CLASS_MAPPINGS=[Js]}class Co extends Ps{static MODEL_CLASS_MAPPINGS=[Zs]}class Eo extends Ps{static MODEL_CLASS_MAPPINGS=[eo]}class Po extends Ps{static MODEL_CLASS_MAPPINGS=[to]}class Ao extends Ps{static MODEL_CLASS_MAPPINGS=[Us]}class Fo extends Ps{static MODEL_CLASS_MAPPINGS=[no]}class zo extends Ps{static MODEL_CLASS_MAPPINGS=[ro]}class Io extends Ps{static MODEL_CLASS_MAPPINGS=[io]}class Oo extends Ps{static MODEL_CLASS_MAPPINGS=[ao]}class Bo extends Ps{static MODEL_CLASS_MAPPINGS=[so]}class Lo extends j{constructor({logits:e,past_key_values:t,encoder_outputs:n,decoder_attentions:r=null,cross_attentions:i=null}){super(),this.logits=e,this.past_key_values=t,this.encoder_outputs=n,this.decoder_attentions=r,this.cross_attentions=i}}class Do extends j{constructor({logits:e}){super(),this.logits=e}}class Ro extends j{constructor({logits:e,embeddings:t}){super(),this.logits=e,this.embeddings=t}}class No extends j{constructor({logits:e}){super(),this.logits=e}}class Vo extends j{constructor({logits:e}){super(),this.logits=e}}class jo extends j{constructor({start_logits:e,end_logits:t}){super(),this.start_logits=e,this.end_logits=t}}class Go extends j{constructor({logits:e}){super(),this.logits=e}}class qo extends j{constructor({logits:e,past_key_values:t}){super(),this.logits=e,this.past_key_values=t}}class Uo extends j{constructor({alphas:e}){super(),this.alphas=e}}class Wo extends j{constructor({waveform:e,spectrogram:t}){super(),this.waveform=e,this.spectrogram=t}}},"./src/models/whisper/common_whisper.js":
|
|
175
|
+
\***********************/(e,t,n)=>{n.r(t),n.d(t,{ASTForAudioClassification:()=>nn,ASTModel:()=>tn,ASTPreTrainedModel:()=>en,AlbertForMaskedLM:()=>ct,AlbertForQuestionAnswering:()=>dt,AlbertForSequenceClassification:()=>ut,AlbertModel:()=>lt,AlbertPreTrainedModel:()=>ot,AutoModel:()=>ho,AutoModelForAudioClassification:()=>Ao,AutoModelForAudioFrameClassification:()=>zo,AutoModelForCTC:()=>Po,AutoModelForCausalLM:()=>bo,AutoModelForDepthEstimation:()=>Lo,AutoModelForDocumentQuestionAnswering:()=>Io,AutoModelForImageClassification:()=>To,AutoModelForImageFeatureExtraction:()=>Ro,AutoModelForImageMatting:()=>Oo,AutoModelForImageSegmentation:()=>ko,AutoModelForImageToImage:()=>Bo,AutoModelForMaskGeneration:()=>Eo,AutoModelForMaskedLM:()=>vo,AutoModelForNormalEstimation:()=>Do,AutoModelForObjectDetection:()=>So,AutoModelForQuestionAnswering:()=>xo,AutoModelForSemanticSegmentation:()=>$o,AutoModelForSeq2SeqLM:()=>go,AutoModelForSequenceClassification:()=>mo,AutoModelForSpeechSeq2Seq:()=>_o,AutoModelForTextToSpectrogram:()=>wo,AutoModelForTextToWaveform:()=>yo,AutoModelForTokenClassification:()=>fo,AutoModelForVision2Seq:()=>Mo,AutoModelForXVector:()=>Fo,AutoModelForZeroShotObjectDetection:()=>Co,BartForConditionalGeneration:()=>Mt,BartForSequenceClassification:()=>Tt,BartModel:()=>xt,BartPretrainedModel:()=>vt,BaseModelOutput:()=>G,BeitForImageClassification:()=>Xr,BeitModel:()=>Hr,BeitPreTrainedModel:()=>Wr,BertForMaskedLM:()=>W,BertForQuestionAnswering:()=>K,BertForSequenceClassification:()=>H,BertForTokenClassification:()=>X,BertModel:()=>U,BertPreTrainedModel:()=>q,BlenderbotForConditionalGeneration:()=>Ft,BlenderbotModel:()=>At,BlenderbotPreTrainedModel:()=>Pt,BlenderbotSmallForConditionalGeneration:()=>Ot,BlenderbotSmallModel:()=>It,BlenderbotSmallPreTrainedModel:()=>zt,BloomForCausalLM:()=>yr,BloomModel:()=>wr,BloomPreTrainedModel:()=>_r,CLIPModel:()=>mn,CLIPPreTrainedModel:()=>hn,CLIPSegForImageSegmentation:()=>kn,CLIPSegModel:()=>Tn,CLIPSegPreTrainedModel:()=>Mn,CLIPTextModelWithProjection:()=>fn,CLIPVisionModelWithProjection:()=>gn,CamembertForMaskedLM:()=>we,CamembertForQuestionAnswering:()=>ve,CamembertForSequenceClassification:()=>ye,CamembertForTokenClassification:()=>be,CamembertModel:()=>_e,CamembertPreTrainedModel:()=>ge,CausalLMOutput:()=>Wo,CausalLMOutputWithPast:()=>Ho,ChineseCLIPModel:()=>xn,ChineseCLIPPreTrainedModel:()=>vn,ClapAudioModelWithProjection:()=>is,ClapModel:()=>ns,ClapPreTrainedModel:()=>ts,ClapTextModelWithProjection:()=>rs,CodeGenForCausalLM:()=>Wn,CodeGenModel:()=>Un,CodeGenPreTrainedModel:()=>qn,CohereForCausalLM:()=>Jn,CohereModel:()=>Yn,CoherePreTrainedModel:()=>Qn,ConvBertForMaskedLM:()=>se,ConvBertForQuestionAnswering:()=>ue,ConvBertForSequenceClassification:()=>oe,ConvBertForTokenClassification:()=>le,ConvBertModel:()=>ae,ConvBertPreTrainedModel:()=>ie,ConvNextForImageClassification:()=>Ni,ConvNextModel:()=>Ri,ConvNextPreTrainedModel:()=>Di,ConvNextV2ForImageClassification:()=>Gi,ConvNextV2Model:()=>ji,ConvNextV2PreTrainedModel:()=>Vi,DPTForDepthEstimation:()=>$i,DPTModel:()=>ki,DPTPreTrainedModel:()=>Ti,DebertaForMaskedLM:()=>Te,DebertaForQuestionAnswering:()=>Se,DebertaForSequenceClassification:()=>ke,DebertaForTokenClassification:()=>$e,DebertaModel:()=>Me,DebertaPreTrainedModel:()=>xe,DebertaV2ForMaskedLM:()=>Pe,DebertaV2ForQuestionAnswering:()=>ze,DebertaV2ForSequenceClassification:()=>Ae,DebertaV2ForTokenClassification:()=>Fe,DebertaV2Model:()=>Ee,DebertaV2PreTrainedModel:()=>Ce,DeiTForImageClassification:()=>ci,DeiTModel:()=>di,DeiTPreTrainedModel:()=>ui,DepthAnythingForDepthEstimation:()=>Ci,DepthAnythingPreTrainedModel:()=>Si,DetrForObjectDetection:()=>Yr,DetrForSegmentation:()=>Jr,DetrModel:()=>Qr,DetrObjectDetectionOutput:()=>Zr,DetrPreTrainedModel:()=>Kr,DetrSegmentationOutput:()=>ei,Dinov2ForImageClassification:()=>Wi,Dinov2Model:()=>Ui,Dinov2PreTrainedModel:()=>qi,DistilBertForMaskedLM:()=>Re,DistilBertForQuestionAnswering:()=>De,DistilBertForSequenceClassification:()=>Be,DistilBertForTokenClassification:()=>Le,DistilBertModel:()=>Oe,DistilBertPreTrainedModel:()=>Ie,DonutSwinModel:()=>Li,DonutSwinPreTrainedModel:()=>Bi,EfficientNetForImageClassification:()=>gs,EfficientNetModel:()=>fs,EfficientNetPreTrainedModel:()=>ms,ElectraForMaskedLM:()=>pe,ElectraForQuestionAnswering:()=>fe,ElectraForSequenceClassification:()=>he,ElectraForTokenClassification:()=>me,ElectraModel:()=>ce,ElectraPreTrainedModel:()=>de,EsmForMaskedLM:()=>je,EsmForSequenceClassification:()=>Ge,EsmForTokenClassification:()=>qe,EsmModel:()=>Ve,EsmPreTrainedModel:()=>Ne,FalconForCausalLM:()=>es,FalconModel:()=>Za,FalconPreTrainedModel:()=>Ja,FastViTForImageClassification:()=>Ar,FastViTModel:()=>Pr,FastViTPreTrainedModel:()=>Er,Florence2ForConditionalGeneration:()=>pn,Florence2PreTrainedModel:()=>cn,GLPNForDepthEstimation:()=>Oi,GLPNModel:()=>Ii,GLPNPreTrainedModel:()=>zi,GPT2LMHeadModel:()=>Cn,GPT2Model:()=>Sn,GPT2PreTrainedModel:()=>$n,GPTBigCodeForCausalLM:()=>Gn,GPTBigCodeModel:()=>jn,GPTBigCodePreTrainedModel:()=>Vn,GPTJForCausalLM:()=>Nn,GPTJModel:()=>Rn,GPTJPreTrainedModel:()=>Dn,GPTNeoForCausalLM:()=>In,GPTNeoModel:()=>zn,GPTNeoPreTrainedModel:()=>Fn,GPTNeoXForCausalLM:()=>Ln,GPTNeoXModel:()=>Bn,GPTNeoXPreTrainedModel:()=>On,Gemma2ForCausalLM:()=>ir,Gemma2Model:()=>rr,Gemma2PreTrainedModel:()=>nr,GemmaForCausalLM:()=>tr,GemmaModel:()=>er,GemmaPreTrainedModel:()=>Zn,HieraForImageClassification:()=>mi,HieraModel:()=>hi,HieraPreTrainedModel:()=>pi,HubertForCTC:()=>Aa,HubertForSequenceClassification:()=>Fa,HubertModel:()=>Pa,HubertPreTrainedModel:()=>Ea,ImageMattingOutput:()=>Xo,JAISLMHeadModel:()=>An,JAISModel:()=>Pn,JAISPreTrainedModel:()=>En,LlamaForCausalLM:()=>Kn,LlamaModel:()=>Xn,LlamaPreTrainedModel:()=>Hn,LlavaForConditionalGeneration:()=>un,LlavaPreTrainedModel:()=>ln,LongT5ForConditionalGeneration:()=>_t,LongT5Model:()=>gt,LongT5PreTrainedModel:()=>ft,M2M100ForConditionalGeneration:()=>aa,M2M100Model:()=>ia,M2M100PreTrainedModel:()=>ra,MBartForCausalLM:()=>Et,MBartForConditionalGeneration:()=>St,MBartForSequenceClassification:()=>Ct,MBartModel:()=>$t,MBartPreTrainedModel:()=>kt,MPNetForMaskedLM:()=>Je,MPNetForQuestionAnswering:()=>tt,MPNetForSequenceClassification:()=>Ze,MPNetForTokenClassification:()=>et,MPNetModel:()=>Ye,MPNetPreTrainedModel:()=>Qe,MT5ForConditionalGeneration:()=>bt,MT5Model:()=>yt,MT5PreTrainedModel:()=>wt,MarianMTModel:()=>na,MarianModel:()=>ta,MarianPreTrainedModel:()=>ea,MaskedLMOutput:()=>qo,MistralForCausalLM:()=>Xa,MistralModel:()=>Ha,MistralPreTrainedModel:()=>Wa,MobileBertForMaskedLM:()=>He,MobileBertForQuestionAnswering:()=>Ke,MobileBertForSequenceClassification:()=>Xe,MobileBertModel:()=>We,MobileBertPreTrainedModel:()=>Ue,MobileNetV1ForImageClassification:()=>Ms,MobileNetV1Model:()=>xs,MobileNetV1PreTrainedModel:()=>vs,MobileNetV2ForImageClassification:()=>$s,MobileNetV2Model:()=>ks,MobileNetV2PreTrainedModel:()=>Ts,MobileNetV3ForImageClassification:()=>Es,MobileNetV3Model:()=>Cs,MobileNetV3PreTrainedModel:()=>Ss,MobileNetV4ForImageClassification:()=>Fs,MobileNetV4Model:()=>As,MobileNetV4PreTrainedModel:()=>Ps,MobileViTForImageClassification:()=>Br,MobileViTModel:()=>Or,MobileViTPreTrainedModel:()=>Ir,MobileViTV2ForImageClassification:()=>Rr,MobileViTV2Model:()=>Dr,MobileViTV2PreTrainedModel:()=>Lr,ModelOutput:()=>j,Moondream1ForConditionalGeneration:()=>dn,MptForCausalLM:()=>xr,MptModel:()=>vr,MptPreTrainedModel:()=>br,MusicgenForCausalLM:()=>ys,MusicgenForConditionalGeneration:()=>bs,MusicgenModel:()=>ws,MusicgenPreTrainedModel:()=>_s,NomicBertModel:()=>Y,NomicBertPreTrainedModel:()=>Q,OPTForCausalLM:()=>kr,OPTModel:()=>Tr,OPTPreTrainedModel:()=>Mr,OpenELMForCausalLM:()=>or,OpenELMModel:()=>sr,OpenELMPreTrainedModel:()=>ar,OwlViTForObjectDetection:()=>jr,OwlViTModel:()=>Vr,OwlViTPreTrainedModel:()=>Nr,Owlv2ForObjectDetection:()=>Ur,Owlv2Model:()=>qr,Owlv2PreTrainedModel:()=>Gr,Phi3ForCausalLM:()=>gr,Phi3Model:()=>fr,Phi3PreTrainedModel:()=>mr,PhiForCausalLM:()=>hr,PhiModel:()=>pr,PhiPreTrainedModel:()=>cr,PreTrainedModel:()=>V,PretrainedMixin:()=>zs,PyAnnoteForAudioFrameClassification:()=>ha,PyAnnoteModel:()=>pa,PyAnnotePreTrainedModel:()=>ca,QuestionAnsweringModelOutput:()=>Uo,Qwen2ForCausalLM:()=>dr,Qwen2Model:()=>ur,Qwen2PreTrainedModel:()=>lr,RTDetrForObjectDetection:()=>ri,RTDetrModel:()=>ni,RTDetrObjectDetectionOutput:()=>ii,RTDetrPreTrainedModel:()=>ti,ResNetForImageClassification:()=>_i,ResNetModel:()=>gi,ResNetPreTrainedModel:()=>fi,RoFormerForMaskedLM:()=>ee,RoFormerForQuestionAnswering:()=>re,RoFormerForSequenceClassification:()=>te,RoFormerForTokenClassification:()=>ne,RoFormerModel:()=>Z,RoFormerPreTrainedModel:()=>J,RobertaForMaskedLM:()=>Dt,RobertaForQuestionAnswering:()=>Vt,RobertaForSequenceClassification:()=>Rt,RobertaForTokenClassification:()=>Nt,RobertaModel:()=>Lt,RobertaPreTrainedModel:()=>Bt,SamImageSegmentationOutput:()=>Zi,SamModel:()=>Ji,SamPreTrainedModel:()=>Yi,SapiensForDepthEstimation:()=>Ai,SapiensForNormalEstimation:()=>Fi,SapiensForSemanticSegmentation:()=>Pi,SapiensPreTrainedModel:()=>Ei,SegformerForImageClassification:()=>us,SegformerForSemanticSegmentation:()=>ds,SegformerModel:()=>ls,SegformerPreTrainedModel:()=>os,Seq2SeqLMOutput:()=>No,SequenceClassifierOutput:()=>Vo,SiglipModel:()=>wn,SiglipPreTrainedModel:()=>_n,SiglipTextModel:()=>yn,SiglipVisionModel:()=>bn,SpeechT5ForSpeechToText:()=>Va,SpeechT5ForTextToSpeech:()=>ja,SpeechT5HifiGan:()=>Ga,SpeechT5Model:()=>Na,SpeechT5PreTrainedModel:()=>Ra,SqueezeBertForMaskedLM:()=>it,SqueezeBertForQuestionAnswering:()=>st,SqueezeBertForSequenceClassification:()=>at,SqueezeBertModel:()=>rt,SqueezeBertPreTrainedModel:()=>nt,StableLmForCausalLM:()=>hs,StableLmModel:()=>ps,StableLmPreTrainedModel:()=>cs,Starcoder2ForCausalLM:()=>Ya,Starcoder2Model:()=>Qa,Starcoder2PreTrainedModel:()=>Ka,Swin2SRForImageSuperResolution:()=>Mi,Swin2SRModel:()=>xi,Swin2SRPreTrainedModel:()=>vi,SwinForImageClassification:()=>bi,SwinModel:()=>yi,SwinPreTrainedModel:()=>wi,T5ForConditionalGeneration:()=>mt,T5Model:()=>ht,T5PreTrainedModel:()=>pt,TableTransformerForObjectDetection:()=>oi,TableTransformerModel:()=>si,TableTransformerObjectDetectionOutput:()=>li,TableTransformerPreTrainedModel:()=>ai,TokenClassifierOutput:()=>Go,TrOCRForCausalLM:()=>Ua,TrOCRPreTrainedModel:()=>qa,UniSpeechForCTC:()=>wa,UniSpeechForSequenceClassification:()=>ya,UniSpeechModel:()=>_a,UniSpeechPreTrainedModel:()=>ga,UniSpeechSatForAudioFrameClassification:()=>Ta,UniSpeechSatForCTC:()=>xa,UniSpeechSatForSequenceClassification:()=>Ma,UniSpeechSatModel:()=>va,UniSpeechSatPreTrainedModel:()=>ba,ViTForImageClassification:()=>Cr,ViTModel:()=>Sr,ViTPreTrainedModel:()=>$r,VisionEncoderDecoderModel:()=>on,VitMatteForImageMatting:()=>zr,VitMattePreTrainedModel:()=>Fr,VitsModel:()=>ss,VitsModelOutput:()=>Ko,VitsPreTrainedModel:()=>as,Wav2Vec2BertForCTC:()=>Sa,Wav2Vec2BertForSequenceClassification:()=>Ca,Wav2Vec2BertModel:()=>$a,Wav2Vec2BertPreTrainedModel:()=>ka,Wav2Vec2ForAudioFrameClassification:()=>da,Wav2Vec2ForCTC:()=>la,Wav2Vec2ForSequenceClassification:()=>ua,Wav2Vec2Model:()=>oa,Wav2Vec2PreTrainedModel:()=>sa,WavLMForAudioFrameClassification:()=>Da,WavLMForCTC:()=>Oa,WavLMForSequenceClassification:()=>Ba,WavLMForXVector:()=>La,WavLMModel:()=>Ia,WavLMPreTrainedModel:()=>za,WeSpeakerResNetModel:()=>fa,WeSpeakerResNetPreTrainedModel:()=>ma,WhisperForConditionalGeneration:()=>sn,WhisperModel:()=>an,WhisperPreTrainedModel:()=>rn,XLMForQuestionAnswering:()=>Ht,XLMForSequenceClassification:()=>Ut,XLMForTokenClassification:()=>Wt,XLMModel:()=>Gt,XLMPreTrainedModel:()=>jt,XLMRobertaForMaskedLM:()=>Qt,XLMRobertaForQuestionAnswering:()=>Zt,XLMRobertaForSequenceClassification:()=>Yt,XLMRobertaForTokenClassification:()=>Jt,XLMRobertaModel:()=>Kt,XLMRobertaPreTrainedModel:()=>Xt,XLMWithLMHeadModel:()=>qt,XVectorOutput:()=>jo,YolosForObjectDetection:()=>Ki,YolosModel:()=>Xi,YolosObjectDetectionOutput:()=>Qi,YolosPreTrainedModel:()=>Hi});var r=n(/*! ./configs.js */"./src/configs.js"),i=n(/*! ./backends/onnx.js */"./src/backends/onnx.js"),a=n(/*! ./utils/dtypes.js */"./src/utils/dtypes.js"),s=n(/*! ./utils/generic.js */"./src/utils/generic.js"),o=n(/*! ./utils/core.js */"./src/utils/core.js"),l=n(/*! ./utils/hub.js */"./src/utils/hub.js"),u=n(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),d=n(/*! ./generation/configuration_utils.js */"./src/generation/configuration_utils.js"),c=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),p=n(/*! ./utils/maths.js */"./src/utils/maths.js"),h=n(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),m=n(/*! ./generation/logits_sampler.js */"./src/generation/logits_sampler.js"),f=n(/*! ./env.js */"./src/env.js"),g=n(/*! ./models/whisper/generation_whisper.js */"./src/models/whisper/generation_whisper.js"),_=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");const w=0,y=1,b=2,v=3,x=4,M=5,T=6,k=7,$=new Map,S=new Map,C=new Map;async function E(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async s=>{const{buffer:o,session_options:u}=await async function(e,t,n){let s=n.device;s&&"string"!=typeof s&&(s.hasOwnProperty(t)?s=s[t]:(console.warn(`device not specified for "${t}". Using the default device.`),s=null));const o=s??(f.apis.IS_NODE_ENV?"cpu":"wasm"),u=(0,i.deviceToExecutionProviders)(o);let d=n.dtype;"string"!=typeof d&&(d&&d.hasOwnProperty(t)?d=d[t]:(d=a.DEFAULT_DEVICE_DTYPE_MAPPING[o]??a.DATA_TYPES.fp32,console.warn(`dtype not specified for "${t}". Using the default dtype (${d}) for this device (${o}).`)));const c=d;if(!a.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(c))throw new Error(`Invalid dtype: ${c}. Should be one of: ${Object.keys(a.DATA_TYPES).join(", ")}`);if(c===a.DATA_TYPES.fp16&&"webgpu"===o&&!await(0,a.isWebGpuFp16Supported)())throw new Error(`The device (${o}) does not support fp16.`);const p=a.DEFAULT_DTYPE_SUFFIX_MAPPING[c],h=`${n.subfolder??""}/${t}${p}.onnx`,m={...n.session_options}??{};m.executionProviders??=u;const g=(0,l.getModelFile)(e,h,!0,n);let _=[];if(n.use_external_data_format&&(!0===n.use_external_data_format||"object"==typeof n.use_external_data_format&&n.use_external_data_format.hasOwnProperty(t)&&!0===n.use_external_data_format[t])){if(f.apis.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const r=`${t}${p}.onnx_data`,i=`${n.subfolder??""}/${r}`;_.push(new Promise((async(t,a)=>{const s=await(0,l.getModelFile)(e,i,!0,n);t({path:r,data:s})})))}else void 0!==m.externalData&&(_=m.externalData.map((async t=>{if("string"==typeof t.data){const r=await(0,l.getModelFile)(e,t.data,!0,n);return{...t,data:r}}return t})));if(_.length>0&&(m.externalData=await Promise.all(_)),"webgpu"===o){const e=(0,r.getKeyValueShapes)(n.config,{prefix:"present"});if(Object.keys(e).length>0&&!(0,i.isONNXProxy)()){const t={};for(const n in e)t[n]="gpu-buffer";m.preferredOutputLocation=t}}return{buffer:await g,session_options:m}}(e,t[s],n);return[s,await(0,i.createInferenceSession)(o,u)]}))))}async function P(e,t){const n=function(e,t){const n=Object.create(null),r=[];for(const a of e.inputNames){const e=t[a];e instanceof c.Tensor?n[a]=(0,i.isONNXProxy)()?e.clone():e:r.push(a)}if(r.length>0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${r.join(", ")}.`);const a=Object.keys(t).length,s=e.inputNames.length;if(a>s){let n=Object.keys(t).filter((t=>!e.inputNames.includes(t)));console.warn(`WARNING: Too many inputs were provided (${a} > ${s}). The following inputs will be ignored: "${n.join(", ")}".`)}return n}(e,t);try{const t=Object.fromEntries(Object.entries(n).map((([e,t])=>[e,t.ort_tensor])));let r=await e.run(t);return r=A(r),r}catch(e){throw console.error(`An error occurred during model execution: "${e}".`),console.error("Inputs given to model:",n),e}}function A(e){for(let t in e)(0,i.isONNXTensor)(e[t])?e[t]=new c.Tensor(e[t]):"object"==typeof e[t]&&A(e[t]);return e}function F(e){if(e instanceof c.Tensor)return e;if(0===e.length)throw Error("items must be non-empty");if(Array.isArray(e[0])){if(e.some((t=>t.length!==e[0].length)))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new c.Tensor("int64",BigInt64Array.from(e.flat().map((e=>BigInt(e)))),[e.length,e[0].length])}return new c.Tensor("int64",BigInt64Array.from(e.map((e=>BigInt(e)))),[1,e.length])}function z(e){return new c.Tensor("bool",[e],[1])}async function I(e,t){let{encoder_outputs:n,input_ids:r,decoder_input_ids:i,...a}=t;if(!n){const r=(0,o.pick)(t,e.sessions.model.inputNames);n=(await O(e,r)).last_hidden_state}a.input_ids=i,a.encoder_hidden_states=n,e.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(a.encoder_attention_mask=t.attention_mask);return await B(e,a,!0)}async function O(e,t){const n=e.sessions.model,r=(0,o.pick)(t,n.inputNames);if(n.inputNames.includes("inputs_embeds")&&!r.inputs_embeds){if(!t.input_ids)throw new Error("Both `input_ids` and `inputs_embeds` are missing in the model inputs.");r.inputs_embeds=await e.encode_text({input_ids:t.input_ids})}return n.inputNames.includes("token_type_ids")&&!r.token_type_ids&&(r.token_type_ids=new c.Tensor("int64",new BigInt64Array(r.input_ids.data.length),r.input_ids.dims)),await P(n,r)}async function B(e,t,n=!1){const r=e.sessions[n?"decoder_model_merged":"model"],{past_key_values:i,...a}=t;r.inputNames.includes("use_cache_branch")&&(a.use_cache_branch=z(!!i)),r.inputNames.includes("position_ids")&&a.attention_mask&&!a.position_ids&&(a.position_ids=function(e,t=null){const{input_ids:n,inputs_embeds:r,attention_mask:i}=e,[a,s]=i.dims,o=new BigInt64Array(i.data.length);for(let e=0;e<a;++e){const t=e*s;let n=BigInt(0);for(let e=0;e<s;++e){const r=t+e;0n===i.data[r]?o[r]=BigInt(1):(o[r]=n,n+=i.data[r])}}let l=new c.Tensor("int64",o,i.dims);if(t){const e=-(n??r).dims.at(1);l=l.slice(null,[e,null])}return l}(a,i)),e.addPastKeyValues(a,i);const s=(0,o.pick)(a,r.inputNames);return await P(r,s)}async function L(e,{input_ids:t=null,attention_mask:n=null,pixel_values:r=null,position_ids:i=null,inputs_embeds:a=null,past_key_values:s=null,generation_config:o=null,logits_processor:l=null,...u}){if(!a)if(a=await e.encode_text({input_ids:t}),r&&1!==t.dims[1]){const i=await e.encode_image({pixel_values:r});({inputs_embeds:a,attention_mask:n}=e._merge_input_ids_with_image_features({image_features:i,inputs_embeds:a,input_ids:t,attention_mask:n}))}else if(s&&r&&1===t.dims[1]){const e=t.dims[1],r=Object.values(s)[0].dims.at(-2);n=(0,c.cat)([(0,c.ones)([t.dims[0],r]),n.slice(null,[n.dims[1]-e,n.dims[1]])],1)}return await B(e,{inputs_embeds:a,past_key_values:s,attention_mask:n,position_ids:i,generation_config:o,logits_processor:l},!0)}function D(e,t,n,r){if(n.past_key_values){const t=Object.values(n.past_key_values)[0].dims.at(-2),{input_ids:r,attention_mask:i}=n;if(i&&i.dims[1]>r.dims[1]);else if(t<r.dims[1])n.input_ids=r.slice(null,[t,null]);else if(null!=e.config.image_token_index&&r.data.some((t=>t==e.config.image_token_index))){const i=e.config.num_image_tokens;if(!i)throw new Error("`num_image_tokens` is missing in the model configuration.");const a=r.dims[1]-(t-i);n.input_ids=r.slice(null,[-a,null]),n.attention_mask=(0,c.ones)([1,t+a])}}return n}function R(e,t,n,r){return n.past_key_values&&(t=t.map((e=>[e.at(-1)]))),{...n,decoder_input_ids:F(t)}}function N(e,...t){return e.config.is_encoder_decoder?R(e,...t):D(e,...t)}class V extends s.Callable{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,t){super(),this.config=e,this.sessions=t;const n=C.get(this.constructor),r=$.get(n);switch(this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,r){case x:this.can_generate=!0,this._forward=B,this._prepare_inputs_for_generation=D;break;case b:case v:case k:this.can_generate=!0,this._forward=I,this._prepare_inputs_for_generation=R;break;case y:this._forward=I;break;case T:this.can_generate=!0,this._forward=L,this._prepare_inputs_for_generation=N;break;default:this._forward=O}this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const t of Object.values(this.sessions))t?.handler?.dispose&&e.push(t.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:i=null,local_files_only:a=!1,revision:s="main",model_file_name:o=null,subfolder:u="onnx",device:d=null,dtype:c=null,use_external_data_format:p=null,session_options:h={}}={}){let m={progress_callback:t,config:n,cache_dir:i,local_files_only:a,revision:s,model_file_name:o,subfolder:u,device:d,dtype:c,use_external_data_format:p,session_options:h};const f=C.get(this),g=$.get(f);let _;if(n=m.config=await r.AutoConfig.from_pretrained(e,m),g===x)_=await Promise.all([E(e,{model:m.model_file_name??"model"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(g===b||g===v)_=await Promise.all([E(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(g===M)_=await Promise.all([E(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},m)]);else if(g===y)_=await Promise.all([E(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m)]);else if(g===T){const t={embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"};n.is_encoder_decoder&&(t.model="encoder_model"),_=await Promise.all([E(e,t,m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)])}else g===k?_=await Promise.all([E(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]):(g!==w&&console.warn(`Model type for '${f??n?.model_type}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`),_=await Promise.all([E(e,{model:m.model_file_name??"model"},m)]));return new this(n,..._)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}_get_logits_warper(e){const t=new u.LogitsProcessorList;return null!==e.temperature&&1!==e.temperature&&t.push(new u.TemperatureLogitsWarper(e.temperature)),null!==e.top_k&&0!==e.top_k&&t.push(new u.TopKLogitsWarper(e.top_k)),null!==e.top_p&&e.top_p<1&&t.push(new u.TopPLogitsWarper(e.top_p)),t}_get_logits_processor(e,t,n=null){const r=new u.LogitsProcessorList;if(null!==e.repetition_penalty&&1!==e.repetition_penalty&&r.push(new u.RepetitionPenaltyLogitsProcessor(e.repetition_penalty)),null!==e.no_repeat_ngram_size&&e.no_repeat_ngram_size>0&&r.push(new u.NoRepeatNGramLogitsProcessor(e.no_repeat_ngram_size)),null!==e.bad_words_ids&&r.push(new u.NoBadWordsLogitsProcessor(e.bad_words_ids,e.eos_token_id)),null!==e.min_length&&null!==e.eos_token_id&&e.min_length>0&&r.push(new u.MinLengthLogitsProcessor(e.min_length,e.eos_token_id)),null!==e.min_new_tokens&&null!==e.eos_token_id&&e.min_new_tokens>0&&r.push(new u.MinNewTokensLengthLogitsProcessor(t,e.min_new_tokens,e.eos_token_id)),null!==e.forced_bos_token_id&&r.push(new u.ForcedBOSTokenLogitsProcessor(e.forced_bos_token_id)),null!==e.forced_eos_token_id&&r.push(new u.ForcedEOSTokenLogitsProcessor(e.max_length,e.forced_eos_token_id)),null!==e.begin_suppress_tokens){const n=t>1||null===e.forced_bos_token_id?t:t+1;r.push(new u.SuppressTokensAtBeginLogitsProcessor(e.begin_suppress_tokens,n))}return null!==e.guidance_scale&&e.guidance_scale>1&&r.push(new u.ClassifierFreeGuidanceLogitsProcessor(e.guidance_scale)),null!==n&&r.extend(n),r}_prepare_generation_config(e,t,n=d.GenerationConfig){const r={...this.config};for(const e of["decoder","generator","text_config"])e in r&&Object.assign(r,r[e]);const i=new n(r);return"generation_config"in this&&Object.assign(i,this.generation_config),e&&Object.assign(i,e),t&&Object.assign(i,(0,o.pick)(t,Object.getOwnPropertyNames(i))),i}_get_stopping_criteria(e,t=null){const n=new h.StoppingCriteriaList;return null!==e.max_length&&n.push(new h.MaxLengthCriteria(e.max_length,this.config.max_position_embeddings??null)),null!==e.eos_token_id&&n.push(new h.EosTokenCriteria(e.eos_token_id)),t&&n.extend(t),n}_validate_model_class(){if(!this.can_generate){const e=[Gs,Ws,js,Ls],t=C.get(this.constructor),n=new Set,r=this.config.model_type;for(const t of e){const e=t.get(r);e&&n.add(e[0])}let i=`The current model class (${t}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw n.size>0&&(i+=` Please use the following class instead: ${[...n].join(", ")}`),Error(i)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:t,model_inputs:n,is_encoder_decoder:r}){return n.past_key_values=this.getPastKeyValues(t,n.past_key_values),n.input_ids=new c.Tensor("int64",e.flat(),[e.length,1]),r||(n.attention_mask=(0,c.cat)([n.attention_mask,(0,c.ones)([n.attention_mask.dims[0],1])],1)),n.position_ids=null,n}_prepare_model_inputs({inputs:e,bos_token_id:t,model_kwargs:n}){const r=(0,o.pick)(n,this.forward_params),i=this.main_input_name;if(i in r){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else r[i]=e;return{inputs_tensor:r[i],model_inputs:r,model_input_name:i}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:t,model_input_name:n,generation_config:r}){if(this.sessions.model.inputNames.includes("inputs_embeds")&&!t.inputs_embeds&&"_prepare_inputs_embeds"in this){const{input_ids:e,pixel_values:n,attention_mask:r,...i}=t,a=await this._prepare_inputs_embeds(t);t={...i,...(0,o.pick)(a,["inputs_embeds","attention_mask"])}}let{last_hidden_state:i}=await O(this,t);if(null!==r.guidance_scale&&r.guidance_scale>1)i=(0,c.cat)([i,(0,c.full_like)(i,0)],0),"attention_mask"in t&&(t.attention_mask=(0,c.cat)([t.attention_mask,(0,c.zeros_like)(t.attention_mask)],0));else if(t.decoder_input_ids){const e=F(t.decoder_input_ids).dims[0];if(e!==i.dims[0]){if(1!==i.dims[0])throw new Error(`The encoder outputs have a different batch size (${i.dims[0]}) than the decoder inputs (${e}).`);i=(0,c.cat)(Array.from({length:e},(()=>i)),0)}}return t.encoder_outputs=i,t}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:t,model_kwargs:n,decoder_start_token_id:r,bos_token_id:i,generation_config:a}){let{decoder_input_ids:s,...o}=n;if(s)Array.isArray(s[0])||(s=Array.from({length:e},(()=>s)));else if(r??=i,"musicgen"===this.config.model_type)s=Array.from({length:e*this.config.decoder.num_codebooks},(()=>[r]));else if(Array.isArray(r)){if(r.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${r.length}`);s=r}else s=Array.from({length:e},(()=>[r]));return s=F(s),n.decoder_attention_mask=(0,c.ones_like)(s),{input_ids:s,model_inputs:o}}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,streamer:i=null,...a}){this._validate_model_class(),t=this._prepare_generation_config(t,a);let{inputs_tensor:s,model_inputs:o,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:a});const u=this.config.is_encoder_decoder;let d;u&&("encoder_outputs"in o||(o=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:s,model_inputs:o,model_input_name:l,generation_config:t}))),u?({input_ids:d,model_inputs:o}=this._prepare_decoder_input_ids_for_generation({batch_size:o[l].dims.at(0),model_input_name:l,model_kwargs:o,decoder_start_token_id:t.decoder_start_token_id,bos_token_id:t.bos_token_id,generation_config:t})):d=o[l];let p=d.dims.at(-1);null!==t.max_new_tokens&&(t.max_length=p+t.max_new_tokens);const h=this._get_logits_processor(t,p,n),f=this._get_stopping_criteria(t,r),g=o[l].dims.at(0),_=m.LogitsSampler.getSampler(t),w=new Array(g).fill(0),y=d.tolist();i&&i.put(y);let b=null,v={};for(;;){o=this.prepare_inputs_for_generation(y,o,t);const e=await this.forward(o);if(t.output_attentions&&t.return_dict_in_generate){const t=this.getAttentions(e);for(const e in t)e in v||(v[e]=[]),v[e].push(t[e])}const n=h(y,e.logits.slice(null,-1,null)),r=[];for(let e=0;e<n.dims.at(0);++e){const t=n[e],i=await _(t);for(const[t,n]of i){const i=BigInt(t);w[e]+=n,y[e].push(i),r.push([i]);break}}i&&i.put(r);if(f(y).every((e=>e))){t.return_dict_in_generate&&(b=this.getPastKeyValues(e,o.past_key_values,!1));break}o=this._update_model_kwargs_for_generation({generated_input_ids:r,outputs:e,model_inputs:o,is_encoder_decoder:u})}i&&i.end();const x=new c.Tensor("int64",y.flat(),[y.length,y[0].length]);return t.return_dict_in_generate?{sequences:x,past_key_values:b,...v}:x}getPastKeyValues(e,t,n=!0){const r=Object.create(null);for(const i in e)if(i.startsWith("present")){const a=i.replace("present","past_key_values");if(t&&i.includes("encoder"))r[a]=t[a];else{if(n&&t){const e=t[a];"gpu-buffer"===e.location&&e.dispose()}r[a]=e[i]}}return r}getAttentions(e){const t={};for(const n of["cross_attentions","encoder_attentions","decoder_attentions"])for(const r in e)r.startsWith(n)&&(n in t||(t[n]=[]),t[n].push(e[r]));return t}addPastKeyValues(e,t){if(t)Object.assign(e,t);else{const t=this.custom_config.kv_cache_dtype??"float32",n="float16"===t?new Uint16Array:[],i=(0,r.getKeyValueShapes)(this.config);for(const r in i)e[r]=new c.Tensor(t,n,i[r])}}async encode_image({pixel_values:e}){const t=(await P(this.sessions.vision_encoder,{pixel_values:e})).image_features;return this.config.num_image_tokens||(console.warn(`The number of image tokens was not set in the model configuration. Setting it to the number of features detected by the vision encoder (${t.dims[1]}).`),this.config.num_image_tokens=t.dims[1]),t}async encode_text({input_ids:e}){return(await P(this.sessions.embed_tokens,{input_ids:e})).inputs_embeds}}class j{}class G extends j{constructor({last_hidden_state:e,hidden_states:t=null,attentions:n=null}){super(),this.last_hidden_state=e,this.hidden_states=t,this.attentions=n}}class q extends V{}class U extends q{}class W extends q{async _call(e){return new qo(await super._call(e))}}class H extends q{async _call(e){return new Vo(await super._call(e))}}class X extends q{async _call(e){return new Go(await super._call(e))}}class K extends q{async _call(e){return new Uo(await super._call(e))}}class Q extends V{}class Y extends Q{}class J extends V{}class Z extends J{}class ee extends J{async _call(e){return new qo(await super._call(e))}}class te extends J{async _call(e){return new Vo(await super._call(e))}}class ne extends J{async _call(e){return new Go(await super._call(e))}}class re extends J{async _call(e){return new Uo(await super._call(e))}}class ie extends V{}class ae extends ie{}class se extends ie{async _call(e){return new qo(await super._call(e))}}class oe extends ie{async _call(e){return new Vo(await super._call(e))}}class le extends ie{async _call(e){return new Go(await super._call(e))}}class ue extends ie{async _call(e){return new Uo(await super._call(e))}}class de extends V{}class ce extends de{}class pe extends de{async _call(e){return new qo(await super._call(e))}}class he extends de{async _call(e){return new Vo(await super._call(e))}}class me extends de{async _call(e){return new Go(await super._call(e))}}class fe extends de{async _call(e){return new Uo(await super._call(e))}}class ge extends V{}class _e extends ge{}class we extends ge{async _call(e){return new qo(await super._call(e))}}class ye extends ge{async _call(e){return new Vo(await super._call(e))}}class be extends ge{async _call(e){return new Go(await super._call(e))}}class ve extends ge{async _call(e){return new Uo(await super._call(e))}}class xe extends V{}class Me extends xe{}class Te extends xe{async _call(e){return new qo(await super._call(e))}}class ke extends xe{async _call(e){return new Vo(await super._call(e))}}class $e extends xe{async _call(e){return new Go(await super._call(e))}}class Se extends xe{async _call(e){return new Uo(await super._call(e))}}class Ce extends V{}class Ee extends Ce{}class Pe extends Ce{async _call(e){return new qo(await super._call(e))}}class Ae extends Ce{async _call(e){return new Vo(await super._call(e))}}class Fe extends Ce{async _call(e){return new Go(await super._call(e))}}class ze extends Ce{async _call(e){return new Uo(await super._call(e))}}class Ie extends V{}class Oe extends Ie{}class Be extends Ie{async _call(e){return new Vo(await super._call(e))}}class Le extends Ie{async _call(e){return new Go(await super._call(e))}}class De extends Ie{async _call(e){return new Uo(await super._call(e))}}class Re extends Ie{async _call(e){return new qo(await super._call(e))}}class Ne extends V{}class Ve extends Ne{}class je extends Ne{async _call(e){return new qo(await super._call(e))}}class Ge extends Ne{async _call(e){return new Vo(await super._call(e))}}class qe extends Ne{async _call(e){return new Go(await super._call(e))}}class Ue extends V{}class We extends Ue{}class He extends Ue{async _call(e){return new qo(await super._call(e))}}class Xe extends Ue{async _call(e){return new Vo(await super._call(e))}}class Ke extends Ue{async _call(e){return new Uo(await super._call(e))}}class Qe extends V{}class Ye extends Qe{}class Je extends Qe{async _call(e){return new qo(await super._call(e))}}class Ze extends Qe{async _call(e){return new Vo(await super._call(e))}}class et extends Qe{async _call(e){return new Go(await super._call(e))}}class tt extends Qe{async _call(e){return new Uo(await super._call(e))}}class nt extends V{}class rt extends nt{}class it extends nt{async _call(e){return new qo(await super._call(e))}}class at extends nt{async _call(e){return new Vo(await super._call(e))}}class st extends nt{async _call(e){return new Uo(await super._call(e))}}class ot extends V{}class lt extends ot{}class ut extends ot{async _call(e){return new Vo(await super._call(e))}}class dt extends ot{async _call(e){return new Uo(await super._call(e))}}class ct extends ot{async _call(e){return new qo(await super._call(e))}}class pt extends V{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class ht extends pt{}class mt extends pt{}class ft extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class gt extends ft{}class _t extends ft{}class wt extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class yt extends wt{}class bt extends wt{}class vt extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class xt extends vt{}class Mt extends vt{}class Tt extends vt{async _call(e){return new Vo(await super._call(e))}}class kt extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class $t extends kt{}class St extends kt{}class Ct extends kt{async _call(e){return new Vo(await super._call(e))}}class Et extends kt{}class Pt extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class At extends Pt{}class Ft extends Pt{}class zt extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class It extends zt{}class Ot extends zt{}class Bt extends V{}class Lt extends Bt{}class Dt extends Bt{async _call(e){return new qo(await super._call(e))}}class Rt extends Bt{async _call(e){return new Vo(await super._call(e))}}class Nt extends Bt{async _call(e){return new Go(await super._call(e))}}class Vt extends Bt{async _call(e){return new Uo(await super._call(e))}}class jt extends V{}class Gt extends jt{}class qt extends jt{async _call(e){return new qo(await super._call(e))}}class Ut extends jt{async _call(e){return new Vo(await super._call(e))}}class Wt extends jt{async _call(e){return new Go(await super._call(e))}}class Ht extends jt{async _call(e){return new Uo(await super._call(e))}}class Xt extends V{}class Kt extends Xt{}class Qt extends Xt{async _call(e){return new qo(await super._call(e))}}class Yt extends Xt{async _call(e){return new Vo(await super._call(e))}}class Jt extends Xt{async _call(e){return new Go(await super._call(e))}}class Zt extends Xt{async _call(e){return new Uo(await super._call(e))}}class en extends V{}class tn extends en{}class nn extends en{}class rn extends V{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class an extends rn{}class sn extends rn{_prepare_generation_config(e,t){return super._prepare_generation_config(e,t,g.WhisperGenerationConfig)}_retrieve_init_tokens(e){const t=[e.decoder_start_token_id];let n=e.language;const r=e.task;if(e.is_multilingual){n||(console.warn("No language specified - defaulting to English (en)."),n="en");const i=`<|${(0,_.whisper_language_to_code)(n)}|>`;t.push(e.lang_to_id[i]),t.push(e.task_to_id[r??"transcribe"])}else if(n||r)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&t.at(-1)!==e.no_timestamps_token_id?t.push(e.no_timestamps_token_id):e.return_timestamps&&t.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),t.pop()),t.filter((e=>null!=e))}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,...i}){t=this._prepare_generation_config(t,i);const a=i.decoder_input_ids??this._retrieve_init_tokens(t);if(t.return_timestamps&&(n??=new u.LogitsProcessorList,n.push(new u.WhisperTimeStampLogitsProcessor(t,a))),t.begin_suppress_tokens&&(n??=new u.LogitsProcessorList,n.push(new u.SuppressTokensAtBeginLogitsProcessor(t.begin_suppress_tokens,a.length))),t.return_token_timestamps){if(!t.alignment_heads)throw new Error("Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config.");"translate"===t.task&&console.warn("Token-level timestamps may not be reliable for task 'translate'."),t.output_attentions=!0,t.return_dict_in_generate=!0}const s=await super.generate({inputs:e,generation_config:t,logits_processor:n,decoder_input_ids:a,...i});return t.return_token_timestamps&&(s.token_timestamps=this._extract_token_timestamps(s,t.alignment_heads,t.num_frames)),s}_extract_token_timestamps(e,t,n=null,r=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");null==n&&console.warn("`num_frames` has not been set, meaning the entire audio will be analyzed. This may lead to inaccurate token-level timestamps for short audios (< 30 seconds).");let i=this.config.median_filter_width;void 0===i&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),i=7);const a=e.cross_attentions,s=Array.from({length:this.config.decoder_layers},((e,t)=>(0,c.cat)(a.map((e=>e[t])),2))),l=(0,c.stack)(t.map((([e,t])=>{if(e>=s.length)throw new Error(`Layer index ${e} is out of bounds for cross attentions (length ${s.length}).`);return n?s[e].slice(null,t,null,[0,n]):s[e].slice(null,t)}))).transpose(1,0,2,3),[u,d]=(0,c.std_mean)(l,-2,0,!0),h=l.clone();for(let e=0;e<h.dims[0];++e){const t=h[e];for(let n=0;n<t.dims[0];++n){const r=t[n],a=u[e][n][0].data,s=d[e][n][0].data;for(let e=0;e<r.dims[0];++e){let t=r[e].data;for(let e=0;e<t.length;++e)t[e]=(t[e]-s[e])/a[e];t.set((0,p.medianFilter)(t,i))}}}const m=[(0,c.mean)(h,1)],f=e.sequences.dims,g=new c.Tensor("float32",new Float32Array(f[0]*f[1]),f);for(let e=0;e<f[0];++e){const t=m[e].neg().squeeze_(0),[n,i]=(0,p.dynamic_time_warping)(t.tolist()),a=Array.from({length:n.length-1},((e,t)=>n[t+1]-n[t])),s=(0,o.mergeArrays)([1],a).map((e=>!!e)),l=[];for(let e=0;e<s.length;++e)s[e]&&l.push(i[e]*r);g[e].data.set(l,1)}return g}}class on extends V{main_input_name="pixel_values";forward_params=["pixel_values","input_ids","encoder_hidden_states","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class ln extends V{forward_params=["input_ids","pixel_values","attention_mask","position_ids","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class un extends ln{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){const i=this.config.image_token_index,a=n.tolist().map((e=>e.findIndex((e=>e==i)))),s=a.every((e=>-1===e)),o=a.every((e=>-1!==e));if(!s&&!o)throw new Error("Every input should contain either 0 or 1 image token.");if(s)return{inputs_embeds:e,attention_mask:r};const l=[],u=[];for(let n=0;n<a.length;++n){const i=a[n],s=e[n],o=t[n],d=r[n];l.push((0,c.cat)([s.slice([0,i]),o,s.slice([i+1,s.dims[0]])],0)),u.push((0,c.cat)([d.slice([0,i]),(0,c.ones)([o.dims[0]]),d.slice([i+1,d.dims[0]])],0))}return{inputs_embeds:(0,c.stack)(l,0),attention_mask:(0,c.stack)(u,0)}}}class dn extends un{}class cn extends V{forward_params=["input_ids","inputs_embeds","attention_mask","pixel_values","encoder_outputs","decoder_input_ids","decoder_inputs_embeds","decoder_attention_mask","past_key_values"];main_input_name="inputs_embeds";constructor(e,t,n){super(e,t),this.generation_config=n}}class pn extends cn{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){return{inputs_embeds:(0,c.cat)([t,e],1),attention_mask:(0,c.cat)([(0,c.ones)(t.dims.slice(0,2)),r],1)}}async _prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:n,attention_mask:r}){if(!e&&!t)throw new Error("Either `input_ids` or `pixel_values` should be provided.");let i,a;return e&&(i=await this.encode_text({input_ids:e})),t&&(a=await this.encode_image({pixel_values:t})),i&&a?({inputs_embeds:n,attention_mask:r}=this._merge_input_ids_with_image_features({inputs_embeds:i,image_features:a,input_ids:e,attention_mask:r})):n=i||a,{inputs_embeds:n,attention_mask:r}}async forward({input_ids:e,pixel_values:t,attention_mask:n,decoder_input_ids:r,decoder_attention_mask:i,encoder_outputs:a,past_key_values:s,inputs_embeds:o,decoder_inputs_embeds:l}){if(o||({inputs_embeds:o,attention_mask:n}=await this._prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:o,attention_mask:n})),!a){let{last_hidden_state:e}=await O(this,{inputs_embeds:o,attention_mask:n});a=e}if(!l){if(!r)throw new Error("Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.");l=await this.encode_text({input_ids:r})}const u={inputs_embeds:l,attention_mask:i,encoder_attention_mask:n,encoder_hidden_states:a,past_key_values:s};return await B(this,u,!0)}}class hn extends V{}class mn extends hn{}class fn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class gn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class _n extends V{}class wn extends _n{}class yn extends _n{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class bn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class vn extends V{}class xn extends vn{}class Mn extends V{}class Tn extends Mn{}class kn extends Mn{}class $n extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Sn extends $n{}class Cn extends $n{}class En extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Pn extends En{}class An extends En{}class Fn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class zn extends Fn{}class In extends Fn{}class On extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Bn extends On{}class Ln extends On{}class Dn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Rn extends Dn{}class Nn extends Dn{}class Vn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class jn extends Vn{}class Gn extends Vn{}class qn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Un extends qn{}class Wn extends qn{}class Hn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Xn extends Hn{}class Kn extends Hn{}class Qn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Yn extends Qn{}class Jn extends Qn{}class Zn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class er extends Zn{}class tr extends Zn{}class nr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class rr extends nr{}class ir extends nr{}class ar extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class sr extends ar{}class or extends ar{}class lr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ur extends lr{}class dr extends lr{}class cr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class pr extends cr{}class hr extends cr{}class mr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class fr extends mr{}class gr extends mr{}class _r extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class wr extends _r{}class yr extends _r{}class br extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class vr extends br{}class xr extends br{}class Mr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Tr extends Mr{}class kr extends Mr{}class $r extends V{}class Sr extends $r{}class Cr extends $r{async _call(e){return new Vo(await super._call(e))}}class Er extends V{}class Pr extends Er{}class Ar extends Er{async _call(e){return new Vo(await super._call(e))}}class Fr extends V{}class zr extends Fr{async _call(e){return new Xo(await super._call(e))}}class Ir extends V{}class Or extends Ir{}class Br extends Ir{async _call(e){return new Vo(await super._call(e))}}class Lr extends V{}class Dr extends Lr{}class Rr extends Lr{async _call(e){return new Vo(await super._call(e))}}class Nr extends V{}class Vr extends Nr{}class jr extends Nr{}class Gr extends V{}class qr extends Gr{}class Ur extends Gr{}class Wr extends V{}class Hr extends Wr{}class Xr extends Wr{async _call(e){return new Vo(await super._call(e))}}class Kr extends V{}class Qr extends Kr{}class Yr extends Kr{async _call(e){return new Zr(await super._call(e))}}class Jr extends Kr{async _call(e){return new ei(await super._call(e))}}class Zr extends j{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class ei extends j{constructor({logits:e,pred_boxes:t,pred_masks:n}){super(),this.logits=e,this.pred_boxes=t,this.pred_masks=n}}class ti extends V{}class ni extends ti{}class ri extends ti{async _call(e){return new ii(await super._call(e))}}class ii extends j{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class ai extends V{}class si extends ai{}class oi extends ai{async _call(e){return new li(await super._call(e))}}class li extends Zr{}class ui extends V{}class di extends ui{}class ci extends ui{async _call(e){return new Vo(await super._call(e))}}class pi extends V{}class hi extends pi{}class mi extends pi{async _call(e){return new Vo(await super._call(e))}}class fi extends V{}class gi extends fi{}class _i extends fi{async _call(e){return new Vo(await super._call(e))}}class wi extends V{}class yi extends wi{}class bi extends wi{async _call(e){return new Vo(await super._call(e))}}class vi extends V{}class xi extends vi{}class Mi extends vi{}class Ti extends V{}class ki extends Ti{}class $i extends Ti{}class Si extends V{}class Ci extends Si{}class Ei extends V{}class Pi extends Ei{}class Ai extends Ei{}class Fi extends Ei{}class zi extends V{}class Ii extends zi{}class Oi extends zi{}class Bi extends V{}class Li extends Bi{}class Di extends V{}class Ri extends Di{}class Ni extends Di{async _call(e){return new Vo(await super._call(e))}}class Vi extends V{}class ji extends Vi{}class Gi extends Vi{async _call(e){return new Vo(await super._call(e))}}class qi extends V{}class Ui extends qi{}class Wi extends qi{async _call(e){return new Vo(await super._call(e))}}class Hi extends V{}class Xi extends Hi{}class Ki extends Hi{async _call(e){return new Qi(await super._call(e))}}class Qi extends j{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Yi extends V{}class Ji extends Yi{async get_image_embeddings({pixel_values:e}){return await O(this,{pixel_values:e})}async forward(e){if(e.image_embeddings&&e.image_positional_embeddings||(e={...e,...await this.get_image_embeddings(e)}),!e.input_labels&&e.input_points){const t=e.input_points.dims.slice(0,-1),n=t.reduce(((e,t)=>e*t),1);e.input_labels=new c.Tensor("int64",new BigInt64Array(n).fill(1n),t)}const t={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(t.input_points=e.input_points),e.input_labels&&(t.input_labels=e.input_labels),e.input_boxes&&(t.input_boxes=e.input_boxes),await P(this.sessions.prompt_encoder_mask_decoder,t)}async _call(e){return new Zi(await super._call(e))}}class Zi extends j{constructor({iou_scores:e,pred_masks:t}){super(),this.iou_scores=e,this.pred_masks=t}}class ea extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ta extends ea{}class na extends ea{}class ra extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ia extends ra{}class aa extends ra{}class sa extends V{}class oa extends sa{}class la extends sa{async _call(e){return new Wo(await super._call(e))}}class ua extends sa{async _call(e){return new Vo(await super._call(e))}}class da extends sa{async _call(e){return new Go(await super._call(e))}}class ca extends V{}class pa extends ca{}class ha extends ca{async _call(e){return new Go(await super._call(e))}}class ma extends V{}class fa extends ma{}class ga extends V{}class _a extends ga{}class wa extends ga{async _call(e){return new Wo(await super._call(e))}}class ya extends ga{async _call(e){return new Vo(await super._call(e))}}class ba extends V{}class va extends ba{}class xa extends ba{async _call(e){return new Wo(await super._call(e))}}class Ma extends ba{async _call(e){return new Vo(await super._call(e))}}class Ta extends ba{async _call(e){return new Go(await super._call(e))}}class ka extends V{}class $a extends ka{}class Sa extends ka{async _call(e){return new Wo(await super._call(e))}}class Ca extends ka{async _call(e){return new Vo(await super._call(e))}}class Ea extends V{}class Pa extends sa{}class Aa extends sa{async _call(e){return new Wo(await super._call(e))}}class Fa extends sa{async _call(e){return new Vo(await super._call(e))}}class za extends V{}class Ia extends za{}class Oa extends za{async _call(e){return new Wo(await super._call(e))}}class Ba extends za{async _call(e){return new Vo(await super._call(e))}}class La extends za{async _call(e){return new jo(await super._call(e))}}class Da extends za{async _call(e){return new Go(await super._call(e))}}class Ra extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Na extends Ra{}class Va extends Ra{}class ja extends Ra{async generate_speech(e,t,{threshold:n=.5,minlenratio:r=0,maxlenratio:i=20,vocoder:a=null}={}){const s={input_ids:e},{encoder_outputs:o,encoder_attention_mask:l}=await O(this,s),u=o.dims[1]/this.config.reduction_factor,d=Math.floor(u*i),p=Math.floor(u*r),h=this.config.num_mel_bins;let m=[],f=null,g=null,_=0;for(;;){++_;const e=z(!!g);let r;r=g?g.output_sequence_out:new c.Tensor("float32",new Float32Array(h),[1,1,h]);let i={use_cache_branch:e,output_sequence:r,encoder_attention_mask:l,speaker_embeddings:t,encoder_hidden_states:o};this.addPastKeyValues(i,f),g=await P(this.sessions.decoder_model_merged,i),f=this.getPastKeyValues(g,f);const{prob:a,spectrum:s}=g;if(m.push(s),_>=p&&(Array.from(a.data).filter((e=>e>=n)).length>0||_>=d))break}const w=(0,c.cat)(m),{waveform:y}=await P(a.sessions.model,{spectrogram:w});return{spectrogram:w,waveform:y}}}class Ga extends V{main_input_name="spectrogram"}class qa extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ua extends qa{}class Wa extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ha extends Wa{}class Xa extends Wa{}class Ka extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Qa extends Ka{}class Ya extends Ka{}class Ja extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Za extends Ja{}class es extends Ja{}class ts extends V{}class ns extends ts{}class rs extends ts{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class is extends ts{static async from_pretrained(e,t={}){return t.model_file_name??="audio_model",super.from_pretrained(e,t)}}class as extends V{}class ss extends as{async _call(e){return new Ko(await super._call(e))}}class os extends V{}class ls extends os{}class us extends os{}class ds extends os{}class cs extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ps extends cs{}class hs extends cs{}class ms extends V{}class fs extends ms{}class gs extends ms{async _call(e){return new Vo(await super._call(e))}}class _s extends V{}class ws extends _s{}class ys extends _s{}class bs extends V{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}_apply_and_filter_by_delay_pattern_mask(e){const[t,n]=e.dims,r=this.config.decoder.num_codebooks,i=n-r;let a=0;for(let t=0;t<e.size;++t){if(e.data[t]===this.config.decoder.pad_token_id)continue;const s=t%n-Math.floor(t/n)%r;s>0&&s<=i&&(e.data[a++]=e.data[t])}const s=Math.floor(t/r),o=a/(s*r);return new c.Tensor(e.type,e.data.slice(0,a),[s,r,o])}prepare_inputs_for_generation(e,t,n){let r=structuredClone(e);for(let e=0;e<r.length;++e)for(let t=0;t<r[e].length;++t)e%this.config.decoder.num_codebooks>=t&&(r[e][t]=BigInt(this.config.decoder.pad_token_id));null!==n.guidance_scale&&n.guidance_scale>1&&(r=r.concat(r));return super.prepare_inputs_for_generation(r,t,n)}async generate(e){const t=await super.generate(e),n=this._apply_and_filter_by_delay_pattern_mask(t).unsqueeze_(0),{audio_values:r}=await P(this.sessions.encodec_decode,{audio_codes:n});return r}}class vs extends V{}class xs extends vs{}class Ms extends vs{async _call(e){return new Vo(await super._call(e))}}class Ts extends V{}class ks extends Ts{}class $s extends Ts{async _call(e){return new Vo(await super._call(e))}}class Ss extends V{}class Cs extends Ss{}class Es extends Ss{async _call(e){return new Vo(await super._call(e))}}class Ps extends V{}class As extends Ps{}class Fs extends Ps{async _call(e){return new Vo(await super._call(e))}}class zs{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:i=null,local_files_only:a=!1,revision:s="main",model_file_name:o=null,subfolder:l="onnx",device:u=null,dtype:d=null,use_external_data_format:c=null,session_options:p={}}={}){let h={progress_callback:t,config:n,cache_dir:i,local_files_only:a,revision:s,model_file_name:o,subfolder:l,device:u,dtype:d,use_external_data_format:c,session_options:p};if(h.config=await r.AutoConfig.from_pretrained(e,h),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(let t of this.MODEL_CLASS_MAPPINGS){const n=t.get(h.config.model_type);if(n)return await n[1].from_pretrained(e,h)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${h.config.model_type}", attempting to construct from base class.`),await V.from_pretrained(e,h);throw Error(`Unsupported model type: ${h.config.model_type}`)}}const Is=new Map([["bert",["BertModel",U]],["nomic_bert",["NomicBertModel",Y]],["roformer",["RoFormerModel",Z]],["electra",["ElectraModel",ce]],["esm",["EsmModel",Ve]],["convbert",["ConvBertModel",ae]],["camembert",["CamembertModel",_e]],["deberta",["DebertaModel",Me]],["deberta-v2",["DebertaV2Model",Ee]],["mpnet",["MPNetModel",Ye]],["albert",["AlbertModel",lt]],["distilbert",["DistilBertModel",Oe]],["roberta",["RobertaModel",Lt]],["xlm",["XLMModel",Gt]],["xlm-roberta",["XLMRobertaModel",Kt]],["clap",["ClapModel",ns]],["clip",["CLIPModel",mn]],["clipseg",["CLIPSegModel",Tn]],["chinese_clip",["ChineseCLIPModel",xn]],["siglip",["SiglipModel",wn]],["mobilebert",["MobileBertModel",We]],["squeezebert",["SqueezeBertModel",rt]],["wav2vec2",["Wav2Vec2Model",oa]],["wav2vec2-bert",["Wav2Vec2BertModel",$a]],["unispeech",["UniSpeechModel",_a]],["unispeech-sat",["UniSpeechSatModel",va]],["hubert",["HubertModel",Pa]],["wavlm",["WavLMModel",Ia]],["audio-spectrogram-transformer",["ASTModel",tn]],["vits",["VitsModel",ss]],["pyannote",["PyAnnoteModel",pa]],["wespeaker-resnet",["WeSpeakerResNetModel",fa]],["detr",["DetrModel",Qr]],["rt_detr",["RTDetrModel",ni]],["table-transformer",["TableTransformerModel",si]],["vit",["ViTModel",Sr]],["fastvit",["FastViTModel",Pr]],["mobilevit",["MobileViTModel",Or]],["mobilevitv2",["MobileViTV2Model",Dr]],["owlvit",["OwlViTModel",Vr]],["owlv2",["Owlv2Model",qr]],["beit",["BeitModel",Hr]],["deit",["DeiTModel",di]],["hiera",["HieraModel",hi]],["convnext",["ConvNextModel",Ri]],["convnextv2",["ConvNextV2Model",ji]],["dinov2",["Dinov2Model",Ui]],["resnet",["ResNetModel",gi]],["swin",["SwinModel",yi]],["swin2sr",["Swin2SRModel",xi]],["donut-swin",["DonutSwinModel",Li]],["yolos",["YolosModel",Xi]],["dpt",["DPTModel",ki]],["glpn",["GLPNModel",Ii]],["hifigan",["SpeechT5HifiGan",Ga]],["efficientnet",["EfficientNetModel",fs]],["mobilenet_v1",["MobileNetV1Model",xs]],["mobilenet_v2",["MobileNetV2Model",ks]],["mobilenet_v3",["MobileNetV3Model",Cs]],["mobilenet_v4",["MobileNetV4Model",As]]]),Os=new Map([["t5",["T5Model",ht]],["longt5",["LongT5Model",gt]],["mt5",["MT5Model",yt]],["bart",["BartModel",xt]],["mbart",["MBartModel",$t]],["marian",["MarianModel",ta]],["whisper",["WhisperModel",an]],["m2m_100",["M2M100Model",ia]],["blenderbot",["BlenderbotModel",At]],["blenderbot-small",["BlenderbotSmallModel",It]]]),Bs=new Map([["bloom",["BloomModel",wr]],["jais",["JAISModel",Pn]],["gpt2",["GPT2Model",Sn]],["gptj",["GPTJModel",Rn]],["gpt_bigcode",["GPTBigCodeModel",jn]],["gpt_neo",["GPTNeoModel",zn]],["gpt_neox",["GPTNeoXModel",Bn]],["codegen",["CodeGenModel",Un]],["llama",["LlamaModel",Xn]],["cohere",["CohereModel",Yn]],["gemma",["GemmaModel",er]],["gemma2",["Gemma2Model",rr]],["openelm",["OpenELMModel",sr]],["qwen2",["Qwen2Model",ur]],["phi",["PhiModel",pr]],["phi3",["Phi3Model",fr]],["mpt",["MptModel",vr]],["opt",["OPTModel",Tr]],["mistral",["MistralModel",Ha]],["starcoder2",["Starcoder2Model",Qa]],["falcon",["FalconModel",Za]],["stablelm",["StableLmModel",ps]]]),Ls=new Map([["speecht5",["SpeechT5ForSpeechToText",Va]],["whisper",["WhisperForConditionalGeneration",sn]]]),Ds=new Map([["speecht5",["SpeechT5ForTextToSpeech",ja]]]),Rs=new Map([["vits",["VitsModel",ss]],["musicgen",["MusicgenForConditionalGeneration",bs]]]),Ns=new Map([["bert",["BertForSequenceClassification",H]],["roformer",["RoFormerForSequenceClassification",te]],["electra",["ElectraForSequenceClassification",he]],["esm",["EsmForSequenceClassification",Ge]],["convbert",["ConvBertForSequenceClassification",oe]],["camembert",["CamembertForSequenceClassification",ye]],["deberta",["DebertaForSequenceClassification",ke]],["deberta-v2",["DebertaV2ForSequenceClassification",Ae]],["mpnet",["MPNetForSequenceClassification",Ze]],["albert",["AlbertForSequenceClassification",ut]],["distilbert",["DistilBertForSequenceClassification",Be]],["roberta",["RobertaForSequenceClassification",Rt]],["xlm",["XLMForSequenceClassification",Ut]],["xlm-roberta",["XLMRobertaForSequenceClassification",Yt]],["bart",["BartForSequenceClassification",Tt]],["mbart",["MBartForSequenceClassification",Ct]],["mobilebert",["MobileBertForSequenceClassification",Xe]],["squeezebert",["SqueezeBertForSequenceClassification",at]]]),Vs=new Map([["bert",["BertForTokenClassification",X]],["roformer",["RoFormerForTokenClassification",ne]],["electra",["ElectraForTokenClassification",me]],["esm",["EsmForTokenClassification",qe]],["convbert",["ConvBertForTokenClassification",le]],["camembert",["CamembertForTokenClassification",be]],["deberta",["DebertaForTokenClassification",$e]],["deberta-v2",["DebertaV2ForTokenClassification",Fe]],["mpnet",["MPNetForTokenClassification",et]],["distilbert",["DistilBertForTokenClassification",Le]],["roberta",["RobertaForTokenClassification",Nt]],["xlm",["XLMForTokenClassification",Wt]],["xlm-roberta",["XLMRobertaForTokenClassification",Jt]]]),js=new Map([["t5",["T5ForConditionalGeneration",mt]],["longt5",["LongT5ForConditionalGeneration",_t]],["mt5",["MT5ForConditionalGeneration",bt]],["bart",["BartForConditionalGeneration",Mt]],["mbart",["MBartForConditionalGeneration",St]],["marian",["MarianMTModel",na]],["m2m_100",["M2M100ForConditionalGeneration",aa]],["blenderbot",["BlenderbotForConditionalGeneration",Ft]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",Ot]]]),Gs=new Map([["bloom",["BloomForCausalLM",yr]],["gpt2",["GPT2LMHeadModel",Cn]],["jais",["JAISLMHeadModel",An]],["gptj",["GPTJForCausalLM",Nn]],["gpt_bigcode",["GPTBigCodeForCausalLM",Gn]],["gpt_neo",["GPTNeoForCausalLM",In]],["gpt_neox",["GPTNeoXForCausalLM",Ln]],["codegen",["CodeGenForCausalLM",Wn]],["llama",["LlamaForCausalLM",Kn]],["cohere",["CohereForCausalLM",Jn]],["gemma",["GemmaForCausalLM",tr]],["gemma2",["Gemma2ForCausalLM",ir]],["openelm",["OpenELMForCausalLM",or]],["qwen2",["Qwen2ForCausalLM",dr]],["phi",["PhiForCausalLM",hr]],["phi3",["Phi3ForCausalLM",gr]],["mpt",["MptForCausalLM",xr]],["opt",["OPTForCausalLM",kr]],["mbart",["MBartForCausalLM",Et]],["mistral",["MistralForCausalLM",Xa]],["starcoder2",["Starcoder2ForCausalLM",Ya]],["falcon",["FalconForCausalLM",es]],["trocr",["TrOCRForCausalLM",Ua]],["stablelm",["StableLmForCausalLM",hs]]]),qs=new Map([["bert",["BertForMaskedLM",W]],["roformer",["RoFormerForMaskedLM",ee]],["electra",["ElectraForMaskedLM",pe]],["esm",["EsmForMaskedLM",je]],["convbert",["ConvBertForMaskedLM",se]],["camembert",["CamembertForMaskedLM",we]],["deberta",["DebertaForMaskedLM",Te]],["deberta-v2",["DebertaV2ForMaskedLM",Pe]],["mpnet",["MPNetForMaskedLM",Je]],["albert",["AlbertForMaskedLM",ct]],["distilbert",["DistilBertForMaskedLM",Re]],["roberta",["RobertaForMaskedLM",Dt]],["xlm",["XLMWithLMHeadModel",qt]],["xlm-roberta",["XLMRobertaForMaskedLM",Qt]],["mobilebert",["MobileBertForMaskedLM",He]],["squeezebert",["SqueezeBertForMaskedLM",it]]]),Us=new Map([["bert",["BertForQuestionAnswering",K]],["roformer",["RoFormerForQuestionAnswering",re]],["electra",["ElectraForQuestionAnswering",fe]],["convbert",["ConvBertForQuestionAnswering",ue]],["camembert",["CamembertForQuestionAnswering",ve]],["deberta",["DebertaForQuestionAnswering",Se]],["deberta-v2",["DebertaV2ForQuestionAnswering",ze]],["mpnet",["MPNetForQuestionAnswering",tt]],["albert",["AlbertForQuestionAnswering",dt]],["distilbert",["DistilBertForQuestionAnswering",De]],["roberta",["RobertaForQuestionAnswering",Vt]],["xlm",["XLMForQuestionAnswering",Ht]],["xlm-roberta",["XLMRobertaForQuestionAnswering",Zt]],["mobilebert",["MobileBertForQuestionAnswering",Ke]],["squeezebert",["SqueezeBertForQuestionAnswering",st]]]),Ws=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",on]]]),Hs=new Map([["llava",["LlavaForConditionalGeneration",un]],["moondream1",["Moondream1ForConditionalGeneration",dn]],["florence2",["Florence2ForConditionalGeneration",pn]]]),Xs=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",on]]]),Ks=new Map([["vit",["ViTForImageClassification",Cr]],["fastvit",["FastViTForImageClassification",Ar]],["mobilevit",["MobileViTForImageClassification",Br]],["mobilevitv2",["MobileViTV2ForImageClassification",Rr]],["beit",["BeitForImageClassification",Xr]],["deit",["DeiTForImageClassification",ci]],["hiera",["HieraForImageClassification",mi]],["convnext",["ConvNextForImageClassification",Ni]],["convnextv2",["ConvNextV2ForImageClassification",Gi]],["dinov2",["Dinov2ForImageClassification",Wi]],["resnet",["ResNetForImageClassification",_i]],["swin",["SwinForImageClassification",bi]],["segformer",["SegformerForImageClassification",us]],["efficientnet",["EfficientNetForImageClassification",gs]],["mobilenet_v1",["MobileNetV1ForImageClassification",Ms]],["mobilenet_v2",["MobileNetV2ForImageClassification",$s]],["mobilenet_v3",["MobileNetV3ForImageClassification",Es]],["mobilenet_v4",["MobileNetV4ForImageClassification",Fs]]]),Qs=new Map([["detr",["DetrForObjectDetection",Yr]],["rt_detr",["RTDetrForObjectDetection",ri]],["table-transformer",["TableTransformerForObjectDetection",oi]],["yolos",["YolosForObjectDetection",Ki]]]),Ys=new Map([["owlvit",["OwlViTForObjectDetection",jr]],["owlv2",["Owlv2ForObjectDetection",Ur]]]),Js=new Map([["detr",["DetrForSegmentation",Jr]],["clipseg",["CLIPSegForImageSegmentation",kn]]]),Zs=new Map([["segformer",["SegformerForSemanticSegmentation",ds]],["sapiens",["SapiensForSemanticSegmentation",Pi]]]),eo=new Map([["sam",["SamModel",Ji]]]),to=new Map([["wav2vec2",["Wav2Vec2ForCTC",la]],["wav2vec2-bert",["Wav2Vec2BertForCTC",Sa]],["unispeech",["UniSpeechForCTC",wa]],["unispeech-sat",["UniSpeechSatForCTC",xa]],["wavlm",["WavLMForCTC",Oa]],["hubert",["HubertForCTC",Aa]]]),no=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",ua]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",Ca]],["unispeech",["UniSpeechForSequenceClassification",ya]],["unispeech-sat",["UniSpeechSatForSequenceClassification",Ma]],["wavlm",["WavLMForSequenceClassification",Ba]],["hubert",["HubertForSequenceClassification",Fa]],["audio-spectrogram-transformer",["ASTForAudioClassification",nn]]]),ro=new Map([["wavlm",["WavLMForXVector",La]]]),io=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",Ta]],["wavlm",["WavLMForAudioFrameClassification",Da]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",da]],["pyannote",["PyAnnoteForAudioFrameClassification",ha]]]),ao=new Map([["vitmatte",["VitMatteForImageMatting",zr]]]),so=new Map([["swin2sr",["Swin2SRForImageSuperResolution",Mi]]]),oo=new Map([["dpt",["DPTForDepthEstimation",$i]],["depth_anything",["DepthAnythingForDepthEstimation",Ci]],["glpn",["GLPNForDepthEstimation",Oi]],["sapiens",["SapiensForDepthEstimation",Ai]]]),lo=new Map([["sapiens",["SapiensForNormalEstimation",Fi]]]),uo=new Map([["clip",["CLIPVisionModelWithProjection",gn]],["siglip",["SiglipVisionModel",bn]]]),co=[[Is,w],[Os,y],[Bs,x],[Ns,w],[Vs,w],[js,b],[Ls,b],[Gs,x],[qs,w],[Us,w],[Ws,v],[Hs,T],[Ks,w],[Js,w],[Zs,w],[ao,w],[so,w],[oo,w],[lo,w],[Qs,w],[Ys,w],[eo,M],[to,w],[no,w],[Ds,b],[Rs,w],[ro,w],[io,w],[uo,w]];for(const[e,t]of co)for(const[n,r]of e.values())$.set(n,t),C.set(r,n),S.set(n,r);const po=[["MusicgenForConditionalGeneration",bs,k],["CLIPTextModelWithProjection",fn,w],["SiglipTextModel",yn,w],["ClapTextModelWithProjection",rs,w],["ClapAudioModelWithProjection",is,w]];for(const[e,t,n]of po)$.set(e,n),C.set(t,e),S.set(e,t);class ho extends zs{static MODEL_CLASS_MAPPINGS=co.map((e=>e[0]));static BASE_IF_FAIL=!0}class mo extends zs{static MODEL_CLASS_MAPPINGS=[Ns]}class fo extends zs{static MODEL_CLASS_MAPPINGS=[Vs]}class go extends zs{static MODEL_CLASS_MAPPINGS=[js]}class _o extends zs{static MODEL_CLASS_MAPPINGS=[Ls]}class wo extends zs{static MODEL_CLASS_MAPPINGS=[Ds]}class yo extends zs{static MODEL_CLASS_MAPPINGS=[Rs]}class bo extends zs{static MODEL_CLASS_MAPPINGS=[Gs]}class vo extends zs{static MODEL_CLASS_MAPPINGS=[qs]}class xo extends zs{static MODEL_CLASS_MAPPINGS=[Us]}class Mo extends zs{static MODEL_CLASS_MAPPINGS=[Ws]}class To extends zs{static MODEL_CLASS_MAPPINGS=[Ks]}class ko extends zs{static MODEL_CLASS_MAPPINGS=[Js]}class $o extends zs{static MODEL_CLASS_MAPPINGS=[Zs]}class So extends zs{static MODEL_CLASS_MAPPINGS=[Qs]}class Co extends zs{static MODEL_CLASS_MAPPINGS=[Ys]}class Eo extends zs{static MODEL_CLASS_MAPPINGS=[eo]}class Po extends zs{static MODEL_CLASS_MAPPINGS=[to]}class Ao extends zs{static MODEL_CLASS_MAPPINGS=[no]}class Fo extends zs{static MODEL_CLASS_MAPPINGS=[ro]}class zo extends zs{static MODEL_CLASS_MAPPINGS=[io]}class Io extends zs{static MODEL_CLASS_MAPPINGS=[Xs]}class Oo extends zs{static MODEL_CLASS_MAPPINGS=[ao]}class Bo extends zs{static MODEL_CLASS_MAPPINGS=[so]}class Lo extends zs{static MODEL_CLASS_MAPPINGS=[oo]}class Do extends zs{static MODEL_CLASS_MAPPINGS=[lo]}class Ro extends zs{static MODEL_CLASS_MAPPINGS=[uo]}class No extends j{constructor({logits:e,past_key_values:t,encoder_outputs:n,decoder_attentions:r=null,cross_attentions:i=null}){super(),this.logits=e,this.past_key_values=t,this.encoder_outputs=n,this.decoder_attentions=r,this.cross_attentions=i}}class Vo extends j{constructor({logits:e}){super(),this.logits=e}}class jo extends j{constructor({logits:e,embeddings:t}){super(),this.logits=e,this.embeddings=t}}class Go extends j{constructor({logits:e}){super(),this.logits=e}}class qo extends j{constructor({logits:e}){super(),this.logits=e}}class Uo extends j{constructor({start_logits:e,end_logits:t}){super(),this.start_logits=e,this.end_logits=t}}class Wo extends j{constructor({logits:e}){super(),this.logits=e}}class Ho extends j{constructor({logits:e,past_key_values:t}){super(),this.logits=e,this.past_key_values=t}}class Xo extends j{constructor({alphas:e}){super(),this.alphas=e}}class Ko extends j{constructor({waveform:e,spectrogram:t}){super(),this.waveform=e,this.spectrogram=t}}},"./src/models/whisper/common_whisper.js":
|
|
176
176
|
/*!**********************************************!*\
|
|
177
177
|
!*** ./src/models/whisper/common_whisper.js ***!
|
|
178
178
|
\**********************************************/(e,t,n)=>{n.r(t),n.d(t,{WHISPER_LANGUAGE_MAPPING:()=>i,WHISPER_TO_LANGUAGE_CODE_MAPPING:()=>a,whisper_language_to_code:()=>s});const r=[["en","english"],["zh","chinese"],["de","german"],["es","spanish"],["ru","russian"],["ko","korean"],["fr","french"],["ja","japanese"],["pt","portuguese"],["tr","turkish"],["pl","polish"],["ca","catalan"],["nl","dutch"],["ar","arabic"],["sv","swedish"],["it","italian"],["id","indonesian"],["hi","hindi"],["fi","finnish"],["vi","vietnamese"],["he","hebrew"],["uk","ukrainian"],["el","greek"],["ms","malay"],["cs","czech"],["ro","romanian"],["da","danish"],["hu","hungarian"],["ta","tamil"],["no","norwegian"],["th","thai"],["ur","urdu"],["hr","croatian"],["bg","bulgarian"],["lt","lithuanian"],["la","latin"],["mi","maori"],["ml","malayalam"],["cy","welsh"],["sk","slovak"],["te","telugu"],["fa","persian"],["lv","latvian"],["bn","bengali"],["sr","serbian"],["az","azerbaijani"],["sl","slovenian"],["kn","kannada"],["et","estonian"],["mk","macedonian"],["br","breton"],["eu","basque"],["is","icelandic"],["hy","armenian"],["ne","nepali"],["mn","mongolian"],["bs","bosnian"],["kk","kazakh"],["sq","albanian"],["sw","swahili"],["gl","galician"],["mr","marathi"],["pa","punjabi"],["si","sinhala"],["km","khmer"],["sn","shona"],["yo","yoruba"],["so","somali"],["af","afrikaans"],["oc","occitan"],["ka","georgian"],["be","belarusian"],["tg","tajik"],["sd","sindhi"],["gu","gujarati"],["am","amharic"],["yi","yiddish"],["lo","lao"],["uz","uzbek"],["fo","faroese"],["ht","haitian creole"],["ps","pashto"],["tk","turkmen"],["nn","nynorsk"],["mt","maltese"],["sa","sanskrit"],["lb","luxembourgish"],["my","myanmar"],["bo","tibetan"],["tl","tagalog"],["mg","malagasy"],["as","assamese"],["tt","tatar"],["haw","hawaiian"],["ln","lingala"],["ha","hausa"],["ba","bashkir"],["jw","javanese"],["su","sundanese"]],i=new Map(r),a=new Map([...r.map((([e,t])=>[t,e])),["burmese","my"],["valencian","ca"],["flemish","nl"],["haitian","ht"],["letzeburgesch","lb"],["pushto","ps"],["panjabi","pa"],["moldavian","ro"],["moldovan","ro"],["sinhalese","si"],["castilian","es"]]);function s(e){e=e.toLowerCase();let t=a.get(e);if(void 0===t){if(!i.has(e)){const t=2===e.length?i.keys():i.values();throw new Error(`Language "${e}" is not supported. Must be one of: ${JSON.stringify(t)}`)}t=e}return t}},"./src/models/whisper/generation_whisper.js":
|
|
@@ -187,7 +187,7 @@ var r,i,a,s,o,l,u,d,c,p,h,m,f,g,_,w,y,b,v,x,M,T,k,$,S,C,E,P,A,F,z,I,O,B=Object.d
|
|
|
187
187
|
\**************************/(e,t,n)=>{n.r(t),n.d(t,{AudioClassificationPipeline:()=>C,AutomaticSpeechRecognitionPipeline:()=>P,DepthEstimationPipeline:()=>N,DocumentQuestionAnsweringPipeline:()=>L,FeatureExtractionPipeline:()=>$,FillMaskPipeline:()=>y,ImageClassificationPipeline:()=>F,ImageFeatureExtractionPipeline:()=>S,ImageSegmentationPipeline:()=>z,ImageToImagePipeline:()=>R,ImageToTextPipeline:()=>A,ObjectDetectionPipeline:()=>O,Pipeline:()=>f,QuestionAnsweringPipeline:()=>w,SummarizationPipeline:()=>v,Text2TextGenerationPipeline:()=>b,TextClassificationPipeline:()=>g,TextGenerationPipeline:()=>T,TextToAudioPipeline:()=>D,TokenClassificationPipeline:()=>_,TranslationPipeline:()=>x,ZeroShotAudioClassificationPipeline:()=>E,ZeroShotClassificationPipeline:()=>k,ZeroShotImageClassificationPipeline:()=>I,ZeroShotObjectDetectionPipeline:()=>B,pipeline:()=>G});var r=n(/*! ./tokenizers.js */"./src/tokenizers.js"),i=n(/*! ./models.js */"./src/models.js"),a=n(/*! ./processors.js */"./src/processors.js"),s=n(/*! ./utils/generic.js */"./src/utils/generic.js"),o=n(/*! ./utils/core.js */"./src/utils/core.js"),l=n(/*! ./utils/maths.js */"./src/utils/maths.js"),u=n(/*! ./utils/audio.js */"./src/utils/audio.js"),d=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),c=n(/*! ./utils/image.js */"./src/utils/image.js");async function p(e){return Array.isArray(e)||(e=[e]),await Promise.all(e.map((e=>c.RawImage.read(e))))}async function h(e,t){return Array.isArray(e)||(e=[e]),await Promise.all(e.map((e=>"string"==typeof e||e instanceof URL?(0,u.read_audio)(e,t):e instanceof Float64Array?new Float32Array(e):e)))}function m(e,t){t&&(e=e.map((e=>0|e)));const[n,r,i,a]=e;return{xmin:n,ymin:r,xmax:i,ymax:a}}class f extends s.Callable{constructor({task:e,model:t,tokenizer:n=null,processor:r=null}){super(),this.task=e,this.model=t,this.tokenizer=n,this.processor=r}async dispose(){await this.model.dispose()}}class g extends f{constructor(e){super(e)}async _call(e,{top_k:t=1}={}){const n=this.tokenizer(e,{padding:!0,truncation:!0}),r=await this.model(n),i="multi_label_classification"===this.model.config.problem_type?e=>e.sigmoid():e=>new d.Tensor("float32",(0,l.softmax)(e.data),e.dims),a=this.model.config.id2label,s=[];for(const e of r.logits){const n=i(e),r=await(0,d.topk)(n,t),o=r[0].tolist(),l=r[1].tolist().map(((e,t)=>({label:a?a[e]:`LABEL_${e}`,score:o[t]})));1===t?s.push(...l):s.push(l)}return Array.isArray(e)||1===t?s:s[0]}}class _ extends f{constructor(e){super(e)}async _call(e,{ignore_labels:t=["O"]}={}){const n=Array.isArray(e),r=this.tokenizer(n?e:[e],{padding:!0,truncation:!0}),i=(await this.model(r)).logits,a=this.model.config.id2label,s=[];for(let e=0;e<i.dims[0];++e){const n=r.input_ids[e],o=i[e],u=[];for(let e=0;e<o.dims[0];++e){const r=o[e],i=(0,l.max)(r.data)[1],s=a?a[i]:`LABEL_${i}`;if(t.includes(s))continue;const d=this.tokenizer.decode([n[e].item()],{skip_special_tokens:!0});if(""===d)continue;const c=(0,l.softmax)(r.data);u.push({entity:s,score:c[i],index:e,word:d})}s.push(u)}return n?s:s[0]}}class w extends f{constructor(e){super(e)}async _call(e,t,{top_k:n=1}={}){const r=this.tokenizer(e,{text_pair:t,padding:!0,truncation:!0}),{start_logits:i,end_logits:a}=await this.model(r),s=r.input_ids.tolist(),u=r.attention_mask.tolist(),d=this.tokenizer.all_special_ids,c=[];for(let e=0;e<i.dims[0];++e){const t=s[e],r=t.findIndex((e=>e==this.tokenizer.sep_token_id)),p=(u[e].map(((e,n)=>1==e&&(0===n||n>r&&-1===d.findIndex((e=>e==t[n]))))),i[e].tolist()),h=a[e].tolist();for(let n=1;n<p.length;++n)(0==u[e]||n<=r||-1!==d.findIndex((e=>e==t[n])))&&(p[n]=-1/0,h[n]=-1/0);const m=(0,l.softmax)(p).map(((e,t)=>[e,t])),f=(0,l.softmax)(h).map(((e,t)=>[e,t]));m[0][0]=0,f[0][0]=0;const g=(0,o.product)(m,f).filter((e=>e[0][1]<=e[1][1])).map((e=>[e[0][1],e[1][1],e[0][0]*e[1][0]])).sort(((e,t)=>t[2]-e[2]));for(let e=0;e<Math.min(g.length,n);++e){const[n,r,i]=g[e],a=t.slice(n,r+1),s=this.tokenizer.decode(a,{skip_special_tokens:!0});c.push({answer:s,score:i})}}return 1===n?c[0]:c}}class y extends f{constructor(e){super(e)}async _call(e,{top_k:t=5}={}){const n=this.tokenizer(e,{padding:!0,truncation:!0}),{logits:r}=await this.model(n),i=[],a=n.input_ids.tolist();for(let e=0;e<a.length;++e){const n=a[e],s=n.findIndex((e=>e==this.tokenizer.mask_token_id));if(-1===s)throw Error(`Mask token (${this.tokenizer.mask_token}) not found in text.`);const o=r[e][s],u=await(0,d.topk)(new d.Tensor("float32",(0,l.softmax)(o.data),o.dims),t),c=u[0].tolist(),p=u[1].tolist();i.push(p.map(((e,t)=>{const r=n.slice();return r[s]=e,{score:c[t],token:Number(e),token_str:this.tokenizer.model.vocab[e],sequence:this.tokenizer.decode(r,{skip_special_tokens:!0})}})))}return Array.isArray(e)?i:i[0]}}class b extends f{_key="generated_text";constructor(e){super(e)}async _call(e,t={}){Array.isArray(e)||(e=[e]),this.model.config.prefix&&(e=e.map((e=>this.model.config.prefix+e)));const n=this.model.config.task_specific_params;n&&n[this.task]&&n[this.task].prefix&&(e=e.map((e=>n[this.task].prefix+e)));const r=this.tokenizer,i={padding:!0,truncation:!0};let a;a=this instanceof x&&"_build_translation_inputs"in r?r._build_translation_inputs(e,i,t):r(e,i);const s=await this.model.generate({...a,...t});return r.batch_decode(s,{skip_special_tokens:!0}).map((e=>({[this._key]:e})))}}class v extends b{_key="summary_text";constructor(e){super(e)}}class x extends b{_key="translation_text";constructor(e){super(e)}}function M(e){return Array.isArray(e)&&e.every((e=>"role"in e&&"content"in e))}class T extends f{constructor(e){super(e)}async _call(e,t={}){let n,r=!1,i=!1;if("string"==typeof e)n=e=[e];else if(Array.isArray(e)&&e.every((e=>"string"==typeof e)))r=!0,n=e;else{if(M(e))e=[e];else{if(!Array.isArray(e)||!e.every(M))throw new Error("Input must be a string, an array of strings, a Chat, or an array of Chats");r=!0}i=!0,n=e.map((e=>this.tokenizer.apply_chat_template(e,{tokenize:!1,add_generation_prompt:!0})))}const a=t.add_special_tokens??!1,s=!i&&(t.return_full_text??!0);this.tokenizer.padding_side="left";const o=this.tokenizer(n,{add_special_tokens:a,padding:!0,truncation:!0}),l=await this.model.generate({...o,...t}),u=this.tokenizer.batch_decode(l,{skip_special_tokens:!0});let d;!s&&o.input_ids.dims.at(-1)>0&&(d=this.tokenizer.batch_decode(o.input_ids,{skip_special_tokens:!0}).map((e=>e.length)));const c=Array.from({length:e.length},(e=>[]));for(let t=0;t<u.length;++t){const n=Math.floor(t/l.dims[0]*e.length);d&&(u[t]=u[t].slice(d[n])),c[n].push({generated_text:i?[...e[n],{role:"assistant",content:u[t]}]:u[t]})}return r||1!==c.length?c:c[0]}}class k extends f{constructor(e){super(e),this.label2id=Object.fromEntries(Object.entries(this.model.config.label2id).map((([e,t])=>[e.toLowerCase(),t]))),this.entailment_id=this.label2id.entailment,void 0===this.entailment_id&&(console.warn("Could not find 'entailment' in label2id mapping. Using 2 as entailment_id."),this.entailment_id=2),this.contradiction_id=this.label2id.contradiction??this.label2id.not_entailment,void 0===this.contradiction_id&&(console.warn("Could not find 'contradiction' in label2id mapping. Using 0 as contradiction_id."),this.contradiction_id=0)}async _call(e,t,{hypothesis_template:n="This example is {}.",multi_label:r=!1}={}){const i=Array.isArray(e);i||(e=[e]),Array.isArray(t)||(t=[t]);const a=t.map((e=>n.replace("{}",e))),s=r||1===t.length,o=[];for(const n of e){const e=[];for(const t of a){const r=this.tokenizer(n,{text_pair:t,padding:!0,truncation:!0}),i=await this.model(r);s?e.push([i.logits.data[this.contradiction_id],i.logits.data[this.entailment_id]]):e.push(i.logits.data[this.entailment_id])}const r=(s?e.map((e=>(0,l.softmax)(e)[1])):(0,l.softmax)(e)).map(((e,t)=>[e,t])).sort(((e,t)=>t[0]-e[0]));o.push({sequence:n,labels:r.map((e=>t[e[1]])),scores:r.map((e=>e[0]))})}return i?o:o[0]}}class $ extends f{constructor(e){super(e)}async _call(e,{pooling:t="none",normalize:n=!1,quantize:r=!1,precision:i="binary"}={}){const a=this.tokenizer(e,{padding:!0,truncation:!0}),s=await this.model(a);let o=s.last_hidden_state??s.logits??s.token_embeddings;if("none"===t);else if("mean"===t)o=(0,d.mean_pooling)(o,a.attention_mask);else{if("cls"!==t)throw Error(`Pooling method '${t}' not supported.`);o=o.slice(null,0)}return n&&(o=o.normalize(2,-1)),r&&(o=(0,d.quantize_embeddings)(o,i)),o}}class S extends f{constructor(e){super(e)}async _call(e,{pool:t=null}={}){const n=await p(e),{pixel_values:r}=await this.processor(n),i=await this.model({pixel_values:r});let a;if(t){if(!("pooler_output"in i))throw Error("No pooled output was returned. Make sure the model has a 'pooler' layer when using the 'pool' option.");a=i.pooler_output}else a=i.last_hidden_state??i.logits??i.image_embeds;return a}}class C extends f{constructor(e){super(e)}async _call(e,{top_k:t=5}={}){const n=this.processor.feature_extractor.config.sampling_rate,r=await h(e,n),i=this.model.config.id2label,a=[];for(const e of r){const n=await this.processor(e),r=(await this.model(n)).logits[0],s=await(0,d.topk)(new d.Tensor("float32",(0,l.softmax)(r.data),r.dims),t),o=s[0].tolist(),u=s[1].tolist().map(((e,t)=>({label:i?i[e]:`LABEL_${e}`,score:o[t]})));a.push(u)}return Array.isArray(e)?a:a[0]}}class E extends f{constructor(e){super(e)}async _call(e,t,{hypothesis_template:n="This is a sound of {}."}={}){const r=!Array.isArray(e);r&&(e=[e]);const i=t.map((e=>n.replace("{}",e))),a=this.tokenizer(i,{padding:!0,truncation:!0}),s=this.processor.feature_extractor.config.sampling_rate,o=await h(e,s),u=[];for(const e of o){const n=await this.processor(e),r=await this.model({...a,...n}),i=(0,l.softmax)(r.logits_per_audio.data);u.push([...i].map(((e,n)=>({score:e,label:t[n]}))))}return r?u[0]:u}}class P extends f{constructor(e){super(e)}async _call(e,t={}){switch(this.model.config.model_type){case"whisper":return this._call_whisper(e,t);case"wav2vec2":case"wav2vec2-bert":case"unispeech":case"unispeech-sat":case"hubert":return this._call_wav2vec2(e,t);default:throw new Error(`AutomaticSpeechRecognitionPipeline does not support model type '${this.model.config.model_type}'.`)}}async _call_wav2vec2(e,t){t.language&&console.warn('`language` parameter is not yet supported for `wav2vec2` models, defaulting to "English".'),t.task&&console.warn('`task` parameter is not yet supported for `wav2vec2` models, defaulting to "transcribe".');const n=!Array.isArray(e);n&&(e=[e]);const r=this.processor.feature_extractor.config.sampling_rate,i=await h(e,r),a=[];for(const e of i){const t=await this.processor(e),n=(await this.model(t)).logits[0],r=[];for(const e of n)r.push((0,l.max)(e.data)[1]);const i=this.tokenizer.decode(r);a.push({text:i})}return n?a[0]:a}async _call_whisper(e,t){const n=t.return_timestamps??!1,r=t.chunk_length_s??0,i=t.force_full_sequences??!1;let a=t.stride_length_s??null;const s={...t};"word"===n&&(s.return_token_timestamps=!0,s.return_timestamps=!1);const o=!Array.isArray(e);o&&(e=[e]);const u=this.processor.feature_extractor.config.chunk_length/this.model.config.max_source_positions,d=this.processor.feature_extractor.config.hop_length,c=this.processor.feature_extractor.config.sampling_rate,p=await h(e,c),m=[];for(const e of p){let t=[];if(r>0){if(null===a)a=r/6;else if(r<=a)throw Error("`chunk_length_s` must be larger than `stride_length_s`.");const n=c*r,i=c*a,s=n-2*i;let o=0;for(;;){const r=o+n,a=e.subarray(o,r),l=await this.processor(a),u=0===o,d=r>=e.length;if(t.push({stride:[a.length,u?0:i,d?0:i],input_features:l.input_features,is_last:d}),d)break;o+=s}}else t=[{stride:[e.length,0,0],input_features:(await this.processor(e)).input_features,is_last:!0}];for(const e of t){s.num_frames=Math.floor(e.stride[0]/d);const t=await this.model.generate({inputs:e.input_features,...s});"word"===n?(e.tokens=t.sequences.tolist()[0],e.token_timestamps=t.token_timestamps.tolist()[0].map((e=>(0,l.round)(e,2)))):e.tokens=t[0].tolist(),e.stride=e.stride.map((e=>e/c))}const[o,p]=this.tokenizer._decode_asr(t,{time_precision:u,return_timestamps:n,force_full_sequences:i});m.push({text:o,...p})}return o?m[0]:m}}class A extends f{constructor(e){super(e)}async _call(e,t={}){const n=Array.isArray(e),r=await p(e),{pixel_values:i}=await this.processor(r),a=[];for(const e of i){e.dims=[1,...e.dims];const n=await this.model.generate({inputs:e,...t}),r=this.tokenizer.batch_decode(n,{skip_special_tokens:!0}).map((e=>({generated_text:e.trim()})));a.push(r)}return n?a:a[0]}}class F extends f{constructor(e){super(e)}async _call(e,{top_k:t=5}={}){const n=await p(e),{pixel_values:r}=await this.processor(n),i=await this.model({pixel_values:r}),a=this.model.config.id2label,s=[];for(const e of i.logits){const n=await(0,d.topk)(new d.Tensor("float32",(0,l.softmax)(e.data),e.dims),t),r=n[0].tolist(),i=n[1].tolist().map(((e,t)=>({label:a?a[e]:`LABEL_${e}`,score:r[t]})));s.push(i)}return Array.isArray(e)?s:s[0]}}class z extends f{constructor(e){super(e),this.subtasks_mapping={panoptic:"post_process_panoptic_segmentation",instance:"post_process_instance_segmentation",semantic:"post_process_semantic_segmentation"}}async _call(e,{threshold:t=.5,mask_threshold:n=.5,overlap_mask_area_threshold:r=.8,label_ids_to_fuse:i=null,target_sizes:a=null,subtask:s=null}={}){if(Array.isArray(e)&&1!==e.length)throw Error("Image segmentation pipeline currently only supports a batch size of 1.");const o=await p(e),l=o.map((e=>[e.height,e.width])),{pixel_values:u,pixel_mask:d}=await this.processor(o),h=await this.model({pixel_values:u,pixel_mask:d});let m=null;if(null!==s)m=this.subtasks_mapping[s];else for(let[e,t]of Object.entries(this.subtasks_mapping))if(t in this.processor.feature_extractor){m=this.processor.feature_extractor[t].bind(this.processor.feature_extractor),s=e;break}const f=this.model.config.id2label,g=[];if("panoptic"===s||"instance"===s){const e=m(h,t,n,r,i,a??l)[0],s=e.segmentation;for(const t of e.segments_info){const e=new Uint8ClampedArray(s.data.length);for(let n=0;n<s.data.length;++n)s.data[n]===t.id&&(e[n]=255);const n=new c.RawImage(e,s.dims[1],s.dims[0],1);g.push({score:t.score,label:f[t.label_id],mask:n})}}else{if("semantic"!==s)throw Error(`Subtask ${s} not supported.`);{const{segmentation:e,labels:t}=m(h,a??l)[0];for(const n of t){const t=new Uint8ClampedArray(e.data.length);for(let r=0;r<e.data.length;++r)e.data[r]===n&&(t[r]=255);const r=new c.RawImage(t,e.dims[1],e.dims[0],1);g.push({score:null,label:f[n],mask:r})}}}return g}}class I extends f{constructor(e){super(e)}async _call(e,t,{hypothesis_template:n="This is a photo of {}"}={}){const r=Array.isArray(e),i=await p(e),a=t.map((e=>n.replace("{}",e))),s=this.tokenizer(a,{padding:"siglip"!==this.model.config.model_type||"max_length",truncation:!0}),{pixel_values:o}=await this.processor(i),u=await this.model({...s,pixel_values:o}),d="siglip"===this.model.config.model_type?e=>e.sigmoid().data:e=>(0,l.softmax)(e.data),c=[];for(const e of u.logits_per_image){const n=[...d(e)].map(((e,n)=>({score:e,label:t[n]})));n.sort(((e,t)=>t.score-e.score)),c.push(n)}return r?c:c[0]}}class O extends f{constructor(e){super(e)}async _call(e,{threshold:t=.9,percentage:n=!1}={}){const r=Array.isArray(e);if(r&&1!==e.length)throw Error("Object detection pipeline currently only supports a batch size of 1.");const i=await p(e),a=n?null:i.map((e=>[e.height,e.width])),{pixel_values:s,pixel_mask:o}=await this.processor(i),l=await this.model({pixel_values:s,pixel_mask:o}),u=this.processor.feature_extractor.post_process_object_detection(l,t,a),d=this.model.config.id2label,c=u.map((e=>e.boxes.map(((t,r)=>({score:e.scores[r],label:d[e.classes[r]],box:m(t,!n)})))));return r?c:c[0]}}class B extends f{constructor(e){super(e)}async _call(e,t,{threshold:n=.1,top_k:r=null,percentage:i=!1}={}){const a=Array.isArray(e),s=await p(e),o=this.tokenizer(t,{padding:!0,truncation:!0}),l=await this.processor(s),u=[];for(let e=0;e<s.length;++e){const a=s[e],d=i?null:[[a.height,a.width]],c=l.pixel_values[e].unsqueeze_(0),p=await this.model({...o,pixel_values:c}),h=this.processor.feature_extractor.post_process_object_detection(p,n,d,!0)[0];let f=h.boxes.map(((e,n)=>({score:h.scores[n],label:t[h.classes[n]],box:m(e,!i)}))).sort(((e,t)=>t.score-e.score));null!==r&&(f=f.slice(0,r)),u.push(f)}return a?u:u[0]}}class L extends f{constructor(e){super(e)}async _call(e,t,n={}){throw new Error("This pipeline is not yet supported in Transformers.js v3.")}}class D extends f{DEFAULT_VOCODER_ID="Xenova/speecht5_hifigan";constructor(e){super(e),this.vocoder=e.vocoder??null}async _call(e,{speaker_embeddings:t=null}={}){return this.processor?this._call_text_to_spectrogram(e,{speaker_embeddings:t}):this._call_text_to_waveform(e)}async _call_text_to_waveform(e){const t=this.tokenizer(e,{padding:!0,truncation:!0}),{waveform:n}=await this.model(t),r=this.model.config.sampling_rate;return{audio:n.data,sampling_rate:r}}async _call_text_to_spectrogram(e,{speaker_embeddings:t}){if(this.vocoder||(console.log("No vocoder specified, using default HifiGan vocoder."),this.vocoder=await i.AutoModel.from_pretrained(this.DEFAULT_VOCODER_ID,{dtype:"fp32"})),("string"==typeof t||t instanceof URL)&&(t=new Float32Array(await(await fetch(t)).arrayBuffer())),t instanceof Float32Array)t=new d.Tensor("float32",t,[1,t.length]);else if(!(t instanceof d.Tensor))throw new Error("Speaker embeddings must be a `Tensor`, `Float32Array`, `string`, or `URL`.");const{input_ids:n}=this.tokenizer(e,{padding:!0,truncation:!0}),{waveform:r}=await this.model.generate_speech(n,t,{vocoder:this.vocoder}),a=this.processor.feature_extractor.config.sampling_rate;return{audio:r.data,sampling_rate:a}}}class R extends f{constructor(e){super(e)}async _call(e){const t=await p(e),n=await this.processor(t),r=await this.model(n),i=[];for(const e of r.reconstruction){const t=e.squeeze().clamp_(0,1).mul_(255).round_().to("uint8");i.push(c.RawImage.fromTensor(t))}return i.length>1?i:i[0]}}class N extends f{constructor(e){super(e)}async _call(e){const t=await p(e),n=await this.processor(t),{predicted_depth:r}=await this.model(n),i=[];for(let e=0;e<t.length;++e){const n=(0,d.interpolate)(r[e],t[e].size.reverse(),"bilinear",!1),a=n.mul_(255/(0,l.max)(n.data)[0]).to("uint8");i.push({predicted_depth:r[e],depth:c.RawImage.fromTensor(a)})}return i.length>1?i:i[0]}}const V=Object.freeze({"text-classification":{tokenizer:r.AutoTokenizer,pipeline:g,model:i.AutoModelForSequenceClassification,default:{model:"Xenova/distilbert-base-uncased-finetuned-sst-2-english"},type:"text"},"token-classification":{tokenizer:r.AutoTokenizer,pipeline:_,model:i.AutoModelForTokenClassification,default:{model:"Xenova/bert-base-multilingual-cased-ner-hrl"},type:"text"},"question-answering":{tokenizer:r.AutoTokenizer,pipeline:w,model:i.AutoModelForQuestionAnswering,default:{model:"Xenova/distilbert-base-cased-distilled-squad"},type:"text"},"fill-mask":{tokenizer:r.AutoTokenizer,pipeline:y,model:i.AutoModelForMaskedLM,default:{model:"Xenova/bert-base-uncased"},type:"text"},summarization:{tokenizer:r.AutoTokenizer,pipeline:v,model:i.AutoModelForSeq2SeqLM,default:{model:"Xenova/distilbart-cnn-6-6"},type:"text"},translation:{tokenizer:r.AutoTokenizer,pipeline:x,model:i.AutoModelForSeq2SeqLM,default:{model:"Xenova/t5-small"},type:"text"},"text2text-generation":{tokenizer:r.AutoTokenizer,pipeline:b,model:i.AutoModelForSeq2SeqLM,default:{model:"Xenova/flan-t5-small"},type:"text"},"text-generation":{tokenizer:r.AutoTokenizer,pipeline:T,model:i.AutoModelForCausalLM,default:{model:"Xenova/gpt2"},type:"text"},"zero-shot-classification":{tokenizer:r.AutoTokenizer,pipeline:k,model:i.AutoModelForSequenceClassification,default:{model:"Xenova/distilbert-base-uncased-mnli"},type:"text"},"audio-classification":{pipeline:C,model:i.AutoModelForAudioClassification,processor:a.AutoProcessor,default:{model:"Xenova/wav2vec2-base-superb-ks"},type:"audio"},"zero-shot-audio-classification":{tokenizer:r.AutoTokenizer,pipeline:E,model:i.AutoModel,processor:a.AutoProcessor,default:{model:"Xenova/clap-htsat-unfused"},type:"multimodal"},"automatic-speech-recognition":{tokenizer:r.AutoTokenizer,pipeline:P,model:[i.AutoModelForSpeechSeq2Seq,i.AutoModelForCTC],processor:a.AutoProcessor,default:{model:"Xenova/whisper-tiny.en"},type:"multimodal"},"text-to-audio":{tokenizer:r.AutoTokenizer,pipeline:D,model:[i.AutoModelForTextToWaveform,i.AutoModelForTextToSpectrogram],processor:[a.AutoProcessor,null],default:{model:"Xenova/speecht5_tts"},type:"text"},"image-to-text":{tokenizer:r.AutoTokenizer,pipeline:A,model:i.AutoModelForVision2Seq,processor:a.AutoProcessor,default:{model:"Xenova/vit-gpt2-image-captioning"},type:"multimodal"},"image-classification":{pipeline:F,model:i.AutoModelForImageClassification,processor:a.AutoProcessor,default:{model:"Xenova/vit-base-patch16-224"},type:"multimodal"},"image-segmentation":{pipeline:z,model:[i.AutoModelForImageSegmentation,i.AutoModelForSemanticSegmentation],processor:a.AutoProcessor,default:{model:"Xenova/detr-resnet-50-panoptic"},type:"multimodal"},"zero-shot-image-classification":{tokenizer:r.AutoTokenizer,pipeline:I,model:i.AutoModel,processor:a.AutoProcessor,default:{model:"Xenova/clip-vit-base-patch32"},type:"multimodal"},"object-detection":{pipeline:O,model:i.AutoModelForObjectDetection,processor:a.AutoProcessor,default:{model:"Xenova/detr-resnet-50"},type:"multimodal"},"zero-shot-object-detection":{tokenizer:r.AutoTokenizer,pipeline:B,model:i.AutoModelForZeroShotObjectDetection,processor:a.AutoProcessor,default:{model:"Xenova/owlvit-base-patch32"},type:"multimodal"},"document-question-answering":{tokenizer:r.AutoTokenizer,pipeline:L,model:i.AutoModelForDocumentQuestionAnswering,processor:a.AutoProcessor,default:{model:"Xenova/donut-base-finetuned-docvqa"},type:"multimodal"},"image-to-image":{pipeline:R,model:i.AutoModelForImageToImage,processor:a.AutoProcessor,default:{model:"Xenova/swin2SR-classical-sr-x2-64"},type:"image"},"depth-estimation":{pipeline:N,model:i.AutoModelForDepthEstimation,processor:a.AutoProcessor,default:{model:"Xenova/dpt-large"},type:"image"},"feature-extraction":{tokenizer:r.AutoTokenizer,pipeline:$,model:i.AutoModel,default:{model:"Xenova/all-MiniLM-L6-v2"},type:"text"},"image-feature-extraction":{processor:a.AutoProcessor,pipeline:S,model:[i.AutoModelForImageFeatureExtraction,i.AutoModel],default:{model:"Xenova/vit-base-patch16-224-in21k"},type:"image"}}),j=Object.freeze({"sentiment-analysis":"text-classification",ner:"token-classification",asr:"automatic-speech-recognition","text-to-speech":"text-to-audio",embeddings:"feature-extraction"});async function G(e,t=null,{progress_callback:n=null,config:r=null,cache_dir:i=null,local_files_only:a=!1,revision:s="main",device:l=null,dtype:u=null,model_file_name:d=null,session_options:c={}}={}){e=j[e]??e;const p=V[e.split("_",1)[0]];if(!p)throw Error(`Unsupported pipeline: ${e}. Must be one of [${Object.keys(V)}]`);t||(t=p.default.model,console.log(`No model specified. Using default model: "${t}".`));const h={progress_callback:n,config:r,cache_dir:i,local_files_only:a,revision:s,device:l,dtype:u,model_file_name:d,session_options:c},m=new Map([["tokenizer",p.tokenizer],["model",p.model],["processor",p.processor]]),f=await async function(e,t,n){const r=Object.create(null),i=[];for(let[a,s]of e.entries()){if(!s)continue;let e;e=Array.isArray(s)?new Promise((async(e,r)=>{let i;for(let a of s){if(null===a)return void e(null);try{return void e(await a.from_pretrained(t,n))}catch(e){if(e.message?.includes("Unsupported model type"))i=e;else{if(!e.message?.includes("Could not locate file"))return void r(e);i=e}}}r(i)})):s.from_pretrained(t,n),r[a]=e,i.push(e)}await Promise.all(i);for(let[e,t]of Object.entries(r))r[e]=await t;return r}(m,t,h);f.task=e,(0,o.dispatchCallback)(n,{status:"ready",task:e,model:t});return new(0,p.pipeline)(f)}},"./src/processors.js":
|
|
188
188
|
/*!***************************!*\
|
|
189
189
|
!*** ./src/processors.js ***!
|
|
190
|
-
\***************************/(e,t,n)=>{n.r(t),n.d(t,{ASTFeatureExtractor:()=>Z,AutoProcessor:()=>pe,BeitFeatureExtractor:()=>j,BitImageProcessor:()=>v,CLIPFeatureExtractor:()=>M,CLIPImageProcessor:()=>T,ChineseCLIPFeatureExtractor:()=>k,ClapFeatureExtractor:()=>ee,ConvNextFeatureExtractor:()=>S,ConvNextImageProcessor:()=>C,DPTFeatureExtractor:()=>y,DPTImageProcessor:()=>b,DeiTFeatureExtractor:()=>V,DetrFeatureExtractor:()=>U,DonutFeatureExtractor:()=>G,EfficientNetImageProcessor:()=>A,FeatureExtractor:()=>f,Florence2Processor:()=>ce,GLPNFeatureExtractor:()=>x,ImageFeatureExtractor:()=>g,MobileNetV1FeatureExtractor:()=>F,MobileNetV2FeatureExtractor:()=>z,MobileNetV3FeatureExtractor:()=>I,MobileNetV4FeatureExtractor:()=>O,MobileViTFeatureExtractor:()=>B,MobileViTImageProcessor:()=>L,NougatImageProcessor:()=>q,OwlViTFeatureExtractor:()=>D,OwlViTProcessor:()=>de,Owlv2ImageProcessor:()=>R,Processor:()=>ie,PyAnnoteFeatureExtractor:()=>te,PyAnnoteProcessor:()=>le,RTDetrImageProcessor:()=>N,SamImageProcessor:()=>H,SamProcessor:()=>ae,SapiensFeatureExtractor:()=>_,SeamlessM4TFeatureExtractor:()=>J,SegformerFeatureExtractor:()=>w,SiglipImageProcessor:()=>$,SpeechT5FeatureExtractor:()=>re,SpeechT5Processor:()=>ue,Swin2SRImageProcessor:()=>X,ViTFeatureExtractor:()=>E,ViTImageProcessor:()=>P,VitMatteImageProcessor:()=>K,Wav2Vec2FeatureExtractor:()=>Y,Wav2Vec2ProcessorWithLM:()=>oe,WeSpeakerFeatureExtractor:()=>ne,WhisperFeatureExtractor:()=>Q,WhisperProcessor:()=>se,YolosFeatureExtractor:()=>W});var r=n(/*! ./utils/generic.js */"./src/utils/generic.js"),i=n(/*! ./utils/core.js */"./src/utils/core.js"),a=n(/*! ./utils/hub.js */"./src/utils/hub.js"),s=n(/*! ./utils/maths.js */"./src/utils/maths.js"),o=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),l=(n(/*! ./utils/image.js */"./src/utils/image.js"),n(/*! ./utils/audio.js */"./src/utils/audio.js"));function u([e,t,n,r]){return[e-n/2,t-r/2,e+n/2,t+r/2]}function d(e,t=.5,n=null,r=!1){const i=e.logits,a=e.pred_boxes,[o,l,d]=i.dims;if(null!==n&&n.length!==o)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");let c=[];for(let e=0;e<o;++e){let o=null!==n?n[e]:null,p={boxes:[],classes:[],scores:[]},h=i[e],m=a[e];for(let e=0;e<l;++e){let n,i=h[e],a=[];if(r){n=i.sigmoid().data;for(let e=0;e<n.length;++e)n[e]>t&&a.push(e)}else{let e=(0,s.max)(i.data)[1];if(e===d-1)continue;if(n=(0,s.softmax)(i.data),n[e]<t)continue;a.push(e)}for(const t of a){let r=m[e].data;r=u(r),null!==o&&(r=r.map(((e,t)=>e*o[(t+1)%2]))),p.boxes.push(r),p.classes.push(t),p.scores.push(n[t])}}c.push(p)}return c}function c(e,t=null){const n=e.logits,r=n.dims[0];if(null!==t&&t.length!==r)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");const i=[];for(let e=0;e<r;++e){const r=null!==t?t[e]:null;let a=n[e];null!==r&&(a=(0,o.interpolate)(a,r,"bilinear",!1));const[s,l]=r??a.dims.slice(-2),u=new o.Tensor("int32",new Int32Array(s*l),[s,l]),d=a[0].data,c=u.data;for(let e=1;e<a.dims[0];++e){const t=a[e].data;for(let n=0;n<t.length;++n)t[n]>d[n]&&(d[n]=t[n],c[n]=e)}const p=new Array(a.dims[0]),h=u.data;for(let e=0;e<h.length;++e){const t=h[e];p[t]=t}const m=p.filter((e=>void 0!==e));i.push({segmentation:u,labels:m})}return i}function p(e,t){if(!(e instanceof Float32Array||e instanceof Float64Array))throw new Error(`${t} expects input to be a Float32Array or a Float64Array, but got ${e?.constructor?.name??typeof e} instead. If using the feature extractor directly, remember to use \`read_audio(url, sampling_rate)\` to obtain the raw audio data of the file/url.`)}function h(e,t,n=0,r=null){const i=e/t;let a=(0,s.bankers_round)(i)*t;return null!==r&&a>r&&(a=Math.floor(i)*t),a<n&&(a=Math.ceil(i)*t),a}function m([e,t],n){return[Math.max(Math.floor(e/n),1)*n,Math.max(Math.floor(t/n),1)*n]}class f extends r.Callable{constructor(e){super(),this.config=e}}class g extends f{constructor(e){super(e),this.image_mean=this.config.image_mean??this.config.mean,this.image_std=this.config.image_std??this.config.std,this.resample=this.config.resample??2,this.do_rescale=this.config.do_rescale??!0,this.rescale_factor=this.config.rescale_factor??1/255,this.do_normalize=this.config.do_normalize,this.do_resize=this.config.do_resize,this.do_thumbnail=this.config.do_thumbnail,this.size=this.config.size,this.size_divisibility=this.config.size_divisibility??this.config.size_divisor,this.do_center_crop=this.config.do_center_crop,this.crop_size=this.config.crop_size,this.do_convert_rgb=this.config.do_convert_rgb??!0,this.do_crop_margin=this.config.do_crop_margin,this.pad_size=this.config.pad_size,this.do_pad=this.config.do_pad,this.do_pad&&!this.pad_size&&this.size&&void 0!==this.size.width&&void 0!==this.size.height&&(this.pad_size=this.size),this.do_flip_channel_order=this.config.do_flip_channel_order??!1}async thumbnail(e,t,n=2){const r=e.height,i=e.width,a=t.height,s=t.width;let o=Math.min(r,a),l=Math.min(i,s);return o===r&&l===i?e:(r>i?l=Math.floor(i*o/r):i>r&&(o=Math.floor(r*l/i)),await e.resize(l,o,{resample:n}))}async crop_margin(e,t=200){const n=e.clone().grayscale(),r=(0,s.min)(n.data)[0],i=(0,s.max)(n.data)[0]-r;if(0===i)return e;const a=t/255;let o=n.width,l=n.height,u=0,d=0;const c=n.data;for(let e=0;e<n.height;++e){const t=e*n.width;for(let s=0;s<n.width;++s)(c[t+s]-r)/i<a&&(o=Math.min(o,s),l=Math.min(l,e),u=Math.max(u,s),d=Math.max(d,e))}return e=await e.crop([o,l,u,d])}pad_image(e,t,n,{mode:r="constant",center:a=!1,constant_values:s=0}={}){const[o,l,u]=t;let d,c;if("number"==typeof n?(d=n,c=n):(d=n.width,c=n.height),d!==l||c!==o){const n=new Float32Array(d*c*u);if(Array.isArray(s))for(let e=0;e<n.length;++e)n[e]=s[e%u];else 0!==s&&n.fill(s);const[p,h]=a?[Math.floor((d-l)/2),Math.floor((c-o)/2)]:[0,0];for(let t=0;t<o;++t){const r=(t+h)*d,i=t*l;for(let t=0;t<l;++t){const a=(r+t+p)*u,s=(i+t)*u;for(let t=0;t<u;++t)n[a+t]=e[s+t]}}if("symmetric"===r){if(a)throw new Error("`center` padding is not supported when `mode` is set to `symmetric`.");const t=o-1,r=l-1;for(let a=0;a<c;++a){const s=a*d,c=(0,i.calculateReflectOffset)(a,t)*l;for(let t=0;t<d;++t){if(a<o&&t<l)continue;const d=(s+t)*u,p=(c+(0,i.calculateReflectOffset)(t,r))*u;for(let t=0;t<u;++t)n[d+t]=e[p+t]}}}e=n,t=[c,d,u]}return[e,t]}rescale(e){for(let t=0;t<e.length;++t)e[t]=this.rescale_factor*e[t]}get_resize_output_image_size(e,t){const[n,r]=e.size;let i,a;if(this.do_thumbnail){const{height:e,width:n}=t;i=Math.min(e,n)}else Number.isInteger(t)?(i=t,a=this.config.max_size??i):void 0!==t&&(i=t.shortest_edge,a=t.longest_edge);if(void 0!==i||void 0!==a){const e=void 0===i?1:Math.max(i/n,i/r),t=n*e,s=r*e,o=void 0===a?1:Math.min(a/t,a/s);let l=Math.floor(Number((t*o).toFixed(2))),u=Math.floor(Number((s*o).toFixed(2)));return void 0!==this.size_divisibility&&([l,u]=m([l,u],this.size_divisibility)),[l,u]}if(void 0!==t&&void 0!==t.width&&void 0!==t.height){let e=t.width,i=t.height;if(this.config.keep_aspect_ratio&&this.config.ensure_multiple_of){let t=i/r,a=e/n;Math.abs(1-a)<Math.abs(1-t)?t=a:a=t,i=h(t*r,this.config.ensure_multiple_of),e=h(a*n,this.config.ensure_multiple_of)}return[e,i]}if(void 0!==this.size_divisibility)return m([n,r],this.size_divisibility);throw new Error(`Could not resize image due to unsupported \`this.size\` option in config: ${JSON.stringify(t)}`)}async resize(e){const[t,n]=this.get_resize_output_image_size(e,this.size);return await e.resize(t,n,{resample:this.resample})}async preprocess(e,{do_normalize:t=null,do_pad:n=null,do_convert_rgb:r=null,do_convert_grayscale:i=null,do_flip_channel_order:a=null}={}){this.do_crop_margin&&(e=await this.crop_margin(e));const[s,l]=e.size;if(r??this.do_convert_rgb?e=e.rgb():i&&(e=e.grayscale()),this.do_resize&&(e=await this.resize(e)),this.do_thumbnail&&(e=await this.thumbnail(e,this.size,this.resample)),this.do_center_crop){let t,n;Number.isInteger(this.crop_size)?(t=this.crop_size,n=this.crop_size):(t=this.crop_size.width,n=this.crop_size.height),e=await e.center_crop(t,n)}const u=[e.height,e.width];let d=Float32Array.from(e.data),c=[e.height,e.width,e.channels];if(this.do_rescale&&this.rescale(d),t??this.do_normalize){let t=this.image_mean;Array.isArray(this.image_mean)||(t=new Array(e.channels).fill(t));let n=this.image_std;if(Array.isArray(this.image_std)||(n=new Array(e.channels).fill(t)),t.length!==e.channels||n.length!==e.channels)throw new Error(`When set to arrays, the length of \`image_mean\` (${t.length}) and \`image_std\` (${n.length}) must match the number of channels in the image (${e.channels}).`);for(let r=0;r<d.length;r+=e.channels)for(let i=0;i<e.channels;++i)d[r+i]=(d[r+i]-t[i])/n[i]}if(n??this.do_pad)if(this.pad_size){const t=this.pad_image(d,[e.height,e.width,e.channels],this.pad_size);[d,c]=t}else if(this.size_divisibility){const[e,t]=m([c[1],c[0]],this.size_divisibility);[d,c]=this.pad_image(d,c,{width:e,height:t})}if(a??this.do_flip_channel_order){if(3!==c[2])throw new Error("Flipping channel order is only supported for RGB images.");for(let e=0;e<d.length;e+=3){const t=d[e];d[e]=d[e+2],d[e+2]=t}}return{original_size:[l,s],reshaped_input_size:u,pixel_values:new o.Tensor("float32",d,c).permute(2,0,1)}}async _call(e,...t){Array.isArray(e)||(e=[e]);const n=await Promise.all(e.map((e=>this.preprocess(e))));return{pixel_values:(0,o.stack)(n.map((e=>e.pixel_values)),0),original_sizes:n.map((e=>e.original_size)),reshaped_input_sizes:n.map((e=>e.reshaped_input_size))}}}class _ extends g{post_process_semantic_segmentation(...e){return c(...e)}}class w extends g{post_process_semantic_segmentation(...e){return c(...e)}}class y extends g{}class b extends y{}class v extends g{}class x extends g{}class M extends g{}class T extends M{}class k extends g{}class $ extends g{}class S extends g{constructor(e){super(e),this.crop_pct=this.config.crop_pct??.875}async resize(e){const t=this.size?.shortest_edge;if(void 0===t)throw new Error("Size dictionary must contain 'shortest_edge' key.");if(t<384){const n=Math.floor(t/this.crop_pct),[r,i]=this.get_resize_output_image_size(e,{shortest_edge:n});e=await e.resize(r,i,{resample:this.resample}),e=await e.center_crop(t,t)}else e=await e.resize(t,t,{resample:this.resample});return e}}class C extends S{}class E extends g{}class P extends g{}class A extends g{constructor(e){super(e),this.include_top=this.config.include_top??!0,this.include_top&&(this.image_std=this.image_std.map((e=>e*e)))}}class F extends g{}class z extends g{}class I extends g{}class O extends g{}class B extends g{}class L extends B{}class D extends g{post_process_object_detection(...e){return d(...e)}}class R extends D{}class N extends g{post_process_object_detection(...e){return d(...e)}}class V extends g{}class j extends g{}class G extends g{pad_image(e,t,n,r={}){const[i,a,s]=t;let o=this.image_mean;Array.isArray(this.image_mean)||(o=new Array(s).fill(o));let l=this.image_std;Array.isArray(l)||(l=new Array(s).fill(o));const u=o.map(((e,t)=>-e/l[t]));return super.pad_image(e,t,n,{center:!0,constant_values:u,...r})}}class q extends G{}class U extends g{async _call(e){const t=await super._call(e),n=[t.pixel_values.dims[0],64,64],r=new o.Tensor("int64",new BigInt64Array(n.reduce(((e,t)=>e*t))).fill(1n),n);return{...t,pixel_mask:r}}post_process_object_detection(...e){return d(...e)}remove_low_and_no_objects(e,t,n,r){let i=[],a=[],o=[];for(let l=0;l<e.dims[0];++l){let u=e[l],d=t[l],c=(0,s.max)(u.data)[1];if(c===r)continue;let p=(0,s.softmax)(u.data)[c];p>n&&(i.push(d),a.push(p),o.push(c))}return[i,a,o]}check_segment_validity(e,t,n,r=.5,i=.8){let a=[],s=0,o=0;const l=t[n].data;for(let t=0;t<e.length;++t)e[t]===n&&(a.push(t),++s),l[t]>=r&&++o;let u=s>0&&o>0;if(u){u=s/o>i}return[u,a]}compute_segments(e,t,n,r,i,a=null,s=null){let[l,u]=s??e[0].dims,d=new o.Tensor("int32",new Int32Array(l*u),[l,u]),c=[];if(null!==s)for(let t=0;t<e.length;++t)e[t]=(0,o.interpolate)(e[t],s,"bilinear",!1);let p=new Int32Array(e[0].data.length),h=new Float32Array(e[0].data.length);for(let n=0;n<e.length;++n){let r=t[n];const i=e[n].data;for(let e=0;e<i.length;++e)i[e]*=r,i[e]>h[e]&&(p[e]=n,h[e]=i[e])}let m=0;const f=d.data;for(let a=0;a<n.length;++a){let s=n[a],[o,l]=this.check_segment_validity(p,e,a,r,i);if(o){++m;for(let e of l)f[e]=m;c.push({id:m,label_id:s,score:t[a]})}}return[d,c]}post_process_panoptic_segmentation(e,t=.5,n=.5,r=.8,i=null,a=null){null===i&&(console.warn("`label_ids_to_fuse` unset. No instance will be fused."),i=new Set);const s=e.logits,l=e.pred_masks.sigmoid();let[u,d,c]=s.dims;if(c-=1,null!==a&&a.length!==u)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");let p=[];for(let e=0;e<u;++e){let u=null!==a?a[e]:null,d=s[e],h=l[e],[m,f,g]=this.remove_low_and_no_objects(d,h,t,c);if(0===g.length){let[e,t]=u??h.dims.slice(-2),n=new o.Tensor("int32",new Int32Array(e*t).fill(-1),[e,t]);p.push({segmentation:n,segments_info:[]});continue}let[_,w]=this.compute_segments(m,f,g,n,r,i,u);p.push({segmentation:_,segments_info:w})}return p}post_process_instance_segmentation(){throw Error("Not implemented yet")}}class W extends g{post_process_object_detection(...e){return d(...e)}}class H extends g{reshape_input_points(e,t,n,r=!1){e=structuredClone(e);let a=(0,i.calculateDimensions)(e);if(3===a.length)r||(a=[1,...a]),e=[e];else if(4!==a.length)throw Error("The input_points must be a 4D tensor of shape `batch_size`, `point_batch_size`, `nb_points_per_image`, `2`.");for(let r=0;r<e.length;++r){let i=t[r],a=n[r],s=[a[0]/i[0],a[1]/i[1]];for(let t=0;t<e[r].length;++t)for(let n=0;n<e[r][t].length;++n)for(let i=0;i<e[r][t][n].length;++i)e[r][t][n][i]*=s[i%2]}return new o.Tensor("float32",Float32Array.from(e.flat(1/0)),a)}add_input_labels(e,t){let n=(0,i.calculateDimensions)(e);if(2===n.length)n=[1,...n],e=[e];else if(3!==n.length)throw Error("The input_points must be a 4D tensor of shape `batch_size`, `point_batch_size`, `nb_points_per_image`, `2`.");if(n.some(((e,n)=>e!==t.dims[n])))throw Error(`The first ${n.length} dimensions of 'input_points' and 'input_labels' must be the same.`);return new o.Tensor("int64",e.flat(1/0).map(BigInt),n)}async _call(e,{input_points:t=null,input_labels:n=null,input_boxes:r=null}={}){const i=await super._call(e);if(t&&(i.input_points=this.reshape_input_points(t,i.original_sizes,i.reshaped_input_sizes)),n){if(!i.input_points)throw Error("`input_points` must be provided if `input_labels` are provided.");i.input_labels=this.add_input_labels(n,i.input_points)}return r&&(i.input_boxes=this.reshape_input_points(r,i.original_sizes,i.reshaped_input_sizes,!0)),i}async post_process_masks(e,t,n,{mask_threshold:r=0,binarize:i=!0,pad_size:a=null}={}){const s=[],l=[(a=a??this.pad_size).height,a.width];for(let a=0;a<t.length;++a){const u=t[a],d=n[a];let c=await(0,o.interpolate_4d)(e[a],{mode:"bilinear",size:l});if(c=c.slice(null,null,[0,d[0]],[0,d[1]]),c=await(0,o.interpolate_4d)(c,{mode:"bilinear",size:u}),i){const e=c.data,t=new Uint8Array(e.length);for(let n=0;n<e.length;++n)e[n]>r&&(t[n]=1);c=new o.Tensor("bool",t,c.dims)}s.push(c)}return s}generate_crop_boxes(e,t,{crop_n_layers:n=0,overlap_ratio:r=512/1500,points_per_crop:i=32,crop_n_points_downscale_factor:a=1}={}){}}class X extends g{pad_image(e,t,n,r={}){const[i,a,s]=t;return super.pad_image(e,t,{width:a+(n-a%n)%n,height:i+(n-i%n)%n},{mode:"symmetric",center:!1,constant_values:-1,...r})}}class K extends g{async _call(e,t){Array.isArray(e)||(e=[e]),Array.isArray(t)||(t=[t]);const n=await Promise.all(e.map((e=>this.preprocess(e)))),r=await Promise.all(t.map((e=>this.preprocess(e,{do_normalize:!1,do_convert_rgb:!1,do_convert_grayscale:!0}))));return{pixel_values:(0,o.stack)(n.map(((e,t)=>(0,o.cat)([e.pixel_values,r[t].pixel_values],0))),0),original_sizes:n.map((e=>e.original_size)),reshaped_input_sizes:n.map((e=>e.reshaped_input_size))}}}class Q extends f{constructor(e){super(e),this.config.mel_filters??=(0,l.mel_filter_bank)(Math.floor(1+this.config.n_fft/2),this.config.feature_size,0,8e3,this.config.sampling_rate,"slaney","slaney"),this.window=(0,l.window_function)(this.config.n_fft,"hann")}async _extract_fbank_features(e){const t=await(0,l.spectrogram)(e,this.window,this.config.n_fft,this.config.hop_length,{power:2,mel_filters:this.config.mel_filters,log_mel:"log10",max_num_frames:this.config.nb_max_frames}),n=t.data,r=(0,s.max)(n)[0];for(let e=0;e<n.length;++e)n[e]=(Math.max(n[e],r-8)+4)/4;return t}async _call(e){let t;p(e,"WhisperFeatureExtractor"),e.length>this.config.n_samples?(console.warn("Attempting to extract features for audio longer than 30 seconds. If using a pipeline to extract transcript from a long audio clip, remember to specify `chunk_length_s` and/or `stride_length_s`."),t=e.slice(0,this.config.n_samples)):(t=new Float32Array(this.config.n_samples),t.set(e));return{input_features:(await this._extract_fbank_features(t)).unsqueeze_(0)}}}class Y extends f{_zero_mean_unit_var_norm(e){const t=e.reduce(((e,t)=>e+t),0)/e.length,n=e.reduce(((e,n)=>e+(n-t)**2),0)/e.length;return e.map((e=>(e-t)/Math.sqrt(n+1e-7)))}async _call(e){p(e,"Wav2Vec2FeatureExtractor"),e instanceof Float64Array&&(e=new Float32Array(e));let t=e;this.config.do_normalize&&(t=this._zero_mean_unit_var_norm(t));const n=[1,t.length];return{input_values:new o.Tensor("float32",t,n),attention_mask:new o.Tensor("int64",new BigInt64Array(t.length).fill(1n),n)}}}class J extends f{constructor(e){super(e);const t=this.config.sampling_rate,n=(0,l.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<n.length;++e)n[e].push(0);this.mel_filters=n,this.window=(0,l.window_function)(400,"povey",{periodic:!1})}async _extract_fbank_features(e,t){return e=e.map((e=>32768*e)),(0,l.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,max_num_frames:t,transpose:!0})}async _call(e,{padding:t=!0,pad_to_multiple_of:n=2,do_normalize_per_mel_bins:r=!0,return_attention_mask:i=!0}={}){p(e,"SeamlessM4TFeatureExtractor");let a,s=await this._extract_fbank_features(e,this.config.max_length);if(r){const[e,t]=s.dims,n=s.data;for(let r=0;r<t;++r){let i=0;for(let a=0;a<e;++a)i+=n[a*t+r];const a=i/e;let s=0;for(let i=0;i<e;++i)s+=(n[i*t+r]-a)**2;s/=e-1;const o=Math.sqrt(s+1e-7);for(let i=0;i<e;++i){const e=i*t+r;n[e]=(n[e]-a)/o}}}if(t){const[e,t]=s.dims,r=s.data,l=e%n;if(l>0){const n=new Float32Array(t*(e+l));n.set(r),n.fill(this.config.padding_value,r.length);const u=e+l;s=new o.Tensor(s.type,n,[u,t]),i&&(a=new o.Tensor("int64",new BigInt64Array(u),[1,u]),a.data.fill(1n,0,e))}}const[l,u]=s.dims,d=this.config.stride;if(0!==l%d)throw new Error(`The number of frames (${l}) must be a multiple of the stride (${d}).`);const c=s.view(1,Math.floor(l/d),u*d),h={input_features:c};if(i){const e=c.dims[1],t=new BigInt64Array(e);if(a){const e=a.data;for(let n=1,r=0;n<l;n+=d,++r)t[r]=e[n]}else t.fill(1n);h.attention_mask=new o.Tensor("int64",t,[1,e])}return h}}class Z extends f{constructor(e){super(e);const t=this.config.sampling_rate,n=(0,l.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<n.length;++e)n[e].push(0);this.mel_filters=n,this.window=(0,l.window_function)(400,"hann",{periodic:!1}),this.mean=this.config.mean,this.std=this.config.std}async _extract_fbank_features(e,t){return(0,l.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,max_num_frames:t,transpose:!0})}async _call(e){p(e,"ASTFeatureExtractor");const t=await this._extract_fbank_features(e,this.config.max_length);if(this.config.do_normalize){const e=2*this.std,n=t.data;for(let t=0;t<n.length;++t)n[t]=(n[t]-this.mean)/e}return{input_values:t.unsqueeze_(0)}}}class ee extends f{constructor(e){super(e),this.mel_filters=(0,l.mel_filter_bank)(this.config.nb_frequency_bins,this.config.feature_size,this.config.frequency_min,this.config.frequency_max,this.config.sampling_rate,null,"htk"),this.mel_filters_slaney=(0,l.mel_filter_bank)(this.config.nb_frequency_bins,this.config.feature_size,this.config.frequency_min,this.config.frequency_max,this.config.sampling_rate,"slaney","slaney"),this.window=(0,l.window_function)(this.config.fft_window_size,"hann")}async _get_input_mel(e,t,n,r){let i,a=!1;const s=e.length-t;if(s>0){if("rand_trunc"!==n)throw new Error(`Truncation strategy "${n}" not implemented`);{a=!0;const n=Math.floor(Math.random()*(s+1));e=e.subarray(n,n+t),i=await this._extract_fbank_features(e,this.mel_filters_slaney,this.config.nb_max_samples)}}else{if(s<0){let n=new Float64Array(t);if(n.set(e),"repeat"===r)for(let r=e.length;r<t;r+=e.length)n.set(e.subarray(0,Math.min(e.length,t-r)),r);else if("repeatpad"===r)for(let t=e.length;t<-s;t+=e.length)n.set(e,t);e=n}if("fusion"===n)throw new Error(`Truncation strategy "${n}" not implemented`);i=await this._extract_fbank_features(e,this.mel_filters_slaney,this.config.nb_max_samples)}return i.unsqueeze_(0)}async _extract_fbank_features(e,t,n=null){return(0,l.spectrogram)(e,this.window,this.config.fft_window_size,this.config.hop_length,{power:2,mel_filters:t,log_mel:"dB",max_num_frames:n,do_pad:!1,transpose:!0})}async _call(e,{max_length:t=null}={}){p(e,"ClapFeatureExtractor");return{input_features:(await this._get_input_mel(e,t??this.config.nb_max_samples,this.config.truncation,this.config.padding)).unsqueeze_(0)}}}class te extends f{async _call(e){p(e,"PyAnnoteFeatureExtractor"),e instanceof Float64Array&&(e=new Float32Array(e));const t=[1,1,e.length];return{input_values:new o.Tensor("float32",e,t)}}samples_to_frames(e){return(e-this.config.offset)/this.config.step}post_process_speaker_diarization(e,t){const n=t/this.samples_to_frames(t)/this.config.sampling_rate,r=[];for(const t of e.tolist()){const e=[];let i=-1;for(let n=0;n<t.length;++n){const r=(0,s.softmax)(t[n]),[a,o]=(0,s.max)(r),[l,u]=[n,n+1];o!==i?(i=o,e.push({id:o,start:l,end:u,score:a})):(e.at(-1).end=u,e.at(-1).score+=a)}r.push(e.map((({id:e,start:t,end:r,score:i})=>({id:e,start:t*n,end:r*n,confidence:i/(r-t)}))))}return r}}class ne extends f{constructor(e){super(e);const t=this.config.sampling_rate,n=(0,l.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<n.length;++e)n[e].push(0);this.mel_filters=n,this.window=(0,l.window_function)(400,"hamming",{periodic:!1}),this.min_num_frames=this.config.min_num_frames}async _extract_fbank_features(e){return e=e.map((e=>32768*e)),(0,l.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,transpose:!0,min_num_frames:this.min_num_frames})}async _call(e){p(e,"WeSpeakerFeatureExtractor");const t=(await this._extract_fbank_features(e)).unsqueeze_(0);if(null===this.config.fbank_centering_span){const e=t.mean(1).data,n=t.data,[r,i,a]=t.dims;for(let t=0;t<r;++t){const r=t*i*a,s=t*a;for(let t=0;t<i;++t){const i=r+t*a;for(let t=0;t<a;++t)n[i+t]-=e[s+t]}}}return{input_features:t}}}class re extends f{}class ie extends r.Callable{constructor(e){super(),this.feature_extractor=e}async _call(e,...t){return await this.feature_extractor(e,...t)}}class ae extends ie{async _call(...e){return await this.feature_extractor(...e)}post_process_masks(...e){return this.feature_extractor.post_process_masks(...e)}reshape_input_points(...e){return this.feature_extractor.reshape_input_points(...e)}}class se extends ie{async _call(e){return await this.feature_extractor(e)}}class oe extends ie{async _call(e){return await this.feature_extractor(e)}}class le extends ie{async _call(e){return await this.feature_extractor(e)}post_process_speaker_diarization(...e){return this.feature_extractor.post_process_speaker_diarization(...e)}}class ue extends ie{async _call(e){return await this.feature_extractor(e)}}class de extends ie{}class ce extends ie{constructor(e){super(e);const{tasks_answer_post_processing_type:t,task_prompts_without_inputs:n,task_prompts_with_input:r}=e.config;this.tasks_answer_post_processing_type=new Map(Object.entries(t??{})),this.task_prompts_without_inputs=new Map(Object.entries(n??{})),this.task_prompts_with_input=new Map(Object.entries(r??{})),this.regexes={quad_boxes:/(.+?)<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>/gm,bboxes:/([^<]+)?<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>/gm},this.size_per_bin=1e3}construct_prompts(e){"string"==typeof e&&(e=[e]);const t=[];for(const n of e)if(this.task_prompts_without_inputs.has(n))t.push(this.task_prompts_without_inputs.get(n));else{for(const[e,r]of this.task_prompts_with_input)if(n.includes(e)){t.push(r.replaceAll("{input}",n).replaceAll(e,""));break}t.length!==e.length&&t.push(n)}return t}post_process_generation(e,t,n){const r=this.tasks_answer_post_processing_type.get(t)??"pure_text";let i;switch(e=e.replaceAll("<s>","").replaceAll("</s>",""),r){case"pure_text":i=e;break;case"description_with_bboxes":case"bboxes":case"phrase_grounding":case"ocr":const a="ocr"===r?"quad_boxes":"bboxes",s=e.matchAll(this.regexes[a]),o=[],l=[];for(const[e,t,...r]of s)o.push(t?t.trim():o.at(-1)??""),l.push(r.map(((e,t)=>(Number(e)+.5)/this.size_per_bin*n[t%2])));i={labels:o,[a]:l};break;default:throw new Error(`Task "${t}" (of type "${r}") not yet implemented.`)}return{[t]:i}}}class pe{static FEATURE_EXTRACTOR_CLASS_MAPPING={ImageFeatureExtractor:g,WhisperFeatureExtractor:Q,ViTFeatureExtractor:E,MobileViTFeatureExtractor:B,MobileViTImageProcessor:L,MobileNetV1FeatureExtractor:F,MobileNetV2FeatureExtractor:z,MobileNetV3FeatureExtractor:I,MobileNetV4FeatureExtractor:O,OwlViTFeatureExtractor:D,Owlv2ImageProcessor:R,CLIPFeatureExtractor:M,CLIPImageProcessor:T,Florence2Processor:ce,ChineseCLIPFeatureExtractor:k,SiglipImageProcessor:$,ConvNextFeatureExtractor:S,ConvNextImageProcessor:C,SegformerFeatureExtractor:w,SapiensFeatureExtractor:_,BitImageProcessor:v,DPTImageProcessor:b,DPTFeatureExtractor:y,GLPNFeatureExtractor:x,BeitFeatureExtractor:j,DeiTFeatureExtractor:V,DetrFeatureExtractor:U,RTDetrImageProcessor:N,YolosFeatureExtractor:W,DonutFeatureExtractor:G,NougatImageProcessor:q,EfficientNetImageProcessor:A,ViTImageProcessor:P,VitMatteImageProcessor:K,SamImageProcessor:H,Swin2SRImageProcessor:X,Wav2Vec2FeatureExtractor:Y,SeamlessM4TFeatureExtractor:J,SpeechT5FeatureExtractor:re,ASTFeatureExtractor:Z,ClapFeatureExtractor:ee,PyAnnoteFeatureExtractor:te,WeSpeakerFeatureExtractor:ne};static PROCESSOR_CLASS_MAPPING={WhisperProcessor:se,Wav2Vec2ProcessorWithLM:oe,PyAnnoteProcessor:le,SamProcessor:ae,SpeechT5Processor:ue,OwlViTProcessor:de,Florence2Processor:ce};static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:i=!1,revision:s="main"}={}){let o=n??await(0,a.getModelJSON)(e,"preprocessor_config.json",!0,{progress_callback:t,config:n,cache_dir:r,local_files_only:i,revision:s}),l=o.feature_extractor_type??o.image_processor_type,u=this.FEATURE_EXTRACTOR_CLASS_MAPPING[l];if(!u){if(void 0===o.size)throw new Error(`Unknown Feature Extractor type: ${l}`);console.warn(`Feature extractor type "${l}" not found, assuming ImageFeatureExtractor due to size parameter in config.`),u=g}return new(this.PROCESSOR_CLASS_MAPPING[o.processor_class]??ie)(new u(o))}}},"./src/tokenizers.js":
|
|
190
|
+
\***************************/(e,t,n)=>{n.r(t),n.d(t,{ASTFeatureExtractor:()=>Z,AutoProcessor:()=>pe,BeitFeatureExtractor:()=>j,BitImageProcessor:()=>v,CLIPFeatureExtractor:()=>M,CLIPImageProcessor:()=>T,ChineseCLIPFeatureExtractor:()=>k,ClapFeatureExtractor:()=>ee,ConvNextFeatureExtractor:()=>S,ConvNextImageProcessor:()=>C,DPTFeatureExtractor:()=>y,DPTImageProcessor:()=>b,DeiTFeatureExtractor:()=>V,DetrFeatureExtractor:()=>U,DonutFeatureExtractor:()=>G,EfficientNetImageProcessor:()=>A,FeatureExtractor:()=>f,Florence2Processor:()=>ce,GLPNFeatureExtractor:()=>x,ImageFeatureExtractor:()=>g,MobileNetV1FeatureExtractor:()=>F,MobileNetV2FeatureExtractor:()=>z,MobileNetV3FeatureExtractor:()=>I,MobileNetV4FeatureExtractor:()=>O,MobileViTFeatureExtractor:()=>B,MobileViTImageProcessor:()=>L,NougatImageProcessor:()=>q,OwlViTFeatureExtractor:()=>D,OwlViTProcessor:()=>de,Owlv2ImageProcessor:()=>R,Processor:()=>ie,PyAnnoteFeatureExtractor:()=>te,PyAnnoteProcessor:()=>le,RTDetrImageProcessor:()=>N,SamImageProcessor:()=>H,SamProcessor:()=>ae,SapiensFeatureExtractor:()=>_,SeamlessM4TFeatureExtractor:()=>J,SegformerFeatureExtractor:()=>w,SiglipImageProcessor:()=>$,SpeechT5FeatureExtractor:()=>re,SpeechT5Processor:()=>ue,Swin2SRImageProcessor:()=>X,ViTFeatureExtractor:()=>E,ViTImageProcessor:()=>P,VitMatteImageProcessor:()=>K,Wav2Vec2FeatureExtractor:()=>Y,Wav2Vec2ProcessorWithLM:()=>oe,WeSpeakerFeatureExtractor:()=>ne,WhisperFeatureExtractor:()=>Q,WhisperProcessor:()=>se,YolosFeatureExtractor:()=>W});var r=n(/*! ./utils/generic.js */"./src/utils/generic.js"),i=n(/*! ./utils/core.js */"./src/utils/core.js"),a=n(/*! ./utils/hub.js */"./src/utils/hub.js"),s=n(/*! ./utils/maths.js */"./src/utils/maths.js"),o=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),l=(n(/*! ./utils/image.js */"./src/utils/image.js"),n(/*! ./utils/audio.js */"./src/utils/audio.js"));function u([e,t,n,r]){return[e-n/2,t-r/2,e+n/2,t+r/2]}function d(e,t=.5,n=null,r=!1){const i=e.logits,a=e.pred_boxes,[o,l,d]=i.dims;if(null!==n&&n.length!==o)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");let c=[];for(let e=0;e<o;++e){let o=null!==n?n[e]:null,p={boxes:[],classes:[],scores:[]},h=i[e],m=a[e];for(let e=0;e<l;++e){let n,i=h[e],a=[];if(r){n=i.sigmoid().data;for(let e=0;e<n.length;++e)n[e]>t&&a.push(e)}else{let e=(0,s.max)(i.data)[1];if(e===d-1)continue;if(n=(0,s.softmax)(i.data),n[e]<t)continue;a.push(e)}for(const t of a){let r=m[e].data;r=u(r),null!==o&&(r=r.map(((e,t)=>e*o[(t+1)%2]))),p.boxes.push(r),p.classes.push(t),p.scores.push(n[t])}}c.push(p)}return c}function c(e,t=null){const n=e.logits,r=n.dims[0];if(null!==t&&t.length!==r)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");const i=[];for(let e=0;e<r;++e){const r=null!==t?t[e]:null;let a=n[e];null!==r&&(a=(0,o.interpolate)(a,r,"bilinear",!1));const[s,l]=r??a.dims.slice(-2),u=new o.Tensor("int32",new Int32Array(s*l),[s,l]),d=a[0].data,c=u.data;for(let e=1;e<a.dims[0];++e){const t=a[e].data;for(let n=0;n<t.length;++n)t[n]>d[n]&&(d[n]=t[n],c[n]=e)}const p=new Array(a.dims[0]);for(let e=0;e<c.length;++e){const t=c[e];p[t]=t}const h=p.filter((e=>void 0!==e));i.push({segmentation:u,labels:h})}return i}function p(e,t){if(!(e instanceof Float32Array||e instanceof Float64Array))throw new Error(`${t} expects input to be a Float32Array or a Float64Array, but got ${e?.constructor?.name??typeof e} instead. If using the feature extractor directly, remember to use \`read_audio(url, sampling_rate)\` to obtain the raw audio data of the file/url.`)}function h(e,t,n=0,r=null){const i=e/t;let a=(0,s.bankers_round)(i)*t;return null!==r&&a>r&&(a=Math.floor(i)*t),a<n&&(a=Math.ceil(i)*t),a}function m([e,t],n){return[Math.max(Math.floor(e/n),1)*n,Math.max(Math.floor(t/n),1)*n]}class f extends r.Callable{constructor(e){super(),this.config=e}}class g extends f{constructor(e){super(e),this.image_mean=this.config.image_mean??this.config.mean,this.image_std=this.config.image_std??this.config.std,this.resample=this.config.resample??2,this.do_rescale=this.config.do_rescale??!0,this.rescale_factor=this.config.rescale_factor??1/255,this.do_normalize=this.config.do_normalize,this.do_resize=this.config.do_resize,this.do_thumbnail=this.config.do_thumbnail,this.size=this.config.size,this.size_divisibility=this.config.size_divisibility??this.config.size_divisor,this.do_center_crop=this.config.do_center_crop,this.crop_size=this.config.crop_size,this.do_convert_rgb=this.config.do_convert_rgb??!0,this.do_crop_margin=this.config.do_crop_margin,this.pad_size=this.config.pad_size,this.do_pad=this.config.do_pad,this.do_pad&&!this.pad_size&&this.size&&void 0!==this.size.width&&void 0!==this.size.height&&(this.pad_size=this.size),this.do_flip_channel_order=this.config.do_flip_channel_order??!1}async thumbnail(e,t,n=2){const r=e.height,i=e.width,a=t.height,s=t.width;let o=Math.min(r,a),l=Math.min(i,s);return o===r&&l===i?e:(r>i?l=Math.floor(i*o/r):i>r&&(o=Math.floor(r*l/i)),await e.resize(l,o,{resample:n}))}async crop_margin(e,t=200){const n=e.clone().grayscale(),r=(0,s.min)(n.data)[0],i=(0,s.max)(n.data)[0]-r;if(0===i)return e;const a=t/255;let o=n.width,l=n.height,u=0,d=0;const c=n.data;for(let e=0;e<n.height;++e){const t=e*n.width;for(let s=0;s<n.width;++s)(c[t+s]-r)/i<a&&(o=Math.min(o,s),l=Math.min(l,e),u=Math.max(u,s),d=Math.max(d,e))}return e=await e.crop([o,l,u,d])}pad_image(e,t,n,{mode:r="constant",center:a=!1,constant_values:s=0}={}){const[o,l,u]=t;let d,c;if("number"==typeof n?(d=n,c=n):(d=n.width,c=n.height),d!==l||c!==o){const n=new Float32Array(d*c*u);if(Array.isArray(s))for(let e=0;e<n.length;++e)n[e]=s[e%u];else 0!==s&&n.fill(s);const[p,h]=a?[Math.floor((d-l)/2),Math.floor((c-o)/2)]:[0,0];for(let t=0;t<o;++t){const r=(t+h)*d,i=t*l;for(let t=0;t<l;++t){const a=(r+t+p)*u,s=(i+t)*u;for(let t=0;t<u;++t)n[a+t]=e[s+t]}}if("symmetric"===r){if(a)throw new Error("`center` padding is not supported when `mode` is set to `symmetric`.");const t=o-1,r=l-1;for(let a=0;a<c;++a){const s=a*d,c=(0,i.calculateReflectOffset)(a,t)*l;for(let t=0;t<d;++t){if(a<o&&t<l)continue;const d=(s+t)*u,p=(c+(0,i.calculateReflectOffset)(t,r))*u;for(let t=0;t<u;++t)n[d+t]=e[p+t]}}}e=n,t=[c,d,u]}return[e,t]}rescale(e){for(let t=0;t<e.length;++t)e[t]=this.rescale_factor*e[t]}get_resize_output_image_size(e,t){const[n,r]=e.size;let i,a;if(this.do_thumbnail){const{height:e,width:n}=t;i=Math.min(e,n)}else Number.isInteger(t)?(i=t,a=this.config.max_size??i):void 0!==t&&(i=t.shortest_edge,a=t.longest_edge);if(void 0!==i||void 0!==a){const e=void 0===i?1:Math.max(i/n,i/r),t=n*e,s=r*e,o=void 0===a?1:Math.min(a/t,a/s);let l=Math.floor(Number((t*o).toFixed(2))),u=Math.floor(Number((s*o).toFixed(2)));return void 0!==this.size_divisibility&&([l,u]=m([l,u],this.size_divisibility)),[l,u]}if(void 0!==t&&void 0!==t.width&&void 0!==t.height){let e=t.width,i=t.height;if(this.config.keep_aspect_ratio&&this.config.ensure_multiple_of){let t=i/r,a=e/n;Math.abs(1-a)<Math.abs(1-t)?t=a:a=t,i=h(t*r,this.config.ensure_multiple_of),e=h(a*n,this.config.ensure_multiple_of)}return[e,i]}if(void 0!==this.size_divisibility)return m([n,r],this.size_divisibility);throw new Error(`Could not resize image due to unsupported \`this.size\` option in config: ${JSON.stringify(t)}`)}async resize(e){const[t,n]=this.get_resize_output_image_size(e,this.size);return await e.resize(t,n,{resample:this.resample})}async preprocess(e,{do_normalize:t=null,do_pad:n=null,do_convert_rgb:r=null,do_convert_grayscale:i=null,do_flip_channel_order:a=null}={}){this.do_crop_margin&&(e=await this.crop_margin(e));const[s,l]=e.size;if(r??this.do_convert_rgb?e=e.rgb():i&&(e=e.grayscale()),this.do_resize&&(e=await this.resize(e)),this.do_thumbnail&&(e=await this.thumbnail(e,this.size,this.resample)),this.do_center_crop){let t,n;Number.isInteger(this.crop_size)?(t=this.crop_size,n=this.crop_size):(t=this.crop_size.width,n=this.crop_size.height),e=await e.center_crop(t,n)}const u=[e.height,e.width];let d=Float32Array.from(e.data),c=[e.height,e.width,e.channels];if(this.do_rescale&&this.rescale(d),t??this.do_normalize){let t=this.image_mean;Array.isArray(this.image_mean)||(t=new Array(e.channels).fill(t));let n=this.image_std;if(Array.isArray(this.image_std)||(n=new Array(e.channels).fill(t)),t.length!==e.channels||n.length!==e.channels)throw new Error(`When set to arrays, the length of \`image_mean\` (${t.length}) and \`image_std\` (${n.length}) must match the number of channels in the image (${e.channels}).`);for(let r=0;r<d.length;r+=e.channels)for(let i=0;i<e.channels;++i)d[r+i]=(d[r+i]-t[i])/n[i]}if(n??this.do_pad)if(this.pad_size){const t=this.pad_image(d,[e.height,e.width,e.channels],this.pad_size);[d,c]=t}else if(this.size_divisibility){const[e,t]=m([c[1],c[0]],this.size_divisibility);[d,c]=this.pad_image(d,c,{width:e,height:t})}if(a??this.do_flip_channel_order){if(3!==c[2])throw new Error("Flipping channel order is only supported for RGB images.");for(let e=0;e<d.length;e+=3){const t=d[e];d[e]=d[e+2],d[e+2]=t}}return{original_size:[l,s],reshaped_input_size:u,pixel_values:new o.Tensor("float32",d,c).permute(2,0,1)}}async _call(e,...t){Array.isArray(e)||(e=[e]);const n=await Promise.all(e.map((e=>this.preprocess(e))));return{pixel_values:(0,o.stack)(n.map((e=>e.pixel_values)),0),original_sizes:n.map((e=>e.original_size)),reshaped_input_sizes:n.map((e=>e.reshaped_input_size))}}}class _ extends g{post_process_semantic_segmentation(...e){return c(...e)}}class w extends g{post_process_semantic_segmentation(...e){return c(...e)}}class y extends g{}class b extends y{}class v extends g{}class x extends g{}class M extends g{}class T extends M{}class k extends g{}class $ extends g{}class S extends g{constructor(e){super(e),this.crop_pct=this.config.crop_pct??.875}async resize(e){const t=this.size?.shortest_edge;if(void 0===t)throw new Error("Size dictionary must contain 'shortest_edge' key.");if(t<384){const n=Math.floor(t/this.crop_pct),[r,i]=this.get_resize_output_image_size(e,{shortest_edge:n});e=await e.resize(r,i,{resample:this.resample}),e=await e.center_crop(t,t)}else e=await e.resize(t,t,{resample:this.resample});return e}}class C extends S{}class E extends g{}class P extends g{}class A extends g{constructor(e){super(e),this.include_top=this.config.include_top??!0,this.include_top&&(this.image_std=this.image_std.map((e=>e*e)))}}class F extends g{}class z extends g{}class I extends g{}class O extends g{}class B extends g{}class L extends B{}class D extends g{post_process_object_detection(...e){return d(...e)}}class R extends D{}class N extends g{post_process_object_detection(...e){return d(...e)}}class V extends g{}class j extends g{}class G extends g{pad_image(e,t,n,r={}){const[i,a,s]=t;let o=this.image_mean;Array.isArray(this.image_mean)||(o=new Array(s).fill(o));let l=this.image_std;Array.isArray(l)||(l=new Array(s).fill(o));const u=o.map(((e,t)=>-e/l[t]));return super.pad_image(e,t,n,{center:!0,constant_values:u,...r})}}class q extends G{}class U extends g{async _call(e){const t=await super._call(e),n=[t.pixel_values.dims[0],64,64],r=new o.Tensor("int64",new BigInt64Array(n.reduce(((e,t)=>e*t))).fill(1n),n);return{...t,pixel_mask:r}}post_process_object_detection(...e){return d(...e)}remove_low_and_no_objects(e,t,n,r){let i=[],a=[],o=[];for(let l=0;l<e.dims[0];++l){let u=e[l],d=t[l],c=(0,s.max)(u.data)[1];if(c===r)continue;let p=(0,s.softmax)(u.data)[c];p>n&&(i.push(d),a.push(p),o.push(c))}return[i,a,o]}check_segment_validity(e,t,n,r=.5,i=.8){let a=[],s=0,o=0;const l=t[n].data;for(let t=0;t<e.length;++t)e[t]===n&&(a.push(t),++s),l[t]>=r&&++o;let u=s>0&&o>0;if(u){u=s/o>i}return[u,a]}compute_segments(e,t,n,r,i,a=null,s=null){let[l,u]=s??e[0].dims,d=new o.Tensor("int32",new Int32Array(l*u),[l,u]),c=[];if(null!==s)for(let t=0;t<e.length;++t)e[t]=(0,o.interpolate)(e[t],s,"bilinear",!1);let p=new Int32Array(e[0].data.length),h=new Float32Array(e[0].data.length);for(let n=0;n<e.length;++n){let r=t[n];const i=e[n].data;for(let e=0;e<i.length;++e)i[e]*=r,i[e]>h[e]&&(p[e]=n,h[e]=i[e])}let m=0;const f=d.data;for(let a=0;a<n.length;++a){let s=n[a],[o,l]=this.check_segment_validity(p,e,a,r,i);if(o){++m;for(let e of l)f[e]=m;c.push({id:m,label_id:s,score:t[a]})}}return[d,c]}post_process_panoptic_segmentation(e,t=.5,n=.5,r=.8,i=null,a=null){null===i&&(console.warn("`label_ids_to_fuse` unset. No instance will be fused."),i=new Set);const s=e.logits,l=e.pred_masks.sigmoid();let[u,d,c]=s.dims;if(c-=1,null!==a&&a.length!==u)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");let p=[];for(let e=0;e<u;++e){let u=null!==a?a[e]:null,d=s[e],h=l[e],[m,f,g]=this.remove_low_and_no_objects(d,h,t,c);if(0===g.length){let[e,t]=u??h.dims.slice(-2),n=new o.Tensor("int32",new Int32Array(e*t).fill(-1),[e,t]);p.push({segmentation:n,segments_info:[]});continue}let[_,w]=this.compute_segments(m,f,g,n,r,i,u);p.push({segmentation:_,segments_info:w})}return p}post_process_instance_segmentation(){throw Error("Not implemented yet")}}class W extends g{post_process_object_detection(...e){return d(...e)}}class H extends g{reshape_input_points(e,t,n,r=!1){e=structuredClone(e);let a=(0,i.calculateDimensions)(e);if(3===a.length)r||(a=[1,...a]),e=[e];else if(4!==a.length)throw Error("The input_points must be a 4D tensor of shape `batch_size`, `point_batch_size`, `nb_points_per_image`, `2`.");for(let r=0;r<e.length;++r){let i=t[r],a=n[r],s=[a[0]/i[0],a[1]/i[1]];for(let t=0;t<e[r].length;++t)for(let n=0;n<e[r][t].length;++n)for(let i=0;i<e[r][t][n].length;++i)e[r][t][n][i]*=s[i%2]}return new o.Tensor("float32",Float32Array.from(e.flat(1/0)),a)}add_input_labels(e,t){let n=(0,i.calculateDimensions)(e);if(2===n.length)n=[1,...n],e=[e];else if(3!==n.length)throw Error("The input_points must be a 4D tensor of shape `batch_size`, `point_batch_size`, `nb_points_per_image`, `2`.");if(n.some(((e,n)=>e!==t.dims[n])))throw Error(`The first ${n.length} dimensions of 'input_points' and 'input_labels' must be the same.`);return new o.Tensor("int64",e.flat(1/0).map(BigInt),n)}async _call(e,{input_points:t=null,input_labels:n=null,input_boxes:r=null}={}){const i=await super._call(e);if(t&&(i.input_points=this.reshape_input_points(t,i.original_sizes,i.reshaped_input_sizes)),n){if(!i.input_points)throw Error("`input_points` must be provided if `input_labels` are provided.");i.input_labels=this.add_input_labels(n,i.input_points)}return r&&(i.input_boxes=this.reshape_input_points(r,i.original_sizes,i.reshaped_input_sizes,!0)),i}async post_process_masks(e,t,n,{mask_threshold:r=0,binarize:i=!0,pad_size:a=null}={}){const s=[],l=[(a=a??this.pad_size).height,a.width];for(let a=0;a<t.length;++a){const u=t[a],d=n[a];let c=await(0,o.interpolate_4d)(e[a],{mode:"bilinear",size:l});if(c=c.slice(null,null,[0,d[0]],[0,d[1]]),c=await(0,o.interpolate_4d)(c,{mode:"bilinear",size:u}),i){const e=c.data,t=new Uint8Array(e.length);for(let n=0;n<e.length;++n)e[n]>r&&(t[n]=1);c=new o.Tensor("bool",t,c.dims)}s.push(c)}return s}generate_crop_boxes(e,t,{crop_n_layers:n=0,overlap_ratio:r=512/1500,points_per_crop:i=32,crop_n_points_downscale_factor:a=1}={}){}}class X extends g{pad_image(e,t,n,r={}){const[i,a,s]=t;return super.pad_image(e,t,{width:a+(n-a%n)%n,height:i+(n-i%n)%n},{mode:"symmetric",center:!1,constant_values:-1,...r})}}class K extends g{async _call(e,t){Array.isArray(e)||(e=[e]),Array.isArray(t)||(t=[t]);const n=await Promise.all(e.map((e=>this.preprocess(e)))),r=await Promise.all(t.map((e=>this.preprocess(e,{do_normalize:!1,do_convert_rgb:!1,do_convert_grayscale:!0}))));return{pixel_values:(0,o.stack)(n.map(((e,t)=>(0,o.cat)([e.pixel_values,r[t].pixel_values],0))),0),original_sizes:n.map((e=>e.original_size)),reshaped_input_sizes:n.map((e=>e.reshaped_input_size))}}}class Q extends f{constructor(e){super(e),this.config.mel_filters??=(0,l.mel_filter_bank)(Math.floor(1+this.config.n_fft/2),this.config.feature_size,0,8e3,this.config.sampling_rate,"slaney","slaney"),this.window=(0,l.window_function)(this.config.n_fft,"hann")}async _extract_fbank_features(e){const t=await(0,l.spectrogram)(e,this.window,this.config.n_fft,this.config.hop_length,{power:2,mel_filters:this.config.mel_filters,log_mel:"log10",max_num_frames:this.config.nb_max_frames}),n=t.data,r=(0,s.max)(n)[0];for(let e=0;e<n.length;++e)n[e]=(Math.max(n[e],r-8)+4)/4;return t}async _call(e){let t;p(e,"WhisperFeatureExtractor"),e.length>this.config.n_samples?(console.warn("Attempting to extract features for audio longer than 30 seconds. If using a pipeline to extract transcript from a long audio clip, remember to specify `chunk_length_s` and/or `stride_length_s`."),t=e.slice(0,this.config.n_samples)):(t=new Float32Array(this.config.n_samples),t.set(e));return{input_features:(await this._extract_fbank_features(t)).unsqueeze_(0)}}}class Y extends f{_zero_mean_unit_var_norm(e){const t=e.reduce(((e,t)=>e+t),0)/e.length,n=e.reduce(((e,n)=>e+(n-t)**2),0)/e.length;return e.map((e=>(e-t)/Math.sqrt(n+1e-7)))}async _call(e){p(e,"Wav2Vec2FeatureExtractor"),e instanceof Float64Array&&(e=new Float32Array(e));let t=e;this.config.do_normalize&&(t=this._zero_mean_unit_var_norm(t));const n=[1,t.length];return{input_values:new o.Tensor("float32",t,n),attention_mask:new o.Tensor("int64",new BigInt64Array(t.length).fill(1n),n)}}}class J extends f{constructor(e){super(e);const t=this.config.sampling_rate,n=(0,l.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<n.length;++e)n[e].push(0);this.mel_filters=n,this.window=(0,l.window_function)(400,"povey",{periodic:!1})}async _extract_fbank_features(e,t){return e=e.map((e=>32768*e)),(0,l.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,max_num_frames:t,transpose:!0})}async _call(e,{padding:t=!0,pad_to_multiple_of:n=2,do_normalize_per_mel_bins:r=!0,return_attention_mask:i=!0}={}){p(e,"SeamlessM4TFeatureExtractor");let a,s=await this._extract_fbank_features(e,this.config.max_length);if(r){const[e,t]=s.dims,n=s.data;for(let r=0;r<t;++r){let i=0;for(let a=0;a<e;++a)i+=n[a*t+r];const a=i/e;let s=0;for(let i=0;i<e;++i)s+=(n[i*t+r]-a)**2;s/=e-1;const o=Math.sqrt(s+1e-7);for(let i=0;i<e;++i){const e=i*t+r;n[e]=(n[e]-a)/o}}}if(t){const[e,t]=s.dims,r=s.data,l=e%n;if(l>0){const n=new Float32Array(t*(e+l));n.set(r),n.fill(this.config.padding_value,r.length);const u=e+l;s=new o.Tensor(s.type,n,[u,t]),i&&(a=new o.Tensor("int64",new BigInt64Array(u),[1,u]),a.data.fill(1n,0,e))}}const[l,u]=s.dims,d=this.config.stride;if(0!==l%d)throw new Error(`The number of frames (${l}) must be a multiple of the stride (${d}).`);const c=s.view(1,Math.floor(l/d),u*d),h={input_features:c};if(i){const e=c.dims[1],t=new BigInt64Array(e);if(a){const e=a.data;for(let n=1,r=0;n<l;n+=d,++r)t[r]=e[n]}else t.fill(1n);h.attention_mask=new o.Tensor("int64",t,[1,e])}return h}}class Z extends f{constructor(e){super(e);const t=this.config.sampling_rate,n=(0,l.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<n.length;++e)n[e].push(0);this.mel_filters=n,this.window=(0,l.window_function)(400,"hann",{periodic:!1}),this.mean=this.config.mean,this.std=this.config.std}async _extract_fbank_features(e,t){return(0,l.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,max_num_frames:t,transpose:!0})}async _call(e){p(e,"ASTFeatureExtractor");const t=await this._extract_fbank_features(e,this.config.max_length);if(this.config.do_normalize){const e=2*this.std,n=t.data;for(let t=0;t<n.length;++t)n[t]=(n[t]-this.mean)/e}return{input_values:t.unsqueeze_(0)}}}class ee extends f{constructor(e){super(e),this.mel_filters=(0,l.mel_filter_bank)(this.config.nb_frequency_bins,this.config.feature_size,this.config.frequency_min,this.config.frequency_max,this.config.sampling_rate,null,"htk"),this.mel_filters_slaney=(0,l.mel_filter_bank)(this.config.nb_frequency_bins,this.config.feature_size,this.config.frequency_min,this.config.frequency_max,this.config.sampling_rate,"slaney","slaney"),this.window=(0,l.window_function)(this.config.fft_window_size,"hann")}async _get_input_mel(e,t,n,r){let i,a=!1;const s=e.length-t;if(s>0){if("rand_trunc"!==n)throw new Error(`Truncation strategy "${n}" not implemented`);{a=!0;const n=Math.floor(Math.random()*(s+1));e=e.subarray(n,n+t),i=await this._extract_fbank_features(e,this.mel_filters_slaney,this.config.nb_max_samples)}}else{if(s<0){let n=new Float64Array(t);if(n.set(e),"repeat"===r)for(let r=e.length;r<t;r+=e.length)n.set(e.subarray(0,Math.min(e.length,t-r)),r);else if("repeatpad"===r)for(let t=e.length;t<-s;t+=e.length)n.set(e,t);e=n}if("fusion"===n)throw new Error(`Truncation strategy "${n}" not implemented`);i=await this._extract_fbank_features(e,this.mel_filters_slaney,this.config.nb_max_samples)}return i.unsqueeze_(0)}async _extract_fbank_features(e,t,n=null){return(0,l.spectrogram)(e,this.window,this.config.fft_window_size,this.config.hop_length,{power:2,mel_filters:t,log_mel:"dB",max_num_frames:n,do_pad:!1,transpose:!0})}async _call(e,{max_length:t=null}={}){p(e,"ClapFeatureExtractor");return{input_features:(await this._get_input_mel(e,t??this.config.nb_max_samples,this.config.truncation,this.config.padding)).unsqueeze_(0)}}}class te extends f{async _call(e){p(e,"PyAnnoteFeatureExtractor"),e instanceof Float64Array&&(e=new Float32Array(e));const t=[1,1,e.length];return{input_values:new o.Tensor("float32",e,t)}}samples_to_frames(e){return(e-this.config.offset)/this.config.step}post_process_speaker_diarization(e,t){const n=t/this.samples_to_frames(t)/this.config.sampling_rate,r=[];for(const t of e.tolist()){const e=[];let i=-1;for(let n=0;n<t.length;++n){const r=(0,s.softmax)(t[n]),[a,o]=(0,s.max)(r),[l,u]=[n,n+1];o!==i?(i=o,e.push({id:o,start:l,end:u,score:a})):(e.at(-1).end=u,e.at(-1).score+=a)}r.push(e.map((({id:e,start:t,end:r,score:i})=>({id:e,start:t*n,end:r*n,confidence:i/(r-t)}))))}return r}}class ne extends f{constructor(e){super(e);const t=this.config.sampling_rate,n=(0,l.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<n.length;++e)n[e].push(0);this.mel_filters=n,this.window=(0,l.window_function)(400,"hamming",{periodic:!1}),this.min_num_frames=this.config.min_num_frames}async _extract_fbank_features(e){return e=e.map((e=>32768*e)),(0,l.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,transpose:!0,min_num_frames:this.min_num_frames})}async _call(e){p(e,"WeSpeakerFeatureExtractor");const t=(await this._extract_fbank_features(e)).unsqueeze_(0);if(null===this.config.fbank_centering_span){const e=t.mean(1).data,n=t.data,[r,i,a]=t.dims;for(let t=0;t<r;++t){const r=t*i*a,s=t*a;for(let t=0;t<i;++t){const i=r+t*a;for(let t=0;t<a;++t)n[i+t]-=e[s+t]}}}return{input_features:t}}}class re extends f{}class ie extends r.Callable{constructor(e){super(),this.feature_extractor=e}async _call(e,...t){return await this.feature_extractor(e,...t)}}class ae extends ie{async _call(...e){return await this.feature_extractor(...e)}post_process_masks(...e){return this.feature_extractor.post_process_masks(...e)}reshape_input_points(...e){return this.feature_extractor.reshape_input_points(...e)}}class se extends ie{async _call(e){return await this.feature_extractor(e)}}class oe extends ie{async _call(e){return await this.feature_extractor(e)}}class le extends ie{async _call(e){return await this.feature_extractor(e)}post_process_speaker_diarization(...e){return this.feature_extractor.post_process_speaker_diarization(...e)}}class ue extends ie{async _call(e){return await this.feature_extractor(e)}}class de extends ie{}class ce extends ie{constructor(e){super(e);const{tasks_answer_post_processing_type:t,task_prompts_without_inputs:n,task_prompts_with_input:r}=e.config;this.tasks_answer_post_processing_type=new Map(Object.entries(t??{})),this.task_prompts_without_inputs=new Map(Object.entries(n??{})),this.task_prompts_with_input=new Map(Object.entries(r??{})),this.regexes={quad_boxes:/(.+?)<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>/gm,bboxes:/([^<]+)?<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>/gm},this.size_per_bin=1e3}construct_prompts(e){"string"==typeof e&&(e=[e]);const t=[];for(const n of e)if(this.task_prompts_without_inputs.has(n))t.push(this.task_prompts_without_inputs.get(n));else{for(const[e,r]of this.task_prompts_with_input)if(n.includes(e)){t.push(r.replaceAll("{input}",n).replaceAll(e,""));break}t.length!==e.length&&t.push(n)}return t}post_process_generation(e,t,n){const r=this.tasks_answer_post_processing_type.get(t)??"pure_text";let i;switch(e=e.replaceAll("<s>","").replaceAll("</s>",""),r){case"pure_text":i=e;break;case"description_with_bboxes":case"bboxes":case"phrase_grounding":case"ocr":const a="ocr"===r?"quad_boxes":"bboxes",s=e.matchAll(this.regexes[a]),o=[],l=[];for(const[e,t,...r]of s)o.push(t?t.trim():o.at(-1)??""),l.push(r.map(((e,t)=>(Number(e)+.5)/this.size_per_bin*n[t%2])));i={labels:o,[a]:l};break;default:throw new Error(`Task "${t}" (of type "${r}") not yet implemented.`)}return{[t]:i}}}class pe{static FEATURE_EXTRACTOR_CLASS_MAPPING={ImageFeatureExtractor:g,WhisperFeatureExtractor:Q,ViTFeatureExtractor:E,MobileViTFeatureExtractor:B,MobileViTImageProcessor:L,MobileNetV1FeatureExtractor:F,MobileNetV2FeatureExtractor:z,MobileNetV3FeatureExtractor:I,MobileNetV4FeatureExtractor:O,OwlViTFeatureExtractor:D,Owlv2ImageProcessor:R,CLIPFeatureExtractor:M,CLIPImageProcessor:T,Florence2Processor:ce,ChineseCLIPFeatureExtractor:k,SiglipImageProcessor:$,ConvNextFeatureExtractor:S,ConvNextImageProcessor:C,SegformerFeatureExtractor:w,SapiensFeatureExtractor:_,BitImageProcessor:v,DPTImageProcessor:b,DPTFeatureExtractor:y,GLPNFeatureExtractor:x,BeitFeatureExtractor:j,DeiTFeatureExtractor:V,DetrFeatureExtractor:U,RTDetrImageProcessor:N,YolosFeatureExtractor:W,DonutFeatureExtractor:G,NougatImageProcessor:q,EfficientNetImageProcessor:A,ViTImageProcessor:P,VitMatteImageProcessor:K,SamImageProcessor:H,Swin2SRImageProcessor:X,Wav2Vec2FeatureExtractor:Y,SeamlessM4TFeatureExtractor:J,SpeechT5FeatureExtractor:re,ASTFeatureExtractor:Z,ClapFeatureExtractor:ee,PyAnnoteFeatureExtractor:te,WeSpeakerFeatureExtractor:ne};static PROCESSOR_CLASS_MAPPING={WhisperProcessor:se,Wav2Vec2ProcessorWithLM:oe,PyAnnoteProcessor:le,SamProcessor:ae,SpeechT5Processor:ue,OwlViTProcessor:de,Florence2Processor:ce};static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:i=!1,revision:s="main"}={}){let o=n??await(0,a.getModelJSON)(e,"preprocessor_config.json",!0,{progress_callback:t,config:n,cache_dir:r,local_files_only:i,revision:s}),l=o.feature_extractor_type??o.image_processor_type,u=this.FEATURE_EXTRACTOR_CLASS_MAPPING[l];if(!u){if(void 0===o.size)throw new Error(`Unknown Feature Extractor type: ${l}`);console.warn(`Feature extractor type "${l}" not found, assuming ImageFeatureExtractor due to size parameter in config.`),u=g}return new(this.PROCESSOR_CLASS_MAPPING[o.processor_class]??ie)(new u(o))}}},"./src/tokenizers.js":
|
|
191
191
|
/*!***************************!*\
|
|
192
192
|
!*** ./src/tokenizers.js ***!
|
|
193
193
|
\***************************/(e,t,n)=>{n.r(t),n.d(t,{AlbertTokenizer:()=>xe,AutoTokenizer:()=>ht,BartTokenizer:()=>Be,BertTokenizer:()=>ve,BlenderbotSmallTokenizer:()=>lt,BlenderbotTokenizer:()=>ot,BloomTokenizer:()=>Ne,CLIPTokenizer:()=>rt,CamembertTokenizer:()=>Ae,CodeGenTokenizer:()=>nt,CodeLlamaTokenizer:()=>Ge,CohereTokenizer:()=>pt,ConvBertTokenizer:()=>Ce,DebertaTokenizer:()=>ke,DebertaV2Tokenizer:()=>$e,DistilBertTokenizer:()=>Pe,ElectraTokenizer:()=>ze,EsmTokenizer:()=>Xe,FalconTokenizer:()=>We,GPT2Tokenizer:()=>Oe,GPTNeoXTokenizer:()=>He,GemmaTokenizer:()=>Qe,Grok1Tokenizer:()=>Ye,HerbertTokenizer:()=>Se,LlamaTokenizer:()=>je,M2M100Tokenizer:()=>et,MBart50Tokenizer:()=>De,MBartTokenizer:()=>Le,MPNetTokenizer:()=>Ue,MarianTokenizer:()=>at,MobileBertTokenizer:()=>Me,NllbTokenizer:()=>Ze,NougatTokenizer:()=>dt,PreTrainedTokenizer:()=>be,Qwen2Tokenizer:()=>Ke,RoFormerTokenizer:()=>Ee,RobertaTokenizer:()=>Re,SiglipTokenizer:()=>it,SpeechT5Tokenizer:()=>ut,SqueezeBertTokenizer:()=>Te,T5Tokenizer:()=>Ie,TokenizerModel:()=>M,VitsTokenizer:()=>ct,Wav2Vec2CTCTokenizer:()=>st,WhisperTokenizer:()=>tt,XLMRobertaTokenizer:()=>qe,XLMTokenizer:()=>Fe,is_chinese_char:()=>w});var r=n(/*! ./utils/generic.js */"./src/utils/generic.js"),i=n(/*! ./utils/core.js */"./src/utils/core.js"),a=n(/*! ./utils/hub.js */"./src/utils/hub.js"),s=n(/*! ./utils/maths.js */"./src/utils/maths.js"),o=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),l=n(/*! ./utils/data-structures.js */"./src/utils/data-structures.js"),u=n(/*! @huggingface/jinja */"./node_modules/@huggingface/jinja/dist/index.js"),d=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js"),c=n(/*! ./utils/constants.js */"./src/utils/constants.js");async function p(e,t){const n=await Promise.all([(0,a.getModelJSON)(e,"tokenizer.json",!0,t),(0,a.getModelJSON)(e,"tokenizer_config.json",!0,t)]);return null!==t.legacy&&(n[1].legacy=t.legacy),n}function h(e,t=!0){if(void 0!==e.Regex){let t=e.Regex.replace(/\\([#&~])/g,"$1");for(const[e,n]of v)t=t.replaceAll(e,n);return new RegExp(t,"gu")}if(void 0!==e.String){const n=(0,i.escapeRegExp)(e.String);return new RegExp(t?n:`(${n})`,"gu")}return console.warn("Unknown pattern type:",e),null}function m(e){return new Map(Object.entries(e))}function f(e){const t=e.dims;switch(t.length){case 1:return e.tolist();case 2:if(1!==t[0])throw new Error("Unable to decode tensor with `batch size !== 1`. Use `tokenizer.batch_decode(...)` for batched inputs.");return e.tolist()[0];default:throw new Error(`Expected tensor to have 1-2 dimensions, got ${t.length}.`)}}function g(e){return e.replace(/ \./g,".").replace(/ \?/g,"?").replace(/ \!/g,"!").replace(/ ,/g,",").replace(/ \' /g,"'").replace(/ n\'t/g,"n't").replace(/ \'m/g,"'m").replace(/ \'s/g,"'s").replace(/ \'ve/g,"'ve").replace(/ \'re/g,"'re")}function _(e){return e.replace(/[\u0300-\u036f]/g,"")}function w(e){return e>=19968&&e<=40959||e>=13312&&e<=19903||e>=131072&&e<=173791||e>=173824&&e<=177983||e>=177984&&e<=178207||e>=178208&&e<=183983||e>=63744&&e<=64255||e>=194560&&e<=195103}const y="\\p{P}\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E",b=new RegExp(`^[${y}]+$`,"gu"),v=new Map([["(?i:'s|'t|'re|'ve|'m|'ll|'d)","(?:'([sS]|[tT]|[rR][eE]|[vV][eE]|[mM]|[lL][lL]|[dD]))"]]);class x{constructor(e){this.content=e.content,this.id=e.id,this.single_word=e.single_word??!1,this.lstrip=e.lstrip??!1,this.rstrip=e.rstrip??!1,this.special=e.special??!1,this.normalized=e.normalized??null}}class M extends r.Callable{constructor(e){super(),this.config=e,this.vocab=[],this.tokens_to_ids=new Map,this.unk_token_id=void 0,this.unk_token=void 0,this.end_of_word_suffix=void 0,this.fuse_unk=this.config.fuse_unk??!1}static fromConfig(e,...t){switch(e.type){case"WordPiece":return new T(e);case"Unigram":return new k(e,...t);case"BPE":return new C(e);default:if(e.vocab)return new E(e,...t);throw new Error(`Unknown TokenizerModel type: ${e.type}`)}}_call(e){let t=this.encode(e);return this.fuse_unk&&(t=function(e,t,n){const r=[];let i=0;for(;i<e.length;)if(r.push(e[i]),(n.get(e[i])??t)===t)for(;i<e.length&&(n.get(e[i])??t)===t;)++i;else++i;return r}(t,this.unk_token_id,this.tokens_to_ids)),t}encode(e){throw Error("encode should be implemented in subclass.")}convert_tokens_to_ids(e){return e.map((e=>this.tokens_to_ids.get(e)??this.unk_token_id))}convert_ids_to_tokens(e){return e.map((e=>this.vocab[e]??this.unk_token))}}class T extends M{constructor(e){super(e),this.tokens_to_ids=m(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.max_input_chars_per_word=e.max_input_chars_per_word??100,this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e}encode(e){const t=[];for(const n of e){const e=[...n];if(e.length>this.max_input_chars_per_word){t.push(this.unk_token);continue}let r=!1,i=0;const a=[];for(;i<e.length;){let t=e.length,n=null;for(;i<t;){let r=e.slice(i,t).join("");if(i>0&&(r=this.config.continuing_subword_prefix+r),this.tokens_to_ids.has(r)){n=r;break}--t}if(null===n){r=!0;break}a.push(n),i=t}r?t.push(this.unk_token):t.push(...a)}return t}}class k extends M{constructor(e,t){super(e);const n=e.vocab.length;this.vocab=new Array(n),this.scores=new Array(n);for(let t=0;t<n;++t){const n=e.vocab[t];this.vocab[t]=n[0],this.scores[t]=n[1]}this.unk_token_id=e.unk_id,this.unk_token=this.vocab[e.unk_id],this.tokens_to_ids=new Map(this.vocab.map(((e,t)=>[e,t]))),this.bosToken=" ",this.bosTokenId=this.tokens_to_ids.get(this.bosToken),this.eosToken=t.eos_token,this.eosTokenId=this.tokens_to_ids.get(this.eosToken),this.unkToken=this.vocab[this.unk_token_id],this.minScore=(0,s.min)(this.scores)[0],this.unkScore=this.minScore-10,this.scores[this.unk_token_id]=this.unkScore,this.trie=new l.CharTrie,this.trie.extend(this.vocab),this.fuse_unk=!0}populateNodes(e){const t=e.sentence,n=t.length;let r=0;for(;r<n;){const n=1;let i=!1;const a=[];for(let s of this.trie.commonPrefixSearch(t.slice(r))){a.push(s);const t=this.tokens_to_ids.get(s),o=this.scores[t],l=s.length;e.insert(r,l,o,t),i||l!==n||(i=!0)}i||e.insert(r,n,this.unkScore,this.unk_token_id),r+=n}}tokenize(e){const t=new l.TokenLattice(e,this.bosTokenId,this.eosTokenId);return this.populateNodes(t),t.tokens()}encode(e){const t=[];for(const n of e){const e=this.tokenize(n);t.push(...e)}return t}}const $=(()=>{const e=[...Array.from({length:"~".charCodeAt(0)-"!".charCodeAt(0)+1},((e,t)=>t+"!".charCodeAt(0))),...Array.from({length:"¬".charCodeAt(0)-"¡".charCodeAt(0)+1},((e,t)=>t+"¡".charCodeAt(0))),...Array.from({length:"ÿ".charCodeAt(0)-"®".charCodeAt(0)+1},((e,t)=>t+"®".charCodeAt(0)))],t=e.slice();let n=0;for(let r=0;r<256;++r)e.includes(r)||(e.push(r),t.push(256+n),n+=1);const r=t.map((e=>String.fromCharCode(e)));return Object.fromEntries(e.map(((e,t)=>[e,r[t]])))})(),S=(0,i.reverseDictionary)($);class C extends M{constructor(e){super(e),this.BPE_SPLIT_TOKEN=" ",this.tokens_to_ids=m(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e;this.bpe_ranks=new Map(e.merges.map(((e,t)=>[e,t]))),this.merges=e.merges.map((e=>e.split(this.BPE_SPLIT_TOKEN))),this.end_of_word_suffix=e.end_of_word_suffix,this.continuing_subword_suffix=e.continuing_subword_suffix??null,this.byte_fallback=this.config.byte_fallback??!1,this.byte_fallback&&(this.text_encoder=new TextEncoder),this.ignore_merges=this.config.ignore_merges??!1,this.cache=new Map}bpe(e){if(0===e.length)return[];const t=this.cache.get(e);if(void 0!==t)return t;const n=Array.from(e);this.end_of_word_suffix&&(n[n.length-1]+=this.end_of_word_suffix);let r=[];if(n.length>1){const e=new l.PriorityQueue(((e,t)=>e.score<t.score));let t={token:n[0],bias:0,prev:null,next:null},i=t;for(let t=1;t<n.length;++t){const r={bias:t/n.length,token:n[t],prev:i,next:null};i.next=r,this._add_node(e,i),i=r}for(;!e.isEmpty();){const n=e.pop();if(n.deleted||!n.next||n.next.deleted)continue;if(n.deleted=!0,n.next.deleted=!0,n.prev){const e={...n.prev};n.prev.deleted=!0,n.prev=e,e.prev?e.prev.next=e:t=e}const r={token:n.token+n.next.token,bias:n.bias,prev:n.prev,next:n.next.next};r.prev?(r.prev.next=r,this._add_node(e,r.prev)):t=r,r.next&&(r.next.prev=r,this._add_node(e,r))}for(let e=t;null!==e;e=e.next)r.push(e.token)}else r=n;if(this.continuing_subword_suffix)for(let e=0;e<r.length-1;++e)r[e]+=this.continuing_subword_suffix;return this.cache.set(e,r),r}_add_node(e,t){const n=this.bpe_ranks.get(t.token+this.BPE_SPLIT_TOKEN+t.next.token);void 0!==n&&(t.score=n+t.bias,e.push(t))}encode(e){const t=[];for(const n of e){if(this.ignore_merges&&this.tokens_to_ids.has(n)){t.push(n);continue}const e=this.bpe(n);for(const n of e)this.tokens_to_ids.has(n)?t.push(n):this.byte_fallback?t.push(...Array.from(this.text_encoder.encode(n)).map((e=>`<0x${e.toString(16).toUpperCase().padStart(2,"0")}>`))):t.push(this.unk_token)}return t}}class E extends M{constructor(e,t){super(e),this.tokens_to_ids=m(t.target_lang?e.vocab[t.target_lang]:e.vocab),this.bos_token=t.bos_token,this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t.eos_token,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.pad_token=t.pad_token,this.pad_token_id=this.tokens_to_ids.get(this.pad_token),this.unk_token=t.unk_token,this.unk_token_id=this.tokens_to_ids.get(this.unk_token),this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e}encode(e){return e}}class P extends r.Callable{constructor(e){super(),this.config=e}static fromConfig(e){if(null===e)return null;switch(e.type){case"BertNormalizer":return new N(e);case"Precompiled":return new pe(e);case"Sequence":return new R(e);case"Replace":return new A(e);case"NFC":return new F(e);case"NFKC":return new z(e);case"NFKD":return new I(e);case"Strip":return new O(e);case"StripAccents":return new B(e);case"Lowercase":return new L(e);case"Prepend":return new D(e);default:throw new Error(`Unknown Normalizer type: ${e.type}`)}}normalize(e){throw Error("normalize should be implemented in subclass.")}_call(e){return this.normalize(e)}}class A extends P{normalize(e){const t=h(this.config.pattern);return null===t?e:e.replaceAll(t,this.config.content)}}class F extends P{normalize(e){return e=e.normalize("NFC")}}class z extends P{normalize(e){return e=e.normalize("NFKC")}}class I extends P{normalize(e){return e=e.normalize("NFKD")}}class O extends P{normalize(e){return this.config.strip_left&&this.config.strip_right?e=e.trim():(this.config.strip_left&&(e=e.trimStart()),this.config.strip_right&&(e=e.trimEnd())),e}}class B extends P{normalize(e){return e=_(e)}}class L extends P{normalize(e){return e=e.toLowerCase()}}class D extends P{normalize(e){return e=this.config.prepend+e}}class R extends P{constructor(e){super(e),this.normalizers=e.normalizers.map((e=>P.fromConfig(e)))}normalize(e){return this.normalizers.reduce(((e,t)=>t.normalize(e)),e)}}class N extends P{_tokenize_chinese_chars(e){const t=[];for(let n=0;n<e.length;++n){const r=e[n];w(r.charCodeAt(0))?(t.push(" "),t.push(r),t.push(" ")):t.push(r)}return t.join("")}stripAccents(e){return e.normalize("NFD").replace(/[\u0300-\u036f]/g,"")}_is_control(e){switch(e){case"\t":case"\n":case"\r":return!1;default:return/^\p{Cc}|\p{Cf}|\p{Co}|\p{Cs}$/u.test(e)}}_clean_text(e){const t=[];for(const n of e){const e=n.charCodeAt(0);0===e||65533===e||this._is_control(n)||(/^\s$/.test(n)?t.push(" "):t.push(n))}return t.join("")}normalize(e){return this.config.clean_text&&(e=this._clean_text(e)),this.config.handle_chinese_chars&&(e=this._tokenize_chinese_chars(e)),this.config.lowercase?(e=e.toLowerCase(),!1!==this.config.strip_accents&&(e=this.stripAccents(e))):this.config.strip_accents&&(e=this.stripAccents(e)),e}}class V extends r.Callable{static fromConfig(e){if(null===e)return null;switch(e.type){case"BertPreTokenizer":return new j(e);case"Sequence":return new he(e);case"Whitespace":return new me(e);case"WhitespaceSplit":return new fe(e);case"Metaspace":return new de(e);case"ByteLevel":return new G(e);case"Split":return new q(e);case"Punctuation":return new U(e);case"Digits":return new W(e);case"Replace":return new ge(e);default:throw new Error(`Unknown PreTokenizer type: ${e.type}`)}}pre_tokenize_text(e,t){throw Error("pre_tokenize_text should be implemented in subclass.")}pre_tokenize(e,t){return(Array.isArray(e)?e.map((e=>this.pre_tokenize_text(e,t))):this.pre_tokenize_text(e,t)).flat()}_call(e,t){return this.pre_tokenize(e,t)}}class j extends V{constructor(e){super(),this.pattern=new RegExp(`[^\\s${y}]+|[${y}]`,"gu")}pre_tokenize_text(e,t){return e.trim().match(this.pattern)||[]}}class G extends V{constructor(e){super(),this.config=e,this.add_prefix_space=this.config.add_prefix_space,this.trim_offsets=this.config.trim_offsets,this.use_regex=this.config.use_regex??!0,this.pattern=/'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu,this.byte_encoder=$,this.text_encoder=new TextEncoder}pre_tokenize_text(e,t){this.add_prefix_space&&!e.startsWith(" ")&&(e=" "+e);return(this.use_regex?e.match(this.pattern)||[]:[e]).map((e=>Array.from(this.text_encoder.encode(e),(e=>this.byte_encoder[e])).join("")))}}class q extends V{constructor(e){super(),this.config=e,this.pattern=h(this.config.pattern,this.config.invert)}pre_tokenize_text(e,t){return null===this.pattern?[]:this.config.invert?e.match(this.pattern)||[]:function(e,t){const n=[];let r=0;for(const i of e.matchAll(t)){const t=i[0];r<i.index&&n.push(e.slice(r,i.index)),t.length>0&&n.push(t),r=i.index+t.length}return r<e.length&&n.push(e.slice(r)),n}(e,this.pattern)}}class U extends V{constructor(e){super(),this.config=e,this.pattern=new RegExp(`[^${y}]+|[${y}]+`,"gu")}pre_tokenize_text(e,t){return e.match(this.pattern)||[]}}class W extends V{constructor(e){super(),this.config=e;const t="[^\\d]+|\\d"+(this.config.individual_digits?"":"+");this.pattern=new RegExp(t,"gu")}pre_tokenize_text(e,t){return e.match(this.pattern)||[]}}class H extends r.Callable{constructor(e){super(),this.config=e}static fromConfig(e){if(null===e)return null;switch(e.type){case"TemplateProcessing":return new Q(e);case"ByteLevel":return new Y(e);case"RobertaProcessing":return new K(e);case"BertProcessing":return new X(e);case"Sequence":return new J(e);default:throw new Error(`Unknown PostProcessor type: ${e.type}`)}}post_process(e,...t){throw Error("post_process should be implemented in subclass.")}_call(e,...t){return this.post_process(e,...t)}}class X extends H{constructor(e){super(e),this.cls=e.cls[0],this.sep=e.sep[0]}post_process(e,t=null,{add_special_tokens:n=!0}={}){n&&(e=(0,i.mergeArrays)([this.cls],e,[this.sep]));let r=new Array(e.length).fill(0);if(null!==t){const a=n&&this instanceof K?[this.sep]:[],s=n?[this.sep]:[];e=(0,i.mergeArrays)(e,a,t,s),r=(0,i.mergeArrays)(r,new Array(t.length+a.length+s.length).fill(1))}return{tokens:e,token_type_ids:r}}}class K extends X{}class Q extends H{constructor(e){super(e),this.single=e.single,this.pair=e.pair}post_process(e,t=null,{add_special_tokens:n=!0}={}){const r=null===t?this.single:this.pair;let a=[],s=[];for(const o of r)"SpecialToken"in o?n&&(a.push(o.SpecialToken.id),s.push(o.SpecialToken.type_id)):"Sequence"in o&&("A"===o.Sequence.id?(a=(0,i.mergeArrays)(a,e),s=(0,i.mergeArrays)(s,new Array(e.length).fill(o.Sequence.type_id))):"B"===o.Sequence.id&&(a=(0,i.mergeArrays)(a,t),s=(0,i.mergeArrays)(s,new Array(t.length).fill(o.Sequence.type_id))));return{tokens:a,token_type_ids:s}}}class Y extends H{post_process(e,t=null){return t&&(e=(0,i.mergeArrays)(e,t)),{tokens:e}}}class J extends H{constructor(e){super(e),this.processors=e.processors.map((e=>H.fromConfig(e)))}post_process(e,t=null,n={}){let r;for(const i of this.processors)if(i instanceof Y){if(e=i.post_process(e).tokens,t){t=i.post_process(t).tokens}}else{const a=i.post_process(e,t,n);e=a.tokens,r=a.token_type_ids}return{tokens:e,token_type_ids:r}}}class Z extends r.Callable{constructor(e){super(),this.config=e,this.added_tokens=[],this.end_of_word_suffix=null,this.trim_offsets=e.trim_offsets}static fromConfig(e){if(null===e)return null;switch(e.type){case"WordPiece":return new ie(e);case"Metaspace":return new ce(e);case"ByteLevel":return new ae(e);case"Replace":return new ee(e);case"ByteFallback":return new te(e);case"Fuse":return new ne(e);case"Strip":return new re(e);case"Sequence":return new oe(e);case"CTC":return new se(e);case"BPEDecoder":return new le(e);default:throw new Error(`Unknown Decoder type: ${e.type}`)}}_call(e){return this.decode(e)}decode(e){return this.decode_chain(e).join("")}decode_chain(e){throw Error("`decode_chain` should be implemented in subclass.")}}class ee extends Z{decode_chain(e){const t=h(this.config.pattern);return null===t?e:e.map((e=>e.replaceAll(t,this.config.content)))}}class te extends Z{constructor(e){super(e),this.text_decoder=new TextDecoder}decode_chain(e){const t=[];let n=[];for(const r of e){let e=null;if(6===r.length&&r.startsWith("<0x")&&r.endsWith(">")){const t=parseInt(r.slice(3,5),16);isNaN(t)||(e=t)}if(null!==e)n.push(e);else{if(n.length>0){const e=this.text_decoder.decode(Uint8Array.from(n));t.push(e),n=[]}t.push(r)}}if(n.length>0){const e=this.text_decoder.decode(Uint8Array.from(n));t.push(e),n=[]}return t}}class ne extends Z{decode_chain(e){return[e.join("")]}}class re extends Z{constructor(e){super(e),this.content=this.config.content,this.start=this.config.start,this.stop=this.config.stop}decode_chain(e){return e.map((e=>{let t=0;for(let n=0;n<this.start&&e[n]===this.content;++n)t=n+1;let n=e.length;for(let t=0;t<this.stop;++t){const r=e.length-t-1;if(e[r]!==this.content)break;n=r}return e.slice(t,n)}))}}class ie extends Z{constructor(e){super(e),this.cleanup=e.cleanup}decode_chain(e){return e.map(((e,t)=>(0!==t&&(e=e.startsWith(this.config.prefix)?e.replace(this.config.prefix,""):" "+e),this.cleanup&&(e=g(e)),e)))}}class ae extends Z{constructor(e){super(e),this.byte_decoder=S,this.text_decoder=new TextDecoder("utf-8",{fatal:!1,ignoreBOM:!0}),this.end_of_word_suffix=null}convert_tokens_to_string(e){const t=e.join(""),n=new Uint8Array([...t].map((e=>this.byte_decoder[e])));return this.text_decoder.decode(n)}decode_chain(e){const t=[];let n=[];for(const r of e)void 0!==this.added_tokens.find((e=>e.content===r))?(n.length>0&&(t.push(this.convert_tokens_to_string(n)),n=[]),t.push(r)):n.push(r);return n.length>0&&t.push(this.convert_tokens_to_string(n)),t}}class se extends Z{constructor(e){super(e),this.pad_token=this.config.pad_token,this.word_delimiter_token=this.config.word_delimiter_token,this.cleanup=this.config.cleanup}convert_tokens_to_string(e){if(0===e.length)return"";const t=[e[0]];for(let n=1;n<e.length;++n)e[n]!==t.at(-1)&&t.push(e[n]);let n=t.filter((e=>e!==this.pad_token)).join("");return this.cleanup&&(n=g(n).replaceAll(this.word_delimiter_token," ").trim()),n}decode_chain(e){return[this.convert_tokens_to_string(e)]}}class oe extends Z{constructor(e){super(e),this.decoders=e.decoders.map((e=>Z.fromConfig(e)))}decode_chain(e){return this.decoders.reduce(((e,t)=>t.decode_chain(e)),e)}}class le extends Z{constructor(e){super(e),this.suffix=this.config.suffix}decode_chain(e){return e.map(((t,n)=>t.replaceAll(this.suffix,n===e.length-1?"":" ")))}}class ue extends Z{decode_chain(e){let t="";for(let n=1;n<e.length;n+=2)t+=e[n];return[t]}}class de extends V{constructor(e){super(),this.addPrefixSpace=e.add_prefix_space,this.replacement=e.replacement,this.strRep=e.str_rep||this.replacement,this.prepend_scheme=e.prepend_scheme??"always"}pre_tokenize_text(e,{section_index:t}={}){let n=e.replaceAll(" ",this.strRep);return this.addPrefixSpace&&!n.startsWith(this.replacement)&&("always"===this.prepend_scheme||"first"===this.prepend_scheme&&0===t)&&(n=this.strRep+n),[n]}}class ce extends Z{constructor(e){super(e),this.addPrefixSpace=e.add_prefix_space,this.replacement=e.replacement}decode_chain(e){const t=[];for(let n=0;n<e.length;++n){let r=e[n].replaceAll(this.replacement," ");this.addPrefixSpace&&0==n&&r.startsWith(" ")&&(r=r.substring(1)),t.push(r)}return t}}class pe extends P{constructor(e){super(e),this.charsmap=e.precompiled_charsmap}normalize(e){if((e=(e=e.replace(/[\u0001-\u0008\u000B\u000E-\u001F\u007F\u008F\u009F]/gm,"")).replace(/[\u0009\u000A\u000C\u000D\u1680\u200B\u200C\u200E\u200F\u2028\u2029\u2581\uFEFF\uFFFD]/gm," ")).includes("~")){const t=e.split("~");e=t.map((e=>e.normalize("NFKC"))).join("~")}else e=e.normalize("NFKC");return e}}class he extends V{constructor(e){super(),this.tokenizers=e.pretokenizers.map((e=>V.fromConfig(e)))}pre_tokenize_text(e,t){return this.tokenizers.reduce(((e,n)=>n.pre_tokenize(e,t)),[e])}}class me extends V{constructor(e){super()}pre_tokenize_text(e,t){return e.match(/\w+|[^\w\s]+/g)||[]}}class fe extends V{constructor(e){super()}pre_tokenize_text(e,t){return function(e){return e.match(/\S+/g)||[]}(e)}}class ge extends V{constructor(e){super(),this.config=e,this.pattern=h(this.config.pattern),this.content=this.config.content}pre_tokenize_text(e,t){return null===this.pattern?[e]:[e.replaceAll(this.pattern,this.config.content)]}}const _e=["bos_token","eos_token","unk_token","sep_token","pad_token","cls_token","mask_token"];function we(e,t,n,r){for(const a of Object.keys(e)){const s=t-e[a].length,o=n(a),l=new Array(s).fill(o);e[a]="right"===r?(0,i.mergeArrays)(e[a],l):(0,i.mergeArrays)(l,e[a])}}function ye(e,t){for(const n of Object.keys(e))e[n].length=t}class be extends r.Callable{return_token_type_ids=!1;padding_side="right";constructor(e,t){super(),this._tokenizer_config=t,this.normalizer=P.fromConfig(e.normalizer),this.pre_tokenizer=V.fromConfig(e.pre_tokenizer),this.model=M.fromConfig(e.model,t),this.post_processor=H.fromConfig(e.post_processor),this.decoder=Z.fromConfig(e.decoder),this.special_tokens=[],this.all_special_ids=[],this.added_tokens=[];for(const t of e.added_tokens){const e=new x(t);this.added_tokens.push(e),this.model.tokens_to_ids.set(e.content,e.id),this.model.vocab[e.id]=e.content,e.special&&(this.special_tokens.push(e.content),this.all_special_ids.push(e.id))}if(this.additional_special_tokens=t.additional_special_tokens??[],this.special_tokens.push(...this.additional_special_tokens),this.special_tokens=[...new Set(this.special_tokens)],this.decoder&&(this.decoder.added_tokens=this.added_tokens,this.decoder.end_of_word_suffix=this.model.end_of_word_suffix),this.added_tokens_regex=this.added_tokens.length>0?new RegExp(this.added_tokens.toSorted(((e,t)=>t.content.length-e.content.length)).map((e=>`${e.lstrip?"\\s*":""}(${(0,i.escapeRegExp)(e.content)})${e.rstrip?"\\s*":""}`)).join("|")):null,this.mask_token=this.getToken("mask_token"),this.mask_token_id=this.model.tokens_to_ids.get(this.mask_token),this.pad_token=this.getToken("pad_token","eos_token"),this.pad_token_id=this.model.tokens_to_ids.get(this.pad_token),this.sep_token=this.getToken("sep_token"),this.sep_token_id=this.model.tokens_to_ids.get(this.sep_token),this.unk_token=this.getToken("unk_token"),this.unk_token_id=this.model.tokens_to_ids.get(this.unk_token),this.model_max_length=t.model_max_length,this.remove_space=t.remove_space,this.clean_up_tokenization_spaces=t.clean_up_tokenization_spaces??!0,this.do_lowercase_and_remove_accent=t.do_lowercase_and_remove_accent??!1,t.padding_side&&(this.padding_side=t.padding_side),this.legacy=!1,this.chat_template=t.chat_template??null,Array.isArray(this.chat_template)){const e=Object.create(null);for(const{name:t,template:n}of this.chat_template){if("string"!=typeof t||"string"!=typeof n)throw new Error('Chat template must be a list of objects with "name" and "template" properties');e[t]=n}this.chat_template=e}this._compiled_template_cache=new Map}getToken(...e){for(const t of e){const e=this._tokenizer_config[t];if(e){if("object"==typeof e){if("AddedToken"===e.__type)return e.content;throw Error(`Unknown token: ${e}`)}return e}}return null}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:i=!1,revision:a="main",legacy:s=null}={}){return new this(...await p(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:i,revision:a,legacy:s}))}_call(e,{text_pair:t=null,add_special_tokens:n=!0,padding:r=!1,truncation:i=null,max_length:a=null,return_tensor:l=!0,return_token_type_ids:u=null}={}){const d=Array.isArray(e);let c;if(d){if(0===e.length)throw Error("text array must be non-empty");if(null!==t){if(!Array.isArray(t))throw Error("text_pair must also be an array");if(e.length!==t.length)throw Error("text and text_pair must have the same length");c=e.map(((e,r)=>this._encode_plus(e,{text_pair:t[r],add_special_tokens:n,return_token_type_ids:u})))}else c=e.map((e=>this._encode_plus(e,{add_special_tokens:n,return_token_type_ids:u})))}else{if(null==e)throw Error("text may not be null or undefined");if(Array.isArray(t))throw Error("When specifying `text_pair`, since `text` is a string, `text_pair` must also be a string (i.e., not an array).");c=[this._encode_plus(e,{text_pair:t,add_special_tokens:n,return_token_type_ids:u})]}if(null===a?a="max_length"===r?this.model_max_length:(0,s.max)(c.map((e=>e.input_ids.length)))[0]:i||console.warn("Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=true` to explicitly truncate examples to max length."),a=Math.min(a,this.model_max_length??1/0),r||i)for(let e=0;e<c.length;++e)c[e].input_ids.length!==a&&(c[e].input_ids.length>a?i&&ye(c[e],a):r&&we(c[e],a,(e=>"input_ids"===e?this.pad_token_id:0),this.padding_side));const p={};if(l){if((!r||!i)&&c.some((e=>{for(const t of Object.keys(e))if(e[t].length!==c[0][t]?.length)return!0;return!1})))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=true' and 'truncation=true' to have batched tensors with the same length.");const e=[c.length,c[0].input_ids.length];for(const t of Object.keys(c[0]))p[t]=new o.Tensor("int64",BigInt64Array.from(c.flatMap((e=>e[t])).map(BigInt)),e)}else{for(const e of Object.keys(c[0]))p[e]=c.map((t=>t[e]));if(!d)for(const e of Object.keys(p))p[e]=p[e][0]}return p}_encode_text(e){if(null===e)return null;const t=(this.added_tokens_regex?e.split(this.added_tokens_regex).filter((e=>e)):[e]).map(((e,t)=>{if(void 0!==this.added_tokens.find((t=>t.content===e)))return e;{if(!0===this.remove_space&&(e=e.trim().split(/\s+/).join(" ")),this.do_lowercase_and_remove_accent&&(e=function(e){return _(e.toLowerCase())}(e)),null!==this.normalizer&&(e=this.normalizer(e)),0===e.length)return[];const n=null!==this.pre_tokenizer?this.pre_tokenizer(e,{section_index:t}):[e];return this.model(n)}})).flat();return t}_encode_plus(e,{text_pair:t=null,add_special_tokens:n=!0,return_token_type_ids:r=null}={}){const{tokens:i,token_type_ids:a}=this._tokenize_helper(e,{pair:t,add_special_tokens:n}),s=this.model.convert_tokens_to_ids(i),o={input_ids:s,attention_mask:new Array(s.length).fill(1)};return(r??this.return_token_type_ids)&&a&&(o.token_type_ids=a),o}_tokenize_helper(e,{pair:t=null,add_special_tokens:n=!1}={}){const r=this._encode_text(e),a=this._encode_text(t);return this.post_processor?this.post_processor(r,a,{add_special_tokens:n}):{tokens:(0,i.mergeArrays)(r??[],a??[])}}tokenize(e,{pair:t=null,add_special_tokens:n=!1}={}){return this._tokenize_helper(e,{pair:t,add_special_tokens:n}).tokens}encode(e,{text_pair:t=null,add_special_tokens:n=!0,return_token_type_ids:r=null}={}){return this._encode_plus(e,{text_pair:t,add_special_tokens:n,return_token_type_ids:r}).input_ids}batch_decode(e,t={}){return e instanceof o.Tensor&&(e=e.tolist()),e.map((e=>this.decode(e,t)))}decode(e,t={}){if(e instanceof o.Tensor&&(e=f(e)),!Array.isArray(e)||0===e.length||!(0,i.isIntegralNumber)(e[0]))throw Error("token_ids must be a non-empty array of integers.");return this.decode_single(e,t)}decode_single(e,{skip_special_tokens:t=!1,clean_up_tokenization_spaces:n=null}){let r=this.model.convert_ids_to_tokens(e);t&&(r=r.filter((e=>!this.special_tokens.includes(e))));let i=this.decoder?this.decoder(r):r.join(" ");return this.decoder&&this.decoder.end_of_word_suffix&&(i=i.replaceAll(this.decoder.end_of_word_suffix," "),t&&(i=i.trim())),(n??this.clean_up_tokenization_spaces)&&(i=g(i)),i}apply_chat_template(e,{tools:t=null,documents:n=null,chat_template:r=null,add_generation_prompt:i=!1,tokenize:a=!0,padding:s=!1,truncation:o=!1,max_length:l=null,return_tensor:d=!0,return_dict:c=!1,tokenizer_kwargs:p={},...h}={}){if(this.chat_template&&"object"==typeof this.chat_template||null===this.chat_template){const e=this.chat_template;if(null!==r&&Object.hasOwn(e,r))r=e[r];else if(null===r&&"default"in e)r=e.default;else if(null===r)throw Error(`This model has multiple chat templates with no default specified! Please either pass a chat template or the name of the template you wish to use to the 'chat_template' argument. Available template names are ${Object.keys(e).sort()}.`)}else{if(!this.chat_template)throw Error("Cannot use apply_chat_template() because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating");r=this.chat_template}if("string"!=typeof r)throw Error("chat_template must be a string, but got "+typeof r);let m=this._compiled_template_cache.get(r);void 0===m&&(m=new u.Template(r),this._compiled_template_cache.set(r,m));const f=Object.create(null);for(const e of _e){const t=this.getToken(e);t&&(f[e]=t)}const g=m.render({messages:e,add_generation_prompt:i,tools:t,documents:n,...f,...h});if(a){const e=this._call(g,{add_special_tokens:!1,padding:s,truncation:o,max_length:l,return_tensor:d,...p});return c?e:e.input_ids}return g}}class ve extends be{return_token_type_ids=!0}class xe extends be{return_token_type_ids=!0}class Me extends be{return_token_type_ids=!0}class Te extends be{return_token_type_ids=!0}class ke extends be{return_token_type_ids=!0}class $e extends be{return_token_type_ids=!0}class Se extends be{return_token_type_ids=!0}class Ce extends be{return_token_type_ids=!0}class Ee extends be{return_token_type_ids=!0}class Pe extends be{}class Ae extends be{}class Fe extends be{return_token_type_ids=!0;constructor(e,t){super(e,t),console.warn('WARNING: `XLMTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}}class ze extends be{return_token_type_ids=!0}class Ie extends be{}class Oe extends be{}class Be extends be{}class Le extends be{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{2}_[A-Z]{2}$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))),this.lang_to_token=e=>e}_build_translation_inputs(e,t,n){return Je(this,e,t,n)}}class De extends Le{}class Re extends be{}class Ne extends be{constructor(e,t){const n=".,!?…。,、।۔،",r=e.pre_tokenizer?.pretokenizers[0]?.pattern;r&&r.Regex===` ?[^(\\s|[${n}])]+`&&(r.Regex=` ?[^\\s${n}]+`),super(e,t)}}const Ve="▁";class je extends be{padding_side="left";constructor(e,t){super(e,t),this.legacy=t.legacy??!0,this.legacy||(this.normalizer=null,this.pre_tokenizer=new de({replacement:Ve,add_prefix_space:!0,prepend_scheme:"first"}))}_encode_text(e){if(null===e)return null;if(this.legacy||0===e.length)return super._encode_text(e);let t=super._encode_text(Ve+e.replaceAll(Ve," "));return t.length>1&&t[0]===Ve&&this.special_tokens.includes(t[1])&&(t=t.slice(1)),t}}class Ge extends be{}class qe extends be{}class Ue extends be{}class We extends be{}class He extends be{}class Xe extends be{}class Ke extends be{}class Qe extends be{}class Ye extends be{}function Je(e,t,n,r){if(!("language_codes"in e)||!Array.isArray(e.language_codes))throw new Error("Tokenizer must have `language_codes` attribute set and it should be an array of language ids.");if(!("languageRegex"in e&&e.languageRegex instanceof RegExp))throw new Error("Tokenizer must have `languageRegex` attribute set and it should be a regular expression.");if(!("lang_to_token"in e)||"function"!=typeof e.lang_to_token)throw new Error("Tokenizer must have `lang_to_token` attribute set and it should be a function.");const i=r.src_lang,a=r.tgt_lang;if(!e.language_codes.includes(a))throw new Error(`Target language code "${a}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);if(void 0!==i){if(!e.language_codes.includes(i))throw new Error(`Source language code "${i}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);for(const t of e.post_processor.config.single)if("SpecialToken"in t&&e.languageRegex.test(t.SpecialToken.id)){t.SpecialToken.id=e.lang_to_token(i);break}}return r.forced_bos_token_id=e.model.convert_tokens_to_ids([e.lang_to_token(a)])[0],e._call(t,n)}class Ze extends be{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{3}_[A-Z][a-z]{3}$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))),this.lang_to_token=e=>e}_build_translation_inputs(e,t,n){return Je(this,e,t,n)}}class et extends be{constructor(e,t){super(e,t),this.languageRegex=/^__[a-z]{2,3}__$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))).map((e=>e.slice(2,-2))),this.lang_to_token=e=>`__${e}__`}_build_translation_inputs(e,t,n){return Je(this,e,t,n)}}class tt extends be{get timestamp_begin(){return this.model.convert_tokens_to_ids(["<|notimestamps|>"])[0]+1}_decode_asr(e,{return_timestamps:t=!1,return_language:n=!1,time_precision:r=null,force_full_sequences:i=!0}={}){if(null===r)throw Error("Must specify time_precision");let a=null;const o="word"===t;function l(){return{language:a,timestamp:[null,null],text:""}}const u=[];let c=l(),p=0;const h=this.timestamp_begin;let m=[],f=[],g=!1,_=null;const w=new Set(this.all_special_ids);for(const n of e){const e=n.tokens,i=o?n.token_timestamps:null;let y=null,v=h;if("stride"in n){const[t,i,a]=n.stride;if(p-=i,_=t-a,i&&(v=i/r+h),a)for(let t=e.length-1;t>=0;--t){const n=Number(e[t]);if(n>=h){if(null!==y&&(n-h)*r<_)break;y=n}}}let x=[],M=[];for(let n=0;n<e.length;++n){const _=Number(e[n]);if(w.has(_)){const e=this.decode([_]),n=d.WHISPER_LANGUAGE_MAPPING.get(e.slice(2,-2));if(void 0!==n){if(null!==a&&n!==a&&!t){m.push(x);const e=this.findLongestCommonSequence(m)[0],t=this.decode(e);c.text=t,u.push(c),m=[],x=[],c=l()}a=c.language=n}}else if(_>=h){const e=(_-h)*r+p,t=(0,s.round)(e,2);if(null!==y&&_>=y)g=!0;else if(g||m.length>0&&_<v)g=!1;else if(null===c.timestamp[0])c.timestamp[0]=t;else if(t===c.timestamp[0]);else{c.timestamp[1]=t,m.push(x),o&&f.push(M);const[e,n]=this.findLongestCommonSequence(m,f),r=this.decode(e);c.text=r,o&&(c.words=this.collateWordTimestamps(e,n,a)),u.push(c),m=[],x=[],f=[],M=[],c=l()}}else if(x.push(_),o){let e,t=(0,s.round)(i[n]+p,2);if(n+1<i.length){e=(0,s.round)(i[n+1]+p,2);const a=this.decode([_]);b.test(a)&&(e=(0,s.round)(Math.min(t+r,e),2))}else e=null;M.push([t,e])}}if("stride"in n){const[e,t,r]=n.stride;p+=e-r}x.length>0?(m.push(x),o&&f.push(M)):m.every((e=>0===e.length))&&(c=l(),m=[],x=[],f=[],M=[])}if(m.length>0){if(i&&t)throw new Error("Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.");const[e,n]=this.findLongestCommonSequence(m,f),r=this.decode(e);c.text=r,o&&(c.words=this.collateWordTimestamps(e,n,a)),u.push(c)}let y=Object.create(null);const v=u.map((e=>e.text)).join("");if(t||n){for(let e=0;e<u.length;++e){const r=u[e];t||delete r.timestamp,n||delete r.language}if(o){const e=[];for(const t of u)for(const n of t.words)e.push(n);y={chunks:e}}else y={chunks:u}}return[v,y]}findLongestCommonSequence(e,t=null){let n=e[0],r=n.length,i=[];const a=Array.isArray(t)&&t.length>0;let s=a?[]:null,o=a?t[0]:null;for(let l=1;l<e.length;++l){const u=e[l];let d=0,c=[r,r,0,0];const p=u.length;for(let e=1;e<r+p;++e){const i=Math.max(0,r-e),s=Math.min(r,r+p-e),h=n.slice(i,s),m=Math.max(0,e-r),f=Math.min(p,e),g=u.slice(m,f);if(h.length!==g.length)throw new Error("There is a bug within whisper `decode_asr` function, please report it. Dropping to prevent bad inference.");let _;_=a?h.filter(((e,n)=>e===g[n]&&o[i+n]<=t[l][m+n])).length:h.filter(((e,t)=>e===g[t])).length;const w=_/e+e/1e4;_>1&&w>d&&(d=w,c=[i,s,m,f])}const[h,m,f,g]=c,_=Math.floor((m+h)/2),w=Math.floor((g+f)/2);i.push(...n.slice(0,_)),n=u.slice(w),r=n.length,a&&(s.push(...o.slice(0,_)),o=t[l].slice(w))}return i.push(...n),a?(s.push(...o),[i,s]):[i,[]]}collateWordTimestamps(e,t,n){const[r,i,a]=this.combineTokensIntoWords(e,n),s=[];for(let e=0;e<r.length;++e){const n=a[e];s.push({text:r[e],timestamp:[t[n.at(0)][0],t[n.at(-1)][1]]})}return s}combineTokensIntoWords(e,t,n="\"'“¡¿([{-",r="\"'.。,,!!??::”)]}、"){let i,a,s;return["chinese","japanese","thai","lao","myanmar"].includes(t=t??"english")?[i,a,s]=this.splitTokensOnUnicode(e):[i,a,s]=this.splitTokensOnSpaces(e),this.mergePunctuations(i,a,s,n,r)}decode(e,t){let n;return t?.decode_with_timestamps?(e instanceof o.Tensor&&(e=f(e)),n=this.decodeWithTimestamps(e,t)):n=super.decode(e,t),n}decodeWithTimestamps(e,t){const n=t?.time_precision??.02,r=Array.from(this.all_special_ids).at(-1)+1;let i=[[]];for(let t of e)if(t=Number(t),t>=r){const e=((t-r)*n).toFixed(2);i.push(`<|${e}|>`),i.push([])}else i[i.length-1].push(t);return i=i.map((e=>"string"==typeof e?e:super.decode(e,t))),i.join("")}splitTokensOnUnicode(e){const t=this.decode(e,{decode_with_timestamps:!0}),n=[],r=[],i=[];let a=[],s=[],o=0;for(let l=0;l<e.length;++l){const u=e[l];a.push(u),s.push(l);const d=this.decode(a,{decode_with_timestamps:!0});d.includes("�")&&"�"!==t[o+d.indexOf("�")]||(n.push(d),r.push(a),i.push(s),a=[],s=[],o+=d.length)}return[n,r,i]}splitTokensOnSpaces(e){const[t,n,r]=this.splitTokensOnUnicode(e),i=[],a=[],s=[],o=new RegExp(`^[${y}]$`,"gu");for(let e=0;e<t.length;++e){const l=t[e],u=n[e],d=r[e],c=u[0]>=this.model.tokens_to_ids.get("<|endoftext|>"),p=l.startsWith(" "),h=l.trim(),m=o.test(h);if(c||p||m||0===i.length)i.push(l),a.push(u),s.push(d);else{const e=i.length-1;i[e]+=l,a[e].push(...u),s[e].push(...d)}}return[i,a,s]}mergePunctuations(e,t,n,r,a){const s=structuredClone(e),o=structuredClone(t),l=structuredClone(n);let u=s.length-2,d=s.length-1;for(;u>=0;)s[u].startsWith(" ")&&r.includes(s[u].trim())?(s[d]=s[u]+s[d],o[d]=(0,i.mergeArrays)(o[u],o[d]),l[d]=(0,i.mergeArrays)(l[u],l[d]),s[u]="",o[u]=[],l[u]=[]):d=u,--u;for(u=0,d=1;d<s.length;)!s[u].endsWith(" ")&&a.includes(s[d])?(s[u]+=s[d],o[u]=(0,i.mergeArrays)(o[u],o[d]),l[u]=(0,i.mergeArrays)(l[u],l[d]),s[d]="",o[d]=[],l[d]=[]):u=d,++d;return[s.filter((e=>e)),o.filter((e=>e.length>0)),l.filter((e=>e.length>0))]}get_decoder_prompt_ids({language:e=null,task:t=null,no_timestamps:n=!0}={}){const r=[];if(e){const t=(0,d.whisper_language_to_code)(e),n=this.model.tokens_to_ids.get(`<|${t}|>`);if(void 0===n)throw new Error(`Unable to find language "${t}" in model vocabulary. Please report this issue at ${c.GITHUB_ISSUE_URL}.`);r.push(n)}else r.push(null);if(t){if("transcribe"!==(t=t.toLowerCase())&&"translate"!==t)throw new Error(`Task "${t}" is not supported. Must be one of: ["transcribe", "translate"]`);const e=this.model.tokens_to_ids.get(`<|${t}|>`);if(void 0===e)throw new Error(`Unable to find task "${t}" in model vocabulary. Please report this issue at ${c.GITHUB_ISSUE_URL}.`);r.push(e)}else r.push(null);if(n){const e=this.model.tokens_to_ids.get("<|notimestamps|>");if(void 0===e)throw new Error(`Unable to find "<|notimestamps|>" in model vocabulary. Please report this issue at ${c.GITHUB_ISSUE_URL}.`);r.push(e)}return r.map(((e,t)=>[t+1,e])).filter((e=>null!==e[1]))}}class nt extends be{}class rt extends be{}class it extends be{}class at extends be{constructor(e,t){super(e,t),this.languageRegex=/^(>>\w+<<)\s*/g,this.supported_language_codes=this.model.vocab.filter((e=>this.languageRegex.test(e))),console.warn('WARNING: `MarianTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}_encode_text(e){if(null===e)return null;const[t,...n]=e.trim().split(this.languageRegex);if(0===n.length)return super._encode_text(t);if(2===n.length){const[e,t]=n;return this.supported_language_codes.includes(e)||console.warn(`Unsupported language code "${e}" detected, which may lead to unexpected behavior. Should be one of: ${JSON.stringify(this.supported_language_codes)}`),(0,i.mergeArrays)([e],super._encode_text(t))}}}class st extends be{}class ot extends be{}class lt extends be{}class ut extends be{}class dt extends be{}class ct extends be{constructor(e,t){super(e,t),this.decoder=new ue({})}}class pt extends be{}class ht{static TOKENIZER_CLASS_MAPPING={T5Tokenizer:Ie,DistilBertTokenizer:Pe,CamembertTokenizer:Ae,DebertaTokenizer:ke,DebertaV2Tokenizer:$e,BertTokenizer:ve,HerbertTokenizer:Se,ConvBertTokenizer:Ce,RoFormerTokenizer:Ee,XLMTokenizer:Fe,ElectraTokenizer:ze,MobileBertTokenizer:Me,SqueezeBertTokenizer:Te,AlbertTokenizer:xe,GPT2Tokenizer:Oe,BartTokenizer:Be,MBartTokenizer:Le,MBart50Tokenizer:De,RobertaTokenizer:Re,WhisperTokenizer:tt,CodeGenTokenizer:nt,CLIPTokenizer:rt,SiglipTokenizer:it,MarianTokenizer:at,BloomTokenizer:Ne,NllbTokenizer:Ze,M2M100Tokenizer:et,LlamaTokenizer:je,CodeLlamaTokenizer:Ge,XLMRobertaTokenizer:qe,MPNetTokenizer:Ue,FalconTokenizer:We,GPTNeoXTokenizer:He,EsmTokenizer:Xe,Wav2Vec2CTCTokenizer:st,BlenderbotTokenizer:ot,BlenderbotSmallTokenizer:lt,SpeechT5Tokenizer:ut,NougatTokenizer:dt,VitsTokenizer:ct,Qwen2Tokenizer:Ke,GemmaTokenizer:Qe,Grok1Tokenizer:Ye,CohereTokenizer:pt,PreTrainedTokenizer:be};static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:i=!1,revision:a="main",legacy:s=null}={}){const[o,l]=await p(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:i,revision:a,legacy:s}),u=l.tokenizer_class?.replace(/Fast$/,"")??"PreTrainedTokenizer";let d=this.TOKENIZER_CLASS_MAPPING[u];return d||(console.warn(`Unknown tokenizer class "${u}", attempting to construct from base class.`),d=be),new d(o,l)}}},"./src/utils/audio.js":
|
|
@@ -226,5 +226,5 @@ var r,i,a,s,o,l,u,d,c,p,h,m,f,g,_,w,y,b,v,x,M,T,k,$,S,C,E,P,A,F,z,I,O,B=Object.d
|
|
|
226
226
|
\*****************************/(e,t,n)=>{n.r(t),n.d(t,{Tensor:()=>o,cat:()=>y,full:()=>T,full_like:()=>k,interpolate:()=>u,interpolate_4d:()=>d,layer_norm:()=>f,matmul:()=>c,mean:()=>x,mean_pooling:()=>m,ones:()=>$,ones_like:()=>S,permute:()=>l,quantize_embeddings:()=>P,rfft:()=>p,stack:()=>b,std_mean:()=>v,topk:()=>h,zeros:()=>C,zeros_like:()=>E});var r=n(/*! ./maths.js */"./src/utils/maths.js"),i=n(/*! ../backends/onnx.js */"./src/backends/onnx.js"),a=n(/*! ../ops/registry.js */"./src/ops/registry.js");const s=Object.freeze({float32:Float32Array,float16:Uint16Array,float64:Float64Array,string:Array,int8:Int8Array,uint8:Uint8Array,int16:Int16Array,uint16:Uint16Array,int32:Int32Array,uint32:Uint32Array,int64:BigInt64Array,uint64:BigUint64Array,bool:Uint8Array});class o{get dims(){return this.ort_tensor.dims}set dims(e){this.ort_tensor.dims=e}get type(){return this.ort_tensor.type}get data(){return this.ort_tensor.data}get size(){return this.ort_tensor.size}get location(){return this.ort_tensor.location}ort_tensor;constructor(...e){return(0,i.isONNXTensor)(e[0])?this.ort_tensor=e[0]:this.ort_tensor=new i.Tensor(e[0],e[1],e[2]),new Proxy(this,{get:(e,t)=>{if("string"==typeof t){let n=Number(t);if(Number.isInteger(n))return e._getitem(n)}return e[t]},set:(e,t,n)=>e[t]=n})}dispose(){this.ort_tensor.dispose()}*[Symbol.iterator](){const[e,...t]=this.dims;if(t.length>0){const n=t.reduce(((e,t)=>e*t));for(let r=0;r<e;++r)yield this._subarray(r,n,t)}else yield*this.data}_getitem(e){const[t,...n]=this.dims;if(e=w(e,t),n.length>0){const t=n.reduce(((e,t)=>e*t));return this._subarray(e,t,n)}return new o(this.type,[this.data[e]],n)}indexOf(e){const t=this.data;for(let n=0;n<t.length;++n)if(t[n]==e)return n;return-1}_subarray(e,t,n){const r=e*t,i=(e+1)*t,a="subarray"in this.data?this.data.subarray(r,i):this.data.slice(r,i);return new o(this.type,a,n)}item(){const e=this.data;if(1!==e.length)throw new Error(`a Tensor with ${e.length} elements cannot be converted to Scalar`);return e[0]}tolist(){return function(e,t){const n=e.length,r=t.reduce(((e,t)=>e*t));if(n!==r)throw Error(`cannot reshape array of size ${n} into shape (${t})`);let i=e;for(let e=t.length-1;e>=0;e--)i=i.reduce(((n,r)=>{let i=n[n.length-1];return i.length<t[e]?i.push(r):n.push([r]),n}),[[]]);return i[0]}(this.data,this.dims)}sigmoid(){return this.clone().sigmoid_()}sigmoid_(){const e=this.data;for(let t=0;t<e.length;++t)e[t]=1/(1+Math.exp(-e[t]));return this}mul(e){return this.clone().mul_(e)}mul_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]*=e;return this}div(e){return this.clone().div_(e)}div_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]/=e;return this}add(e){return this.clone().add_(e)}add_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]+=e;return this}sub(e){return this.clone().sub_(e)}sub_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]-=e;return this}clone(){return new o(this.type,this.data.slice(),this.dims.slice())}slice(...e){const t=[],n=[];for(let r=0;r<this.dims.length;++r){let i=e[r];if(null==i)n.push([0,this.dims[r]]),t.push(this.dims[r]);else if("number"==typeof i)i=w(i,this.dims[r],r),n.push([i,i+1]);else{if(!Array.isArray(i)||2!==i.length)throw new Error(`Invalid slice: ${i}`);{let[e,a]=i;if(e=null===e?0:w(e,this.dims[r],r,!1),a=null===a?this.dims[r]:w(a,this.dims[r],r,!1),e>a)throw new Error(`Invalid slice: ${i}`);const s=[Math.max(e,0),Math.min(a,this.dims[r])];n.push(s),t.push(s[1]-s[0])}}}const r=n.map((([e,t])=>t-e)),i=r.reduce(((e,t)=>e*t)),a=this.data,s=new a.constructor(i),l=this.stride();for(let e=0;e<i;++e){let t=0;for(let i=r.length-1,a=e;i>=0;--i){const e=r[i];t+=(a%e+n[i][0])*l[i],a=Math.floor(a/e)}s[e]=a[t]}return new o(this.type,s,t)}permute(...e){return l(this,e)}transpose(...e){return this.permute(...e)}sum(e=null,t=!1){return this.norm(1,e,t)}norm(e="fro",t=null,n=!1){if("fro"===e)e=2;else if("string"==typeof e)throw Error(`Unsupported norm: ${e}`);const r=this.data;if(null===t){let t=r.reduce(((t,n)=>t+n**e),0)**(1/e);return new o(this.type,[t],[])}t=w(t,this.dims.length);const i=this.dims.slice();i[t]=1;const a=new r.constructor(r.length/this.dims[t]);for(let n=0;n<r.length;++n){let s=0;for(let e=this.dims.length-1,r=n,a=1;e>=0;--e){const n=this.dims[e];if(e!==t){s+=r%n*a,a*=i[e]}r=Math.floor(r/n)}a[s]+=r[n]**e}if(1!==e)for(let t=0;t<a.length;++t)a[t]=a[t]**(1/e);return n||i.splice(t,1),new o(this.type,a,i)}normalize_(e=2,t=1){t=w(t,this.dims.length);const n=this.norm(e,t,!0),r=this.data,i=n.data;for(let e=0;e<r.length;++e){let n=0;for(let r=this.dims.length-1,i=e,a=1;r>=0;--r){const e=this.dims[r];if(r!==t){n+=i%e*a,a*=this.dims[r]}i=Math.floor(i/e)}r[e]/=i[n]}return this}normalize(e=2,t=1){return this.clone().normalize_(e,t)}stride(){return function(e){const t=new Array(e.length);for(let n=e.length-1,r=1;n>=0;--n)t[n]=r,r*=e[n];return t}(this.dims)}squeeze(e=null){return new o(this.type,this.data,g(this.dims,e))}squeeze_(e=null){return this.dims=g(this.dims,e),this}unsqueeze(e=null){return new o(this.type,this.data,_(this.dims,e))}unsqueeze_(e=null){return this.dims=_(this.dims,e),this}flatten_(e=0,t=-1){t=(t+this.dims.length)%this.dims.length;let n=this.dims.slice(0,e),r=this.dims.slice(e,t+1),i=this.dims.slice(t+1);return this.dims=[...n,r.reduce(((e,t)=>e*t),1),...i],this}flatten(e=0,t=-1){return this.clone().flatten_(e,t)}view(...e){let t=-1;for(let n=0;n<e.length;++n)if(-1===e[n]){if(-1!==t)throw new Error("Only one dimension can be inferred");t=n}const n=this.data;if(-1!==t){const r=e.reduce(((e,n,r)=>r!==t?e*n:e),1);e[t]=n.length/r}return new o(this.type,n,e)}neg_(){const e=this.data;for(let t=0;t<e.length;++t)e[t]=-e[t];return this}neg(){return this.clone().neg_()}clamp_(e,t){const n=this.data;for(let r=0;r<n.length;++r)n[r]=Math.min(Math.max(n[r],e),t);return this}clamp(e,t){return this.clone().clamp_(e,t)}round_(){const e=this.data;for(let t=0;t<e.length;++t)e[t]=Math.round(e[t]);return this}round(){return this.clone().round_()}mean(e=null,t=!1){return x(this,e,t)}to(e){if(this.type===e)return this;if(!s.hasOwnProperty(e))throw new Error(`Unsupported type: ${e}`);return new o(e,s[e].from(this.data),this.dims)}}function l(e,t){const[n,i]=(0,r.permute_data)(e.data,e.dims,t);return new o(e.type,n,i)}function u(e,[t,n],i="bilinear",a=!1){const s=e.dims.at(-3)??1,l=e.dims.at(-2),u=e.dims.at(-1);let d=(0,r.interpolate_data)(e.data,[s,l,u],[t,n],i,a);return new o(e.type,d,[s,t,n])}async function d(e,{size:t=null,mode:n="bilinear"}={}){if(4!==e.dims.length)throw new Error("`interpolate_4d` currently only supports 4D input.");if(!t)throw new Error("`interpolate_4d` requires a `size` argument.");let r,i;if(2===t.length)r=[...e.dims.slice(0,2),...t];else if(3===t.length)r=[e.dims[0],...t];else{if(4!==t.length)throw new Error("`size` must be of length 2, 3, or 4.");r=t}if("bilinear"===n)i=await a.TensorOpRegistry.bilinear_interpolate_4d;else{if("bicubic"!==n)throw new Error(`Unsupported mode: ${n}`);i=await a.TensorOpRegistry.bicubic_interpolate_4d}const s=new o("int64",new BigInt64Array(r.map(BigInt)),[r.length]);return await i({x:e,s})}async function c(e,t){const n=await a.TensorOpRegistry.matmul;return await n({a:e,b:t})}async function p(e,t){const n=await a.TensorOpRegistry.rfft;return await n({x:e,a:t})}async function h(e,t){const n=await a.TensorOpRegistry.top_k;return t=null===t?e.dims.at(-1):Math.min(t,e.dims.at(-1)),await n({x:e,k:new o("int64",[BigInt(t)],[1])})}function m(e,t){const n=e.data,r=t.data,i=[e.dims[0],e.dims[2]],a=new n.constructor(i[0]*i[1]),[s,l,u]=e.dims;let d=0;for(let e=0;e<s;++e){const t=e*u*l;for(let i=0;i<u;++i){let s=0,o=0;const c=e*l,p=t+i;for(let e=0;e<l;++e){const t=Number(r[c+e]);o+=t,s+=n[p+e*u]*t}const h=s/o;a[d++]=h}}return new o(e.type,a,i)}function f(e,t,{eps:n=1e-5}={}){if(2!==e.dims.length)throw new Error("`layer_norm` currently only supports 2D input.");const[r,i]=e.dims;if(1!==t.length&&t[0]!==i)throw new Error("`normalized_shape` must be a 1D array with shape `[input.dims[1]]`.");const[a,s]=v(e,1,0,!0),l=a.data,u=s.data,d=e.data,c=new d.constructor(d.length);for(let e=0;e<r;++e){const t=e*i;for(let r=0;r<i;++r){const i=t+r;c[i]=(d[i]-u[e])/(l[e]+n)}}return new o(e.type,c,e.dims)}function g(e,t){return e=e.slice(),null===t?e=e.filter((e=>1!==e)):"number"==typeof t?1===e[t]&&e.splice(t,1):Array.isArray(t)&&(e=e.filter(((e,n)=>1!==e||!t.includes(n)))),e}function _(e,t){return t=w(t,e.length+1),(e=e.slice()).splice(t,0,1),e}function w(e,t,n=null,r=!0){if(r&&(e<-t||e>=t))throw new Error(`IndexError: index ${e} is out of bounds for dimension${null===n?"":" "+n} with size ${t}`);return e<0&&(e=(e%t+t)%t),e}function y(e,t=0){t=w(t,e[0].dims.length);const n=e[0].dims.slice();n[t]=e.reduce(((e,n)=>e+n.dims[t]),0);const r=n.reduce(((e,t)=>e*t),1),i=new e[0].data.constructor(r),a=e[0].type;if(0===t){let t=0;for(const n of e){const e=n.data;i.set(e,t),t+=e.length}}else{let r=0;for(let a=0;a<e.length;++a){const{data:s,dims:o}=e[a];for(let e=0;e<s.length;++e){let a=0;for(let i=o.length-1,s=e,l=1;i>=0;--i){const e=o[i];let u=s%e;i===t&&(u+=r),a+=u*l,l*=n[i],s=Math.floor(s/e)}i[a]=s[e]}r+=o[t]}}return new o(a,i,n)}function b(e,t=0){return y(e.map((e=>e.unsqueeze(t))),t)}function v(e,t=null,n=1,r=!1){const i=e.data,a=e.dims;if(null===t){const t=i.reduce(((e,t)=>e+t),0)/i.length,r=Math.sqrt(i.reduce(((e,n)=>e+(n-t)**2),0)/(i.length-n)),a=new o(e.type,[t],[]);return[new o(e.type,[r],[]),a]}const s=x(e,t=w(t,a.length),r),l=s.data,u=a.slice();u[t]=1;const d=new i.constructor(i.length/a[t]);for(let e=0;e<i.length;++e){let n=0;for(let r=a.length-1,i=e,s=1;r>=0;--r){const e=a[r];if(r!==t){n+=i%e*s,s*=u[r]}i=Math.floor(i/e)}d[n]+=(i[e]-l[n])**2}for(let e=0;e<d.length;++e)d[e]=Math.sqrt(d[e]/(a[t]-n));r||u.splice(t,1);return[new o(e.type,d,u),s]}function x(e,t=null,n=!1){const r=e.data;if(null===t){const t=r.reduce(((e,t)=>e+t),0);return new o(e.type,[t/r.length],[])}const i=e.dims;t=w(t,i.length);const a=i.slice();a[t]=1;const s=new r.constructor(r.length/i[t]);for(let e=0;e<r.length;++e){let n=0;for(let r=i.length-1,s=e,o=1;r>=0;--r){const e=i[r];if(r!==t){n+=s%e*o,o*=a[r]}s=Math.floor(s/e)}s[n]+=r[e]}if(1!==i[t])for(let e=0;e<s.length;++e)s[e]=s[e]/i[t];return n||a.splice(t,1),new o(e.type,s,a)}function M(e,t,n,r){const i=e.reduce(((e,t)=>e*t),1);return new o(n,new r(i).fill(t),e)}function T(e,t){let n,r;if("number"==typeof t)n="float32",r=Float32Array;else{if("bigint"!=typeof t)throw new Error("Unsupported data type: "+typeof t);n="int64",r=BigInt64Array}return M(e,t,n,r)}function k(e,t){return T(e.dims,t)}function $(e){return M(e,1n,"int64",BigInt64Array)}function S(e){return $(e.dims)}function C(e){return M(e,0n,"int64",BigInt64Array)}function E(e){return C(e.dims)}function P(e,t){if(2!==e.dims.length)throw new Error("The tensor must have 2 dimensions");if(e.dims.at(-1)%8!=0)throw new Error("The last dimension of the tensor must be a multiple of 8");if(!["binary","ubinary"].includes(t))throw new Error("The precision must be either 'binary' or 'ubinary'");const n="binary"===t,r=n?"int8":"uint8",i=n?Int8Array:Uint8Array,a=e.data,s=new i(a.length/8);for(let e=0;e<a.length;++e){const t=a[e]>0?1:0,r=Math.floor(e/8),i=e%8;s[r]|=t<<7-i,n&&0===i&&(s[r]-=128)}return new o(r,s,[e.dims[0],e.dims[1]/8])}}},r={};function i(e){var t=r[e];if(void 0!==t)return t.exports;var a=r[e]={exports:{}};return n[e](a,a.exports,i),a.exports}i.m=n,t=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,i.t=function(n,r){if(1&r&&(n=this(n)),8&r)return n;if("object"==typeof n&&n){if(4&r&&n.__esModule)return n;if(16&r&&"function"==typeof n.then)return n}var a=Object.create(null);i.r(a);var s={};e=e||[null,t({}),t([]),t(t)];for(var o=2&r&&n;"object"==typeof o&&!~e.indexOf(o);o=t(o))Object.getOwnPropertyNames(o).forEach((e=>s[e]=()=>n[e]));return s.default=()=>n,i.d(a,s),a},i.d=(e,t)=>{for(var n in t)i.o(t,n)&&!i.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},i.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),i.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},(()=>{var e;if("string"==typeof import.meta.url&&(e=import.meta.url),!e)throw new Error("Automatic publicPath is not supported in this browser");e=e.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/"),i.p=e})(),i.b=new URL("./",import.meta.url);var a={};
|
|
227
227
|
/*!*****************************!*\
|
|
228
228
|
!*** ./src/transformers.js ***!
|
|
229
|
-
\*****************************/i.r(a),i.d(a,{ASTFeatureExtractor:()=>d.ASTFeatureExtractor,ASTForAudioClassification:()=>l.ASTForAudioClassification,ASTModel:()=>l.ASTModel,ASTPreTrainedModel:()=>l.ASTPreTrainedModel,AlbertForMaskedLM:()=>l.AlbertForMaskedLM,AlbertForQuestionAnswering:()=>l.AlbertForQuestionAnswering,AlbertForSequenceClassification:()=>l.AlbertForSequenceClassification,AlbertModel:()=>l.AlbertModel,AlbertPreTrainedModel:()=>l.AlbertPreTrainedModel,AlbertTokenizer:()=>u.AlbertTokenizer,AudioClassificationPipeline:()=>o.AudioClassificationPipeline,AutoConfig:()=>c.AutoConfig,AutoModel:()=>l.AutoModel,AutoModelForAudioClassification:()=>l.AutoModelForAudioClassification,AutoModelForAudioFrameClassification:()=>l.AutoModelForAudioFrameClassification,AutoModelForCTC:()=>l.AutoModelForCTC,AutoModelForCausalLM:()=>l.AutoModelForCausalLM,AutoModelForDepthEstimation:()=>l.AutoModelForDepthEstimation,AutoModelForDocumentQuestionAnswering:()=>l.AutoModelForDocumentQuestionAnswering,AutoModelForImageClassification:()=>l.AutoModelForImageClassification,AutoModelForImageFeatureExtraction:()=>l.AutoModelForImageFeatureExtraction,AutoModelForImageMatting:()=>l.AutoModelForImageMatting,AutoModelForImageSegmentation:()=>l.AutoModelForImageSegmentation,AutoModelForImageToImage:()=>l.AutoModelForImageToImage,AutoModelForMaskGeneration:()=>l.AutoModelForMaskGeneration,AutoModelForMaskedLM:()=>l.AutoModelForMaskedLM,AutoModelForNormalEstimation:()=>l.AutoModelForNormalEstimation,AutoModelForObjectDetection:()=>l.AutoModelForObjectDetection,AutoModelForQuestionAnswering:()=>l.AutoModelForQuestionAnswering,AutoModelForSemanticSegmentation:()=>l.AutoModelForSemanticSegmentation,AutoModelForSeq2SeqLM:()=>l.AutoModelForSeq2SeqLM,AutoModelForSequenceClassification:()=>l.AutoModelForSequenceClassification,AutoModelForSpeechSeq2Seq:()=>l.AutoModelForSpeechSeq2Seq,AutoModelForTextToSpectrogram:()=>l.AutoModelForTextToSpectrogram,AutoModelForTextToWaveform:()=>l.AutoModelForTextToWaveform,AutoModelForTokenClassification:()=>l.AutoModelForTokenClassification,AutoModelForVision2Seq:()=>l.AutoModelForVision2Seq,AutoModelForXVector:()=>l.AutoModelForXVector,AutoModelForZeroShotObjectDetection:()=>l.AutoModelForZeroShotObjectDetection,AutoProcessor:()=>d.AutoProcessor,AutoTokenizer:()=>u.AutoTokenizer,AutomaticSpeechRecognitionPipeline:()=>o.AutomaticSpeechRecognitionPipeline,BartForConditionalGeneration:()=>l.BartForConditionalGeneration,BartForSequenceClassification:()=>l.BartForSequenceClassification,BartModel:()=>l.BartModel,BartPretrainedModel:()=>l.BartPretrainedModel,BartTokenizer:()=>u.BartTokenizer,BaseModelOutput:()=>l.BaseModelOutput,BaseStreamer:()=>g.BaseStreamer,BeitFeatureExtractor:()=>d.BeitFeatureExtractor,BeitForImageClassification:()=>l.BeitForImageClassification,BeitModel:()=>l.BeitModel,BeitPreTrainedModel:()=>l.BeitPreTrainedModel,BertForMaskedLM:()=>l.BertForMaskedLM,BertForQuestionAnswering:()=>l.BertForQuestionAnswering,BertForSequenceClassification:()=>l.BertForSequenceClassification,BertForTokenClassification:()=>l.BertForTokenClassification,BertModel:()=>l.BertModel,BertPreTrainedModel:()=>l.BertPreTrainedModel,BertTokenizer:()=>u.BertTokenizer,BitImageProcessor:()=>d.BitImageProcessor,BlenderbotForConditionalGeneration:()=>l.BlenderbotForConditionalGeneration,BlenderbotModel:()=>l.BlenderbotModel,BlenderbotPreTrainedModel:()=>l.BlenderbotPreTrainedModel,BlenderbotSmallForConditionalGeneration:()=>l.BlenderbotSmallForConditionalGeneration,BlenderbotSmallModel:()=>l.BlenderbotSmallModel,BlenderbotSmallPreTrainedModel:()=>l.BlenderbotSmallPreTrainedModel,BlenderbotSmallTokenizer:()=>u.BlenderbotSmallTokenizer,BlenderbotTokenizer:()=>u.BlenderbotTokenizer,BloomForCausalLM:()=>l.BloomForCausalLM,BloomModel:()=>l.BloomModel,BloomPreTrainedModel:()=>l.BloomPreTrainedModel,BloomTokenizer:()=>u.BloomTokenizer,CLIPFeatureExtractor:()=>d.CLIPFeatureExtractor,CLIPImageProcessor:()=>d.CLIPImageProcessor,CLIPModel:()=>l.CLIPModel,CLIPPreTrainedModel:()=>l.CLIPPreTrainedModel,CLIPSegForImageSegmentation:()=>l.CLIPSegForImageSegmentation,CLIPSegModel:()=>l.CLIPSegModel,CLIPSegPreTrainedModel:()=>l.CLIPSegPreTrainedModel,CLIPTextModelWithProjection:()=>l.CLIPTextModelWithProjection,CLIPTokenizer:()=>u.CLIPTokenizer,CLIPVisionModelWithProjection:()=>l.CLIPVisionModelWithProjection,CamembertForMaskedLM:()=>l.CamembertForMaskedLM,CamembertForQuestionAnswering:()=>l.CamembertForQuestionAnswering,CamembertForSequenceClassification:()=>l.CamembertForSequenceClassification,CamembertForTokenClassification:()=>l.CamembertForTokenClassification,CamembertModel:()=>l.CamembertModel,CamembertPreTrainedModel:()=>l.CamembertPreTrainedModel,CamembertTokenizer:()=>u.CamembertTokenizer,CausalLMOutput:()=>l.CausalLMOutput,CausalLMOutputWithPast:()=>l.CausalLMOutputWithPast,ChineseCLIPFeatureExtractor:()=>d.ChineseCLIPFeatureExtractor,ChineseCLIPModel:()=>l.ChineseCLIPModel,ChineseCLIPPreTrainedModel:()=>l.ChineseCLIPPreTrainedModel,ClapAudioModelWithProjection:()=>l.ClapAudioModelWithProjection,ClapFeatureExtractor:()=>d.ClapFeatureExtractor,ClapModel:()=>l.ClapModel,ClapPreTrainedModel:()=>l.ClapPreTrainedModel,ClapTextModelWithProjection:()=>l.ClapTextModelWithProjection,CodeGenForCausalLM:()=>l.CodeGenForCausalLM,CodeGenModel:()=>l.CodeGenModel,CodeGenPreTrainedModel:()=>l.CodeGenPreTrainedModel,CodeGenTokenizer:()=>u.CodeGenTokenizer,CodeLlamaTokenizer:()=>u.CodeLlamaTokenizer,CohereForCausalLM:()=>l.CohereForCausalLM,CohereModel:()=>l.CohereModel,CoherePreTrainedModel:()=>l.CoherePreTrainedModel,CohereTokenizer:()=>u.CohereTokenizer,ConvBertForMaskedLM:()=>l.ConvBertForMaskedLM,ConvBertForQuestionAnswering:()=>l.ConvBertForQuestionAnswering,ConvBertForSequenceClassification:()=>l.ConvBertForSequenceClassification,ConvBertForTokenClassification:()=>l.ConvBertForTokenClassification,ConvBertModel:()=>l.ConvBertModel,ConvBertPreTrainedModel:()=>l.ConvBertPreTrainedModel,ConvBertTokenizer:()=>u.ConvBertTokenizer,ConvNextFeatureExtractor:()=>d.ConvNextFeatureExtractor,ConvNextForImageClassification:()=>l.ConvNextForImageClassification,ConvNextImageProcessor:()=>d.ConvNextImageProcessor,ConvNextModel:()=>l.ConvNextModel,ConvNextPreTrainedModel:()=>l.ConvNextPreTrainedModel,ConvNextV2ForImageClassification:()=>l.ConvNextV2ForImageClassification,ConvNextV2Model:()=>l.ConvNextV2Model,ConvNextV2PreTrainedModel:()=>l.ConvNextV2PreTrainedModel,DPTFeatureExtractor:()=>d.DPTFeatureExtractor,DPTForDepthEstimation:()=>l.DPTForDepthEstimation,DPTImageProcessor:()=>d.DPTImageProcessor,DPTModel:()=>l.DPTModel,DPTPreTrainedModel:()=>l.DPTPreTrainedModel,DebertaForMaskedLM:()=>l.DebertaForMaskedLM,DebertaForQuestionAnswering:()=>l.DebertaForQuestionAnswering,DebertaForSequenceClassification:()=>l.DebertaForSequenceClassification,DebertaForTokenClassification:()=>l.DebertaForTokenClassification,DebertaModel:()=>l.DebertaModel,DebertaPreTrainedModel:()=>l.DebertaPreTrainedModel,DebertaTokenizer:()=>u.DebertaTokenizer,DebertaV2ForMaskedLM:()=>l.DebertaV2ForMaskedLM,DebertaV2ForQuestionAnswering:()=>l.DebertaV2ForQuestionAnswering,DebertaV2ForSequenceClassification:()=>l.DebertaV2ForSequenceClassification,DebertaV2ForTokenClassification:()=>l.DebertaV2ForTokenClassification,DebertaV2Model:()=>l.DebertaV2Model,DebertaV2PreTrainedModel:()=>l.DebertaV2PreTrainedModel,DebertaV2Tokenizer:()=>u.DebertaV2Tokenizer,DeiTFeatureExtractor:()=>d.DeiTFeatureExtractor,DeiTForImageClassification:()=>l.DeiTForImageClassification,DeiTModel:()=>l.DeiTModel,DeiTPreTrainedModel:()=>l.DeiTPreTrainedModel,DepthAnythingForDepthEstimation:()=>l.DepthAnythingForDepthEstimation,DepthAnythingPreTrainedModel:()=>l.DepthAnythingPreTrainedModel,DepthEstimationPipeline:()=>o.DepthEstimationPipeline,DetrFeatureExtractor:()=>d.DetrFeatureExtractor,DetrForObjectDetection:()=>l.DetrForObjectDetection,DetrForSegmentation:()=>l.DetrForSegmentation,DetrModel:()=>l.DetrModel,DetrObjectDetectionOutput:()=>l.DetrObjectDetectionOutput,DetrPreTrainedModel:()=>l.DetrPreTrainedModel,DetrSegmentationOutput:()=>l.DetrSegmentationOutput,Dinov2ForImageClassification:()=>l.Dinov2ForImageClassification,Dinov2Model:()=>l.Dinov2Model,Dinov2PreTrainedModel:()=>l.Dinov2PreTrainedModel,DistilBertForMaskedLM:()=>l.DistilBertForMaskedLM,DistilBertForQuestionAnswering:()=>l.DistilBertForQuestionAnswering,DistilBertForSequenceClassification:()=>l.DistilBertForSequenceClassification,DistilBertForTokenClassification:()=>l.DistilBertForTokenClassification,DistilBertModel:()=>l.DistilBertModel,DistilBertPreTrainedModel:()=>l.DistilBertPreTrainedModel,DistilBertTokenizer:()=>u.DistilBertTokenizer,DocumentQuestionAnsweringPipeline:()=>o.DocumentQuestionAnsweringPipeline,DonutFeatureExtractor:()=>d.DonutFeatureExtractor,DonutSwinModel:()=>l.DonutSwinModel,DonutSwinPreTrainedModel:()=>l.DonutSwinPreTrainedModel,EfficientNetForImageClassification:()=>l.EfficientNetForImageClassification,EfficientNetImageProcessor:()=>d.EfficientNetImageProcessor,EfficientNetModel:()=>l.EfficientNetModel,EfficientNetPreTrainedModel:()=>l.EfficientNetPreTrainedModel,ElectraForMaskedLM:()=>l.ElectraForMaskedLM,ElectraForQuestionAnswering:()=>l.ElectraForQuestionAnswering,ElectraForSequenceClassification:()=>l.ElectraForSequenceClassification,ElectraForTokenClassification:()=>l.ElectraForTokenClassification,ElectraModel:()=>l.ElectraModel,ElectraPreTrainedModel:()=>l.ElectraPreTrainedModel,ElectraTokenizer:()=>u.ElectraTokenizer,EosTokenCriteria:()=>_.EosTokenCriteria,EsmForMaskedLM:()=>l.EsmForMaskedLM,EsmForSequenceClassification:()=>l.EsmForSequenceClassification,EsmForTokenClassification:()=>l.EsmForTokenClassification,EsmModel:()=>l.EsmModel,EsmPreTrainedModel:()=>l.EsmPreTrainedModel,EsmTokenizer:()=>u.EsmTokenizer,FFT:()=>f.FFT,FalconForCausalLM:()=>l.FalconForCausalLM,FalconModel:()=>l.FalconModel,FalconPreTrainedModel:()=>l.FalconPreTrainedModel,FalconTokenizer:()=>u.FalconTokenizer,FastViTForImageClassification:()=>l.FastViTForImageClassification,FastViTModel:()=>l.FastViTModel,FastViTPreTrainedModel:()=>l.FastViTPreTrainedModel,FeatureExtractionPipeline:()=>o.FeatureExtractionPipeline,FeatureExtractor:()=>d.FeatureExtractor,FillMaskPipeline:()=>o.FillMaskPipeline,Florence2ForConditionalGeneration:()=>l.Florence2ForConditionalGeneration,Florence2PreTrainedModel:()=>l.Florence2PreTrainedModel,Florence2Processor:()=>d.Florence2Processor,GLPNFeatureExtractor:()=>d.GLPNFeatureExtractor,GLPNForDepthEstimation:()=>l.GLPNForDepthEstimation,GLPNModel:()=>l.GLPNModel,GLPNPreTrainedModel:()=>l.GLPNPreTrainedModel,GPT2LMHeadModel:()=>l.GPT2LMHeadModel,GPT2Model:()=>l.GPT2Model,GPT2PreTrainedModel:()=>l.GPT2PreTrainedModel,GPT2Tokenizer:()=>u.GPT2Tokenizer,GPTBigCodeForCausalLM:()=>l.GPTBigCodeForCausalLM,GPTBigCodeModel:()=>l.GPTBigCodeModel,GPTBigCodePreTrainedModel:()=>l.GPTBigCodePreTrainedModel,GPTJForCausalLM:()=>l.GPTJForCausalLM,GPTJModel:()=>l.GPTJModel,GPTJPreTrainedModel:()=>l.GPTJPreTrainedModel,GPTNeoForCausalLM:()=>l.GPTNeoForCausalLM,GPTNeoModel:()=>l.GPTNeoModel,GPTNeoPreTrainedModel:()=>l.GPTNeoPreTrainedModel,GPTNeoXForCausalLM:()=>l.GPTNeoXForCausalLM,GPTNeoXModel:()=>l.GPTNeoXModel,GPTNeoXPreTrainedModel:()=>l.GPTNeoXPreTrainedModel,GPTNeoXTokenizer:()=>u.GPTNeoXTokenizer,Gemma2ForCausalLM:()=>l.Gemma2ForCausalLM,Gemma2Model:()=>l.Gemma2Model,Gemma2PreTrainedModel:()=>l.Gemma2PreTrainedModel,GemmaForCausalLM:()=>l.GemmaForCausalLM,GemmaModel:()=>l.GemmaModel,GemmaPreTrainedModel:()=>l.GemmaPreTrainedModel,GemmaTokenizer:()=>u.GemmaTokenizer,Grok1Tokenizer:()=>u.Grok1Tokenizer,HerbertTokenizer:()=>u.HerbertTokenizer,HubertForCTC:()=>l.HubertForCTC,HubertForSequenceClassification:()=>l.HubertForSequenceClassification,HubertModel:()=>l.HubertModel,HubertPreTrainedModel:()=>l.HubertPreTrainedModel,ImageClassificationPipeline:()=>o.ImageClassificationPipeline,ImageFeatureExtractionPipeline:()=>o.ImageFeatureExtractionPipeline,ImageFeatureExtractor:()=>d.ImageFeatureExtractor,ImageMattingOutput:()=>l.ImageMattingOutput,ImageSegmentationPipeline:()=>o.ImageSegmentationPipeline,ImageToImagePipeline:()=>o.ImageToImagePipeline,ImageToTextPipeline:()=>o.ImageToTextPipeline,InterruptableStoppingCriteria:()=>_.InterruptableStoppingCriteria,JAISLMHeadModel:()=>l.JAISLMHeadModel,JAISModel:()=>l.JAISModel,JAISPreTrainedModel:()=>l.JAISPreTrainedModel,LlamaForCausalLM:()=>l.LlamaForCausalLM,LlamaModel:()=>l.LlamaModel,LlamaPreTrainedModel:()=>l.LlamaPreTrainedModel,LlamaTokenizer:()=>u.LlamaTokenizer,LlavaForConditionalGeneration:()=>l.LlavaForConditionalGeneration,LlavaPreTrainedModel:()=>l.LlavaPreTrainedModel,LongT5ForConditionalGeneration:()=>l.LongT5ForConditionalGeneration,LongT5Model:()=>l.LongT5Model,LongT5PreTrainedModel:()=>l.LongT5PreTrainedModel,M2M100ForConditionalGeneration:()=>l.M2M100ForConditionalGeneration,M2M100Model:()=>l.M2M100Model,M2M100PreTrainedModel:()=>l.M2M100PreTrainedModel,M2M100Tokenizer:()=>u.M2M100Tokenizer,MBart50Tokenizer:()=>u.MBart50Tokenizer,MBartForCausalLM:()=>l.MBartForCausalLM,MBartForConditionalGeneration:()=>l.MBartForConditionalGeneration,MBartForSequenceClassification:()=>l.MBartForSequenceClassification,MBartModel:()=>l.MBartModel,MBartPreTrainedModel:()=>l.MBartPreTrainedModel,MBartTokenizer:()=>u.MBartTokenizer,MPNetForMaskedLM:()=>l.MPNetForMaskedLM,MPNetForQuestionAnswering:()=>l.MPNetForQuestionAnswering,MPNetForSequenceClassification:()=>l.MPNetForSequenceClassification,MPNetForTokenClassification:()=>l.MPNetForTokenClassification,MPNetModel:()=>l.MPNetModel,MPNetPreTrainedModel:()=>l.MPNetPreTrainedModel,MPNetTokenizer:()=>u.MPNetTokenizer,MT5ForConditionalGeneration:()=>l.MT5ForConditionalGeneration,MT5Model:()=>l.MT5Model,MT5PreTrainedModel:()=>l.MT5PreTrainedModel,MarianMTModel:()=>l.MarianMTModel,MarianModel:()=>l.MarianModel,MarianPreTrainedModel:()=>l.MarianPreTrainedModel,MarianTokenizer:()=>u.MarianTokenizer,MaskedLMOutput:()=>l.MaskedLMOutput,MaxLengthCriteria:()=>_.MaxLengthCriteria,MistralForCausalLM:()=>l.MistralForCausalLM,MistralModel:()=>l.MistralModel,MistralPreTrainedModel:()=>l.MistralPreTrainedModel,MobileBertForMaskedLM:()=>l.MobileBertForMaskedLM,MobileBertForQuestionAnswering:()=>l.MobileBertForQuestionAnswering,MobileBertForSequenceClassification:()=>l.MobileBertForSequenceClassification,MobileBertModel:()=>l.MobileBertModel,MobileBertPreTrainedModel:()=>l.MobileBertPreTrainedModel,MobileBertTokenizer:()=>u.MobileBertTokenizer,MobileNetV1FeatureExtractor:()=>d.MobileNetV1FeatureExtractor,MobileNetV1ForImageClassification:()=>l.MobileNetV1ForImageClassification,MobileNetV1Model:()=>l.MobileNetV1Model,MobileNetV1PreTrainedModel:()=>l.MobileNetV1PreTrainedModel,MobileNetV2FeatureExtractor:()=>d.MobileNetV2FeatureExtractor,MobileNetV2ForImageClassification:()=>l.MobileNetV2ForImageClassification,MobileNetV2Model:()=>l.MobileNetV2Model,MobileNetV2PreTrainedModel:()=>l.MobileNetV2PreTrainedModel,MobileNetV3FeatureExtractor:()=>d.MobileNetV3FeatureExtractor,MobileNetV3ForImageClassification:()=>l.MobileNetV3ForImageClassification,MobileNetV3Model:()=>l.MobileNetV3Model,MobileNetV3PreTrainedModel:()=>l.MobileNetV3PreTrainedModel,MobileNetV4FeatureExtractor:()=>d.MobileNetV4FeatureExtractor,MobileNetV4ForImageClassification:()=>l.MobileNetV4ForImageClassification,MobileNetV4Model:()=>l.MobileNetV4Model,MobileNetV4PreTrainedModel:()=>l.MobileNetV4PreTrainedModel,MobileViTFeatureExtractor:()=>d.MobileViTFeatureExtractor,MobileViTForImageClassification:()=>l.MobileViTForImageClassification,MobileViTImageProcessor:()=>d.MobileViTImageProcessor,MobileViTModel:()=>l.MobileViTModel,MobileViTPreTrainedModel:()=>l.MobileViTPreTrainedModel,MobileViTV2ForImageClassification:()=>l.MobileViTV2ForImageClassification,MobileViTV2Model:()=>l.MobileViTV2Model,MobileViTV2PreTrainedModel:()=>l.MobileViTV2PreTrainedModel,ModelOutput:()=>l.ModelOutput,Moondream1ForConditionalGeneration:()=>l.Moondream1ForConditionalGeneration,MptForCausalLM:()=>l.MptForCausalLM,MptModel:()=>l.MptModel,MptPreTrainedModel:()=>l.MptPreTrainedModel,MusicgenForCausalLM:()=>l.MusicgenForCausalLM,MusicgenForConditionalGeneration:()=>l.MusicgenForConditionalGeneration,MusicgenModel:()=>l.MusicgenModel,MusicgenPreTrainedModel:()=>l.MusicgenPreTrainedModel,NllbTokenizer:()=>u.NllbTokenizer,NomicBertModel:()=>l.NomicBertModel,NomicBertPreTrainedModel:()=>l.NomicBertPreTrainedModel,NougatImageProcessor:()=>d.NougatImageProcessor,NougatTokenizer:()=>u.NougatTokenizer,OPTForCausalLM:()=>l.OPTForCausalLM,OPTModel:()=>l.OPTModel,OPTPreTrainedModel:()=>l.OPTPreTrainedModel,ObjectDetectionPipeline:()=>o.ObjectDetectionPipeline,OpenELMForCausalLM:()=>l.OpenELMForCausalLM,OpenELMModel:()=>l.OpenELMModel,OpenELMPreTrainedModel:()=>l.OpenELMPreTrainedModel,OwlViTFeatureExtractor:()=>d.OwlViTFeatureExtractor,OwlViTForObjectDetection:()=>l.OwlViTForObjectDetection,OwlViTModel:()=>l.OwlViTModel,OwlViTPreTrainedModel:()=>l.OwlViTPreTrainedModel,OwlViTProcessor:()=>d.OwlViTProcessor,Owlv2ForObjectDetection:()=>l.Owlv2ForObjectDetection,Owlv2ImageProcessor:()=>d.Owlv2ImageProcessor,Owlv2Model:()=>l.Owlv2Model,Owlv2PreTrainedModel:()=>l.Owlv2PreTrainedModel,Phi3ForCausalLM:()=>l.Phi3ForCausalLM,Phi3Model:()=>l.Phi3Model,Phi3PreTrainedModel:()=>l.Phi3PreTrainedModel,PhiForCausalLM:()=>l.PhiForCausalLM,PhiModel:()=>l.PhiModel,PhiPreTrainedModel:()=>l.PhiPreTrainedModel,Pipeline:()=>o.Pipeline,PreTrainedModel:()=>l.PreTrainedModel,PreTrainedTokenizer:()=>u.PreTrainedTokenizer,PretrainedConfig:()=>c.PretrainedConfig,PretrainedMixin:()=>l.PretrainedMixin,Processor:()=>d.Processor,PyAnnoteFeatureExtractor:()=>d.PyAnnoteFeatureExtractor,PyAnnoteForAudioFrameClassification:()=>l.PyAnnoteForAudioFrameClassification,PyAnnoteModel:()=>l.PyAnnoteModel,PyAnnotePreTrainedModel:()=>l.PyAnnotePreTrainedModel,PyAnnoteProcessor:()=>d.PyAnnoteProcessor,QuestionAnsweringModelOutput:()=>l.QuestionAnsweringModelOutput,QuestionAnsweringPipeline:()=>o.QuestionAnsweringPipeline,Qwen2ForCausalLM:()=>l.Qwen2ForCausalLM,Qwen2Model:()=>l.Qwen2Model,Qwen2PreTrainedModel:()=>l.Qwen2PreTrainedModel,Qwen2Tokenizer:()=>u.Qwen2Tokenizer,RTDetrForObjectDetection:()=>l.RTDetrForObjectDetection,RTDetrImageProcessor:()=>d.RTDetrImageProcessor,RTDetrModel:()=>l.RTDetrModel,RTDetrObjectDetectionOutput:()=>l.RTDetrObjectDetectionOutput,RTDetrPreTrainedModel:()=>l.RTDetrPreTrainedModel,RawImage:()=>h.RawImage,ResNetForImageClassification:()=>l.ResNetForImageClassification,ResNetModel:()=>l.ResNetModel,ResNetPreTrainedModel:()=>l.ResNetPreTrainedModel,RoFormerForMaskedLM:()=>l.RoFormerForMaskedLM,RoFormerForQuestionAnswering:()=>l.RoFormerForQuestionAnswering,RoFormerForSequenceClassification:()=>l.RoFormerForSequenceClassification,RoFormerForTokenClassification:()=>l.RoFormerForTokenClassification,RoFormerModel:()=>l.RoFormerModel,RoFormerPreTrainedModel:()=>l.RoFormerPreTrainedModel,RoFormerTokenizer:()=>u.RoFormerTokenizer,RobertaForMaskedLM:()=>l.RobertaForMaskedLM,RobertaForQuestionAnswering:()=>l.RobertaForQuestionAnswering,RobertaForSequenceClassification:()=>l.RobertaForSequenceClassification,RobertaForTokenClassification:()=>l.RobertaForTokenClassification,RobertaModel:()=>l.RobertaModel,RobertaPreTrainedModel:()=>l.RobertaPreTrainedModel,RobertaTokenizer:()=>u.RobertaTokenizer,SamImageProcessor:()=>d.SamImageProcessor,SamImageSegmentationOutput:()=>l.SamImageSegmentationOutput,SamModel:()=>l.SamModel,SamPreTrainedModel:()=>l.SamPreTrainedModel,SamProcessor:()=>d.SamProcessor,SapiensFeatureExtractor:()=>d.SapiensFeatureExtractor,SapiensForDepthEstimation:()=>l.SapiensForDepthEstimation,SapiensForNormalEstimation:()=>l.SapiensForNormalEstimation,SapiensForSemanticSegmentation:()=>l.SapiensForSemanticSegmentation,SapiensPreTrainedModel:()=>l.SapiensPreTrainedModel,SeamlessM4TFeatureExtractor:()=>d.SeamlessM4TFeatureExtractor,SegformerFeatureExtractor:()=>d.SegformerFeatureExtractor,SegformerForImageClassification:()=>l.SegformerForImageClassification,SegformerForSemanticSegmentation:()=>l.SegformerForSemanticSegmentation,SegformerModel:()=>l.SegformerModel,SegformerPreTrainedModel:()=>l.SegformerPreTrainedModel,Seq2SeqLMOutput:()=>l.Seq2SeqLMOutput,SequenceClassifierOutput:()=>l.SequenceClassifierOutput,SiglipImageProcessor:()=>d.SiglipImageProcessor,SiglipModel:()=>l.SiglipModel,SiglipPreTrainedModel:()=>l.SiglipPreTrainedModel,SiglipTextModel:()=>l.SiglipTextModel,SiglipTokenizer:()=>u.SiglipTokenizer,SiglipVisionModel:()=>l.SiglipVisionModel,SpeechT5FeatureExtractor:()=>d.SpeechT5FeatureExtractor,SpeechT5ForSpeechToText:()=>l.SpeechT5ForSpeechToText,SpeechT5ForTextToSpeech:()=>l.SpeechT5ForTextToSpeech,SpeechT5HifiGan:()=>l.SpeechT5HifiGan,SpeechT5Model:()=>l.SpeechT5Model,SpeechT5PreTrainedModel:()=>l.SpeechT5PreTrainedModel,SpeechT5Processor:()=>d.SpeechT5Processor,SpeechT5Tokenizer:()=>u.SpeechT5Tokenizer,SqueezeBertForMaskedLM:()=>l.SqueezeBertForMaskedLM,SqueezeBertForQuestionAnswering:()=>l.SqueezeBertForQuestionAnswering,SqueezeBertForSequenceClassification:()=>l.SqueezeBertForSequenceClassification,SqueezeBertModel:()=>l.SqueezeBertModel,SqueezeBertPreTrainedModel:()=>l.SqueezeBertPreTrainedModel,SqueezeBertTokenizer:()=>u.SqueezeBertTokenizer,StableLmForCausalLM:()=>l.StableLmForCausalLM,StableLmModel:()=>l.StableLmModel,StableLmPreTrainedModel:()=>l.StableLmPreTrainedModel,Starcoder2ForCausalLM:()=>l.Starcoder2ForCausalLM,Starcoder2Model:()=>l.Starcoder2Model,Starcoder2PreTrainedModel:()=>l.Starcoder2PreTrainedModel,StoppingCriteria:()=>_.StoppingCriteria,StoppingCriteriaList:()=>_.StoppingCriteriaList,SummarizationPipeline:()=>o.SummarizationPipeline,Swin2SRForImageSuperResolution:()=>l.Swin2SRForImageSuperResolution,Swin2SRImageProcessor:()=>d.Swin2SRImageProcessor,Swin2SRModel:()=>l.Swin2SRModel,Swin2SRPreTrainedModel:()=>l.Swin2SRPreTrainedModel,SwinForImageClassification:()=>l.SwinForImageClassification,SwinModel:()=>l.SwinModel,SwinPreTrainedModel:()=>l.SwinPreTrainedModel,T5ForConditionalGeneration:()=>l.T5ForConditionalGeneration,T5Model:()=>l.T5Model,T5PreTrainedModel:()=>l.T5PreTrainedModel,T5Tokenizer:()=>u.T5Tokenizer,TableTransformerForObjectDetection:()=>l.TableTransformerForObjectDetection,TableTransformerModel:()=>l.TableTransformerModel,TableTransformerObjectDetectionOutput:()=>l.TableTransformerObjectDetectionOutput,TableTransformerPreTrainedModel:()=>l.TableTransformerPreTrainedModel,Tensor:()=>m.Tensor,Text2TextGenerationPipeline:()=>o.Text2TextGenerationPipeline,TextClassificationPipeline:()=>o.TextClassificationPipeline,TextGenerationPipeline:()=>o.TextGenerationPipeline,TextStreamer:()=>g.TextStreamer,TextToAudioPipeline:()=>o.TextToAudioPipeline,TokenClassificationPipeline:()=>o.TokenClassificationPipeline,TokenClassifierOutput:()=>l.TokenClassifierOutput,TokenizerModel:()=>u.TokenizerModel,TrOCRForCausalLM:()=>l.TrOCRForCausalLM,TrOCRPreTrainedModel:()=>l.TrOCRPreTrainedModel,TranslationPipeline:()=>o.TranslationPipeline,UniSpeechForCTC:()=>l.UniSpeechForCTC,UniSpeechForSequenceClassification:()=>l.UniSpeechForSequenceClassification,UniSpeechModel:()=>l.UniSpeechModel,UniSpeechPreTrainedModel:()=>l.UniSpeechPreTrainedModel,UniSpeechSatForAudioFrameClassification:()=>l.UniSpeechSatForAudioFrameClassification,UniSpeechSatForCTC:()=>l.UniSpeechSatForCTC,UniSpeechSatForSequenceClassification:()=>l.UniSpeechSatForSequenceClassification,UniSpeechSatModel:()=>l.UniSpeechSatModel,UniSpeechSatPreTrainedModel:()=>l.UniSpeechSatPreTrainedModel,ViTFeatureExtractor:()=>d.ViTFeatureExtractor,ViTForImageClassification:()=>l.ViTForImageClassification,ViTImageProcessor:()=>d.ViTImageProcessor,ViTModel:()=>l.ViTModel,ViTPreTrainedModel:()=>l.ViTPreTrainedModel,VisionEncoderDecoderModel:()=>l.VisionEncoderDecoderModel,VitMatteForImageMatting:()=>l.VitMatteForImageMatting,VitMatteImageProcessor:()=>d.VitMatteImageProcessor,VitMattePreTrainedModel:()=>l.VitMattePreTrainedModel,VitsModel:()=>l.VitsModel,VitsModelOutput:()=>l.VitsModelOutput,VitsPreTrainedModel:()=>l.VitsPreTrainedModel,VitsTokenizer:()=>u.VitsTokenizer,Wav2Vec2BertForCTC:()=>l.Wav2Vec2BertForCTC,Wav2Vec2BertForSequenceClassification:()=>l.Wav2Vec2BertForSequenceClassification,Wav2Vec2BertModel:()=>l.Wav2Vec2BertModel,Wav2Vec2BertPreTrainedModel:()=>l.Wav2Vec2BertPreTrainedModel,Wav2Vec2CTCTokenizer:()=>u.Wav2Vec2CTCTokenizer,Wav2Vec2FeatureExtractor:()=>d.Wav2Vec2FeatureExtractor,Wav2Vec2ForAudioFrameClassification:()=>l.Wav2Vec2ForAudioFrameClassification,Wav2Vec2ForCTC:()=>l.Wav2Vec2ForCTC,Wav2Vec2ForSequenceClassification:()=>l.Wav2Vec2ForSequenceClassification,Wav2Vec2Model:()=>l.Wav2Vec2Model,Wav2Vec2PreTrainedModel:()=>l.Wav2Vec2PreTrainedModel,Wav2Vec2ProcessorWithLM:()=>d.Wav2Vec2ProcessorWithLM,WavLMForAudioFrameClassification:()=>l.WavLMForAudioFrameClassification,WavLMForCTC:()=>l.WavLMForCTC,WavLMForSequenceClassification:()=>l.WavLMForSequenceClassification,WavLMForXVector:()=>l.WavLMForXVector,WavLMModel:()=>l.WavLMModel,WavLMPreTrainedModel:()=>l.WavLMPreTrainedModel,WeSpeakerFeatureExtractor:()=>d.WeSpeakerFeatureExtractor,WeSpeakerResNetModel:()=>l.WeSpeakerResNetModel,WeSpeakerResNetPreTrainedModel:()=>l.WeSpeakerResNetPreTrainedModel,WhisperFeatureExtractor:()=>d.WhisperFeatureExtractor,WhisperForConditionalGeneration:()=>l.WhisperForConditionalGeneration,WhisperModel:()=>l.WhisperModel,WhisperPreTrainedModel:()=>l.WhisperPreTrainedModel,WhisperProcessor:()=>d.WhisperProcessor,WhisperTextStreamer:()=>g.WhisperTextStreamer,WhisperTokenizer:()=>u.WhisperTokenizer,XLMForQuestionAnswering:()=>l.XLMForQuestionAnswering,XLMForSequenceClassification:()=>l.XLMForSequenceClassification,XLMForTokenClassification:()=>l.XLMForTokenClassification,XLMModel:()=>l.XLMModel,XLMPreTrainedModel:()=>l.XLMPreTrainedModel,XLMRobertaForMaskedLM:()=>l.XLMRobertaForMaskedLM,XLMRobertaForQuestionAnswering:()=>l.XLMRobertaForQuestionAnswering,XLMRobertaForSequenceClassification:()=>l.XLMRobertaForSequenceClassification,XLMRobertaForTokenClassification:()=>l.XLMRobertaForTokenClassification,XLMRobertaModel:()=>l.XLMRobertaModel,XLMRobertaPreTrainedModel:()=>l.XLMRobertaPreTrainedModel,XLMRobertaTokenizer:()=>u.XLMRobertaTokenizer,XLMTokenizer:()=>u.XLMTokenizer,XLMWithLMHeadModel:()=>l.XLMWithLMHeadModel,XVectorOutput:()=>l.XVectorOutput,YolosFeatureExtractor:()=>d.YolosFeatureExtractor,YolosForObjectDetection:()=>l.YolosForObjectDetection,YolosModel:()=>l.YolosModel,YolosObjectDetectionOutput:()=>l.YolosObjectDetectionOutput,YolosPreTrainedModel:()=>l.YolosPreTrainedModel,ZeroShotAudioClassificationPipeline:()=>o.ZeroShotAudioClassificationPipeline,ZeroShotClassificationPipeline:()=>o.ZeroShotClassificationPipeline,ZeroShotImageClassificationPipeline:()=>o.ZeroShotImageClassificationPipeline,ZeroShotObjectDetectionPipeline:()=>o.ZeroShotObjectDetectionPipeline,bankers_round:()=>f.bankers_round,cat:()=>m.cat,cos_sim:()=>f.cos_sim,dot:()=>f.dot,dynamic_time_warping:()=>f.dynamic_time_warping,env:()=>s.env,full:()=>m.full,full_like:()=>m.full_like,getKeyValueShapes:()=>c.getKeyValueShapes,hamming:()=>p.hamming,hanning:()=>p.hanning,interpolate:()=>m.interpolate,interpolate_4d:()=>m.interpolate_4d,interpolate_data:()=>f.interpolate_data,is_chinese_char:()=>u.is_chinese_char,layer_norm:()=>m.layer_norm,log_softmax:()=>f.log_softmax,magnitude:()=>f.magnitude,matmul:()=>m.matmul,max:()=>f.max,mean:()=>m.mean,mean_pooling:()=>m.mean_pooling,medianFilter:()=>f.medianFilter,mel_filter_bank:()=>p.mel_filter_bank,min:()=>f.min,ones:()=>m.ones,ones_like:()=>m.ones_like,permute:()=>m.permute,permute_data:()=>f.permute_data,pipeline:()=>o.pipeline,quantize_embeddings:()=>m.quantize_embeddings,read_audio:()=>p.read_audio,rfft:()=>m.rfft,round:()=>f.round,softmax:()=>f.softmax,spectrogram:()=>p.spectrogram,stack:()=>m.stack,std_mean:()=>m.std_mean,topk:()=>m.topk,window_function:()=>p.window_function,zeros:()=>m.zeros,zeros_like:()=>m.zeros_like});var s=i(/*! ./env.js */"./src/env.js"),o=i(/*! ./pipelines.js */"./src/pipelines.js"),l=i(/*! ./models.js */"./src/models.js"),u=i(/*! ./tokenizers.js */"./src/tokenizers.js"),d=i(/*! ./processors.js */"./src/processors.js"),c=i(/*! ./configs.js */"./src/configs.js"),p=i(/*! ./utils/audio.js */"./src/utils/audio.js"),h=i(/*! ./utils/image.js */"./src/utils/image.js"),m=i(/*! ./utils/tensor.js */"./src/utils/tensor.js"),f=i(/*! ./utils/maths.js */"./src/utils/maths.js"),g=i(/*! ./generation/streamers.js */"./src/generation/streamers.js"),_=i(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),w=a.ASTFeatureExtractor,y=a.ASTForAudioClassification,b=a.ASTModel,v=a.ASTPreTrainedModel,x=a.AlbertForMaskedLM,M=a.AlbertForQuestionAnswering,T=a.AlbertForSequenceClassification,k=a.AlbertModel,$=a.AlbertPreTrainedModel,S=a.AlbertTokenizer,C=a.AudioClassificationPipeline,E=a.AutoConfig,P=a.AutoModel,A=a.AutoModelForAudioClassification,F=a.AutoModelForAudioFrameClassification,z=a.AutoModelForCTC,I=a.AutoModelForCausalLM,O=a.AutoModelForDepthEstimation,B=a.AutoModelForDocumentQuestionAnswering,L=a.AutoModelForImageClassification,D=a.AutoModelForImageFeatureExtraction,R=a.AutoModelForImageMatting,N=a.AutoModelForImageSegmentation,V=a.AutoModelForImageToImage,j=a.AutoModelForMaskGeneration,G=a.AutoModelForMaskedLM,q=a.AutoModelForNormalEstimation,U=a.AutoModelForObjectDetection,W=a.AutoModelForQuestionAnswering,H=a.AutoModelForSemanticSegmentation,X=a.AutoModelForSeq2SeqLM,K=a.AutoModelForSequenceClassification,Q=a.AutoModelForSpeechSeq2Seq,Y=a.AutoModelForTextToSpectrogram,J=a.AutoModelForTextToWaveform,Z=a.AutoModelForTokenClassification,ee=a.AutoModelForVision2Seq,te=a.AutoModelForXVector,ne=a.AutoModelForZeroShotObjectDetection,re=a.AutoProcessor,ie=a.AutoTokenizer,ae=a.AutomaticSpeechRecognitionPipeline,se=a.BartForConditionalGeneration,oe=a.BartForSequenceClassification,le=a.BartModel,ue=a.BartPretrainedModel,de=a.BartTokenizer,ce=a.BaseModelOutput,pe=a.BaseStreamer,he=a.BeitFeatureExtractor,me=a.BeitForImageClassification,fe=a.BeitModel,ge=a.BeitPreTrainedModel,_e=a.BertForMaskedLM,we=a.BertForQuestionAnswering,ye=a.BertForSequenceClassification,be=a.BertForTokenClassification,ve=a.BertModel,xe=a.BertPreTrainedModel,Me=a.BertTokenizer,Te=a.BitImageProcessor,ke=a.BlenderbotForConditionalGeneration,$e=a.BlenderbotModel,Se=a.BlenderbotPreTrainedModel,Ce=a.BlenderbotSmallForConditionalGeneration,Ee=a.BlenderbotSmallModel,Pe=a.BlenderbotSmallPreTrainedModel,Ae=a.BlenderbotSmallTokenizer,Fe=a.BlenderbotTokenizer,ze=a.BloomForCausalLM,Ie=a.BloomModel,Oe=a.BloomPreTrainedModel,Be=a.BloomTokenizer,Le=a.CLIPFeatureExtractor,De=a.CLIPImageProcessor,Re=a.CLIPModel,Ne=a.CLIPPreTrainedModel,Ve=a.CLIPSegForImageSegmentation,je=a.CLIPSegModel,Ge=a.CLIPSegPreTrainedModel,qe=a.CLIPTextModelWithProjection,Ue=a.CLIPTokenizer,We=a.CLIPVisionModelWithProjection,He=a.CamembertForMaskedLM,Xe=a.CamembertForQuestionAnswering,Ke=a.CamembertForSequenceClassification,Qe=a.CamembertForTokenClassification,Ye=a.CamembertModel,Je=a.CamembertPreTrainedModel,Ze=a.CamembertTokenizer,et=a.CausalLMOutput,tt=a.CausalLMOutputWithPast,nt=a.ChineseCLIPFeatureExtractor,rt=a.ChineseCLIPModel,it=a.ChineseCLIPPreTrainedModel,at=a.ClapAudioModelWithProjection,st=a.ClapFeatureExtractor,ot=a.ClapModel,lt=a.ClapPreTrainedModel,ut=a.ClapTextModelWithProjection,dt=a.CodeGenForCausalLM,ct=a.CodeGenModel,pt=a.CodeGenPreTrainedModel,ht=a.CodeGenTokenizer,mt=a.CodeLlamaTokenizer,ft=a.CohereForCausalLM,gt=a.CohereModel,_t=a.CoherePreTrainedModel,wt=a.CohereTokenizer,yt=a.ConvBertForMaskedLM,bt=a.ConvBertForQuestionAnswering,vt=a.ConvBertForSequenceClassification,xt=a.ConvBertForTokenClassification,Mt=a.ConvBertModel,Tt=a.ConvBertPreTrainedModel,kt=a.ConvBertTokenizer,$t=a.ConvNextFeatureExtractor,St=a.ConvNextForImageClassification,Ct=a.ConvNextImageProcessor,Et=a.ConvNextModel,Pt=a.ConvNextPreTrainedModel,At=a.ConvNextV2ForImageClassification,Ft=a.ConvNextV2Model,zt=a.ConvNextV2PreTrainedModel,It=a.DPTFeatureExtractor,Ot=a.DPTForDepthEstimation,Bt=a.DPTImageProcessor,Lt=a.DPTModel,Dt=a.DPTPreTrainedModel,Rt=a.DebertaForMaskedLM,Nt=a.DebertaForQuestionAnswering,Vt=a.DebertaForSequenceClassification,jt=a.DebertaForTokenClassification,Gt=a.DebertaModel,qt=a.DebertaPreTrainedModel,Ut=a.DebertaTokenizer,Wt=a.DebertaV2ForMaskedLM,Ht=a.DebertaV2ForQuestionAnswering,Xt=a.DebertaV2ForSequenceClassification,Kt=a.DebertaV2ForTokenClassification,Qt=a.DebertaV2Model,Yt=a.DebertaV2PreTrainedModel,Jt=a.DebertaV2Tokenizer,Zt=a.DeiTFeatureExtractor,en=a.DeiTForImageClassification,tn=a.DeiTModel,nn=a.DeiTPreTrainedModel,rn=a.DepthAnythingForDepthEstimation,an=a.DepthAnythingPreTrainedModel,sn=a.DepthEstimationPipeline,on=a.DetrFeatureExtractor,ln=a.DetrForObjectDetection,un=a.DetrForSegmentation,dn=a.DetrModel,cn=a.DetrObjectDetectionOutput,pn=a.DetrPreTrainedModel,hn=a.DetrSegmentationOutput,mn=a.Dinov2ForImageClassification,fn=a.Dinov2Model,gn=a.Dinov2PreTrainedModel,_n=a.DistilBertForMaskedLM,wn=a.DistilBertForQuestionAnswering,yn=a.DistilBertForSequenceClassification,bn=a.DistilBertForTokenClassification,vn=a.DistilBertModel,xn=a.DistilBertPreTrainedModel,Mn=a.DistilBertTokenizer,Tn=a.DocumentQuestionAnsweringPipeline,kn=a.DonutFeatureExtractor,$n=a.DonutSwinModel,Sn=a.DonutSwinPreTrainedModel,Cn=a.EfficientNetForImageClassification,En=a.EfficientNetImageProcessor,Pn=a.EfficientNetModel,An=a.EfficientNetPreTrainedModel,Fn=a.ElectraForMaskedLM,zn=a.ElectraForQuestionAnswering,In=a.ElectraForSequenceClassification,On=a.ElectraForTokenClassification,Bn=a.ElectraModel,Ln=a.ElectraPreTrainedModel,Dn=a.ElectraTokenizer,Rn=a.EosTokenCriteria,Nn=a.EsmForMaskedLM,Vn=a.EsmForSequenceClassification,jn=a.EsmForTokenClassification,Gn=a.EsmModel,qn=a.EsmPreTrainedModel,Un=a.EsmTokenizer,Wn=a.FFT,Hn=a.FalconForCausalLM,Xn=a.FalconModel,Kn=a.FalconPreTrainedModel,Qn=a.FalconTokenizer,Yn=a.FastViTForImageClassification,Jn=a.FastViTModel,Zn=a.FastViTPreTrainedModel,er=a.FeatureExtractionPipeline,tr=a.FeatureExtractor,nr=a.FillMaskPipeline,rr=a.Florence2ForConditionalGeneration,ir=a.Florence2PreTrainedModel,ar=a.Florence2Processor,sr=a.GLPNFeatureExtractor,or=a.GLPNForDepthEstimation,lr=a.GLPNModel,ur=a.GLPNPreTrainedModel,dr=a.GPT2LMHeadModel,cr=a.GPT2Model,pr=a.GPT2PreTrainedModel,hr=a.GPT2Tokenizer,mr=a.GPTBigCodeForCausalLM,fr=a.GPTBigCodeModel,gr=a.GPTBigCodePreTrainedModel,_r=a.GPTJForCausalLM,wr=a.GPTJModel,yr=a.GPTJPreTrainedModel,br=a.GPTNeoForCausalLM,vr=a.GPTNeoModel,xr=a.GPTNeoPreTrainedModel,Mr=a.GPTNeoXForCausalLM,Tr=a.GPTNeoXModel,kr=a.GPTNeoXPreTrainedModel,$r=a.GPTNeoXTokenizer,Sr=a.Gemma2ForCausalLM,Cr=a.Gemma2Model,Er=a.Gemma2PreTrainedModel,Pr=a.GemmaForCausalLM,Ar=a.GemmaModel,Fr=a.GemmaPreTrainedModel,zr=a.GemmaTokenizer,Ir=a.Grok1Tokenizer,Or=a.HerbertTokenizer,Br=a.HubertForCTC,Lr=a.HubertForSequenceClassification,Dr=a.HubertModel,Rr=a.HubertPreTrainedModel,Nr=a.ImageClassificationPipeline,Vr=a.ImageFeatureExtractionPipeline,jr=a.ImageFeatureExtractor,Gr=a.ImageMattingOutput,qr=a.ImageSegmentationPipeline,Ur=a.ImageToImagePipeline,Wr=a.ImageToTextPipeline,Hr=a.InterruptableStoppingCriteria,Xr=a.JAISLMHeadModel,Kr=a.JAISModel,Qr=a.JAISPreTrainedModel,Yr=a.LlamaForCausalLM,Jr=a.LlamaModel,Zr=a.LlamaPreTrainedModel,ei=a.LlamaTokenizer,ti=a.LlavaForConditionalGeneration,ni=a.LlavaPreTrainedModel,ri=a.LongT5ForConditionalGeneration,ii=a.LongT5Model,ai=a.LongT5PreTrainedModel,si=a.M2M100ForConditionalGeneration,oi=a.M2M100Model,li=a.M2M100PreTrainedModel,ui=a.M2M100Tokenizer,di=a.MBart50Tokenizer,ci=a.MBartForCausalLM,pi=a.MBartForConditionalGeneration,hi=a.MBartForSequenceClassification,mi=a.MBartModel,fi=a.MBartPreTrainedModel,gi=a.MBartTokenizer,_i=a.MPNetForMaskedLM,wi=a.MPNetForQuestionAnswering,yi=a.MPNetForSequenceClassification,bi=a.MPNetForTokenClassification,vi=a.MPNetModel,xi=a.MPNetPreTrainedModel,Mi=a.MPNetTokenizer,Ti=a.MT5ForConditionalGeneration,ki=a.MT5Model,$i=a.MT5PreTrainedModel,Si=a.MarianMTModel,Ci=a.MarianModel,Ei=a.MarianPreTrainedModel,Pi=a.MarianTokenizer,Ai=a.MaskedLMOutput,Fi=a.MaxLengthCriteria,zi=a.MistralForCausalLM,Ii=a.MistralModel,Oi=a.MistralPreTrainedModel,Bi=a.MobileBertForMaskedLM,Li=a.MobileBertForQuestionAnswering,Di=a.MobileBertForSequenceClassification,Ri=a.MobileBertModel,Ni=a.MobileBertPreTrainedModel,Vi=a.MobileBertTokenizer,ji=a.MobileNetV1FeatureExtractor,Gi=a.MobileNetV1ForImageClassification,qi=a.MobileNetV1Model,Ui=a.MobileNetV1PreTrainedModel,Wi=a.MobileNetV2FeatureExtractor,Hi=a.MobileNetV2ForImageClassification,Xi=a.MobileNetV2Model,Ki=a.MobileNetV2PreTrainedModel,Qi=a.MobileNetV3FeatureExtractor,Yi=a.MobileNetV3ForImageClassification,Ji=a.MobileNetV3Model,Zi=a.MobileNetV3PreTrainedModel,ea=a.MobileNetV4FeatureExtractor,ta=a.MobileNetV4ForImageClassification,na=a.MobileNetV4Model,ra=a.MobileNetV4PreTrainedModel,ia=a.MobileViTFeatureExtractor,aa=a.MobileViTForImageClassification,sa=a.MobileViTImageProcessor,oa=a.MobileViTModel,la=a.MobileViTPreTrainedModel,ua=a.MobileViTV2ForImageClassification,da=a.MobileViTV2Model,ca=a.MobileViTV2PreTrainedModel,pa=a.ModelOutput,ha=a.Moondream1ForConditionalGeneration,ma=a.MptForCausalLM,fa=a.MptModel,ga=a.MptPreTrainedModel,_a=a.MusicgenForCausalLM,wa=a.MusicgenForConditionalGeneration,ya=a.MusicgenModel,ba=a.MusicgenPreTrainedModel,va=a.NllbTokenizer,xa=a.NomicBertModel,Ma=a.NomicBertPreTrainedModel,Ta=a.NougatImageProcessor,ka=a.NougatTokenizer,$a=a.OPTForCausalLM,Sa=a.OPTModel,Ca=a.OPTPreTrainedModel,Ea=a.ObjectDetectionPipeline,Pa=a.OpenELMForCausalLM,Aa=a.OpenELMModel,Fa=a.OpenELMPreTrainedModel,za=a.OwlViTFeatureExtractor,Ia=a.OwlViTForObjectDetection,Oa=a.OwlViTModel,Ba=a.OwlViTPreTrainedModel,La=a.OwlViTProcessor,Da=a.Owlv2ForObjectDetection,Ra=a.Owlv2ImageProcessor,Na=a.Owlv2Model,Va=a.Owlv2PreTrainedModel,ja=a.Phi3ForCausalLM,Ga=a.Phi3Model,qa=a.Phi3PreTrainedModel,Ua=a.PhiForCausalLM,Wa=a.PhiModel,Ha=a.PhiPreTrainedModel,Xa=a.Pipeline,Ka=a.PreTrainedModel,Qa=a.PreTrainedTokenizer,Ya=a.PretrainedConfig,Ja=a.PretrainedMixin,Za=a.Processor,es=a.PyAnnoteFeatureExtractor,ts=a.PyAnnoteForAudioFrameClassification,ns=a.PyAnnoteModel,rs=a.PyAnnotePreTrainedModel,is=a.PyAnnoteProcessor,as=a.QuestionAnsweringModelOutput,ss=a.QuestionAnsweringPipeline,os=a.Qwen2ForCausalLM,ls=a.Qwen2Model,us=a.Qwen2PreTrainedModel,ds=a.Qwen2Tokenizer,cs=a.RTDetrForObjectDetection,ps=a.RTDetrImageProcessor,hs=a.RTDetrModel,ms=a.RTDetrObjectDetectionOutput,fs=a.RTDetrPreTrainedModel,gs=a.RawImage,_s=a.ResNetForImageClassification,ws=a.ResNetModel,ys=a.ResNetPreTrainedModel,bs=a.RoFormerForMaskedLM,vs=a.RoFormerForQuestionAnswering,xs=a.RoFormerForSequenceClassification,Ms=a.RoFormerForTokenClassification,Ts=a.RoFormerModel,ks=a.RoFormerPreTrainedModel,$s=a.RoFormerTokenizer,Ss=a.RobertaForMaskedLM,Cs=a.RobertaForQuestionAnswering,Es=a.RobertaForSequenceClassification,Ps=a.RobertaForTokenClassification,As=a.RobertaModel,Fs=a.RobertaPreTrainedModel,zs=a.RobertaTokenizer,Is=a.SamImageProcessor,Os=a.SamImageSegmentationOutput,Bs=a.SamModel,Ls=a.SamPreTrainedModel,Ds=a.SamProcessor,Rs=a.SapiensFeatureExtractor,Ns=a.SapiensForDepthEstimation,Vs=a.SapiensForNormalEstimation,js=a.SapiensForSemanticSegmentation,Gs=a.SapiensPreTrainedModel,qs=a.SeamlessM4TFeatureExtractor,Us=a.SegformerFeatureExtractor,Ws=a.SegformerForImageClassification,Hs=a.SegformerForSemanticSegmentation,Xs=a.SegformerModel,Ks=a.SegformerPreTrainedModel,Qs=a.Seq2SeqLMOutput,Ys=a.SequenceClassifierOutput,Js=a.SiglipImageProcessor,Zs=a.SiglipModel,eo=a.SiglipPreTrainedModel,to=a.SiglipTextModel,no=a.SiglipTokenizer,ro=a.SiglipVisionModel,io=a.SpeechT5FeatureExtractor,ao=a.SpeechT5ForSpeechToText,so=a.SpeechT5ForTextToSpeech,oo=a.SpeechT5HifiGan,lo=a.SpeechT5Model,uo=a.SpeechT5PreTrainedModel,co=a.SpeechT5Processor,po=a.SpeechT5Tokenizer,ho=a.SqueezeBertForMaskedLM,mo=a.SqueezeBertForQuestionAnswering,fo=a.SqueezeBertForSequenceClassification,go=a.SqueezeBertModel,_o=a.SqueezeBertPreTrainedModel,wo=a.SqueezeBertTokenizer,yo=a.StableLmForCausalLM,bo=a.StableLmModel,vo=a.StableLmPreTrainedModel,xo=a.Starcoder2ForCausalLM,Mo=a.Starcoder2Model,To=a.Starcoder2PreTrainedModel,ko=a.StoppingCriteria,$o=a.StoppingCriteriaList,So=a.SummarizationPipeline,Co=a.Swin2SRForImageSuperResolution,Eo=a.Swin2SRImageProcessor,Po=a.Swin2SRModel,Ao=a.Swin2SRPreTrainedModel,Fo=a.SwinForImageClassification,zo=a.SwinModel,Io=a.SwinPreTrainedModel,Oo=a.T5ForConditionalGeneration,Bo=a.T5Model,Lo=a.T5PreTrainedModel,Do=a.T5Tokenizer,Ro=a.TableTransformerForObjectDetection,No=a.TableTransformerModel,Vo=a.TableTransformerObjectDetectionOutput,jo=a.TableTransformerPreTrainedModel,Go=a.Tensor,qo=a.Text2TextGenerationPipeline,Uo=a.TextClassificationPipeline,Wo=a.TextGenerationPipeline,Ho=a.TextStreamer,Xo=a.TextToAudioPipeline,Ko=a.TokenClassificationPipeline,Qo=a.TokenClassifierOutput,Yo=a.TokenizerModel,Jo=a.TrOCRForCausalLM,Zo=a.TrOCRPreTrainedModel,el=a.TranslationPipeline,tl=a.UniSpeechForCTC,nl=a.UniSpeechForSequenceClassification,rl=a.UniSpeechModel,il=a.UniSpeechPreTrainedModel,al=a.UniSpeechSatForAudioFrameClassification,sl=a.UniSpeechSatForCTC,ol=a.UniSpeechSatForSequenceClassification,ll=a.UniSpeechSatModel,ul=a.UniSpeechSatPreTrainedModel,dl=a.ViTFeatureExtractor,cl=a.ViTForImageClassification,pl=a.ViTImageProcessor,hl=a.ViTModel,ml=a.ViTPreTrainedModel,fl=a.VisionEncoderDecoderModel,gl=a.VitMatteForImageMatting,_l=a.VitMatteImageProcessor,wl=a.VitMattePreTrainedModel,yl=a.VitsModel,bl=a.VitsModelOutput,vl=a.VitsPreTrainedModel,xl=a.VitsTokenizer,Ml=a.Wav2Vec2BertForCTC,Tl=a.Wav2Vec2BertForSequenceClassification,kl=a.Wav2Vec2BertModel,$l=a.Wav2Vec2BertPreTrainedModel,Sl=a.Wav2Vec2CTCTokenizer,Cl=a.Wav2Vec2FeatureExtractor,El=a.Wav2Vec2ForAudioFrameClassification,Pl=a.Wav2Vec2ForCTC,Al=a.Wav2Vec2ForSequenceClassification,Fl=a.Wav2Vec2Model,zl=a.Wav2Vec2PreTrainedModel,Il=a.Wav2Vec2ProcessorWithLM,Ol=a.WavLMForAudioFrameClassification,Bl=a.WavLMForCTC,Ll=a.WavLMForSequenceClassification,Dl=a.WavLMForXVector,Rl=a.WavLMModel,Nl=a.WavLMPreTrainedModel,Vl=a.WeSpeakerFeatureExtractor,jl=a.WeSpeakerResNetModel,Gl=a.WeSpeakerResNetPreTrainedModel,ql=a.WhisperFeatureExtractor,Ul=a.WhisperForConditionalGeneration,Wl=a.WhisperModel,Hl=a.WhisperPreTrainedModel,Xl=a.WhisperProcessor,Kl=a.WhisperTextStreamer,Ql=a.WhisperTokenizer,Yl=a.XLMForQuestionAnswering,Jl=a.XLMForSequenceClassification,Zl=a.XLMForTokenClassification,eu=a.XLMModel,tu=a.XLMPreTrainedModel,nu=a.XLMRobertaForMaskedLM,ru=a.XLMRobertaForQuestionAnswering,iu=a.XLMRobertaForSequenceClassification,au=a.XLMRobertaForTokenClassification,su=a.XLMRobertaModel,ou=a.XLMRobertaPreTrainedModel,lu=a.XLMRobertaTokenizer,uu=a.XLMTokenizer,du=a.XLMWithLMHeadModel,cu=a.XVectorOutput,pu=a.YolosFeatureExtractor,hu=a.YolosForObjectDetection,mu=a.YolosModel,fu=a.YolosObjectDetectionOutput,gu=a.YolosPreTrainedModel,_u=a.ZeroShotAudioClassificationPipeline,wu=a.ZeroShotClassificationPipeline,yu=a.ZeroShotImageClassificationPipeline,bu=a.ZeroShotObjectDetectionPipeline,vu=a.bankers_round,xu=a.cat,Mu=a.cos_sim,Tu=a.dot,ku=a.dynamic_time_warping,$u=a.env,Su=a.full,Cu=a.full_like,Eu=a.getKeyValueShapes,Pu=a.hamming,Au=a.hanning,Fu=a.interpolate,zu=a.interpolate_4d,Iu=a.interpolate_data,Ou=a.is_chinese_char,Bu=a.layer_norm,Lu=a.log_softmax,Du=a.magnitude,Ru=a.matmul,Nu=a.max,Vu=a.mean,ju=a.mean_pooling,Gu=a.medianFilter,qu=a.mel_filter_bank,Uu=a.min,Wu=a.ones,Hu=a.ones_like,Xu=a.permute,Ku=a.permute_data,Qu=a.pipeline,Yu=a.quantize_embeddings,Ju=a.read_audio,Zu=a.rfft,ed=a.round,td=a.softmax,nd=a.spectrogram,rd=a.stack,id=a.std_mean,ad=a.topk,sd=a.window_function,od=a.zeros,ld=a.zeros_like;export{w as ASTFeatureExtractor,y as ASTForAudioClassification,b as ASTModel,v as ASTPreTrainedModel,x as AlbertForMaskedLM,M as AlbertForQuestionAnswering,T as AlbertForSequenceClassification,k as AlbertModel,$ as AlbertPreTrainedModel,S as AlbertTokenizer,C as AudioClassificationPipeline,E as AutoConfig,P as AutoModel,A as AutoModelForAudioClassification,F as AutoModelForAudioFrameClassification,z as AutoModelForCTC,I as AutoModelForCausalLM,O as AutoModelForDepthEstimation,B as AutoModelForDocumentQuestionAnswering,L as AutoModelForImageClassification,D as AutoModelForImageFeatureExtraction,R as AutoModelForImageMatting,N as AutoModelForImageSegmentation,V as AutoModelForImageToImage,j as AutoModelForMaskGeneration,G as AutoModelForMaskedLM,q as AutoModelForNormalEstimation,U as AutoModelForObjectDetection,W as AutoModelForQuestionAnswering,H as AutoModelForSemanticSegmentation,X as AutoModelForSeq2SeqLM,K as AutoModelForSequenceClassification,Q as AutoModelForSpeechSeq2Seq,Y as AutoModelForTextToSpectrogram,J as AutoModelForTextToWaveform,Z as AutoModelForTokenClassification,ee as AutoModelForVision2Seq,te as AutoModelForXVector,ne as AutoModelForZeroShotObjectDetection,re as AutoProcessor,ie as AutoTokenizer,ae as AutomaticSpeechRecognitionPipeline,se as BartForConditionalGeneration,oe as BartForSequenceClassification,le as BartModel,ue as BartPretrainedModel,de as BartTokenizer,ce as BaseModelOutput,pe as BaseStreamer,he as BeitFeatureExtractor,me as BeitForImageClassification,fe as BeitModel,ge as BeitPreTrainedModel,_e as BertForMaskedLM,we as BertForQuestionAnswering,ye as BertForSequenceClassification,be as BertForTokenClassification,ve as BertModel,xe as BertPreTrainedModel,Me as BertTokenizer,Te as BitImageProcessor,ke as BlenderbotForConditionalGeneration,$e as BlenderbotModel,Se as BlenderbotPreTrainedModel,Ce as BlenderbotSmallForConditionalGeneration,Ee as BlenderbotSmallModel,Pe as BlenderbotSmallPreTrainedModel,Ae as BlenderbotSmallTokenizer,Fe as BlenderbotTokenizer,ze as BloomForCausalLM,Ie as BloomModel,Oe as BloomPreTrainedModel,Be as BloomTokenizer,Le as CLIPFeatureExtractor,De as CLIPImageProcessor,Re as CLIPModel,Ne as CLIPPreTrainedModel,Ve as CLIPSegForImageSegmentation,je as CLIPSegModel,Ge as CLIPSegPreTrainedModel,qe as CLIPTextModelWithProjection,Ue as CLIPTokenizer,We as CLIPVisionModelWithProjection,He as CamembertForMaskedLM,Xe as CamembertForQuestionAnswering,Ke as CamembertForSequenceClassification,Qe as CamembertForTokenClassification,Ye as CamembertModel,Je as CamembertPreTrainedModel,Ze as CamembertTokenizer,et as CausalLMOutput,tt as CausalLMOutputWithPast,nt as ChineseCLIPFeatureExtractor,rt as ChineseCLIPModel,it as ChineseCLIPPreTrainedModel,at as ClapAudioModelWithProjection,st as ClapFeatureExtractor,ot as ClapModel,lt as ClapPreTrainedModel,ut as ClapTextModelWithProjection,dt as CodeGenForCausalLM,ct as CodeGenModel,pt as CodeGenPreTrainedModel,ht as CodeGenTokenizer,mt as CodeLlamaTokenizer,ft as CohereForCausalLM,gt as CohereModel,_t as CoherePreTrainedModel,wt as CohereTokenizer,yt as ConvBertForMaskedLM,bt as ConvBertForQuestionAnswering,vt as ConvBertForSequenceClassification,xt as ConvBertForTokenClassification,Mt as ConvBertModel,Tt as ConvBertPreTrainedModel,kt as ConvBertTokenizer,$t as ConvNextFeatureExtractor,St as ConvNextForImageClassification,Ct as ConvNextImageProcessor,Et as ConvNextModel,Pt as ConvNextPreTrainedModel,At as ConvNextV2ForImageClassification,Ft as ConvNextV2Model,zt as ConvNextV2PreTrainedModel,It as DPTFeatureExtractor,Ot as DPTForDepthEstimation,Bt as DPTImageProcessor,Lt as DPTModel,Dt as DPTPreTrainedModel,Rt as DebertaForMaskedLM,Nt as DebertaForQuestionAnswering,Vt as DebertaForSequenceClassification,jt as DebertaForTokenClassification,Gt as DebertaModel,qt as DebertaPreTrainedModel,Ut as DebertaTokenizer,Wt as DebertaV2ForMaskedLM,Ht as DebertaV2ForQuestionAnswering,Xt as DebertaV2ForSequenceClassification,Kt as DebertaV2ForTokenClassification,Qt as DebertaV2Model,Yt as DebertaV2PreTrainedModel,Jt as DebertaV2Tokenizer,Zt as DeiTFeatureExtractor,en as DeiTForImageClassification,tn as DeiTModel,nn as DeiTPreTrainedModel,rn as DepthAnythingForDepthEstimation,an as DepthAnythingPreTrainedModel,sn as DepthEstimationPipeline,on as DetrFeatureExtractor,ln as DetrForObjectDetection,un as DetrForSegmentation,dn as DetrModel,cn as DetrObjectDetectionOutput,pn as DetrPreTrainedModel,hn as DetrSegmentationOutput,mn as Dinov2ForImageClassification,fn as Dinov2Model,gn as Dinov2PreTrainedModel,_n as DistilBertForMaskedLM,wn as DistilBertForQuestionAnswering,yn as DistilBertForSequenceClassification,bn as DistilBertForTokenClassification,vn as DistilBertModel,xn as DistilBertPreTrainedModel,Mn as DistilBertTokenizer,Tn as DocumentQuestionAnsweringPipeline,kn as DonutFeatureExtractor,$n as DonutSwinModel,Sn as DonutSwinPreTrainedModel,Cn as EfficientNetForImageClassification,En as EfficientNetImageProcessor,Pn as EfficientNetModel,An as EfficientNetPreTrainedModel,Fn as ElectraForMaskedLM,zn as ElectraForQuestionAnswering,In as ElectraForSequenceClassification,On as ElectraForTokenClassification,Bn as ElectraModel,Ln as ElectraPreTrainedModel,Dn as ElectraTokenizer,Rn as EosTokenCriteria,Nn as EsmForMaskedLM,Vn as EsmForSequenceClassification,jn as EsmForTokenClassification,Gn as EsmModel,qn as EsmPreTrainedModel,Un as EsmTokenizer,Wn as FFT,Hn as FalconForCausalLM,Xn as FalconModel,Kn as FalconPreTrainedModel,Qn as FalconTokenizer,Yn as FastViTForImageClassification,Jn as FastViTModel,Zn as FastViTPreTrainedModel,er as FeatureExtractionPipeline,tr as FeatureExtractor,nr as FillMaskPipeline,rr as Florence2ForConditionalGeneration,ir as Florence2PreTrainedModel,ar as Florence2Processor,sr as GLPNFeatureExtractor,or as GLPNForDepthEstimation,lr as GLPNModel,ur as GLPNPreTrainedModel,dr as GPT2LMHeadModel,cr as GPT2Model,pr as GPT2PreTrainedModel,hr as GPT2Tokenizer,mr as GPTBigCodeForCausalLM,fr as GPTBigCodeModel,gr as GPTBigCodePreTrainedModel,_r as GPTJForCausalLM,wr as GPTJModel,yr as GPTJPreTrainedModel,br as GPTNeoForCausalLM,vr as GPTNeoModel,xr as GPTNeoPreTrainedModel,Mr as GPTNeoXForCausalLM,Tr as GPTNeoXModel,kr as GPTNeoXPreTrainedModel,$r as GPTNeoXTokenizer,Sr as Gemma2ForCausalLM,Cr as Gemma2Model,Er as Gemma2PreTrainedModel,Pr as GemmaForCausalLM,Ar as GemmaModel,Fr as GemmaPreTrainedModel,zr as GemmaTokenizer,Ir as Grok1Tokenizer,Or as HerbertTokenizer,Br as HubertForCTC,Lr as HubertForSequenceClassification,Dr as HubertModel,Rr as HubertPreTrainedModel,Nr as ImageClassificationPipeline,Vr as ImageFeatureExtractionPipeline,jr as ImageFeatureExtractor,Gr as ImageMattingOutput,qr as ImageSegmentationPipeline,Ur as ImageToImagePipeline,Wr as ImageToTextPipeline,Hr as InterruptableStoppingCriteria,Xr as JAISLMHeadModel,Kr as JAISModel,Qr as JAISPreTrainedModel,Yr as LlamaForCausalLM,Jr as LlamaModel,Zr as LlamaPreTrainedModel,ei as LlamaTokenizer,ti as LlavaForConditionalGeneration,ni as LlavaPreTrainedModel,ri as LongT5ForConditionalGeneration,ii as LongT5Model,ai as LongT5PreTrainedModel,si as M2M100ForConditionalGeneration,oi as M2M100Model,li as M2M100PreTrainedModel,ui as M2M100Tokenizer,di as MBart50Tokenizer,ci as MBartForCausalLM,pi as MBartForConditionalGeneration,hi as MBartForSequenceClassification,mi as MBartModel,fi as MBartPreTrainedModel,gi as MBartTokenizer,_i as MPNetForMaskedLM,wi as MPNetForQuestionAnswering,yi as MPNetForSequenceClassification,bi as MPNetForTokenClassification,vi as MPNetModel,xi as MPNetPreTrainedModel,Mi as MPNetTokenizer,Ti as MT5ForConditionalGeneration,ki as MT5Model,$i as MT5PreTrainedModel,Si as MarianMTModel,Ci as MarianModel,Ei as MarianPreTrainedModel,Pi as MarianTokenizer,Ai as MaskedLMOutput,Fi as MaxLengthCriteria,zi as MistralForCausalLM,Ii as MistralModel,Oi as MistralPreTrainedModel,Bi as MobileBertForMaskedLM,Li as MobileBertForQuestionAnswering,Di as MobileBertForSequenceClassification,Ri as MobileBertModel,Ni as MobileBertPreTrainedModel,Vi as MobileBertTokenizer,ji as MobileNetV1FeatureExtractor,Gi as MobileNetV1ForImageClassification,qi as MobileNetV1Model,Ui as MobileNetV1PreTrainedModel,Wi as MobileNetV2FeatureExtractor,Hi as MobileNetV2ForImageClassification,Xi as MobileNetV2Model,Ki as MobileNetV2PreTrainedModel,Qi as MobileNetV3FeatureExtractor,Yi as MobileNetV3ForImageClassification,Ji as MobileNetV3Model,Zi as MobileNetV3PreTrainedModel,ea as MobileNetV4FeatureExtractor,ta as MobileNetV4ForImageClassification,na as MobileNetV4Model,ra as MobileNetV4PreTrainedModel,ia as MobileViTFeatureExtractor,aa as MobileViTForImageClassification,sa as MobileViTImageProcessor,oa as MobileViTModel,la as MobileViTPreTrainedModel,ua as MobileViTV2ForImageClassification,da as MobileViTV2Model,ca as MobileViTV2PreTrainedModel,pa as ModelOutput,ha as Moondream1ForConditionalGeneration,ma as MptForCausalLM,fa as MptModel,ga as MptPreTrainedModel,_a as MusicgenForCausalLM,wa as MusicgenForConditionalGeneration,ya as MusicgenModel,ba as MusicgenPreTrainedModel,va as NllbTokenizer,xa as NomicBertModel,Ma as NomicBertPreTrainedModel,Ta as NougatImageProcessor,ka as NougatTokenizer,$a as OPTForCausalLM,Sa as OPTModel,Ca as OPTPreTrainedModel,Ea as ObjectDetectionPipeline,Pa as OpenELMForCausalLM,Aa as OpenELMModel,Fa as OpenELMPreTrainedModel,za as OwlViTFeatureExtractor,Ia as OwlViTForObjectDetection,Oa as OwlViTModel,Ba as OwlViTPreTrainedModel,La as OwlViTProcessor,Da as Owlv2ForObjectDetection,Ra as Owlv2ImageProcessor,Na as Owlv2Model,Va as Owlv2PreTrainedModel,ja as Phi3ForCausalLM,Ga as Phi3Model,qa as Phi3PreTrainedModel,Ua as PhiForCausalLM,Wa as PhiModel,Ha as PhiPreTrainedModel,Xa as Pipeline,Ka as PreTrainedModel,Qa as PreTrainedTokenizer,Ya as PretrainedConfig,Ja as PretrainedMixin,Za as Processor,es as PyAnnoteFeatureExtractor,ts as PyAnnoteForAudioFrameClassification,ns as PyAnnoteModel,rs as PyAnnotePreTrainedModel,is as PyAnnoteProcessor,as as QuestionAnsweringModelOutput,ss as QuestionAnsweringPipeline,os as Qwen2ForCausalLM,ls as Qwen2Model,us as Qwen2PreTrainedModel,ds as Qwen2Tokenizer,cs as RTDetrForObjectDetection,ps as RTDetrImageProcessor,hs as RTDetrModel,ms as RTDetrObjectDetectionOutput,fs as RTDetrPreTrainedModel,gs as RawImage,_s as ResNetForImageClassification,ws as ResNetModel,ys as ResNetPreTrainedModel,bs as RoFormerForMaskedLM,vs as RoFormerForQuestionAnswering,xs as RoFormerForSequenceClassification,Ms as RoFormerForTokenClassification,Ts as RoFormerModel,ks as RoFormerPreTrainedModel,$s as RoFormerTokenizer,Ss as RobertaForMaskedLM,Cs as RobertaForQuestionAnswering,Es as RobertaForSequenceClassification,Ps as RobertaForTokenClassification,As as RobertaModel,Fs as RobertaPreTrainedModel,zs as RobertaTokenizer,Is as SamImageProcessor,Os as SamImageSegmentationOutput,Bs as SamModel,Ls as SamPreTrainedModel,Ds as SamProcessor,Rs as SapiensFeatureExtractor,Ns as SapiensForDepthEstimation,Vs as SapiensForNormalEstimation,js as SapiensForSemanticSegmentation,Gs as SapiensPreTrainedModel,qs as SeamlessM4TFeatureExtractor,Us as SegformerFeatureExtractor,Ws as SegformerForImageClassification,Hs as SegformerForSemanticSegmentation,Xs as SegformerModel,Ks as SegformerPreTrainedModel,Qs as Seq2SeqLMOutput,Ys as SequenceClassifierOutput,Js as SiglipImageProcessor,Zs as SiglipModel,eo as SiglipPreTrainedModel,to as SiglipTextModel,no as SiglipTokenizer,ro as SiglipVisionModel,io as SpeechT5FeatureExtractor,ao as SpeechT5ForSpeechToText,so as SpeechT5ForTextToSpeech,oo as SpeechT5HifiGan,lo as SpeechT5Model,uo as SpeechT5PreTrainedModel,co as SpeechT5Processor,po as SpeechT5Tokenizer,ho as SqueezeBertForMaskedLM,mo as SqueezeBertForQuestionAnswering,fo as SqueezeBertForSequenceClassification,go as SqueezeBertModel,_o as SqueezeBertPreTrainedModel,wo as SqueezeBertTokenizer,yo as StableLmForCausalLM,bo as StableLmModel,vo as StableLmPreTrainedModel,xo as Starcoder2ForCausalLM,Mo as Starcoder2Model,To as Starcoder2PreTrainedModel,ko as StoppingCriteria,$o as StoppingCriteriaList,So as SummarizationPipeline,Co as Swin2SRForImageSuperResolution,Eo as Swin2SRImageProcessor,Po as Swin2SRModel,Ao as Swin2SRPreTrainedModel,Fo as SwinForImageClassification,zo as SwinModel,Io as SwinPreTrainedModel,Oo as T5ForConditionalGeneration,Bo as T5Model,Lo as T5PreTrainedModel,Do as T5Tokenizer,Ro as TableTransformerForObjectDetection,No as TableTransformerModel,Vo as TableTransformerObjectDetectionOutput,jo as TableTransformerPreTrainedModel,Go as Tensor,qo as Text2TextGenerationPipeline,Uo as TextClassificationPipeline,Wo as TextGenerationPipeline,Ho as TextStreamer,Xo as TextToAudioPipeline,Ko as TokenClassificationPipeline,Qo as TokenClassifierOutput,Yo as TokenizerModel,Jo as TrOCRForCausalLM,Zo as TrOCRPreTrainedModel,el as TranslationPipeline,tl as UniSpeechForCTC,nl as UniSpeechForSequenceClassification,rl as UniSpeechModel,il as UniSpeechPreTrainedModel,al as UniSpeechSatForAudioFrameClassification,sl as UniSpeechSatForCTC,ol as UniSpeechSatForSequenceClassification,ll as UniSpeechSatModel,ul as UniSpeechSatPreTrainedModel,dl as ViTFeatureExtractor,cl as ViTForImageClassification,pl as ViTImageProcessor,hl as ViTModel,ml as ViTPreTrainedModel,fl as VisionEncoderDecoderModel,gl as VitMatteForImageMatting,_l as VitMatteImageProcessor,wl as VitMattePreTrainedModel,yl as VitsModel,bl as VitsModelOutput,vl as VitsPreTrainedModel,xl as VitsTokenizer,Ml as Wav2Vec2BertForCTC,Tl as Wav2Vec2BertForSequenceClassification,kl as Wav2Vec2BertModel,$l as Wav2Vec2BertPreTrainedModel,Sl as Wav2Vec2CTCTokenizer,Cl as Wav2Vec2FeatureExtractor,El as Wav2Vec2ForAudioFrameClassification,Pl as Wav2Vec2ForCTC,Al as Wav2Vec2ForSequenceClassification,Fl as Wav2Vec2Model,zl as Wav2Vec2PreTrainedModel,Il as Wav2Vec2ProcessorWithLM,Ol as WavLMForAudioFrameClassification,Bl as WavLMForCTC,Ll as WavLMForSequenceClassification,Dl as WavLMForXVector,Rl as WavLMModel,Nl as WavLMPreTrainedModel,Vl as WeSpeakerFeatureExtractor,jl as WeSpeakerResNetModel,Gl as WeSpeakerResNetPreTrainedModel,ql as WhisperFeatureExtractor,Ul as WhisperForConditionalGeneration,Wl as WhisperModel,Hl as WhisperPreTrainedModel,Xl as WhisperProcessor,Kl as WhisperTextStreamer,Ql as WhisperTokenizer,Yl as XLMForQuestionAnswering,Jl as XLMForSequenceClassification,Zl as XLMForTokenClassification,eu as XLMModel,tu as XLMPreTrainedModel,nu as XLMRobertaForMaskedLM,ru as XLMRobertaForQuestionAnswering,iu as XLMRobertaForSequenceClassification,au as XLMRobertaForTokenClassification,su as XLMRobertaModel,ou as XLMRobertaPreTrainedModel,lu as XLMRobertaTokenizer,uu as XLMTokenizer,du as XLMWithLMHeadModel,cu as XVectorOutput,pu as YolosFeatureExtractor,hu as YolosForObjectDetection,mu as YolosModel,fu as YolosObjectDetectionOutput,gu as YolosPreTrainedModel,_u as ZeroShotAudioClassificationPipeline,wu as ZeroShotClassificationPipeline,yu as ZeroShotImageClassificationPipeline,bu as ZeroShotObjectDetectionPipeline,vu as bankers_round,xu as cat,Mu as cos_sim,Tu as dot,ku as dynamic_time_warping,$u as env,Su as full,Cu as full_like,Eu as getKeyValueShapes,Pu as hamming,Au as hanning,Fu as interpolate,zu as interpolate_4d,Iu as interpolate_data,Ou as is_chinese_char,Bu as layer_norm,Lu as log_softmax,Du as magnitude,Ru as matmul,Nu as max,Vu as mean,ju as mean_pooling,Gu as medianFilter,qu as mel_filter_bank,Uu as min,Wu as ones,Hu as ones_like,Xu as permute,Ku as permute_data,Qu as pipeline,Yu as quantize_embeddings,Ju as read_audio,Zu as rfft,ed as round,td as softmax,nd as spectrogram,rd as stack,id as std_mean,ad as topk,sd as window_function,od as zeros,ld as zeros_like};
|
|
229
|
+
\*****************************/i.r(a),i.d(a,{ASTFeatureExtractor:()=>d.ASTFeatureExtractor,ASTForAudioClassification:()=>l.ASTForAudioClassification,ASTModel:()=>l.ASTModel,ASTPreTrainedModel:()=>l.ASTPreTrainedModel,AlbertForMaskedLM:()=>l.AlbertForMaskedLM,AlbertForQuestionAnswering:()=>l.AlbertForQuestionAnswering,AlbertForSequenceClassification:()=>l.AlbertForSequenceClassification,AlbertModel:()=>l.AlbertModel,AlbertPreTrainedModel:()=>l.AlbertPreTrainedModel,AlbertTokenizer:()=>u.AlbertTokenizer,AudioClassificationPipeline:()=>o.AudioClassificationPipeline,AutoConfig:()=>c.AutoConfig,AutoModel:()=>l.AutoModel,AutoModelForAudioClassification:()=>l.AutoModelForAudioClassification,AutoModelForAudioFrameClassification:()=>l.AutoModelForAudioFrameClassification,AutoModelForCTC:()=>l.AutoModelForCTC,AutoModelForCausalLM:()=>l.AutoModelForCausalLM,AutoModelForDepthEstimation:()=>l.AutoModelForDepthEstimation,AutoModelForDocumentQuestionAnswering:()=>l.AutoModelForDocumentQuestionAnswering,AutoModelForImageClassification:()=>l.AutoModelForImageClassification,AutoModelForImageFeatureExtraction:()=>l.AutoModelForImageFeatureExtraction,AutoModelForImageMatting:()=>l.AutoModelForImageMatting,AutoModelForImageSegmentation:()=>l.AutoModelForImageSegmentation,AutoModelForImageToImage:()=>l.AutoModelForImageToImage,AutoModelForMaskGeneration:()=>l.AutoModelForMaskGeneration,AutoModelForMaskedLM:()=>l.AutoModelForMaskedLM,AutoModelForNormalEstimation:()=>l.AutoModelForNormalEstimation,AutoModelForObjectDetection:()=>l.AutoModelForObjectDetection,AutoModelForQuestionAnswering:()=>l.AutoModelForQuestionAnswering,AutoModelForSemanticSegmentation:()=>l.AutoModelForSemanticSegmentation,AutoModelForSeq2SeqLM:()=>l.AutoModelForSeq2SeqLM,AutoModelForSequenceClassification:()=>l.AutoModelForSequenceClassification,AutoModelForSpeechSeq2Seq:()=>l.AutoModelForSpeechSeq2Seq,AutoModelForTextToSpectrogram:()=>l.AutoModelForTextToSpectrogram,AutoModelForTextToWaveform:()=>l.AutoModelForTextToWaveform,AutoModelForTokenClassification:()=>l.AutoModelForTokenClassification,AutoModelForVision2Seq:()=>l.AutoModelForVision2Seq,AutoModelForXVector:()=>l.AutoModelForXVector,AutoModelForZeroShotObjectDetection:()=>l.AutoModelForZeroShotObjectDetection,AutoProcessor:()=>d.AutoProcessor,AutoTokenizer:()=>u.AutoTokenizer,AutomaticSpeechRecognitionPipeline:()=>o.AutomaticSpeechRecognitionPipeline,BartForConditionalGeneration:()=>l.BartForConditionalGeneration,BartForSequenceClassification:()=>l.BartForSequenceClassification,BartModel:()=>l.BartModel,BartPretrainedModel:()=>l.BartPretrainedModel,BartTokenizer:()=>u.BartTokenizer,BaseModelOutput:()=>l.BaseModelOutput,BaseStreamer:()=>g.BaseStreamer,BeitFeatureExtractor:()=>d.BeitFeatureExtractor,BeitForImageClassification:()=>l.BeitForImageClassification,BeitModel:()=>l.BeitModel,BeitPreTrainedModel:()=>l.BeitPreTrainedModel,BertForMaskedLM:()=>l.BertForMaskedLM,BertForQuestionAnswering:()=>l.BertForQuestionAnswering,BertForSequenceClassification:()=>l.BertForSequenceClassification,BertForTokenClassification:()=>l.BertForTokenClassification,BertModel:()=>l.BertModel,BertPreTrainedModel:()=>l.BertPreTrainedModel,BertTokenizer:()=>u.BertTokenizer,BitImageProcessor:()=>d.BitImageProcessor,BlenderbotForConditionalGeneration:()=>l.BlenderbotForConditionalGeneration,BlenderbotModel:()=>l.BlenderbotModel,BlenderbotPreTrainedModel:()=>l.BlenderbotPreTrainedModel,BlenderbotSmallForConditionalGeneration:()=>l.BlenderbotSmallForConditionalGeneration,BlenderbotSmallModel:()=>l.BlenderbotSmallModel,BlenderbotSmallPreTrainedModel:()=>l.BlenderbotSmallPreTrainedModel,BlenderbotSmallTokenizer:()=>u.BlenderbotSmallTokenizer,BlenderbotTokenizer:()=>u.BlenderbotTokenizer,BloomForCausalLM:()=>l.BloomForCausalLM,BloomModel:()=>l.BloomModel,BloomPreTrainedModel:()=>l.BloomPreTrainedModel,BloomTokenizer:()=>u.BloomTokenizer,CLIPFeatureExtractor:()=>d.CLIPFeatureExtractor,CLIPImageProcessor:()=>d.CLIPImageProcessor,CLIPModel:()=>l.CLIPModel,CLIPPreTrainedModel:()=>l.CLIPPreTrainedModel,CLIPSegForImageSegmentation:()=>l.CLIPSegForImageSegmentation,CLIPSegModel:()=>l.CLIPSegModel,CLIPSegPreTrainedModel:()=>l.CLIPSegPreTrainedModel,CLIPTextModelWithProjection:()=>l.CLIPTextModelWithProjection,CLIPTokenizer:()=>u.CLIPTokenizer,CLIPVisionModelWithProjection:()=>l.CLIPVisionModelWithProjection,CamembertForMaskedLM:()=>l.CamembertForMaskedLM,CamembertForQuestionAnswering:()=>l.CamembertForQuestionAnswering,CamembertForSequenceClassification:()=>l.CamembertForSequenceClassification,CamembertForTokenClassification:()=>l.CamembertForTokenClassification,CamembertModel:()=>l.CamembertModel,CamembertPreTrainedModel:()=>l.CamembertPreTrainedModel,CamembertTokenizer:()=>u.CamembertTokenizer,CausalLMOutput:()=>l.CausalLMOutput,CausalLMOutputWithPast:()=>l.CausalLMOutputWithPast,ChineseCLIPFeatureExtractor:()=>d.ChineseCLIPFeatureExtractor,ChineseCLIPModel:()=>l.ChineseCLIPModel,ChineseCLIPPreTrainedModel:()=>l.ChineseCLIPPreTrainedModel,ClapAudioModelWithProjection:()=>l.ClapAudioModelWithProjection,ClapFeatureExtractor:()=>d.ClapFeatureExtractor,ClapModel:()=>l.ClapModel,ClapPreTrainedModel:()=>l.ClapPreTrainedModel,ClapTextModelWithProjection:()=>l.ClapTextModelWithProjection,CodeGenForCausalLM:()=>l.CodeGenForCausalLM,CodeGenModel:()=>l.CodeGenModel,CodeGenPreTrainedModel:()=>l.CodeGenPreTrainedModel,CodeGenTokenizer:()=>u.CodeGenTokenizer,CodeLlamaTokenizer:()=>u.CodeLlamaTokenizer,CohereForCausalLM:()=>l.CohereForCausalLM,CohereModel:()=>l.CohereModel,CoherePreTrainedModel:()=>l.CoherePreTrainedModel,CohereTokenizer:()=>u.CohereTokenizer,ConvBertForMaskedLM:()=>l.ConvBertForMaskedLM,ConvBertForQuestionAnswering:()=>l.ConvBertForQuestionAnswering,ConvBertForSequenceClassification:()=>l.ConvBertForSequenceClassification,ConvBertForTokenClassification:()=>l.ConvBertForTokenClassification,ConvBertModel:()=>l.ConvBertModel,ConvBertPreTrainedModel:()=>l.ConvBertPreTrainedModel,ConvBertTokenizer:()=>u.ConvBertTokenizer,ConvNextFeatureExtractor:()=>d.ConvNextFeatureExtractor,ConvNextForImageClassification:()=>l.ConvNextForImageClassification,ConvNextImageProcessor:()=>d.ConvNextImageProcessor,ConvNextModel:()=>l.ConvNextModel,ConvNextPreTrainedModel:()=>l.ConvNextPreTrainedModel,ConvNextV2ForImageClassification:()=>l.ConvNextV2ForImageClassification,ConvNextV2Model:()=>l.ConvNextV2Model,ConvNextV2PreTrainedModel:()=>l.ConvNextV2PreTrainedModel,DPTFeatureExtractor:()=>d.DPTFeatureExtractor,DPTForDepthEstimation:()=>l.DPTForDepthEstimation,DPTImageProcessor:()=>d.DPTImageProcessor,DPTModel:()=>l.DPTModel,DPTPreTrainedModel:()=>l.DPTPreTrainedModel,DebertaForMaskedLM:()=>l.DebertaForMaskedLM,DebertaForQuestionAnswering:()=>l.DebertaForQuestionAnswering,DebertaForSequenceClassification:()=>l.DebertaForSequenceClassification,DebertaForTokenClassification:()=>l.DebertaForTokenClassification,DebertaModel:()=>l.DebertaModel,DebertaPreTrainedModel:()=>l.DebertaPreTrainedModel,DebertaTokenizer:()=>u.DebertaTokenizer,DebertaV2ForMaskedLM:()=>l.DebertaV2ForMaskedLM,DebertaV2ForQuestionAnswering:()=>l.DebertaV2ForQuestionAnswering,DebertaV2ForSequenceClassification:()=>l.DebertaV2ForSequenceClassification,DebertaV2ForTokenClassification:()=>l.DebertaV2ForTokenClassification,DebertaV2Model:()=>l.DebertaV2Model,DebertaV2PreTrainedModel:()=>l.DebertaV2PreTrainedModel,DebertaV2Tokenizer:()=>u.DebertaV2Tokenizer,DeiTFeatureExtractor:()=>d.DeiTFeatureExtractor,DeiTForImageClassification:()=>l.DeiTForImageClassification,DeiTModel:()=>l.DeiTModel,DeiTPreTrainedModel:()=>l.DeiTPreTrainedModel,DepthAnythingForDepthEstimation:()=>l.DepthAnythingForDepthEstimation,DepthAnythingPreTrainedModel:()=>l.DepthAnythingPreTrainedModel,DepthEstimationPipeline:()=>o.DepthEstimationPipeline,DetrFeatureExtractor:()=>d.DetrFeatureExtractor,DetrForObjectDetection:()=>l.DetrForObjectDetection,DetrForSegmentation:()=>l.DetrForSegmentation,DetrModel:()=>l.DetrModel,DetrObjectDetectionOutput:()=>l.DetrObjectDetectionOutput,DetrPreTrainedModel:()=>l.DetrPreTrainedModel,DetrSegmentationOutput:()=>l.DetrSegmentationOutput,Dinov2ForImageClassification:()=>l.Dinov2ForImageClassification,Dinov2Model:()=>l.Dinov2Model,Dinov2PreTrainedModel:()=>l.Dinov2PreTrainedModel,DistilBertForMaskedLM:()=>l.DistilBertForMaskedLM,DistilBertForQuestionAnswering:()=>l.DistilBertForQuestionAnswering,DistilBertForSequenceClassification:()=>l.DistilBertForSequenceClassification,DistilBertForTokenClassification:()=>l.DistilBertForTokenClassification,DistilBertModel:()=>l.DistilBertModel,DistilBertPreTrainedModel:()=>l.DistilBertPreTrainedModel,DistilBertTokenizer:()=>u.DistilBertTokenizer,DocumentQuestionAnsweringPipeline:()=>o.DocumentQuestionAnsweringPipeline,DonutFeatureExtractor:()=>d.DonutFeatureExtractor,DonutSwinModel:()=>l.DonutSwinModel,DonutSwinPreTrainedModel:()=>l.DonutSwinPreTrainedModel,EfficientNetForImageClassification:()=>l.EfficientNetForImageClassification,EfficientNetImageProcessor:()=>d.EfficientNetImageProcessor,EfficientNetModel:()=>l.EfficientNetModel,EfficientNetPreTrainedModel:()=>l.EfficientNetPreTrainedModel,ElectraForMaskedLM:()=>l.ElectraForMaskedLM,ElectraForQuestionAnswering:()=>l.ElectraForQuestionAnswering,ElectraForSequenceClassification:()=>l.ElectraForSequenceClassification,ElectraForTokenClassification:()=>l.ElectraForTokenClassification,ElectraModel:()=>l.ElectraModel,ElectraPreTrainedModel:()=>l.ElectraPreTrainedModel,ElectraTokenizer:()=>u.ElectraTokenizer,EosTokenCriteria:()=>_.EosTokenCriteria,EsmForMaskedLM:()=>l.EsmForMaskedLM,EsmForSequenceClassification:()=>l.EsmForSequenceClassification,EsmForTokenClassification:()=>l.EsmForTokenClassification,EsmModel:()=>l.EsmModel,EsmPreTrainedModel:()=>l.EsmPreTrainedModel,EsmTokenizer:()=>u.EsmTokenizer,FFT:()=>f.FFT,FalconForCausalLM:()=>l.FalconForCausalLM,FalconModel:()=>l.FalconModel,FalconPreTrainedModel:()=>l.FalconPreTrainedModel,FalconTokenizer:()=>u.FalconTokenizer,FastViTForImageClassification:()=>l.FastViTForImageClassification,FastViTModel:()=>l.FastViTModel,FastViTPreTrainedModel:()=>l.FastViTPreTrainedModel,FeatureExtractionPipeline:()=>o.FeatureExtractionPipeline,FeatureExtractor:()=>d.FeatureExtractor,FillMaskPipeline:()=>o.FillMaskPipeline,Florence2ForConditionalGeneration:()=>l.Florence2ForConditionalGeneration,Florence2PreTrainedModel:()=>l.Florence2PreTrainedModel,Florence2Processor:()=>d.Florence2Processor,GLPNFeatureExtractor:()=>d.GLPNFeatureExtractor,GLPNForDepthEstimation:()=>l.GLPNForDepthEstimation,GLPNModel:()=>l.GLPNModel,GLPNPreTrainedModel:()=>l.GLPNPreTrainedModel,GPT2LMHeadModel:()=>l.GPT2LMHeadModel,GPT2Model:()=>l.GPT2Model,GPT2PreTrainedModel:()=>l.GPT2PreTrainedModel,GPT2Tokenizer:()=>u.GPT2Tokenizer,GPTBigCodeForCausalLM:()=>l.GPTBigCodeForCausalLM,GPTBigCodeModel:()=>l.GPTBigCodeModel,GPTBigCodePreTrainedModel:()=>l.GPTBigCodePreTrainedModel,GPTJForCausalLM:()=>l.GPTJForCausalLM,GPTJModel:()=>l.GPTJModel,GPTJPreTrainedModel:()=>l.GPTJPreTrainedModel,GPTNeoForCausalLM:()=>l.GPTNeoForCausalLM,GPTNeoModel:()=>l.GPTNeoModel,GPTNeoPreTrainedModel:()=>l.GPTNeoPreTrainedModel,GPTNeoXForCausalLM:()=>l.GPTNeoXForCausalLM,GPTNeoXModel:()=>l.GPTNeoXModel,GPTNeoXPreTrainedModel:()=>l.GPTNeoXPreTrainedModel,GPTNeoXTokenizer:()=>u.GPTNeoXTokenizer,Gemma2ForCausalLM:()=>l.Gemma2ForCausalLM,Gemma2Model:()=>l.Gemma2Model,Gemma2PreTrainedModel:()=>l.Gemma2PreTrainedModel,GemmaForCausalLM:()=>l.GemmaForCausalLM,GemmaModel:()=>l.GemmaModel,GemmaPreTrainedModel:()=>l.GemmaPreTrainedModel,GemmaTokenizer:()=>u.GemmaTokenizer,Grok1Tokenizer:()=>u.Grok1Tokenizer,HerbertTokenizer:()=>u.HerbertTokenizer,HieraForImageClassification:()=>l.HieraForImageClassification,HieraModel:()=>l.HieraModel,HieraPreTrainedModel:()=>l.HieraPreTrainedModel,HubertForCTC:()=>l.HubertForCTC,HubertForSequenceClassification:()=>l.HubertForSequenceClassification,HubertModel:()=>l.HubertModel,HubertPreTrainedModel:()=>l.HubertPreTrainedModel,ImageClassificationPipeline:()=>o.ImageClassificationPipeline,ImageFeatureExtractionPipeline:()=>o.ImageFeatureExtractionPipeline,ImageFeatureExtractor:()=>d.ImageFeatureExtractor,ImageMattingOutput:()=>l.ImageMattingOutput,ImageSegmentationPipeline:()=>o.ImageSegmentationPipeline,ImageToImagePipeline:()=>o.ImageToImagePipeline,ImageToTextPipeline:()=>o.ImageToTextPipeline,InterruptableStoppingCriteria:()=>_.InterruptableStoppingCriteria,JAISLMHeadModel:()=>l.JAISLMHeadModel,JAISModel:()=>l.JAISModel,JAISPreTrainedModel:()=>l.JAISPreTrainedModel,LlamaForCausalLM:()=>l.LlamaForCausalLM,LlamaModel:()=>l.LlamaModel,LlamaPreTrainedModel:()=>l.LlamaPreTrainedModel,LlamaTokenizer:()=>u.LlamaTokenizer,LlavaForConditionalGeneration:()=>l.LlavaForConditionalGeneration,LlavaPreTrainedModel:()=>l.LlavaPreTrainedModel,LongT5ForConditionalGeneration:()=>l.LongT5ForConditionalGeneration,LongT5Model:()=>l.LongT5Model,LongT5PreTrainedModel:()=>l.LongT5PreTrainedModel,M2M100ForConditionalGeneration:()=>l.M2M100ForConditionalGeneration,M2M100Model:()=>l.M2M100Model,M2M100PreTrainedModel:()=>l.M2M100PreTrainedModel,M2M100Tokenizer:()=>u.M2M100Tokenizer,MBart50Tokenizer:()=>u.MBart50Tokenizer,MBartForCausalLM:()=>l.MBartForCausalLM,MBartForConditionalGeneration:()=>l.MBartForConditionalGeneration,MBartForSequenceClassification:()=>l.MBartForSequenceClassification,MBartModel:()=>l.MBartModel,MBartPreTrainedModel:()=>l.MBartPreTrainedModel,MBartTokenizer:()=>u.MBartTokenizer,MPNetForMaskedLM:()=>l.MPNetForMaskedLM,MPNetForQuestionAnswering:()=>l.MPNetForQuestionAnswering,MPNetForSequenceClassification:()=>l.MPNetForSequenceClassification,MPNetForTokenClassification:()=>l.MPNetForTokenClassification,MPNetModel:()=>l.MPNetModel,MPNetPreTrainedModel:()=>l.MPNetPreTrainedModel,MPNetTokenizer:()=>u.MPNetTokenizer,MT5ForConditionalGeneration:()=>l.MT5ForConditionalGeneration,MT5Model:()=>l.MT5Model,MT5PreTrainedModel:()=>l.MT5PreTrainedModel,MarianMTModel:()=>l.MarianMTModel,MarianModel:()=>l.MarianModel,MarianPreTrainedModel:()=>l.MarianPreTrainedModel,MarianTokenizer:()=>u.MarianTokenizer,MaskedLMOutput:()=>l.MaskedLMOutput,MaxLengthCriteria:()=>_.MaxLengthCriteria,MistralForCausalLM:()=>l.MistralForCausalLM,MistralModel:()=>l.MistralModel,MistralPreTrainedModel:()=>l.MistralPreTrainedModel,MobileBertForMaskedLM:()=>l.MobileBertForMaskedLM,MobileBertForQuestionAnswering:()=>l.MobileBertForQuestionAnswering,MobileBertForSequenceClassification:()=>l.MobileBertForSequenceClassification,MobileBertModel:()=>l.MobileBertModel,MobileBertPreTrainedModel:()=>l.MobileBertPreTrainedModel,MobileBertTokenizer:()=>u.MobileBertTokenizer,MobileNetV1FeatureExtractor:()=>d.MobileNetV1FeatureExtractor,MobileNetV1ForImageClassification:()=>l.MobileNetV1ForImageClassification,MobileNetV1Model:()=>l.MobileNetV1Model,MobileNetV1PreTrainedModel:()=>l.MobileNetV1PreTrainedModel,MobileNetV2FeatureExtractor:()=>d.MobileNetV2FeatureExtractor,MobileNetV2ForImageClassification:()=>l.MobileNetV2ForImageClassification,MobileNetV2Model:()=>l.MobileNetV2Model,MobileNetV2PreTrainedModel:()=>l.MobileNetV2PreTrainedModel,MobileNetV3FeatureExtractor:()=>d.MobileNetV3FeatureExtractor,MobileNetV3ForImageClassification:()=>l.MobileNetV3ForImageClassification,MobileNetV3Model:()=>l.MobileNetV3Model,MobileNetV3PreTrainedModel:()=>l.MobileNetV3PreTrainedModel,MobileNetV4FeatureExtractor:()=>d.MobileNetV4FeatureExtractor,MobileNetV4ForImageClassification:()=>l.MobileNetV4ForImageClassification,MobileNetV4Model:()=>l.MobileNetV4Model,MobileNetV4PreTrainedModel:()=>l.MobileNetV4PreTrainedModel,MobileViTFeatureExtractor:()=>d.MobileViTFeatureExtractor,MobileViTForImageClassification:()=>l.MobileViTForImageClassification,MobileViTImageProcessor:()=>d.MobileViTImageProcessor,MobileViTModel:()=>l.MobileViTModel,MobileViTPreTrainedModel:()=>l.MobileViTPreTrainedModel,MobileViTV2ForImageClassification:()=>l.MobileViTV2ForImageClassification,MobileViTV2Model:()=>l.MobileViTV2Model,MobileViTV2PreTrainedModel:()=>l.MobileViTV2PreTrainedModel,ModelOutput:()=>l.ModelOutput,Moondream1ForConditionalGeneration:()=>l.Moondream1ForConditionalGeneration,MptForCausalLM:()=>l.MptForCausalLM,MptModel:()=>l.MptModel,MptPreTrainedModel:()=>l.MptPreTrainedModel,MusicgenForCausalLM:()=>l.MusicgenForCausalLM,MusicgenForConditionalGeneration:()=>l.MusicgenForConditionalGeneration,MusicgenModel:()=>l.MusicgenModel,MusicgenPreTrainedModel:()=>l.MusicgenPreTrainedModel,NllbTokenizer:()=>u.NllbTokenizer,NomicBertModel:()=>l.NomicBertModel,NomicBertPreTrainedModel:()=>l.NomicBertPreTrainedModel,NougatImageProcessor:()=>d.NougatImageProcessor,NougatTokenizer:()=>u.NougatTokenizer,OPTForCausalLM:()=>l.OPTForCausalLM,OPTModel:()=>l.OPTModel,OPTPreTrainedModel:()=>l.OPTPreTrainedModel,ObjectDetectionPipeline:()=>o.ObjectDetectionPipeline,OpenELMForCausalLM:()=>l.OpenELMForCausalLM,OpenELMModel:()=>l.OpenELMModel,OpenELMPreTrainedModel:()=>l.OpenELMPreTrainedModel,OwlViTFeatureExtractor:()=>d.OwlViTFeatureExtractor,OwlViTForObjectDetection:()=>l.OwlViTForObjectDetection,OwlViTModel:()=>l.OwlViTModel,OwlViTPreTrainedModel:()=>l.OwlViTPreTrainedModel,OwlViTProcessor:()=>d.OwlViTProcessor,Owlv2ForObjectDetection:()=>l.Owlv2ForObjectDetection,Owlv2ImageProcessor:()=>d.Owlv2ImageProcessor,Owlv2Model:()=>l.Owlv2Model,Owlv2PreTrainedModel:()=>l.Owlv2PreTrainedModel,Phi3ForCausalLM:()=>l.Phi3ForCausalLM,Phi3Model:()=>l.Phi3Model,Phi3PreTrainedModel:()=>l.Phi3PreTrainedModel,PhiForCausalLM:()=>l.PhiForCausalLM,PhiModel:()=>l.PhiModel,PhiPreTrainedModel:()=>l.PhiPreTrainedModel,Pipeline:()=>o.Pipeline,PreTrainedModel:()=>l.PreTrainedModel,PreTrainedTokenizer:()=>u.PreTrainedTokenizer,PretrainedConfig:()=>c.PretrainedConfig,PretrainedMixin:()=>l.PretrainedMixin,Processor:()=>d.Processor,PyAnnoteFeatureExtractor:()=>d.PyAnnoteFeatureExtractor,PyAnnoteForAudioFrameClassification:()=>l.PyAnnoteForAudioFrameClassification,PyAnnoteModel:()=>l.PyAnnoteModel,PyAnnotePreTrainedModel:()=>l.PyAnnotePreTrainedModel,PyAnnoteProcessor:()=>d.PyAnnoteProcessor,QuestionAnsweringModelOutput:()=>l.QuestionAnsweringModelOutput,QuestionAnsweringPipeline:()=>o.QuestionAnsweringPipeline,Qwen2ForCausalLM:()=>l.Qwen2ForCausalLM,Qwen2Model:()=>l.Qwen2Model,Qwen2PreTrainedModel:()=>l.Qwen2PreTrainedModel,Qwen2Tokenizer:()=>u.Qwen2Tokenizer,RTDetrForObjectDetection:()=>l.RTDetrForObjectDetection,RTDetrImageProcessor:()=>d.RTDetrImageProcessor,RTDetrModel:()=>l.RTDetrModel,RTDetrObjectDetectionOutput:()=>l.RTDetrObjectDetectionOutput,RTDetrPreTrainedModel:()=>l.RTDetrPreTrainedModel,RawImage:()=>h.RawImage,ResNetForImageClassification:()=>l.ResNetForImageClassification,ResNetModel:()=>l.ResNetModel,ResNetPreTrainedModel:()=>l.ResNetPreTrainedModel,RoFormerForMaskedLM:()=>l.RoFormerForMaskedLM,RoFormerForQuestionAnswering:()=>l.RoFormerForQuestionAnswering,RoFormerForSequenceClassification:()=>l.RoFormerForSequenceClassification,RoFormerForTokenClassification:()=>l.RoFormerForTokenClassification,RoFormerModel:()=>l.RoFormerModel,RoFormerPreTrainedModel:()=>l.RoFormerPreTrainedModel,RoFormerTokenizer:()=>u.RoFormerTokenizer,RobertaForMaskedLM:()=>l.RobertaForMaskedLM,RobertaForQuestionAnswering:()=>l.RobertaForQuestionAnswering,RobertaForSequenceClassification:()=>l.RobertaForSequenceClassification,RobertaForTokenClassification:()=>l.RobertaForTokenClassification,RobertaModel:()=>l.RobertaModel,RobertaPreTrainedModel:()=>l.RobertaPreTrainedModel,RobertaTokenizer:()=>u.RobertaTokenizer,SamImageProcessor:()=>d.SamImageProcessor,SamImageSegmentationOutput:()=>l.SamImageSegmentationOutput,SamModel:()=>l.SamModel,SamPreTrainedModel:()=>l.SamPreTrainedModel,SamProcessor:()=>d.SamProcessor,SapiensFeatureExtractor:()=>d.SapiensFeatureExtractor,SapiensForDepthEstimation:()=>l.SapiensForDepthEstimation,SapiensForNormalEstimation:()=>l.SapiensForNormalEstimation,SapiensForSemanticSegmentation:()=>l.SapiensForSemanticSegmentation,SapiensPreTrainedModel:()=>l.SapiensPreTrainedModel,SeamlessM4TFeatureExtractor:()=>d.SeamlessM4TFeatureExtractor,SegformerFeatureExtractor:()=>d.SegformerFeatureExtractor,SegformerForImageClassification:()=>l.SegformerForImageClassification,SegformerForSemanticSegmentation:()=>l.SegformerForSemanticSegmentation,SegformerModel:()=>l.SegformerModel,SegformerPreTrainedModel:()=>l.SegformerPreTrainedModel,Seq2SeqLMOutput:()=>l.Seq2SeqLMOutput,SequenceClassifierOutput:()=>l.SequenceClassifierOutput,SiglipImageProcessor:()=>d.SiglipImageProcessor,SiglipModel:()=>l.SiglipModel,SiglipPreTrainedModel:()=>l.SiglipPreTrainedModel,SiglipTextModel:()=>l.SiglipTextModel,SiglipTokenizer:()=>u.SiglipTokenizer,SiglipVisionModel:()=>l.SiglipVisionModel,SpeechT5FeatureExtractor:()=>d.SpeechT5FeatureExtractor,SpeechT5ForSpeechToText:()=>l.SpeechT5ForSpeechToText,SpeechT5ForTextToSpeech:()=>l.SpeechT5ForTextToSpeech,SpeechT5HifiGan:()=>l.SpeechT5HifiGan,SpeechT5Model:()=>l.SpeechT5Model,SpeechT5PreTrainedModel:()=>l.SpeechT5PreTrainedModel,SpeechT5Processor:()=>d.SpeechT5Processor,SpeechT5Tokenizer:()=>u.SpeechT5Tokenizer,SqueezeBertForMaskedLM:()=>l.SqueezeBertForMaskedLM,SqueezeBertForQuestionAnswering:()=>l.SqueezeBertForQuestionAnswering,SqueezeBertForSequenceClassification:()=>l.SqueezeBertForSequenceClassification,SqueezeBertModel:()=>l.SqueezeBertModel,SqueezeBertPreTrainedModel:()=>l.SqueezeBertPreTrainedModel,SqueezeBertTokenizer:()=>u.SqueezeBertTokenizer,StableLmForCausalLM:()=>l.StableLmForCausalLM,StableLmModel:()=>l.StableLmModel,StableLmPreTrainedModel:()=>l.StableLmPreTrainedModel,Starcoder2ForCausalLM:()=>l.Starcoder2ForCausalLM,Starcoder2Model:()=>l.Starcoder2Model,Starcoder2PreTrainedModel:()=>l.Starcoder2PreTrainedModel,StoppingCriteria:()=>_.StoppingCriteria,StoppingCriteriaList:()=>_.StoppingCriteriaList,SummarizationPipeline:()=>o.SummarizationPipeline,Swin2SRForImageSuperResolution:()=>l.Swin2SRForImageSuperResolution,Swin2SRImageProcessor:()=>d.Swin2SRImageProcessor,Swin2SRModel:()=>l.Swin2SRModel,Swin2SRPreTrainedModel:()=>l.Swin2SRPreTrainedModel,SwinForImageClassification:()=>l.SwinForImageClassification,SwinModel:()=>l.SwinModel,SwinPreTrainedModel:()=>l.SwinPreTrainedModel,T5ForConditionalGeneration:()=>l.T5ForConditionalGeneration,T5Model:()=>l.T5Model,T5PreTrainedModel:()=>l.T5PreTrainedModel,T5Tokenizer:()=>u.T5Tokenizer,TableTransformerForObjectDetection:()=>l.TableTransformerForObjectDetection,TableTransformerModel:()=>l.TableTransformerModel,TableTransformerObjectDetectionOutput:()=>l.TableTransformerObjectDetectionOutput,TableTransformerPreTrainedModel:()=>l.TableTransformerPreTrainedModel,Tensor:()=>m.Tensor,Text2TextGenerationPipeline:()=>o.Text2TextGenerationPipeline,TextClassificationPipeline:()=>o.TextClassificationPipeline,TextGenerationPipeline:()=>o.TextGenerationPipeline,TextStreamer:()=>g.TextStreamer,TextToAudioPipeline:()=>o.TextToAudioPipeline,TokenClassificationPipeline:()=>o.TokenClassificationPipeline,TokenClassifierOutput:()=>l.TokenClassifierOutput,TokenizerModel:()=>u.TokenizerModel,TrOCRForCausalLM:()=>l.TrOCRForCausalLM,TrOCRPreTrainedModel:()=>l.TrOCRPreTrainedModel,TranslationPipeline:()=>o.TranslationPipeline,UniSpeechForCTC:()=>l.UniSpeechForCTC,UniSpeechForSequenceClassification:()=>l.UniSpeechForSequenceClassification,UniSpeechModel:()=>l.UniSpeechModel,UniSpeechPreTrainedModel:()=>l.UniSpeechPreTrainedModel,UniSpeechSatForAudioFrameClassification:()=>l.UniSpeechSatForAudioFrameClassification,UniSpeechSatForCTC:()=>l.UniSpeechSatForCTC,UniSpeechSatForSequenceClassification:()=>l.UniSpeechSatForSequenceClassification,UniSpeechSatModel:()=>l.UniSpeechSatModel,UniSpeechSatPreTrainedModel:()=>l.UniSpeechSatPreTrainedModel,ViTFeatureExtractor:()=>d.ViTFeatureExtractor,ViTForImageClassification:()=>l.ViTForImageClassification,ViTImageProcessor:()=>d.ViTImageProcessor,ViTModel:()=>l.ViTModel,ViTPreTrainedModel:()=>l.ViTPreTrainedModel,VisionEncoderDecoderModel:()=>l.VisionEncoderDecoderModel,VitMatteForImageMatting:()=>l.VitMatteForImageMatting,VitMatteImageProcessor:()=>d.VitMatteImageProcessor,VitMattePreTrainedModel:()=>l.VitMattePreTrainedModel,VitsModel:()=>l.VitsModel,VitsModelOutput:()=>l.VitsModelOutput,VitsPreTrainedModel:()=>l.VitsPreTrainedModel,VitsTokenizer:()=>u.VitsTokenizer,Wav2Vec2BertForCTC:()=>l.Wav2Vec2BertForCTC,Wav2Vec2BertForSequenceClassification:()=>l.Wav2Vec2BertForSequenceClassification,Wav2Vec2BertModel:()=>l.Wav2Vec2BertModel,Wav2Vec2BertPreTrainedModel:()=>l.Wav2Vec2BertPreTrainedModel,Wav2Vec2CTCTokenizer:()=>u.Wav2Vec2CTCTokenizer,Wav2Vec2FeatureExtractor:()=>d.Wav2Vec2FeatureExtractor,Wav2Vec2ForAudioFrameClassification:()=>l.Wav2Vec2ForAudioFrameClassification,Wav2Vec2ForCTC:()=>l.Wav2Vec2ForCTC,Wav2Vec2ForSequenceClassification:()=>l.Wav2Vec2ForSequenceClassification,Wav2Vec2Model:()=>l.Wav2Vec2Model,Wav2Vec2PreTrainedModel:()=>l.Wav2Vec2PreTrainedModel,Wav2Vec2ProcessorWithLM:()=>d.Wav2Vec2ProcessorWithLM,WavLMForAudioFrameClassification:()=>l.WavLMForAudioFrameClassification,WavLMForCTC:()=>l.WavLMForCTC,WavLMForSequenceClassification:()=>l.WavLMForSequenceClassification,WavLMForXVector:()=>l.WavLMForXVector,WavLMModel:()=>l.WavLMModel,WavLMPreTrainedModel:()=>l.WavLMPreTrainedModel,WeSpeakerFeatureExtractor:()=>d.WeSpeakerFeatureExtractor,WeSpeakerResNetModel:()=>l.WeSpeakerResNetModel,WeSpeakerResNetPreTrainedModel:()=>l.WeSpeakerResNetPreTrainedModel,WhisperFeatureExtractor:()=>d.WhisperFeatureExtractor,WhisperForConditionalGeneration:()=>l.WhisperForConditionalGeneration,WhisperModel:()=>l.WhisperModel,WhisperPreTrainedModel:()=>l.WhisperPreTrainedModel,WhisperProcessor:()=>d.WhisperProcessor,WhisperTextStreamer:()=>g.WhisperTextStreamer,WhisperTokenizer:()=>u.WhisperTokenizer,XLMForQuestionAnswering:()=>l.XLMForQuestionAnswering,XLMForSequenceClassification:()=>l.XLMForSequenceClassification,XLMForTokenClassification:()=>l.XLMForTokenClassification,XLMModel:()=>l.XLMModel,XLMPreTrainedModel:()=>l.XLMPreTrainedModel,XLMRobertaForMaskedLM:()=>l.XLMRobertaForMaskedLM,XLMRobertaForQuestionAnswering:()=>l.XLMRobertaForQuestionAnswering,XLMRobertaForSequenceClassification:()=>l.XLMRobertaForSequenceClassification,XLMRobertaForTokenClassification:()=>l.XLMRobertaForTokenClassification,XLMRobertaModel:()=>l.XLMRobertaModel,XLMRobertaPreTrainedModel:()=>l.XLMRobertaPreTrainedModel,XLMRobertaTokenizer:()=>u.XLMRobertaTokenizer,XLMTokenizer:()=>u.XLMTokenizer,XLMWithLMHeadModel:()=>l.XLMWithLMHeadModel,XVectorOutput:()=>l.XVectorOutput,YolosFeatureExtractor:()=>d.YolosFeatureExtractor,YolosForObjectDetection:()=>l.YolosForObjectDetection,YolosModel:()=>l.YolosModel,YolosObjectDetectionOutput:()=>l.YolosObjectDetectionOutput,YolosPreTrainedModel:()=>l.YolosPreTrainedModel,ZeroShotAudioClassificationPipeline:()=>o.ZeroShotAudioClassificationPipeline,ZeroShotClassificationPipeline:()=>o.ZeroShotClassificationPipeline,ZeroShotImageClassificationPipeline:()=>o.ZeroShotImageClassificationPipeline,ZeroShotObjectDetectionPipeline:()=>o.ZeroShotObjectDetectionPipeline,bankers_round:()=>f.bankers_round,cat:()=>m.cat,cos_sim:()=>f.cos_sim,dot:()=>f.dot,dynamic_time_warping:()=>f.dynamic_time_warping,env:()=>s.env,full:()=>m.full,full_like:()=>m.full_like,getKeyValueShapes:()=>c.getKeyValueShapes,hamming:()=>p.hamming,hanning:()=>p.hanning,interpolate:()=>m.interpolate,interpolate_4d:()=>m.interpolate_4d,interpolate_data:()=>f.interpolate_data,is_chinese_char:()=>u.is_chinese_char,layer_norm:()=>m.layer_norm,log_softmax:()=>f.log_softmax,magnitude:()=>f.magnitude,matmul:()=>m.matmul,max:()=>f.max,mean:()=>m.mean,mean_pooling:()=>m.mean_pooling,medianFilter:()=>f.medianFilter,mel_filter_bank:()=>p.mel_filter_bank,min:()=>f.min,ones:()=>m.ones,ones_like:()=>m.ones_like,permute:()=>m.permute,permute_data:()=>f.permute_data,pipeline:()=>o.pipeline,quantize_embeddings:()=>m.quantize_embeddings,read_audio:()=>p.read_audio,rfft:()=>m.rfft,round:()=>f.round,softmax:()=>f.softmax,spectrogram:()=>p.spectrogram,stack:()=>m.stack,std_mean:()=>m.std_mean,topk:()=>m.topk,window_function:()=>p.window_function,zeros:()=>m.zeros,zeros_like:()=>m.zeros_like});var s=i(/*! ./env.js */"./src/env.js"),o=i(/*! ./pipelines.js */"./src/pipelines.js"),l=i(/*! ./models.js */"./src/models.js"),u=i(/*! ./tokenizers.js */"./src/tokenizers.js"),d=i(/*! ./processors.js */"./src/processors.js"),c=i(/*! ./configs.js */"./src/configs.js"),p=i(/*! ./utils/audio.js */"./src/utils/audio.js"),h=i(/*! ./utils/image.js */"./src/utils/image.js"),m=i(/*! ./utils/tensor.js */"./src/utils/tensor.js"),f=i(/*! ./utils/maths.js */"./src/utils/maths.js"),g=i(/*! ./generation/streamers.js */"./src/generation/streamers.js"),_=i(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),w=a.ASTFeatureExtractor,y=a.ASTForAudioClassification,b=a.ASTModel,v=a.ASTPreTrainedModel,x=a.AlbertForMaskedLM,M=a.AlbertForQuestionAnswering,T=a.AlbertForSequenceClassification,k=a.AlbertModel,$=a.AlbertPreTrainedModel,S=a.AlbertTokenizer,C=a.AudioClassificationPipeline,E=a.AutoConfig,P=a.AutoModel,A=a.AutoModelForAudioClassification,F=a.AutoModelForAudioFrameClassification,z=a.AutoModelForCTC,I=a.AutoModelForCausalLM,O=a.AutoModelForDepthEstimation,B=a.AutoModelForDocumentQuestionAnswering,L=a.AutoModelForImageClassification,D=a.AutoModelForImageFeatureExtraction,R=a.AutoModelForImageMatting,N=a.AutoModelForImageSegmentation,V=a.AutoModelForImageToImage,j=a.AutoModelForMaskGeneration,G=a.AutoModelForMaskedLM,q=a.AutoModelForNormalEstimation,U=a.AutoModelForObjectDetection,W=a.AutoModelForQuestionAnswering,H=a.AutoModelForSemanticSegmentation,X=a.AutoModelForSeq2SeqLM,K=a.AutoModelForSequenceClassification,Q=a.AutoModelForSpeechSeq2Seq,Y=a.AutoModelForTextToSpectrogram,J=a.AutoModelForTextToWaveform,Z=a.AutoModelForTokenClassification,ee=a.AutoModelForVision2Seq,te=a.AutoModelForXVector,ne=a.AutoModelForZeroShotObjectDetection,re=a.AutoProcessor,ie=a.AutoTokenizer,ae=a.AutomaticSpeechRecognitionPipeline,se=a.BartForConditionalGeneration,oe=a.BartForSequenceClassification,le=a.BartModel,ue=a.BartPretrainedModel,de=a.BartTokenizer,ce=a.BaseModelOutput,pe=a.BaseStreamer,he=a.BeitFeatureExtractor,me=a.BeitForImageClassification,fe=a.BeitModel,ge=a.BeitPreTrainedModel,_e=a.BertForMaskedLM,we=a.BertForQuestionAnswering,ye=a.BertForSequenceClassification,be=a.BertForTokenClassification,ve=a.BertModel,xe=a.BertPreTrainedModel,Me=a.BertTokenizer,Te=a.BitImageProcessor,ke=a.BlenderbotForConditionalGeneration,$e=a.BlenderbotModel,Se=a.BlenderbotPreTrainedModel,Ce=a.BlenderbotSmallForConditionalGeneration,Ee=a.BlenderbotSmallModel,Pe=a.BlenderbotSmallPreTrainedModel,Ae=a.BlenderbotSmallTokenizer,Fe=a.BlenderbotTokenizer,ze=a.BloomForCausalLM,Ie=a.BloomModel,Oe=a.BloomPreTrainedModel,Be=a.BloomTokenizer,Le=a.CLIPFeatureExtractor,De=a.CLIPImageProcessor,Re=a.CLIPModel,Ne=a.CLIPPreTrainedModel,Ve=a.CLIPSegForImageSegmentation,je=a.CLIPSegModel,Ge=a.CLIPSegPreTrainedModel,qe=a.CLIPTextModelWithProjection,Ue=a.CLIPTokenizer,We=a.CLIPVisionModelWithProjection,He=a.CamembertForMaskedLM,Xe=a.CamembertForQuestionAnswering,Ke=a.CamembertForSequenceClassification,Qe=a.CamembertForTokenClassification,Ye=a.CamembertModel,Je=a.CamembertPreTrainedModel,Ze=a.CamembertTokenizer,et=a.CausalLMOutput,tt=a.CausalLMOutputWithPast,nt=a.ChineseCLIPFeatureExtractor,rt=a.ChineseCLIPModel,it=a.ChineseCLIPPreTrainedModel,at=a.ClapAudioModelWithProjection,st=a.ClapFeatureExtractor,ot=a.ClapModel,lt=a.ClapPreTrainedModel,ut=a.ClapTextModelWithProjection,dt=a.CodeGenForCausalLM,ct=a.CodeGenModel,pt=a.CodeGenPreTrainedModel,ht=a.CodeGenTokenizer,mt=a.CodeLlamaTokenizer,ft=a.CohereForCausalLM,gt=a.CohereModel,_t=a.CoherePreTrainedModel,wt=a.CohereTokenizer,yt=a.ConvBertForMaskedLM,bt=a.ConvBertForQuestionAnswering,vt=a.ConvBertForSequenceClassification,xt=a.ConvBertForTokenClassification,Mt=a.ConvBertModel,Tt=a.ConvBertPreTrainedModel,kt=a.ConvBertTokenizer,$t=a.ConvNextFeatureExtractor,St=a.ConvNextForImageClassification,Ct=a.ConvNextImageProcessor,Et=a.ConvNextModel,Pt=a.ConvNextPreTrainedModel,At=a.ConvNextV2ForImageClassification,Ft=a.ConvNextV2Model,zt=a.ConvNextV2PreTrainedModel,It=a.DPTFeatureExtractor,Ot=a.DPTForDepthEstimation,Bt=a.DPTImageProcessor,Lt=a.DPTModel,Dt=a.DPTPreTrainedModel,Rt=a.DebertaForMaskedLM,Nt=a.DebertaForQuestionAnswering,Vt=a.DebertaForSequenceClassification,jt=a.DebertaForTokenClassification,Gt=a.DebertaModel,qt=a.DebertaPreTrainedModel,Ut=a.DebertaTokenizer,Wt=a.DebertaV2ForMaskedLM,Ht=a.DebertaV2ForQuestionAnswering,Xt=a.DebertaV2ForSequenceClassification,Kt=a.DebertaV2ForTokenClassification,Qt=a.DebertaV2Model,Yt=a.DebertaV2PreTrainedModel,Jt=a.DebertaV2Tokenizer,Zt=a.DeiTFeatureExtractor,en=a.DeiTForImageClassification,tn=a.DeiTModel,nn=a.DeiTPreTrainedModel,rn=a.DepthAnythingForDepthEstimation,an=a.DepthAnythingPreTrainedModel,sn=a.DepthEstimationPipeline,on=a.DetrFeatureExtractor,ln=a.DetrForObjectDetection,un=a.DetrForSegmentation,dn=a.DetrModel,cn=a.DetrObjectDetectionOutput,pn=a.DetrPreTrainedModel,hn=a.DetrSegmentationOutput,mn=a.Dinov2ForImageClassification,fn=a.Dinov2Model,gn=a.Dinov2PreTrainedModel,_n=a.DistilBertForMaskedLM,wn=a.DistilBertForQuestionAnswering,yn=a.DistilBertForSequenceClassification,bn=a.DistilBertForTokenClassification,vn=a.DistilBertModel,xn=a.DistilBertPreTrainedModel,Mn=a.DistilBertTokenizer,Tn=a.DocumentQuestionAnsweringPipeline,kn=a.DonutFeatureExtractor,$n=a.DonutSwinModel,Sn=a.DonutSwinPreTrainedModel,Cn=a.EfficientNetForImageClassification,En=a.EfficientNetImageProcessor,Pn=a.EfficientNetModel,An=a.EfficientNetPreTrainedModel,Fn=a.ElectraForMaskedLM,zn=a.ElectraForQuestionAnswering,In=a.ElectraForSequenceClassification,On=a.ElectraForTokenClassification,Bn=a.ElectraModel,Ln=a.ElectraPreTrainedModel,Dn=a.ElectraTokenizer,Rn=a.EosTokenCriteria,Nn=a.EsmForMaskedLM,Vn=a.EsmForSequenceClassification,jn=a.EsmForTokenClassification,Gn=a.EsmModel,qn=a.EsmPreTrainedModel,Un=a.EsmTokenizer,Wn=a.FFT,Hn=a.FalconForCausalLM,Xn=a.FalconModel,Kn=a.FalconPreTrainedModel,Qn=a.FalconTokenizer,Yn=a.FastViTForImageClassification,Jn=a.FastViTModel,Zn=a.FastViTPreTrainedModel,er=a.FeatureExtractionPipeline,tr=a.FeatureExtractor,nr=a.FillMaskPipeline,rr=a.Florence2ForConditionalGeneration,ir=a.Florence2PreTrainedModel,ar=a.Florence2Processor,sr=a.GLPNFeatureExtractor,or=a.GLPNForDepthEstimation,lr=a.GLPNModel,ur=a.GLPNPreTrainedModel,dr=a.GPT2LMHeadModel,cr=a.GPT2Model,pr=a.GPT2PreTrainedModel,hr=a.GPT2Tokenizer,mr=a.GPTBigCodeForCausalLM,fr=a.GPTBigCodeModel,gr=a.GPTBigCodePreTrainedModel,_r=a.GPTJForCausalLM,wr=a.GPTJModel,yr=a.GPTJPreTrainedModel,br=a.GPTNeoForCausalLM,vr=a.GPTNeoModel,xr=a.GPTNeoPreTrainedModel,Mr=a.GPTNeoXForCausalLM,Tr=a.GPTNeoXModel,kr=a.GPTNeoXPreTrainedModel,$r=a.GPTNeoXTokenizer,Sr=a.Gemma2ForCausalLM,Cr=a.Gemma2Model,Er=a.Gemma2PreTrainedModel,Pr=a.GemmaForCausalLM,Ar=a.GemmaModel,Fr=a.GemmaPreTrainedModel,zr=a.GemmaTokenizer,Ir=a.Grok1Tokenizer,Or=a.HerbertTokenizer,Br=a.HieraForImageClassification,Lr=a.HieraModel,Dr=a.HieraPreTrainedModel,Rr=a.HubertForCTC,Nr=a.HubertForSequenceClassification,Vr=a.HubertModel,jr=a.HubertPreTrainedModel,Gr=a.ImageClassificationPipeline,qr=a.ImageFeatureExtractionPipeline,Ur=a.ImageFeatureExtractor,Wr=a.ImageMattingOutput,Hr=a.ImageSegmentationPipeline,Xr=a.ImageToImagePipeline,Kr=a.ImageToTextPipeline,Qr=a.InterruptableStoppingCriteria,Yr=a.JAISLMHeadModel,Jr=a.JAISModel,Zr=a.JAISPreTrainedModel,ei=a.LlamaForCausalLM,ti=a.LlamaModel,ni=a.LlamaPreTrainedModel,ri=a.LlamaTokenizer,ii=a.LlavaForConditionalGeneration,ai=a.LlavaPreTrainedModel,si=a.LongT5ForConditionalGeneration,oi=a.LongT5Model,li=a.LongT5PreTrainedModel,ui=a.M2M100ForConditionalGeneration,di=a.M2M100Model,ci=a.M2M100PreTrainedModel,pi=a.M2M100Tokenizer,hi=a.MBart50Tokenizer,mi=a.MBartForCausalLM,fi=a.MBartForConditionalGeneration,gi=a.MBartForSequenceClassification,_i=a.MBartModel,wi=a.MBartPreTrainedModel,yi=a.MBartTokenizer,bi=a.MPNetForMaskedLM,vi=a.MPNetForQuestionAnswering,xi=a.MPNetForSequenceClassification,Mi=a.MPNetForTokenClassification,Ti=a.MPNetModel,ki=a.MPNetPreTrainedModel,$i=a.MPNetTokenizer,Si=a.MT5ForConditionalGeneration,Ci=a.MT5Model,Ei=a.MT5PreTrainedModel,Pi=a.MarianMTModel,Ai=a.MarianModel,Fi=a.MarianPreTrainedModel,zi=a.MarianTokenizer,Ii=a.MaskedLMOutput,Oi=a.MaxLengthCriteria,Bi=a.MistralForCausalLM,Li=a.MistralModel,Di=a.MistralPreTrainedModel,Ri=a.MobileBertForMaskedLM,Ni=a.MobileBertForQuestionAnswering,Vi=a.MobileBertForSequenceClassification,ji=a.MobileBertModel,Gi=a.MobileBertPreTrainedModel,qi=a.MobileBertTokenizer,Ui=a.MobileNetV1FeatureExtractor,Wi=a.MobileNetV1ForImageClassification,Hi=a.MobileNetV1Model,Xi=a.MobileNetV1PreTrainedModel,Ki=a.MobileNetV2FeatureExtractor,Qi=a.MobileNetV2ForImageClassification,Yi=a.MobileNetV2Model,Ji=a.MobileNetV2PreTrainedModel,Zi=a.MobileNetV3FeatureExtractor,ea=a.MobileNetV3ForImageClassification,ta=a.MobileNetV3Model,na=a.MobileNetV3PreTrainedModel,ra=a.MobileNetV4FeatureExtractor,ia=a.MobileNetV4ForImageClassification,aa=a.MobileNetV4Model,sa=a.MobileNetV4PreTrainedModel,oa=a.MobileViTFeatureExtractor,la=a.MobileViTForImageClassification,ua=a.MobileViTImageProcessor,da=a.MobileViTModel,ca=a.MobileViTPreTrainedModel,pa=a.MobileViTV2ForImageClassification,ha=a.MobileViTV2Model,ma=a.MobileViTV2PreTrainedModel,fa=a.ModelOutput,ga=a.Moondream1ForConditionalGeneration,_a=a.MptForCausalLM,wa=a.MptModel,ya=a.MptPreTrainedModel,ba=a.MusicgenForCausalLM,va=a.MusicgenForConditionalGeneration,xa=a.MusicgenModel,Ma=a.MusicgenPreTrainedModel,Ta=a.NllbTokenizer,ka=a.NomicBertModel,$a=a.NomicBertPreTrainedModel,Sa=a.NougatImageProcessor,Ca=a.NougatTokenizer,Ea=a.OPTForCausalLM,Pa=a.OPTModel,Aa=a.OPTPreTrainedModel,Fa=a.ObjectDetectionPipeline,za=a.OpenELMForCausalLM,Ia=a.OpenELMModel,Oa=a.OpenELMPreTrainedModel,Ba=a.OwlViTFeatureExtractor,La=a.OwlViTForObjectDetection,Da=a.OwlViTModel,Ra=a.OwlViTPreTrainedModel,Na=a.OwlViTProcessor,Va=a.Owlv2ForObjectDetection,ja=a.Owlv2ImageProcessor,Ga=a.Owlv2Model,qa=a.Owlv2PreTrainedModel,Ua=a.Phi3ForCausalLM,Wa=a.Phi3Model,Ha=a.Phi3PreTrainedModel,Xa=a.PhiForCausalLM,Ka=a.PhiModel,Qa=a.PhiPreTrainedModel,Ya=a.Pipeline,Ja=a.PreTrainedModel,Za=a.PreTrainedTokenizer,es=a.PretrainedConfig,ts=a.PretrainedMixin,ns=a.Processor,rs=a.PyAnnoteFeatureExtractor,is=a.PyAnnoteForAudioFrameClassification,as=a.PyAnnoteModel,ss=a.PyAnnotePreTrainedModel,os=a.PyAnnoteProcessor,ls=a.QuestionAnsweringModelOutput,us=a.QuestionAnsweringPipeline,ds=a.Qwen2ForCausalLM,cs=a.Qwen2Model,ps=a.Qwen2PreTrainedModel,hs=a.Qwen2Tokenizer,ms=a.RTDetrForObjectDetection,fs=a.RTDetrImageProcessor,gs=a.RTDetrModel,_s=a.RTDetrObjectDetectionOutput,ws=a.RTDetrPreTrainedModel,ys=a.RawImage,bs=a.ResNetForImageClassification,vs=a.ResNetModel,xs=a.ResNetPreTrainedModel,Ms=a.RoFormerForMaskedLM,Ts=a.RoFormerForQuestionAnswering,ks=a.RoFormerForSequenceClassification,$s=a.RoFormerForTokenClassification,Ss=a.RoFormerModel,Cs=a.RoFormerPreTrainedModel,Es=a.RoFormerTokenizer,Ps=a.RobertaForMaskedLM,As=a.RobertaForQuestionAnswering,Fs=a.RobertaForSequenceClassification,zs=a.RobertaForTokenClassification,Is=a.RobertaModel,Os=a.RobertaPreTrainedModel,Bs=a.RobertaTokenizer,Ls=a.SamImageProcessor,Ds=a.SamImageSegmentationOutput,Rs=a.SamModel,Ns=a.SamPreTrainedModel,Vs=a.SamProcessor,js=a.SapiensFeatureExtractor,Gs=a.SapiensForDepthEstimation,qs=a.SapiensForNormalEstimation,Us=a.SapiensForSemanticSegmentation,Ws=a.SapiensPreTrainedModel,Hs=a.SeamlessM4TFeatureExtractor,Xs=a.SegformerFeatureExtractor,Ks=a.SegformerForImageClassification,Qs=a.SegformerForSemanticSegmentation,Ys=a.SegformerModel,Js=a.SegformerPreTrainedModel,Zs=a.Seq2SeqLMOutput,eo=a.SequenceClassifierOutput,to=a.SiglipImageProcessor,no=a.SiglipModel,ro=a.SiglipPreTrainedModel,io=a.SiglipTextModel,ao=a.SiglipTokenizer,so=a.SiglipVisionModel,oo=a.SpeechT5FeatureExtractor,lo=a.SpeechT5ForSpeechToText,uo=a.SpeechT5ForTextToSpeech,co=a.SpeechT5HifiGan,po=a.SpeechT5Model,ho=a.SpeechT5PreTrainedModel,mo=a.SpeechT5Processor,fo=a.SpeechT5Tokenizer,go=a.SqueezeBertForMaskedLM,_o=a.SqueezeBertForQuestionAnswering,wo=a.SqueezeBertForSequenceClassification,yo=a.SqueezeBertModel,bo=a.SqueezeBertPreTrainedModel,vo=a.SqueezeBertTokenizer,xo=a.StableLmForCausalLM,Mo=a.StableLmModel,To=a.StableLmPreTrainedModel,ko=a.Starcoder2ForCausalLM,$o=a.Starcoder2Model,So=a.Starcoder2PreTrainedModel,Co=a.StoppingCriteria,Eo=a.StoppingCriteriaList,Po=a.SummarizationPipeline,Ao=a.Swin2SRForImageSuperResolution,Fo=a.Swin2SRImageProcessor,zo=a.Swin2SRModel,Io=a.Swin2SRPreTrainedModel,Oo=a.SwinForImageClassification,Bo=a.SwinModel,Lo=a.SwinPreTrainedModel,Do=a.T5ForConditionalGeneration,Ro=a.T5Model,No=a.T5PreTrainedModel,Vo=a.T5Tokenizer,jo=a.TableTransformerForObjectDetection,Go=a.TableTransformerModel,qo=a.TableTransformerObjectDetectionOutput,Uo=a.TableTransformerPreTrainedModel,Wo=a.Tensor,Ho=a.Text2TextGenerationPipeline,Xo=a.TextClassificationPipeline,Ko=a.TextGenerationPipeline,Qo=a.TextStreamer,Yo=a.TextToAudioPipeline,Jo=a.TokenClassificationPipeline,Zo=a.TokenClassifierOutput,el=a.TokenizerModel,tl=a.TrOCRForCausalLM,nl=a.TrOCRPreTrainedModel,rl=a.TranslationPipeline,il=a.UniSpeechForCTC,al=a.UniSpeechForSequenceClassification,sl=a.UniSpeechModel,ol=a.UniSpeechPreTrainedModel,ll=a.UniSpeechSatForAudioFrameClassification,ul=a.UniSpeechSatForCTC,dl=a.UniSpeechSatForSequenceClassification,cl=a.UniSpeechSatModel,pl=a.UniSpeechSatPreTrainedModel,hl=a.ViTFeatureExtractor,ml=a.ViTForImageClassification,fl=a.ViTImageProcessor,gl=a.ViTModel,_l=a.ViTPreTrainedModel,wl=a.VisionEncoderDecoderModel,yl=a.VitMatteForImageMatting,bl=a.VitMatteImageProcessor,vl=a.VitMattePreTrainedModel,xl=a.VitsModel,Ml=a.VitsModelOutput,Tl=a.VitsPreTrainedModel,kl=a.VitsTokenizer,$l=a.Wav2Vec2BertForCTC,Sl=a.Wav2Vec2BertForSequenceClassification,Cl=a.Wav2Vec2BertModel,El=a.Wav2Vec2BertPreTrainedModel,Pl=a.Wav2Vec2CTCTokenizer,Al=a.Wav2Vec2FeatureExtractor,Fl=a.Wav2Vec2ForAudioFrameClassification,zl=a.Wav2Vec2ForCTC,Il=a.Wav2Vec2ForSequenceClassification,Ol=a.Wav2Vec2Model,Bl=a.Wav2Vec2PreTrainedModel,Ll=a.Wav2Vec2ProcessorWithLM,Dl=a.WavLMForAudioFrameClassification,Rl=a.WavLMForCTC,Nl=a.WavLMForSequenceClassification,Vl=a.WavLMForXVector,jl=a.WavLMModel,Gl=a.WavLMPreTrainedModel,ql=a.WeSpeakerFeatureExtractor,Ul=a.WeSpeakerResNetModel,Wl=a.WeSpeakerResNetPreTrainedModel,Hl=a.WhisperFeatureExtractor,Xl=a.WhisperForConditionalGeneration,Kl=a.WhisperModel,Ql=a.WhisperPreTrainedModel,Yl=a.WhisperProcessor,Jl=a.WhisperTextStreamer,Zl=a.WhisperTokenizer,eu=a.XLMForQuestionAnswering,tu=a.XLMForSequenceClassification,nu=a.XLMForTokenClassification,ru=a.XLMModel,iu=a.XLMPreTrainedModel,au=a.XLMRobertaForMaskedLM,su=a.XLMRobertaForQuestionAnswering,ou=a.XLMRobertaForSequenceClassification,lu=a.XLMRobertaForTokenClassification,uu=a.XLMRobertaModel,du=a.XLMRobertaPreTrainedModel,cu=a.XLMRobertaTokenizer,pu=a.XLMTokenizer,hu=a.XLMWithLMHeadModel,mu=a.XVectorOutput,fu=a.YolosFeatureExtractor,gu=a.YolosForObjectDetection,_u=a.YolosModel,wu=a.YolosObjectDetectionOutput,yu=a.YolosPreTrainedModel,bu=a.ZeroShotAudioClassificationPipeline,vu=a.ZeroShotClassificationPipeline,xu=a.ZeroShotImageClassificationPipeline,Mu=a.ZeroShotObjectDetectionPipeline,Tu=a.bankers_round,ku=a.cat,$u=a.cos_sim,Su=a.dot,Cu=a.dynamic_time_warping,Eu=a.env,Pu=a.full,Au=a.full_like,Fu=a.getKeyValueShapes,zu=a.hamming,Iu=a.hanning,Ou=a.interpolate,Bu=a.interpolate_4d,Lu=a.interpolate_data,Du=a.is_chinese_char,Ru=a.layer_norm,Nu=a.log_softmax,Vu=a.magnitude,ju=a.matmul,Gu=a.max,qu=a.mean,Uu=a.mean_pooling,Wu=a.medianFilter,Hu=a.mel_filter_bank,Xu=a.min,Ku=a.ones,Qu=a.ones_like,Yu=a.permute,Ju=a.permute_data,Zu=a.pipeline,ed=a.quantize_embeddings,td=a.read_audio,nd=a.rfft,rd=a.round,id=a.softmax,ad=a.spectrogram,sd=a.stack,od=a.std_mean,ld=a.topk,ud=a.window_function,dd=a.zeros,cd=a.zeros_like;export{w as ASTFeatureExtractor,y as ASTForAudioClassification,b as ASTModel,v as ASTPreTrainedModel,x as AlbertForMaskedLM,M as AlbertForQuestionAnswering,T as AlbertForSequenceClassification,k as AlbertModel,$ as AlbertPreTrainedModel,S as AlbertTokenizer,C as AudioClassificationPipeline,E as AutoConfig,P as AutoModel,A as AutoModelForAudioClassification,F as AutoModelForAudioFrameClassification,z as AutoModelForCTC,I as AutoModelForCausalLM,O as AutoModelForDepthEstimation,B as AutoModelForDocumentQuestionAnswering,L as AutoModelForImageClassification,D as AutoModelForImageFeatureExtraction,R as AutoModelForImageMatting,N as AutoModelForImageSegmentation,V as AutoModelForImageToImage,j as AutoModelForMaskGeneration,G as AutoModelForMaskedLM,q as AutoModelForNormalEstimation,U as AutoModelForObjectDetection,W as AutoModelForQuestionAnswering,H as AutoModelForSemanticSegmentation,X as AutoModelForSeq2SeqLM,K as AutoModelForSequenceClassification,Q as AutoModelForSpeechSeq2Seq,Y as AutoModelForTextToSpectrogram,J as AutoModelForTextToWaveform,Z as AutoModelForTokenClassification,ee as AutoModelForVision2Seq,te as AutoModelForXVector,ne as AutoModelForZeroShotObjectDetection,re as AutoProcessor,ie as AutoTokenizer,ae as AutomaticSpeechRecognitionPipeline,se as BartForConditionalGeneration,oe as BartForSequenceClassification,le as BartModel,ue as BartPretrainedModel,de as BartTokenizer,ce as BaseModelOutput,pe as BaseStreamer,he as BeitFeatureExtractor,me as BeitForImageClassification,fe as BeitModel,ge as BeitPreTrainedModel,_e as BertForMaskedLM,we as BertForQuestionAnswering,ye as BertForSequenceClassification,be as BertForTokenClassification,ve as BertModel,xe as BertPreTrainedModel,Me as BertTokenizer,Te as BitImageProcessor,ke as BlenderbotForConditionalGeneration,$e as BlenderbotModel,Se as BlenderbotPreTrainedModel,Ce as BlenderbotSmallForConditionalGeneration,Ee as BlenderbotSmallModel,Pe as BlenderbotSmallPreTrainedModel,Ae as BlenderbotSmallTokenizer,Fe as BlenderbotTokenizer,ze as BloomForCausalLM,Ie as BloomModel,Oe as BloomPreTrainedModel,Be as BloomTokenizer,Le as CLIPFeatureExtractor,De as CLIPImageProcessor,Re as CLIPModel,Ne as CLIPPreTrainedModel,Ve as CLIPSegForImageSegmentation,je as CLIPSegModel,Ge as CLIPSegPreTrainedModel,qe as CLIPTextModelWithProjection,Ue as CLIPTokenizer,We as CLIPVisionModelWithProjection,He as CamembertForMaskedLM,Xe as CamembertForQuestionAnswering,Ke as CamembertForSequenceClassification,Qe as CamembertForTokenClassification,Ye as CamembertModel,Je as CamembertPreTrainedModel,Ze as CamembertTokenizer,et as CausalLMOutput,tt as CausalLMOutputWithPast,nt as ChineseCLIPFeatureExtractor,rt as ChineseCLIPModel,it as ChineseCLIPPreTrainedModel,at as ClapAudioModelWithProjection,st as ClapFeatureExtractor,ot as ClapModel,lt as ClapPreTrainedModel,ut as ClapTextModelWithProjection,dt as CodeGenForCausalLM,ct as CodeGenModel,pt as CodeGenPreTrainedModel,ht as CodeGenTokenizer,mt as CodeLlamaTokenizer,ft as CohereForCausalLM,gt as CohereModel,_t as CoherePreTrainedModel,wt as CohereTokenizer,yt as ConvBertForMaskedLM,bt as ConvBertForQuestionAnswering,vt as ConvBertForSequenceClassification,xt as ConvBertForTokenClassification,Mt as ConvBertModel,Tt as ConvBertPreTrainedModel,kt as ConvBertTokenizer,$t as ConvNextFeatureExtractor,St as ConvNextForImageClassification,Ct as ConvNextImageProcessor,Et as ConvNextModel,Pt as ConvNextPreTrainedModel,At as ConvNextV2ForImageClassification,Ft as ConvNextV2Model,zt as ConvNextV2PreTrainedModel,It as DPTFeatureExtractor,Ot as DPTForDepthEstimation,Bt as DPTImageProcessor,Lt as DPTModel,Dt as DPTPreTrainedModel,Rt as DebertaForMaskedLM,Nt as DebertaForQuestionAnswering,Vt as DebertaForSequenceClassification,jt as DebertaForTokenClassification,Gt as DebertaModel,qt as DebertaPreTrainedModel,Ut as DebertaTokenizer,Wt as DebertaV2ForMaskedLM,Ht as DebertaV2ForQuestionAnswering,Xt as DebertaV2ForSequenceClassification,Kt as DebertaV2ForTokenClassification,Qt as DebertaV2Model,Yt as DebertaV2PreTrainedModel,Jt as DebertaV2Tokenizer,Zt as DeiTFeatureExtractor,en as DeiTForImageClassification,tn as DeiTModel,nn as DeiTPreTrainedModel,rn as DepthAnythingForDepthEstimation,an as DepthAnythingPreTrainedModel,sn as DepthEstimationPipeline,on as DetrFeatureExtractor,ln as DetrForObjectDetection,un as DetrForSegmentation,dn as DetrModel,cn as DetrObjectDetectionOutput,pn as DetrPreTrainedModel,hn as DetrSegmentationOutput,mn as Dinov2ForImageClassification,fn as Dinov2Model,gn as Dinov2PreTrainedModel,_n as DistilBertForMaskedLM,wn as DistilBertForQuestionAnswering,yn as DistilBertForSequenceClassification,bn as DistilBertForTokenClassification,vn as DistilBertModel,xn as DistilBertPreTrainedModel,Mn as DistilBertTokenizer,Tn as DocumentQuestionAnsweringPipeline,kn as DonutFeatureExtractor,$n as DonutSwinModel,Sn as DonutSwinPreTrainedModel,Cn as EfficientNetForImageClassification,En as EfficientNetImageProcessor,Pn as EfficientNetModel,An as EfficientNetPreTrainedModel,Fn as ElectraForMaskedLM,zn as ElectraForQuestionAnswering,In as ElectraForSequenceClassification,On as ElectraForTokenClassification,Bn as ElectraModel,Ln as ElectraPreTrainedModel,Dn as ElectraTokenizer,Rn as EosTokenCriteria,Nn as EsmForMaskedLM,Vn as EsmForSequenceClassification,jn as EsmForTokenClassification,Gn as EsmModel,qn as EsmPreTrainedModel,Un as EsmTokenizer,Wn as FFT,Hn as FalconForCausalLM,Xn as FalconModel,Kn as FalconPreTrainedModel,Qn as FalconTokenizer,Yn as FastViTForImageClassification,Jn as FastViTModel,Zn as FastViTPreTrainedModel,er as FeatureExtractionPipeline,tr as FeatureExtractor,nr as FillMaskPipeline,rr as Florence2ForConditionalGeneration,ir as Florence2PreTrainedModel,ar as Florence2Processor,sr as GLPNFeatureExtractor,or as GLPNForDepthEstimation,lr as GLPNModel,ur as GLPNPreTrainedModel,dr as GPT2LMHeadModel,cr as GPT2Model,pr as GPT2PreTrainedModel,hr as GPT2Tokenizer,mr as GPTBigCodeForCausalLM,fr as GPTBigCodeModel,gr as GPTBigCodePreTrainedModel,_r as GPTJForCausalLM,wr as GPTJModel,yr as GPTJPreTrainedModel,br as GPTNeoForCausalLM,vr as GPTNeoModel,xr as GPTNeoPreTrainedModel,Mr as GPTNeoXForCausalLM,Tr as GPTNeoXModel,kr as GPTNeoXPreTrainedModel,$r as GPTNeoXTokenizer,Sr as Gemma2ForCausalLM,Cr as Gemma2Model,Er as Gemma2PreTrainedModel,Pr as GemmaForCausalLM,Ar as GemmaModel,Fr as GemmaPreTrainedModel,zr as GemmaTokenizer,Ir as Grok1Tokenizer,Or as HerbertTokenizer,Br as HieraForImageClassification,Lr as HieraModel,Dr as HieraPreTrainedModel,Rr as HubertForCTC,Nr as HubertForSequenceClassification,Vr as HubertModel,jr as HubertPreTrainedModel,Gr as ImageClassificationPipeline,qr as ImageFeatureExtractionPipeline,Ur as ImageFeatureExtractor,Wr as ImageMattingOutput,Hr as ImageSegmentationPipeline,Xr as ImageToImagePipeline,Kr as ImageToTextPipeline,Qr as InterruptableStoppingCriteria,Yr as JAISLMHeadModel,Jr as JAISModel,Zr as JAISPreTrainedModel,ei as LlamaForCausalLM,ti as LlamaModel,ni as LlamaPreTrainedModel,ri as LlamaTokenizer,ii as LlavaForConditionalGeneration,ai as LlavaPreTrainedModel,si as LongT5ForConditionalGeneration,oi as LongT5Model,li as LongT5PreTrainedModel,ui as M2M100ForConditionalGeneration,di as M2M100Model,ci as M2M100PreTrainedModel,pi as M2M100Tokenizer,hi as MBart50Tokenizer,mi as MBartForCausalLM,fi as MBartForConditionalGeneration,gi as MBartForSequenceClassification,_i as MBartModel,wi as MBartPreTrainedModel,yi as MBartTokenizer,bi as MPNetForMaskedLM,vi as MPNetForQuestionAnswering,xi as MPNetForSequenceClassification,Mi as MPNetForTokenClassification,Ti as MPNetModel,ki as MPNetPreTrainedModel,$i as MPNetTokenizer,Si as MT5ForConditionalGeneration,Ci as MT5Model,Ei as MT5PreTrainedModel,Pi as MarianMTModel,Ai as MarianModel,Fi as MarianPreTrainedModel,zi as MarianTokenizer,Ii as MaskedLMOutput,Oi as MaxLengthCriteria,Bi as MistralForCausalLM,Li as MistralModel,Di as MistralPreTrainedModel,Ri as MobileBertForMaskedLM,Ni as MobileBertForQuestionAnswering,Vi as MobileBertForSequenceClassification,ji as MobileBertModel,Gi as MobileBertPreTrainedModel,qi as MobileBertTokenizer,Ui as MobileNetV1FeatureExtractor,Wi as MobileNetV1ForImageClassification,Hi as MobileNetV1Model,Xi as MobileNetV1PreTrainedModel,Ki as MobileNetV2FeatureExtractor,Qi as MobileNetV2ForImageClassification,Yi as MobileNetV2Model,Ji as MobileNetV2PreTrainedModel,Zi as MobileNetV3FeatureExtractor,ea as MobileNetV3ForImageClassification,ta as MobileNetV3Model,na as MobileNetV3PreTrainedModel,ra as MobileNetV4FeatureExtractor,ia as MobileNetV4ForImageClassification,aa as MobileNetV4Model,sa as MobileNetV4PreTrainedModel,oa as MobileViTFeatureExtractor,la as MobileViTForImageClassification,ua as MobileViTImageProcessor,da as MobileViTModel,ca as MobileViTPreTrainedModel,pa as MobileViTV2ForImageClassification,ha as MobileViTV2Model,ma as MobileViTV2PreTrainedModel,fa as ModelOutput,ga as Moondream1ForConditionalGeneration,_a as MptForCausalLM,wa as MptModel,ya as MptPreTrainedModel,ba as MusicgenForCausalLM,va as MusicgenForConditionalGeneration,xa as MusicgenModel,Ma as MusicgenPreTrainedModel,Ta as NllbTokenizer,ka as NomicBertModel,$a as NomicBertPreTrainedModel,Sa as NougatImageProcessor,Ca as NougatTokenizer,Ea as OPTForCausalLM,Pa as OPTModel,Aa as OPTPreTrainedModel,Fa as ObjectDetectionPipeline,za as OpenELMForCausalLM,Ia as OpenELMModel,Oa as OpenELMPreTrainedModel,Ba as OwlViTFeatureExtractor,La as OwlViTForObjectDetection,Da as OwlViTModel,Ra as OwlViTPreTrainedModel,Na as OwlViTProcessor,Va as Owlv2ForObjectDetection,ja as Owlv2ImageProcessor,Ga as Owlv2Model,qa as Owlv2PreTrainedModel,Ua as Phi3ForCausalLM,Wa as Phi3Model,Ha as Phi3PreTrainedModel,Xa as PhiForCausalLM,Ka as PhiModel,Qa as PhiPreTrainedModel,Ya as Pipeline,Ja as PreTrainedModel,Za as PreTrainedTokenizer,es as PretrainedConfig,ts as PretrainedMixin,ns as Processor,rs as PyAnnoteFeatureExtractor,is as PyAnnoteForAudioFrameClassification,as as PyAnnoteModel,ss as PyAnnotePreTrainedModel,os as PyAnnoteProcessor,ls as QuestionAnsweringModelOutput,us as QuestionAnsweringPipeline,ds as Qwen2ForCausalLM,cs as Qwen2Model,ps as Qwen2PreTrainedModel,hs as Qwen2Tokenizer,ms as RTDetrForObjectDetection,fs as RTDetrImageProcessor,gs as RTDetrModel,_s as RTDetrObjectDetectionOutput,ws as RTDetrPreTrainedModel,ys as RawImage,bs as ResNetForImageClassification,vs as ResNetModel,xs as ResNetPreTrainedModel,Ms as RoFormerForMaskedLM,Ts as RoFormerForQuestionAnswering,ks as RoFormerForSequenceClassification,$s as RoFormerForTokenClassification,Ss as RoFormerModel,Cs as RoFormerPreTrainedModel,Es as RoFormerTokenizer,Ps as RobertaForMaskedLM,As as RobertaForQuestionAnswering,Fs as RobertaForSequenceClassification,zs as RobertaForTokenClassification,Is as RobertaModel,Os as RobertaPreTrainedModel,Bs as RobertaTokenizer,Ls as SamImageProcessor,Ds as SamImageSegmentationOutput,Rs as SamModel,Ns as SamPreTrainedModel,Vs as SamProcessor,js as SapiensFeatureExtractor,Gs as SapiensForDepthEstimation,qs as SapiensForNormalEstimation,Us as SapiensForSemanticSegmentation,Ws as SapiensPreTrainedModel,Hs as SeamlessM4TFeatureExtractor,Xs as SegformerFeatureExtractor,Ks as SegformerForImageClassification,Qs as SegformerForSemanticSegmentation,Ys as SegformerModel,Js as SegformerPreTrainedModel,Zs as Seq2SeqLMOutput,eo as SequenceClassifierOutput,to as SiglipImageProcessor,no as SiglipModel,ro as SiglipPreTrainedModel,io as SiglipTextModel,ao as SiglipTokenizer,so as SiglipVisionModel,oo as SpeechT5FeatureExtractor,lo as SpeechT5ForSpeechToText,uo as SpeechT5ForTextToSpeech,co as SpeechT5HifiGan,po as SpeechT5Model,ho as SpeechT5PreTrainedModel,mo as SpeechT5Processor,fo as SpeechT5Tokenizer,go as SqueezeBertForMaskedLM,_o as SqueezeBertForQuestionAnswering,wo as SqueezeBertForSequenceClassification,yo as SqueezeBertModel,bo as SqueezeBertPreTrainedModel,vo as SqueezeBertTokenizer,xo as StableLmForCausalLM,Mo as StableLmModel,To as StableLmPreTrainedModel,ko as Starcoder2ForCausalLM,$o as Starcoder2Model,So as Starcoder2PreTrainedModel,Co as StoppingCriteria,Eo as StoppingCriteriaList,Po as SummarizationPipeline,Ao as Swin2SRForImageSuperResolution,Fo as Swin2SRImageProcessor,zo as Swin2SRModel,Io as Swin2SRPreTrainedModel,Oo as SwinForImageClassification,Bo as SwinModel,Lo as SwinPreTrainedModel,Do as T5ForConditionalGeneration,Ro as T5Model,No as T5PreTrainedModel,Vo as T5Tokenizer,jo as TableTransformerForObjectDetection,Go as TableTransformerModel,qo as TableTransformerObjectDetectionOutput,Uo as TableTransformerPreTrainedModel,Wo as Tensor,Ho as Text2TextGenerationPipeline,Xo as TextClassificationPipeline,Ko as TextGenerationPipeline,Qo as TextStreamer,Yo as TextToAudioPipeline,Jo as TokenClassificationPipeline,Zo as TokenClassifierOutput,el as TokenizerModel,tl as TrOCRForCausalLM,nl as TrOCRPreTrainedModel,rl as TranslationPipeline,il as UniSpeechForCTC,al as UniSpeechForSequenceClassification,sl as UniSpeechModel,ol as UniSpeechPreTrainedModel,ll as UniSpeechSatForAudioFrameClassification,ul as UniSpeechSatForCTC,dl as UniSpeechSatForSequenceClassification,cl as UniSpeechSatModel,pl as UniSpeechSatPreTrainedModel,hl as ViTFeatureExtractor,ml as ViTForImageClassification,fl as ViTImageProcessor,gl as ViTModel,_l as ViTPreTrainedModel,wl as VisionEncoderDecoderModel,yl as VitMatteForImageMatting,bl as VitMatteImageProcessor,vl as VitMattePreTrainedModel,xl as VitsModel,Ml as VitsModelOutput,Tl as VitsPreTrainedModel,kl as VitsTokenizer,$l as Wav2Vec2BertForCTC,Sl as Wav2Vec2BertForSequenceClassification,Cl as Wav2Vec2BertModel,El as Wav2Vec2BertPreTrainedModel,Pl as Wav2Vec2CTCTokenizer,Al as Wav2Vec2FeatureExtractor,Fl as Wav2Vec2ForAudioFrameClassification,zl as Wav2Vec2ForCTC,Il as Wav2Vec2ForSequenceClassification,Ol as Wav2Vec2Model,Bl as Wav2Vec2PreTrainedModel,Ll as Wav2Vec2ProcessorWithLM,Dl as WavLMForAudioFrameClassification,Rl as WavLMForCTC,Nl as WavLMForSequenceClassification,Vl as WavLMForXVector,jl as WavLMModel,Gl as WavLMPreTrainedModel,ql as WeSpeakerFeatureExtractor,Ul as WeSpeakerResNetModel,Wl as WeSpeakerResNetPreTrainedModel,Hl as WhisperFeatureExtractor,Xl as WhisperForConditionalGeneration,Kl as WhisperModel,Ql as WhisperPreTrainedModel,Yl as WhisperProcessor,Jl as WhisperTextStreamer,Zl as WhisperTokenizer,eu as XLMForQuestionAnswering,tu as XLMForSequenceClassification,nu as XLMForTokenClassification,ru as XLMModel,iu as XLMPreTrainedModel,au as XLMRobertaForMaskedLM,su as XLMRobertaForQuestionAnswering,ou as XLMRobertaForSequenceClassification,lu as XLMRobertaForTokenClassification,uu as XLMRobertaModel,du as XLMRobertaPreTrainedModel,cu as XLMRobertaTokenizer,pu as XLMTokenizer,hu as XLMWithLMHeadModel,mu as XVectorOutput,fu as YolosFeatureExtractor,gu as YolosForObjectDetection,_u as YolosModel,wu as YolosObjectDetectionOutput,yu as YolosPreTrainedModel,bu as ZeroShotAudioClassificationPipeline,vu as ZeroShotClassificationPipeline,xu as ZeroShotImageClassificationPipeline,Mu as ZeroShotObjectDetectionPipeline,Tu as bankers_round,ku as cat,$u as cos_sim,Su as dot,Cu as dynamic_time_warping,Eu as env,Pu as full,Au as full_like,Fu as getKeyValueShapes,zu as hamming,Iu as hanning,Ou as interpolate,Bu as interpolate_4d,Lu as interpolate_data,Du as is_chinese_char,Ru as layer_norm,Nu as log_softmax,Vu as magnitude,ju as matmul,Gu as max,qu as mean,Uu as mean_pooling,Wu as medianFilter,Hu as mel_filter_bank,Xu as min,Ku as ones,Qu as ones_like,Yu as permute,Ju as permute_data,Zu as pipeline,ed as quantize_embeddings,td as read_audio,nd as rfft,rd as round,id as softmax,ad as spectrogram,sd as stack,od as std_mean,ld as topk,ud as window_function,dd as zeros,cd as zeros_like};
|
|
230
230
|
//# sourceMappingURL=transformers.min.js.map
|