@huggingface/transformers 3.1.0 → 3.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -2
- package/dist/transformers.cjs +678 -153
- package/dist/transformers.cjs.map +1 -1
- package/dist/transformers.js +682 -154
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.cjs +24 -18
- package/dist/transformers.min.cjs.map +1 -1
- package/dist/transformers.min.js +19 -13
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.min.mjs +30 -24
- package/dist/transformers.min.mjs.map +1 -1
- package/dist/transformers.mjs +682 -154
- package/dist/transformers.mjs.map +1 -1
- package/package.json +1 -1
- package/src/configs.js +2 -1
- package/src/env.js +6 -6
- package/src/generation/configuration_utils.js +7 -0
- package/src/generation/logits_process.js +22 -16
- package/src/generation/streamers.js +7 -2
- package/src/models/idefics3/image_processing_idefics3.js +219 -0
- package/src/models/idefics3/processing_idefics3.js +136 -0
- package/src/models/image_processors.js +1 -0
- package/src/models/processors.js +1 -0
- package/src/models.js +112 -34
- package/src/utils/core.js +14 -0
- package/src/utils/dtypes.js +2 -1
- package/src/utils/image.js +19 -16
- package/src/utils/tensor.js +6 -1
- package/types/configs.d.ts +1 -1
- package/types/configs.d.ts.map +1 -1
- package/types/env.d.ts +1 -1
- package/types/env.d.ts.map +1 -1
- package/types/generation/configuration_utils.d.ts +6 -0
- package/types/generation/configuration_utils.d.ts.map +1 -1
- package/types/generation/logits_process.d.ts +30 -20
- package/types/generation/logits_process.d.ts.map +1 -1
- package/types/generation/streamers.d.ts +13 -8
- package/types/generation/streamers.d.ts.map +1 -1
- package/types/models/idefics3/image_processing_idefics3.d.ts +40 -0
- package/types/models/idefics3/image_processing_idefics3.d.ts.map +1 -0
- package/types/models/idefics3/processing_idefics3.d.ts +19 -0
- package/types/models/idefics3/processing_idefics3.d.ts.map +1 -0
- package/types/models/image_processors.d.ts +1 -0
- package/types/models/processors.d.ts +1 -0
- package/types/models.d.ts +16 -6
- package/types/models.d.ts.map +1 -1
- package/types/utils/core.d.ts +7 -0
- package/types/utils/core.d.ts.map +1 -1
- package/types/utils/dtypes.d.ts +3 -2
- package/types/utils/dtypes.d.ts.map +1 -1
- package/types/utils/image.d.ts +4 -0
- package/types/utils/image.d.ts.map +1 -1
- package/types/utils/tensor.d.ts +5 -3
- package/types/utils/tensor.d.ts.map +1 -1
package/dist/transformers.min.js
CHANGED
|
@@ -160,16 +160,16 @@ var r,s,o,i,a,l,d,u,c,p,h,m,f,g,_,w,y,b,v,x,M,T,k,$,C,P,S,E,F,A,I,z,L,O,B=Object
|
|
|
160
160
|
\**************************************/(e,t,n)=>{n.r(t),n.d(t,{Processor:()=>i});var r=n(/*! ../utils/constants.js */"./src/utils/constants.js"),s=n(/*! ../utils/generic.js */"./src/utils/generic.js"),o=n(/*! ../utils/hub.js */"./src/utils/hub.js");class i extends s.Callable{static classes=["image_processor_class","tokenizer_class","feature_extractor_class"];static uses_processor_config=!1;constructor(e,t){super(),this.config=e,this.components=t}get image_processor(){return this.components.image_processor}get tokenizer(){return this.components.tokenizer}get feature_extractor(){return this.components.feature_extractor}apply_chat_template(e,t={}){if(!this.tokenizer)throw new Error("Unable to apply chat template without a tokenizer.");return this.tokenizer.apply_chat_template(e,{tokenize:!1,...t})}batch_decode(...e){if(!this.tokenizer)throw new Error("Unable to decode without a tokenizer.");return this.tokenizer.batch_decode(...e)}async _call(e,...t){for(const n of[this.image_processor,this.feature_extractor,this.tokenizer])if(n)return n(e,...t);throw new Error("No image processor, feature extractor, or tokenizer found.")}static async from_pretrained(e,t){const[n,s]=await Promise.all([this.uses_processor_config?(0,o.getModelJSON)(e,r.PROCESSOR_NAME,!0,t):{},Promise.all(this.classes.filter((e=>e in this)).map((async n=>{const r=await this[n].from_pretrained(e,t);return[n.replace(/_class$/,""),r]}))).then(Object.fromEntries)]);return new this(n,s)}}},"./src/configs.js":
|
|
161
161
|
/*!************************!*\
|
|
162
162
|
!*** ./src/configs.js ***!
|
|
163
|
-
\************************/(e,t,n)=>{n.r(t),n.d(t,{AutoConfig:()=>l,PretrainedConfig:()=>a,getKeyValueShapes:()=>i});var r=n(/*! ./utils/core.js */"./src/utils/core.js"),s=n(/*! ./utils/hub.js */"./src/utils/hub.js");function o(e){const t={};let n={};switch(e.model_type){case"llava":case"paligemma":case"florence2":case"llava_onevision":n=o(e.text_config);break;case"moondream1":n=o(e.phi_config);break;case"musicgen":n=o(e.decoder);break;case"multi_modality":n=o(e.language_config);break;case"gpt2":case"gptj":case"jais":case"codegen":case"gpt_bigcode":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="n_embd";break;case"gpt_neox":case"stablelm":case"opt":case"phi":case"phi3":case"falcon":t.num_heads="num_attention_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size";break;case"llama":case"olmo":case"mobilellm":case"granite":case"cohere":case"mistral":case"starcoder2":case"qwen2":case"qwen2_vl":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size",t.num_attention_heads="num_attention_heads";break;case"gemma":case"gemma2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.dim_kv="head_dim";break;case"openelm":t.num_heads="num_kv_heads",t.num_layers="num_transformer_layers",t.dim_kv="head_dim";break;case"gpt_neo":case"donut-swin":t.num_heads="num_heads",t.num_layers="num_layers",t.hidden_size="hidden_size";break;case"bloom":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="hidden_size";break;case"mpt":t.num_heads="n_heads",t.num_layers="n_layers",t.hidden_size="d_model";break;case"t5":case"mt5":case"longt5":t.num_decoder_layers="num_decoder_layers",t.num_decoder_heads="num_heads",t.decoder_dim_kv="d_kv",t.num_encoder_layers="num_layers",t.num_encoder_heads="num_heads",t.encoder_dim_kv="d_kv";break;case"bart":case"mbart":case"marian":case"whisper":case"m2m_100":case"blenderbot":case"blenderbot-small":case"florence2_language":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="d_model",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="d_model";break;case"speecht5":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="hidden_size",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="hidden_size";break;case"trocr":t.num_encoder_layers=t.num_decoder_layers="decoder_layers",t.num_encoder_heads=t.num_decoder_heads="decoder_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="d_model";break;case"musicgen_decoder":t.num_encoder_layers=t.num_decoder_layers="num_hidden_layers",t.num_encoder_heads=t.num_decoder_heads="num_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="hidden_size";break;case"vision-encoder-decoder":const s=o(e.decoder),i="num_decoder_layers"in s,a=(0,r.pick)(e,["model_type","is_encoder_decoder"]);return i?(a.num_decoder_layers=s.num_decoder_layers,a.num_decoder_heads=s.num_decoder_heads,a.decoder_hidden_size=s.decoder_hidden_size,a.num_encoder_layers=s.num_encoder_layers,a.num_encoder_heads=s.num_encoder_heads,a.encoder_hidden_size=s.encoder_hidden_size):(a.num_layers=s.num_layers,a.num_heads=s.num_heads,a.hidden_size=s.hidden_size),a}const s={...n,...(0,r.pick)(e,["model_type","multi_query","is_encoder_decoder"])};for(const n in t)s[n]=e[t[n]];return s}function i(e,{prefix:t="past_key_values",batch_size:n=1}={}){const r={},s=e.normalized_config;if(s.is_encoder_decoder&&"num_encoder_heads"in s&&"num_decoder_heads"in s){const e=s.encoder_dim_kv??s.encoder_hidden_size/s.num_encoder_heads,o=s.decoder_dim_kv??s.decoder_hidden_size/s.num_decoder_heads,i=[n,s.num_encoder_heads,0,e],a=[n,s.num_decoder_heads,0,o];for(let e=0;e<s.num_decoder_layers;++e)r[`${t}.${e}.encoder.key`]=i,r[`${t}.${e}.encoder.value`]=i,r[`${t}.${e}.decoder.key`]=a,r[`${t}.${e}.decoder.value`]=a}else{const e=s.num_heads,o=s.num_layers,i=s.dim_kv??s.hidden_size/(s.num_attention_heads??e);if("falcon"===s.model_type){const s=[n*e,0,i];for(let e=0;e<o;++e)r[`${t}.${e}.key`]=s,r[`${t}.${e}.value`]=s}else if(s.multi_query){const s=[n*e,0,2*i];for(let e=0;e<o;++e)r[`${t}.${e}.key_value`]=s}else if("bloom"===s.model_type){const s=[n*e,i,0],a=[n*e,0,i];for(let e=0;e<o;++e)r[`${t}.${e}.key`]=s,r[`${t}.${e}.value`]=a}else if("openelm"===s.model_type)for(let s=0;s<o;++s){const o=[n,e[s],0,i];r[`${t}.${s}.key`]=o,r[`${t}.${s}.value`]=o}else{const s=[n,e,0,i];for(let e=0;e<o;++e)r[`${t}.${e}.key`]=s,r[`${t}.${e}.value`]=s}}return r}class a{model_type=null;is_encoder_decoder=!1;max_position_embeddings;"transformers.js_config";constructor(e){Object.assign(this,e),this.normalized_config=o(this)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:o=!1,revision:i="main"}={}){!n||n instanceof a||(n=new a(n));const l=n??await async function(e,t){return await(0,s.getModelJSON)(e,"config.json",!0,t)}(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:o,revision:i});return new this(l)}}class l{static async from_pretrained(...e){return a.from_pretrained(...e)}}},"./src/env.js":
|
|
163
|
+
\************************/(e,t,n)=>{n.r(t),n.d(t,{AutoConfig:()=>l,PretrainedConfig:()=>a,getKeyValueShapes:()=>i});var r=n(/*! ./utils/core.js */"./src/utils/core.js"),s=n(/*! ./utils/hub.js */"./src/utils/hub.js");function o(e){const t={};let n={};switch(e.model_type){case"llava":case"paligemma":case"florence2":case"llava_onevision":case"idefics3":n=o(e.text_config);break;case"moondream1":n=o(e.phi_config);break;case"musicgen":n=o(e.decoder);break;case"multi_modality":n=o(e.language_config);break;case"gpt2":case"gptj":case"jais":case"codegen":case"gpt_bigcode":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="n_embd";break;case"gpt_neox":case"stablelm":case"opt":case"phi":case"phi3":case"falcon":t.num_heads="num_attention_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size";break;case"llama":case"olmo":case"mobilellm":case"granite":case"cohere":case"mistral":case"starcoder2":case"qwen2":case"qwen2_vl":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size",t.num_attention_heads="num_attention_heads";break;case"gemma":case"gemma2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.dim_kv="head_dim";break;case"openelm":t.num_heads="num_kv_heads",t.num_layers="num_transformer_layers",t.dim_kv="head_dim";break;case"gpt_neo":case"donut-swin":t.num_heads="num_heads",t.num_layers="num_layers",t.hidden_size="hidden_size";break;case"bloom":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="hidden_size";break;case"mpt":t.num_heads="n_heads",t.num_layers="n_layers",t.hidden_size="d_model";break;case"t5":case"mt5":case"longt5":t.num_decoder_layers="num_decoder_layers",t.num_decoder_heads="num_heads",t.decoder_dim_kv="d_kv",t.num_encoder_layers="num_layers",t.num_encoder_heads="num_heads",t.encoder_dim_kv="d_kv";break;case"bart":case"mbart":case"marian":case"whisper":case"m2m_100":case"blenderbot":case"blenderbot-small":case"florence2_language":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="d_model",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="d_model";break;case"speecht5":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="hidden_size",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="hidden_size";break;case"trocr":t.num_encoder_layers=t.num_decoder_layers="decoder_layers",t.num_encoder_heads=t.num_decoder_heads="decoder_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="d_model";break;case"musicgen_decoder":t.num_encoder_layers=t.num_decoder_layers="num_hidden_layers",t.num_encoder_heads=t.num_decoder_heads="num_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="hidden_size";break;case"vision-encoder-decoder":const s=o(e.decoder),i="num_decoder_layers"in s,a=(0,r.pick)(e,["model_type","is_encoder_decoder"]);return i?(a.num_decoder_layers=s.num_decoder_layers,a.num_decoder_heads=s.num_decoder_heads,a.decoder_hidden_size=s.decoder_hidden_size,a.num_encoder_layers=s.num_encoder_layers,a.num_encoder_heads=s.num_encoder_heads,a.encoder_hidden_size=s.encoder_hidden_size):(a.num_layers=s.num_layers,a.num_heads=s.num_heads,a.hidden_size=s.hidden_size),a}const s={...n,...(0,r.pick)(e,["model_type","multi_query","is_encoder_decoder"])};for(const n in t)s[n]=e[t[n]];return s}function i(e,{prefix:t="past_key_values",batch_size:n=1}={}){const r={},s=e.normalized_config;if(s.is_encoder_decoder&&"num_encoder_heads"in s&&"num_decoder_heads"in s){const e=s.encoder_dim_kv??s.encoder_hidden_size/s.num_encoder_heads,o=s.decoder_dim_kv??s.decoder_hidden_size/s.num_decoder_heads,i=[n,s.num_encoder_heads,0,e],a=[n,s.num_decoder_heads,0,o];for(let e=0;e<s.num_decoder_layers;++e)r[`${t}.${e}.encoder.key`]=i,r[`${t}.${e}.encoder.value`]=i,r[`${t}.${e}.decoder.key`]=a,r[`${t}.${e}.decoder.value`]=a}else{const e=s.num_heads,o=s.num_layers,i=s.dim_kv??s.hidden_size/(s.num_attention_heads??e);if("falcon"===s.model_type){const s=[n*e,0,i];for(let e=0;e<o;++e)r[`${t}.${e}.key`]=s,r[`${t}.${e}.value`]=s}else if(s.multi_query){const s=[n*e,0,2*i];for(let e=0;e<o;++e)r[`${t}.${e}.key_value`]=s}else if("bloom"===s.model_type){const s=[n*e,i,0],a=[n*e,0,i];for(let e=0;e<o;++e)r[`${t}.${e}.key`]=s,r[`${t}.${e}.value`]=a}else if("openelm"===s.model_type)for(let s=0;s<o;++s){const o=[n,e[s],0,i];r[`${t}.${s}.key`]=o,r[`${t}.${s}.value`]=o}else{const s=[n,e,0,i];for(let e=0;e<o;++e)r[`${t}.${e}.key`]=s,r[`${t}.${e}.value`]=s}}return r}class a{model_type=null;is_encoder_decoder=!1;max_position_embeddings;"transformers.js_config";constructor(e){Object.assign(this,e),this.normalized_config=o(this)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:o=!1,revision:i="main"}={}){!n||n instanceof a||(n=new a(n));const l=n??await async function(e,t){return await(0,s.getModelJSON)(e,"config.json",!0,t)}(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:o,revision:i});return new this(l)}}class l{static async from_pretrained(...e){return a.from_pretrained(...e)}}},"./src/env.js":
|
|
164
164
|
/*!********************!*\
|
|
165
165
|
!*** ./src/env.js ***!
|
|
166
|
-
\********************/(e,t,n)=>{n.r(t),n.d(t,{apis:()=>f,env:()=>b});var r=n(/*! fs */"?569f"),s=n(/*! path */"?3f59"),o=n(/*! url */"?154a");const i="undefined"!=typeof
|
|
166
|
+
\********************/(e,t,n)=>{n.r(t),n.d(t,{apis:()=>f,env:()=>b});var r=n(/*! fs */"?569f"),s=n(/*! path */"?3f59"),o=n(/*! url */"?154a");const i="undefined"!=typeof window&&void 0!==window.document,a="undefined"!=typeof self&&"DedicatedWorkerGlobalScope"===self.constructor?.name,l="undefined"!=typeof self&&"caches"in self,d="undefined"!=typeof navigator&&"gpu"in navigator,u="undefined"!=typeof navigator&&"ml"in navigator,c="undefined"!=typeof process,p=c&&"node"===process?.release?.name,h=!v(r),m=!v(s),f=Object.freeze({IS_BROWSER_ENV:i,IS_WEBWORKER_ENV:a,IS_WEB_CACHE_AVAILABLE:l,IS_WEBGPU_AVAILABLE:d,IS_WEBNN_AVAILABLE:u,IS_PROCESS_AVAILABLE:c,IS_NODE_ENV:p,IS_FS_AVAILABLE:h,IS_PATH_AVAILABLE:m}),g=h&&m;let _="./";if(g){const e=Object(import.meta).url;e?_=s.dirname(s.dirname(o.fileURLToPath(e))):"undefined"!=typeof __dirname&&(_=s.dirname(__dirname))}const w=g?s.join(_,"/.cache/"):null,y="/models/",b={version:"3.1.1",backends:{onnx:{}},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!(i||a),localModelPath:g?s.join(_,y):y,useFS:h,useBrowserCache:l,useFSCache:h,cacheDir:w,useCustomCache:!1,customCache:null};function v(e){return 0===Object.keys(e).length}},"./src/generation/configuration_utils.js":
|
|
167
167
|
/*!***********************************************!*\
|
|
168
168
|
!*** ./src/generation/configuration_utils.js ***!
|
|
169
|
-
\***********************************************/(e,t,n)=>{n.r(t),n.d(t,{GenerationConfig:()=>s});var r=n(/*! ../utils/core.js */"./src/utils/core.js");class s{max_length=20;max_new_tokens=null;min_length=0;min_new_tokens=null;early_stopping=!1;max_time=null;do_sample=!1;num_beams=1;num_beam_groups=1;penalty_alpha=null;use_cache=!0;temperature=1;top_k=50;top_p=1;typical_p=1;epsilon_cutoff=0;eta_cutoff=0;diversity_penalty=0;repetition_penalty=1;encoder_repetition_penalty=1;length_penalty=1;no_repeat_ngram_size=0;bad_words_ids=null;force_words_ids=null;renormalize_logits=!1;constraints=null;forced_bos_token_id=null;forced_eos_token_id=null;remove_invalid_values=!1;exponential_decay_length_penalty=null;suppress_tokens=null;begin_suppress_tokens=null;forced_decoder_ids=null;guidance_scale=null;num_return_sequences=1;output_attentions=!1;output_hidden_states=!1;output_scores=!1;return_dict_in_generate=!1;pad_token_id=null;bos_token_id=null;eos_token_id=null;encoder_no_repeat_ngram_size=0;decoder_start_token_id=null;generation_kwargs={};constructor(e){Object.assign(this,(0,r.pick)(e,Object.getOwnPropertyNames(this)))}}},"./src/generation/logits_process.js":
|
|
169
|
+
\***********************************************/(e,t,n)=>{n.r(t),n.d(t,{GenerationConfig:()=>s});var r=n(/*! ../utils/core.js */"./src/utils/core.js");class s{max_length=20;max_new_tokens=null;min_length=0;min_new_tokens=null;early_stopping=!1;max_time=null;do_sample=!1;num_beams=1;num_beam_groups=1;penalty_alpha=null;use_cache=!0;temperature=1;top_k=50;top_p=1;typical_p=1;epsilon_cutoff=0;eta_cutoff=0;diversity_penalty=0;repetition_penalty=1;encoder_repetition_penalty=1;length_penalty=1;no_repeat_ngram_size=0;bad_words_ids=null;force_words_ids=null;renormalize_logits=!1;constraints=null;forced_bos_token_id=null;forced_eos_token_id=null;remove_invalid_values=!1;exponential_decay_length_penalty=null;suppress_tokens=null;streamer=null;begin_suppress_tokens=null;forced_decoder_ids=null;guidance_scale=null;num_return_sequences=1;output_attentions=!1;output_hidden_states=!1;output_scores=!1;return_dict_in_generate=!1;pad_token_id=null;bos_token_id=null;eos_token_id=null;encoder_no_repeat_ngram_size=0;decoder_start_token_id=null;generation_kwargs={};constructor(e){Object.assign(this,(0,r.pick)(e,Object.getOwnPropertyNames(this)))}}},"./src/generation/logits_process.js":
|
|
170
170
|
/*!******************************************!*\
|
|
171
171
|
!*** ./src/generation/logits_process.js ***!
|
|
172
|
-
\******************************************/(e,t,n)=>{n.r(t),n.d(t,{ClassifierFreeGuidanceLogitsProcessor:()=>_,ForcedBOSTokenLogitsProcessor:()=>l,ForcedEOSTokenLogitsProcessor:()=>d,LogitsProcessor:()=>o,LogitsProcessorList:()=>a,LogitsWarper:()=>i,MinLengthLogitsProcessor:()=>m,MinNewTokensLengthLogitsProcessor:()=>f,NoBadWordsLogitsProcessor:()=>g,NoRepeatNGramLogitsProcessor:()=>p,RepetitionPenaltyLogitsProcessor:()=>h,SuppressTokensAtBeginLogitsProcessor:()=>u,TemperatureLogitsWarper:()=>w,TopKLogitsWarper:()=>b,TopPLogitsWarper:()=>y,WhisperTimeStampLogitsProcessor:()=>c});var r=n(/*! ../utils/generic.js */"./src/utils/generic.js"),s=(n(/*! ../utils/tensor.js */"./src/utils/tensor.js"),n(/*! ../utils/maths.js */"./src/utils/maths.js"));class o extends r.Callable{_call(e,t){throw Error("`_call` should be implemented in a subclass")}}class i extends r.Callable{_call(e,t){throw Error("`_call` should be implemented in a subclass")}}class a extends r.Callable{constructor(){super(),this.processors=[]}push(e){this.processors.push(e)}extend(e){this.processors.push(...e)}_call(e,t){let n=t;for(const t of this.processors)n=t(e,n);return n}[Symbol.iterator](){return this.processors.values()}}class l extends o{constructor(e){super(),this.bos_token_id=e}_call(e,t){for(let n=0;n<e.length;++n)if(1===e[n].length){const e=t[n].data;e.fill(-1/0),e[this.bos_token_id]=0}return t}}class d extends o{constructor(e,t){super(),this.max_length=e,this.eos_token_id=Array.isArray(t)?t:[t]}_call(e,t){for(let n=0;n<e.length;++n)if(e[n].length===this.max_length-1){const e=t[n].data;e.fill(-1/0);for(const t of this.eos_token_id)e[t]=0}return t}}class u extends o{constructor(e,t){super(),this.begin_suppress_tokens=e,this.begin_index=t}_call(e,t){for(let n=0;n<e.length;++n)if(e[n].length===this.begin_index){const e=t[n].data;for(const t of this.begin_suppress_tokens)e[t]=-1/0}return t}}class c extends o{constructor(e,t){super(),this.eos_token_id=Array.isArray(e.eos_token_id)?e.eos_token_id[0]:e.eos_token_id,this.no_timestamps_token_id=e.no_timestamps_token_id,this.timestamp_begin=this.no_timestamps_token_id+1,this.begin_index=t.length,t.at(-1)===this.no_timestamps_token_id&&(this.begin_index-=1),this.max_initial_timestamp_index=e.max_initial_timestamp_index}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data;if(r[this.no_timestamps_token_id]=-1/0,e[n].length===this.begin_index-1){r.fill(-1/0),r[this.timestamp_begin]=0;continue}const o=e[n].slice(this.begin_index),i=o.length>=1&&o[o.length-1]>=this.timestamp_begin,a=o.length<2||o[o.length-2]>=this.timestamp_begin;if(i&&(a?r.subarray(this.timestamp_begin).fill(-1/0):r.subarray(0,this.eos_token_id).fill(-1/0)),e[n].length===this.begin_index&&null!==this.max_initial_timestamp_index){const e=this.timestamp_begin+this.max_initial_timestamp_index;r.subarray(e+1).fill(-1/0)}const l=(0,s.log_softmax)(r);Math.log(l.subarray(this.timestamp_begin).map(Math.exp).reduce(((e,t)=>e+t)))>(0,s.max)(l.subarray(0,this.timestamp_begin))[0]&&r.subarray(0,this.timestamp_begin).fill(-1/0)}return t}}class p extends o{constructor(e){super(),this.no_repeat_ngram_size=e}getNgrams(e){const t=e.length,n=[];for(let r=0;r<t+1-this.no_repeat_ngram_size;++r){const t=[];for(let n=0;n<this.no_repeat_ngram_size;++n)t.push(e[r+n]);n.push(t.map(Number))}const r=new Map;for(const e of n){const t=e.slice(0,e.length-1),n=JSON.stringify(t),s=r.get(n)??[];s.push(e[e.length-1]),r.set(n,s)}return r}getGeneratedNgrams(e,t){const n=t.slice(t.length+1-this.no_repeat_ngram_size,t.length);return e.get(JSON.stringify(n.map(Number)))??[]}calcBannedNgramTokens(e){const t=[];if(e.length+1<this.no_repeat_ngram_size)return t;{const t=this.getNgrams(e);return this.getGeneratedNgrams(t,e)}}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data,s=this.calcBannedNgramTokens(e[n]);for(const e of s)r[e]=-1/0}return t}}class h extends o{constructor(e){super(),this.penalty=e}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data;for(const t of e[n]){const e=Number(t);r[e]<0?r[e]*=this.penalty:r[e]/=this.penalty}}return t}}class m extends o{constructor(e,t){super(),this.min_length=e,this.eos_token_id=Array.isArray(t)?t:[t]}_call(e,t){for(let n=0;n<e.length;++n)if(e[n].length<this.min_length){const e=t[n].data;for(const t of this.eos_token_id)e[t]=-1/0}return t}}class f extends o{constructor(e,t,n){super(),this.prompt_length_to_skip=e,this.min_new_tokens=t,this.eos_token_id=Array.isArray(n)?n:[n]}_call(e,t){for(let n=0;n<e.length;++n){if(e[n].length-this.prompt_length_to_skip<this.min_new_tokens){const e=t[n].data;for(const t of this.eos_token_id)e[t]=-1/0}}return t}}class g extends o{constructor(e,t){super(),this.bad_words_ids=e,this.eos_token_id=Array.isArray(t)?t:[t]}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data,s=e[n];for(const e of this.bad_words_ids){let t=!0;for(let n=1;n<=e.length-1&&e.length<s.length;++n)if(e.at(-n-1)!=s.at(-n)){t=!1;break}t&&(r[e.at(-1)]=-1/0)}}return t}}class _ extends o{constructor(e){if(super(),e<=1)throw new Error(`Require guidance scale >1 to use the classifier free guidance processor, got guidance scale ${e}.`);this.guidance_scale=e}_call(e,t){if(t.dims[0]!==2*e.length)throw new Error(`Logits should have twice the batch size of the input ids, the first half of batches corresponding to the conditional inputs, and the second half of batches corresponding to the unconditional inputs. Got batch size ${t.dims[0]} for the logits and ${e.length} for the input ids.`);const n=e.length,r=t.slice([0,n],null),s=t.slice([n,t.dims[0]],null);for(let e=0;e<s.data.length;++e)s.data[e]+=(r.data[e]-s.data[e])*this.guidance_scale;return s}}class w extends i{constructor(e){if(super(),"number"!=typeof e||e<=0){let t=`\`temperature\` (=${e}) must be a strictly positive float, otherwise your next token scores will be invalid.`;0===e&&(t+=" If you're looking for greedy decoding strategies, set `do_sample=false`.")}this.temperature=e}_call(e,t){const n=t.data;for(let e=0;e<n.length;++e)n[e]/=this.temperature;return t}}class y extends i{constructor(e,{filter_value:t=-1/0,min_tokens_to_keep:n=1}={}){if(super(),e<0||e>1)throw new Error(`\`top_p\` must be a float > 0 and < 1, but is ${e}`);if(!Number.isInteger(n)||n<1)throw new Error(`\`min_tokens_to_keep\` must be a positive integer, but is ${n}`);this.top_p=e,this.filter_value=t,this.min_tokens_to_keep=n}}class b extends i{constructor(e,{filter_value:t=-1/0,min_tokens_to_keep:n=1}={}){if(super(),!Number.isInteger(e)||e<0)throw new Error(`\`top_k\` must be a positive integer, but is ${e}`);this.top_k=Math.max(e,n),this.filter_value=t}}},"./src/generation/logits_sampler.js":
|
|
172
|
+
\******************************************/(e,t,n)=>{n.r(t),n.d(t,{ClassifierFreeGuidanceLogitsProcessor:()=>_,ForcedBOSTokenLogitsProcessor:()=>l,ForcedEOSTokenLogitsProcessor:()=>d,LogitsProcessor:()=>o,LogitsProcessorList:()=>a,LogitsWarper:()=>i,MinLengthLogitsProcessor:()=>m,MinNewTokensLengthLogitsProcessor:()=>f,NoBadWordsLogitsProcessor:()=>g,NoRepeatNGramLogitsProcessor:()=>p,RepetitionPenaltyLogitsProcessor:()=>h,SuppressTokensAtBeginLogitsProcessor:()=>u,TemperatureLogitsWarper:()=>w,TopKLogitsWarper:()=>b,TopPLogitsWarper:()=>y,WhisperTimeStampLogitsProcessor:()=>c});var r=n(/*! ../utils/generic.js */"./src/utils/generic.js"),s=(n(/*! ../utils/tensor.js */"./src/utils/tensor.js"),n(/*! ../utils/maths.js */"./src/utils/maths.js"));class o extends r.Callable{_call(e,t){throw Error("`_call` should be implemented in a subclass")}}class i extends r.Callable{_call(e,t){throw Error("`_call` should be implemented in a subclass")}}class a extends r.Callable{constructor(){super(),this.processors=[]}push(e){this.processors.push(e)}extend(e){this.processors.push(...e)}_call(e,t){let n=t;for(const t of this.processors)n=t(e,n);return n}[Symbol.iterator](){return this.processors.values()}}class l extends o{constructor(e){super(),this.bos_token_id=e}_call(e,t){for(let n=0;n<e.length;++n)if(1===e[n].length){const e=t[n].data;e.fill(-1/0),e[this.bos_token_id]=0}return t}}class d extends o{constructor(e,t){super(),this.max_length=e,this.eos_token_id=Array.isArray(t)?t:[t]}_call(e,t){for(let n=0;n<e.length;++n)if(e[n].length===this.max_length-1){const e=t[n].data;e.fill(-1/0);for(const t of this.eos_token_id)e[t]=0}return t}}class u extends o{constructor(e,t){super(),this.begin_suppress_tokens=e,this.begin_index=t}_call(e,t){for(let n=0;n<e.length;++n)if(e[n].length===this.begin_index){const e=t[n].data;for(const t of this.begin_suppress_tokens)e[t]=-1/0}return t}}class c extends o{constructor(e,t){super(),this.eos_token_id=Array.isArray(e.eos_token_id)?e.eos_token_id[0]:e.eos_token_id,this.no_timestamps_token_id=e.no_timestamps_token_id,this.timestamp_begin=this.no_timestamps_token_id+1,this.begin_index=t.length,t.at(-1)===this.no_timestamps_token_id&&(this.begin_index-=1),this.max_initial_timestamp_index=e.max_initial_timestamp_index}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data;if(r[this.no_timestamps_token_id]=-1/0,e[n].length===this.begin_index-1){r.fill(-1/0),r[this.timestamp_begin]=0;continue}const o=e[n].slice(this.begin_index),i=o.length>=1&&o[o.length-1]>=this.timestamp_begin,a=o.length<2||o[o.length-2]>=this.timestamp_begin;if(i&&(a?r.subarray(this.timestamp_begin).fill(-1/0):r.subarray(0,this.eos_token_id).fill(-1/0)),e[n].length===this.begin_index&&null!==this.max_initial_timestamp_index){const e=this.timestamp_begin+this.max_initial_timestamp_index;r.subarray(e+1).fill(-1/0)}const l=(0,s.log_softmax)(r);Math.log(l.subarray(this.timestamp_begin).map(Math.exp).reduce(((e,t)=>e+t)))>(0,s.max)(l.subarray(0,this.timestamp_begin))[0]&&r.subarray(0,this.timestamp_begin).fill(-1/0)}return t}}class p extends o{constructor(e){super(),this.no_repeat_ngram_size=e}getNgrams(e){const t=e.length,n=[];for(let r=0;r<t+1-this.no_repeat_ngram_size;++r){const t=[];for(let n=0;n<this.no_repeat_ngram_size;++n)t.push(e[r+n]);n.push(t.map(Number))}const r=new Map;for(const e of n){const t=e.slice(0,e.length-1),n=JSON.stringify(t),s=r.get(n)??[];s.push(e[e.length-1]),r.set(n,s)}return r}getGeneratedNgrams(e,t){const n=t.slice(t.length+1-this.no_repeat_ngram_size,t.length);return e.get(JSON.stringify(n.map(Number)))??[]}calcBannedNgramTokens(e){const t=[];if(e.length+1<this.no_repeat_ngram_size)return t;{const t=this.getNgrams(e);return this.getGeneratedNgrams(t,e)}}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data,s=this.calcBannedNgramTokens(e[n]);for(const e of s)r[e]=-1/0}return t}}class h extends o{constructor(e){super(),this.penalty=e}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data;for(const t of new Set(e[n])){const e=Number(t);r[e]<0?r[e]*=this.penalty:r[e]/=this.penalty}}return t}}class m extends o{constructor(e,t){super(),this.min_length=e,this.eos_token_id=Array.isArray(t)?t:[t]}_call(e,t){for(let n=0;n<e.length;++n)if(e[n].length<this.min_length){const e=t[n].data;for(const t of this.eos_token_id)e[t]=-1/0}return t}}class f extends o{constructor(e,t,n){super(),this.prompt_length_to_skip=e,this.min_new_tokens=t,this.eos_token_id=Array.isArray(n)?n:[n]}_call(e,t){for(let n=0;n<e.length;++n){if(e[n].length-this.prompt_length_to_skip<this.min_new_tokens){const e=t[n].data;for(const t of this.eos_token_id)e[t]=-1/0}}return t}}class g extends o{constructor(e,t){super(),this.bad_words_ids=e,this.eos_token_id=Array.isArray(t)?t:[t]}_call(e,t){for(let n=0;n<e.length;++n){const r=t[n].data,s=e[n];for(const e of this.bad_words_ids){let t=!0;for(let n=1;n<=e.length-1&&e.length<s.length;++n)if(e.at(-n-1)!=s.at(-n)){t=!1;break}t&&(r[e.at(-1)]=-1/0)}}return t}}class _ extends o{constructor(e){if(super(),e<=1)throw new Error(`Require guidance scale >1 to use the classifier free guidance processor, got guidance scale ${e}.`);this.guidance_scale=e}_call(e,t){if(t.dims[0]!==2*e.length)throw new Error(`Logits should have twice the batch size of the input ids, the first half of batches corresponding to the conditional inputs, and the second half of batches corresponding to the unconditional inputs. Got batch size ${t.dims[0]} for the logits and ${e.length} for the input ids.`);const n=e.length,r=t.slice([0,n],null),s=t.slice([n,t.dims[0]],null);for(let e=0;e<s.data.length;++e)s.data[e]+=(r.data[e]-s.data[e])*this.guidance_scale;return s}}class w extends i{constructor(e){if(super(),"number"!=typeof e||e<=0){let t=`\`temperature\` (=${e}) must be a strictly positive float, otherwise your next token scores will be invalid.`;0===e&&(t+=" If you're looking for greedy decoding strategies, set `do_sample=false`.")}this.temperature=e}_call(e,t){const n=t.data;for(let e=0;e<n.length;++e)n[e]/=this.temperature;return t}}class y extends i{constructor(e,{filter_value:t=-1/0,min_tokens_to_keep:n=1}={}){if(super(),e<0||e>1)throw new Error(`\`top_p\` must be a float > 0 and < 1, but is ${e}`);if(!Number.isInteger(n)||n<1)throw new Error(`\`min_tokens_to_keep\` must be a positive integer, but is ${n}`);this.top_p=e,this.filter_value=t,this.min_tokens_to_keep=n}}class b extends i{constructor(e,{filter_value:t=-1/0,min_tokens_to_keep:n=1}={}){if(super(),!Number.isInteger(e)||e<0)throw new Error(`\`top_k\` must be a positive integer, but is ${e}`);this.top_k=Math.max(e,n),this.filter_value=t}}},"./src/generation/logits_sampler.js":
|
|
173
173
|
/*!******************************************!*\
|
|
174
174
|
!*** ./src/generation/logits_sampler.js ***!
|
|
175
175
|
\******************************************/(e,t,n)=>{n.r(t),n.d(t,{LogitsSampler:()=>i});var r=n(/*! ../utils/generic.js */"./src/utils/generic.js"),s=n(/*! ../utils/tensor.js */"./src/utils/tensor.js"),o=n(/*! ../utils/maths.js */"./src/utils/maths.js");n(/*! ../generation/configuration_utils.js */"./src/generation/configuration_utils.js");class i extends r.Callable{constructor(e){super(),this.generation_config=e}async _call(e){return this.sample(e)}async sample(e){throw Error("sample should be implemented in subclasses.")}getLogits(e,t){let n=e.dims.at(-1),r=e.data;if(-1===t)r=r.slice(-n);else{let e=t*n;r=r.slice(e,e+n)}return r}randomSelect(e){let t=0;for(let n=0;n<e.length;++n)t+=e[n];let n=Math.random()*t;for(let t=0;t<e.length;++t)if(n-=e[t],n<=0)return t;return 0}static getSampler(e){if(e.do_sample)return new l(e);if(e.num_beams>1)return new d(e);if(e.num_return_sequences>1)throw Error(`num_return_sequences has to be 1 when doing greedy search, but is ${e.num_return_sequences}.`);return new a(e)}}class a extends i{async sample(e){const t=(0,o.max)(e.data)[1];return[[BigInt(t),0]]}}class l extends i{async sample(e){let t=e.dims.at(-1);this.generation_config.top_k>0&&(t=Math.min(this.generation_config.top_k,t));const[n,r]=await(0,s.topk)(e,t),i=(0,o.softmax)(n.data);return Array.from({length:this.generation_config.num_beams},(()=>{const e=this.randomSelect(i);return[r.data[e],Math.log(i[e])]}))}}class d extends i{async sample(e){let t=e.dims.at(-1);this.generation_config.top_k>0&&(t=Math.min(this.generation_config.top_k,t));const[n,r]=await(0,s.topk)(e,t),i=(0,o.softmax)(n.data);return Array.from({length:this.generation_config.num_beams},((e,t)=>[r.data[t],Math.log(i[t])]))}}},"./src/generation/stopping_criteria.js":
|
|
@@ -181,7 +181,7 @@ var r,s,o,i,a,l,d,u,c,p,h,m,f,g,_,w,y,b,v,x,M,T,k,$,C,P,S,E,F,A,I,z,L,O,B=Object
|
|
|
181
181
|
\*************************************/(e,t,n)=>{n.r(t),n.d(t,{BaseStreamer:()=>i,TextStreamer:()=>l,WhisperTextStreamer:()=>d});var r=n(/*! ../utils/core.js */"./src/utils/core.js"),s=n(/*! ../tokenizers.js */"./src/tokenizers.js"),o=n(/*! ../env.js */"./src/env.js");class i{put(e){throw Error("Not implemented")}end(){throw Error("Not implemented")}}const a=o.apis.IS_PROCESS_AVAILABLE?e=>process.stdout.write(e):e=>console.log(e);class l extends i{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:r=null,decode_kwargs:s={},...o}={}){super(),this.tokenizer=e,this.skip_prompt=t,this.callback_function=n??a,this.token_callback_function=r,this.decode_kwargs={...s,...o},this.token_cache=[],this.print_len=0,this.next_tokens_are_prompt=!0}put(e){if(e.length>1)throw Error("TextStreamer only supports batch size of 1");if(this.skip_prompt&&this.next_tokens_are_prompt)return void(this.next_tokens_are_prompt=!1);const t=e[0];this.token_callback_function?.(t),this.token_cache=(0,r.mergeArrays)(this.token_cache,t);const n=this.tokenizer.decode(this.token_cache,this.decode_kwargs);let o;n.endsWith("\n")?(o=n.slice(this.print_len),this.token_cache=[],this.print_len=0):n.length>0&&(0,s.is_chinese_char)(n.charCodeAt(n.length-1))?(o=n.slice(this.print_len),this.print_len+=o.length):(o=n.slice(this.print_len,n.lastIndexOf(" ")+1),this.print_len+=o.length),this.on_finalized_text(o,!1)}end(){let e;if(this.token_cache.length>0){e=this.tokenizer.decode(this.token_cache,this.decode_kwargs).slice(this.print_len),this.token_cache=[],this.print_len=0}else e="";this.next_tokens_are_prompt=!0,this.on_finalized_text(e,!0)}on_finalized_text(e,t){e.length>0&&this.callback_function?.(e),t&&this.callback_function===a&&o.apis.IS_PROCESS_AVAILABLE&&this.callback_function?.("\n")}}class d extends l{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:r=null,on_chunk_start:s=null,on_chunk_end:o=null,on_finalize:i=null,time_precision:a=.02,skip_special_tokens:l=!0,decode_kwargs:d={}}={}){super(e,{skip_prompt:t,callback_function:n,token_callback_function:r,decode_kwargs:{skip_special_tokens:l,...d}}),this.timestamp_begin=e.timestamp_begin,this.on_chunk_start=s,this.on_chunk_end=o,this.on_finalize=i,this.time_precision=a,this.waiting_for_timestamp=!1}put(e){if(e.length>1)throw Error("WhisperTextStreamer only supports batch size of 1");const t=e[0];if(1===t.length){const n=Number(t[0])-this.timestamp_begin;if(n>=0){const t=n*this.time_precision;this.waiting_for_timestamp?this.on_chunk_end?.(t):this.on_chunk_start?.(t),this.waiting_for_timestamp=!this.waiting_for_timestamp,e=[[]]}}return super.put(e)}end(){super.end(),this.on_finalize?.()}}},"./src/models.js":
|
|
182
182
|
/*!***********************!*\
|
|
183
183
|
!*** ./src/models.js ***!
|
|
184
|
-
\***********************/(e,t,n)=>{n.r(t),n.d(t,{ASTForAudioClassification:()=>dn,ASTModel:()=>ln,ASTPreTrainedModel:()=>an,AlbertForMaskedLM:()=>_t,AlbertForQuestionAnswering:()=>gt,AlbertForSequenceClassification:()=>ft,AlbertModel:()=>mt,AlbertPreTrainedModel:()=>ht,AutoModel:()=>_l,AutoModelForAudioClassification:()=>Ol,AutoModelForAudioFrameClassification:()=>Dl,AutoModelForCTC:()=>Ll,AutoModelForCausalLM:()=>Tl,AutoModelForDepthEstimation:()=>Vl,AutoModelForDocumentQuestionAnswering:()=>Nl,AutoModelForImageClassification:()=>Pl,AutoModelForImageFeatureExtraction:()=>ql,AutoModelForImageMatting:()=>Rl,AutoModelForImageSegmentation:()=>Sl,AutoModelForImageToImage:()=>jl,AutoModelForMaskGeneration:()=>zl,AutoModelForMaskedLM:()=>kl,AutoModelForNormalEstimation:()=>Gl,AutoModelForObjectDetection:()=>Al,AutoModelForPoseEstimation:()=>Ul,AutoModelForQuestionAnswering:()=>$l,AutoModelForSemanticSegmentation:()=>El,AutoModelForSeq2SeqLM:()=>bl,AutoModelForSequenceClassification:()=>wl,AutoModelForSpeechSeq2Seq:()=>vl,AutoModelForTextToSpectrogram:()=>xl,AutoModelForTextToWaveform:()=>Ml,AutoModelForTokenClassification:()=>yl,AutoModelForUniversalSegmentation:()=>Fl,AutoModelForVision2Seq:()=>Cl,AutoModelForXVector:()=>Bl,AutoModelForZeroShotObjectDetection:()=>Il,BartForConditionalGeneration:()=>St,BartForSequenceClassification:()=>Et,BartModel:()=>Pt,BartPretrainedModel:()=>Ct,BaseModelOutput:()=>Q,BeitForImageClassification:()=>Es,BeitModel:()=>Ss,BeitPreTrainedModel:()=>Ps,BertForMaskedLM:()=>J,BertForQuestionAnswering:()=>te,BertForSequenceClassification:()=>Z,BertForTokenClassification:()=>ee,BertModel:()=>Y,BertPreTrainedModel:()=>K,BlenderbotForConditionalGeneration:()=>Dt,BlenderbotModel:()=>Bt,BlenderbotPreTrainedModel:()=>Ot,BlenderbotSmallForConditionalGeneration:()=>jt,BlenderbotSmallModel:()=>Rt,BlenderbotSmallPreTrainedModel:()=>Nt,BloomForCausalLM:()=>Gr,BloomModel:()=>Vr,BloomPreTrainedModel:()=>jr,CLIPModel:()=>vn,CLIPPreTrainedModel:()=>bn,CLIPSegForImageSegmentation:()=>Dn,CLIPSegModel:()=>Bn,CLIPSegPreTrainedModel:()=>On,CLIPTextModel:()=>xn,CLIPTextModelWithProjection:()=>Mn,CLIPVisionModel:()=>Tn,CLIPVisionModelWithProjection:()=>kn,CamembertForMaskedLM:()=>Te,CamembertForQuestionAnswering:()=>Ce,CamembertForSequenceClassification:()=>ke,CamembertForTokenClassification:()=>$e,CamembertModel:()=>Me,CamembertPreTrainedModel:()=>xe,CausalLMOutput:()=>Jl,CausalLMOutputWithPast:()=>Zl,ChineseCLIPModel:()=>Fn,ChineseCLIPPreTrainedModel:()=>En,ClapAudioModelWithProjection:()=>Wi,ClapModel:()=>Ui,ClapPreTrainedModel:()=>Gi,ClapTextModelWithProjection:()=>qi,CodeGenForCausalLM:()=>or,CodeGenModel:()=>sr,CodeGenPreTrainedModel:()=>rr,CohereForCausalLM:()=>br,CohereModel:()=>yr,CoherePreTrainedModel:()=>wr,ConvBertForMaskedLM:()=>pe,ConvBertForQuestionAnswering:()=>fe,ConvBertForSequenceClassification:()=>he,ConvBertForTokenClassification:()=>me,ConvBertModel:()=>ce,ConvBertPreTrainedModel:()=>ue,ConvNextForImageClassification:()=>Po,ConvNextModel:()=>Co,ConvNextPreTrainedModel:()=>$o,ConvNextV2ForImageClassification:()=>Fo,ConvNextV2Model:()=>Eo,ConvNextV2PreTrainedModel:()=>So,DPTForDepthEstimation:()=>lo,DPTModel:()=>ao,DPTPreTrainedModel:()=>io,DebertaForMaskedLM:()=>Ee,DebertaForQuestionAnswering:()=>Ie,DebertaForSequenceClassification:()=>Fe,DebertaForTokenClassification:()=>Ae,DebertaModel:()=>Se,DebertaPreTrainedModel:()=>Pe,DebertaV2ForMaskedLM:()=>Oe,DebertaV2ForQuestionAnswering:()=>Ne,DebertaV2ForSequenceClassification:()=>Be,DebertaV2ForTokenClassification:()=>De,DebertaV2Model:()=>Le,DebertaV2PreTrainedModel:()=>ze,DecisionTransformerModel:()=>xa,DecisionTransformerPreTrainedModel:()=>va,DeiTForImageClassification:()=>Hs,DeiTModel:()=>Ws,DeiTPreTrainedModel:()=>qs,DepthAnythingForDepthEstimation:()=>co,DepthAnythingPreTrainedModel:()=>uo,DepthProForDepthEstimation:()=>_o,DepthProPreTrainedModel:()=>go,DetrForObjectDetection:()=>Is,DetrForSegmentation:()=>zs,DetrModel:()=>As,DetrObjectDetectionOutput:()=>Ls,DetrPreTrainedModel:()=>Fs,DetrSegmentationOutput:()=>Os,Dinov2ForImageClassification:()=>zo,Dinov2Model:()=>Io,Dinov2PreTrainedModel:()=>Ao,DistilBertForMaskedLM:()=>qe,DistilBertForQuestionAnswering:()=>Ue,DistilBertForSequenceClassification:()=>Ve,DistilBertForTokenClassification:()=>Ge,DistilBertModel:()=>je,DistilBertPreTrainedModel:()=>Re,DonutSwinModel:()=>ko,DonutSwinPreTrainedModel:()=>To,EfficientNetForImageClassification:()=>sa,EfficientNetModel:()=>ra,EfficientNetPreTrainedModel:()=>na,ElectraForMaskedLM:()=>we,ElectraForQuestionAnswering:()=>ve,ElectraForSequenceClassification:()=>ye,ElectraForTokenClassification:()=>be,ElectraModel:()=>_e,ElectraPreTrainedModel:()=>ge,EsmForMaskedLM:()=>Xe,EsmForSequenceClassification:()=>Qe,EsmForTokenClassification:()=>Ke,EsmModel:()=>He,EsmPreTrainedModel:()=>We,FalconForCausalLM:()=>Vi,FalconModel:()=>ji,FalconPreTrainedModel:()=>Ri,FastViTForImageClassification:()=>hs,FastViTModel:()=>ps,FastViTPreTrainedModel:()=>cs,Florence2ForConditionalGeneration:()=>yn,Florence2PreTrainedModel:()=>wn,GLPNForDepthEstimation:()=>Mo,GLPNModel:()=>xo,GLPNPreTrainedModel:()=>vo,GPT2LMHeadModel:()=>jn,GPT2Model:()=>Rn,GPT2PreTrainedModel:()=>Nn,GPTBigCodeForCausalLM:()=>nr,GPTBigCodeModel:()=>tr,GPTBigCodePreTrainedModel:()=>er,GPTJForCausalLM:()=>Zn,GPTJModel:()=>Jn,GPTJPreTrainedModel:()=>Yn,GPTNeoForCausalLM:()=>Hn,GPTNeoModel:()=>Wn,GPTNeoPreTrainedModel:()=>qn,GPTNeoXForCausalLM:()=>Kn,GPTNeoXModel:()=>Qn,GPTNeoXPreTrainedModel:()=>Xn,Gemma2ForCausalLM:()=>$r,Gemma2Model:()=>kr,Gemma2PreTrainedModel:()=>Tr,GemmaForCausalLM:()=>Mr,GemmaModel:()=>xr,GemmaPreTrainedModel:()=>vr,GraniteForCausalLM:()=>_r,GraniteModel:()=>gr,GranitePreTrainedModel:()=>fr,GroupViTModel:()=>us,GroupViTPreTrainedModel:()=>ds,HieraForImageClassification:()=>Ks,HieraModel:()=>Qs,HieraPreTrainedModel:()=>Xs,HubertForCTC:()=>yi,HubertForSequenceClassification:()=>bi,HubertModel:()=>wi,HubertPreTrainedModel:()=>_i,ImageMattingOutput:()=>ed,JAISLMHeadModel:()=>Un,JAISModel:()=>Gn,JAISPreTrainedModel:()=>Vn,JinaCLIPModel:()=>In,JinaCLIPPreTrainedModel:()=>An,JinaCLIPTextModel:()=>zn,JinaCLIPVisionModel:()=>Ln,LlamaForCausalLM:()=>lr,LlamaModel:()=>ar,LlamaPreTrainedModel:()=>ir,LlavaForConditionalGeneration:()=>fn,LlavaOnevisionForConditionalGeneration:()=>gn,LlavaPreTrainedModel:()=>mn,LongT5ForConditionalGeneration:()=>Mt,LongT5Model:()=>xt,LongT5PreTrainedModel:()=>vt,M2M100ForConditionalGeneration:()=>Ho,M2M100Model:()=>Wo,M2M100PreTrainedModel:()=>qo,MBartForCausalLM:()=>Lt,MBartForConditionalGeneration:()=>It,MBartForSequenceClassification:()=>zt,MBartModel:()=>At,MBartPreTrainedModel:()=>Ft,MPNetForMaskedLM:()=>st,MPNetForQuestionAnswering:()=>at,MPNetForSequenceClassification:()=>ot,MPNetForTokenClassification:()=>it,MPNetModel:()=>rt,MPNetPreTrainedModel:()=>nt,MT5ForConditionalGeneration:()=>$t,MT5Model:()=>kt,MT5PreTrainedModel:()=>Tt,MarianMTModel:()=>Uo,MarianModel:()=>Go,MarianPreTrainedModel:()=>Vo,MaskFormerForInstanceSegmentation:()=>bo,MaskFormerModel:()=>yo,MaskFormerPreTrainedModel:()=>wo,MaskedLMOutput:()=>Kl,MgpstrForSceneTextRecognition:()=>Ca,MgpstrModelOutput:()=>ka,MgpstrPreTrainedModel:()=>$a,MistralForCausalLM:()=>Oi,MistralModel:()=>Li,MistralPreTrainedModel:()=>zi,MobileBertForMaskedLM:()=>Ze,MobileBertForQuestionAnswering:()=>tt,MobileBertForSequenceClassification:()=>et,MobileBertModel:()=>Je,MobileBertPreTrainedModel:()=>Ye,MobileLLMForCausalLM:()=>cr,MobileLLMModel:()=>ur,MobileLLMPreTrainedModel:()=>dr,MobileNetV1ForImageClassification:()=>ca,MobileNetV1Model:()=>ua,MobileNetV1PreTrainedModel:()=>da,MobileNetV2ForImageClassification:()=>ma,MobileNetV2Model:()=>ha,MobileNetV2PreTrainedModel:()=>pa,MobileNetV3ForImageClassification:()=>_a,MobileNetV3Model:()=>ga,MobileNetV3PreTrainedModel:()=>fa,MobileNetV4ForImageClassification:()=>ba,MobileNetV4Model:()=>ya,MobileNetV4PreTrainedModel:()=>wa,MobileViTForImageClassification:()=>ws,MobileViTModel:()=>_s,MobileViTPreTrainedModel:()=>gs,MobileViTV2ForImageClassification:()=>vs,MobileViTV2Model:()=>bs,MobileViTV2PreTrainedModel:()=>ys,ModelOutput:()=>X,Moondream1ForConditionalGeneration:()=>_n,MptForCausalLM:()=>Wr,MptModel:()=>qr,MptPreTrainedModel:()=>Ur,MultiModalityCausalLM:()=>Ta,MultiModalityPreTrainedModel:()=>Ma,MusicgenForCausalLM:()=>aa,MusicgenForConditionalGeneration:()=>la,MusicgenModel:()=>ia,MusicgenPreTrainedModel:()=>oa,NomicBertModel:()=>re,NomicBertPreTrainedModel:()=>ne,OPTForCausalLM:()=>Qr,OPTModel:()=>Xr,OPTPreTrainedModel:()=>Hr,OlmoForCausalLM:()=>mr,OlmoModel:()=>hr,OlmoPreTrainedModel:()=>pr,OpenELMForCausalLM:()=>Sr,OpenELMModel:()=>Pr,OpenELMPreTrainedModel:()=>Cr,OwlViTForObjectDetection:()=>Ts,OwlViTModel:()=>Ms,OwlViTPreTrainedModel:()=>xs,Owlv2ForObjectDetection:()=>Cs,Owlv2Model:()=>$s,Owlv2PreTrainedModel:()=>ks,PatchTSMixerForPrediction:()=>Ia,PatchTSMixerModel:()=>Aa,PatchTSMixerPreTrainedModel:()=>Fa,PatchTSTForPrediction:()=>Ea,PatchTSTModel:()=>Sa,PatchTSTPreTrainedModel:()=>Pa,Phi3ForCausalLM:()=>Rr,Phi3Model:()=>Nr,Phi3PreTrainedModel:()=>Dr,PhiForCausalLM:()=>Br,PhiModel:()=>Or,PhiPreTrainedModel:()=>Lr,PreTrainedModel:()=>H,PretrainedMixin:()=>za,PvtForImageClassification:()=>rs,PvtModel:()=>ns,PvtPreTrainedModel:()=>ts,PyAnnoteForAudioFrameClassification:()=>ti,PyAnnoteModel:()=>ei,PyAnnotePreTrainedModel:()=>Zo,QuestionAnsweringModelOutput:()=>Yl,Qwen2ForCausalLM:()=>Ar,Qwen2Model:()=>Fr,Qwen2PreTrainedModel:()=>Er,Qwen2VLForConditionalGeneration:()=>zr,Qwen2VLPreTrainedModel:()=>Ir,RTDetrForObjectDetection:()=>Ns,RTDetrModel:()=>Ds,RTDetrObjectDetectionOutput:()=>Rs,RTDetrPreTrainedModel:()=>Bs,ResNetForImageClassification:()=>Zs,ResNetModel:()=>Js,ResNetPreTrainedModel:()=>Ys,RoFormerForMaskedLM:()=>ie,RoFormerForQuestionAnswering:()=>de,RoFormerForSequenceClassification:()=>ae,RoFormerForTokenClassification:()=>le,RoFormerModel:()=>oe,RoFormerPreTrainedModel:()=>se,RobertaForMaskedLM:()=>Ut,RobertaForQuestionAnswering:()=>Ht,RobertaForSequenceClassification:()=>qt,RobertaForTokenClassification:()=>Wt,RobertaModel:()=>Gt,RobertaPreTrainedModel:()=>Vt,SamImageSegmentationOutput:()=>jo,SamModel:()=>Ro,SamPreTrainedModel:()=>No,SapiensForDepthEstimation:()=>mo,SapiensForNormalEstimation:()=>fo,SapiensForSemanticSegmentation:()=>ho,SapiensPreTrainedModel:()=>po,SegformerForImageClassification:()=>Yi,SegformerForSemanticSegmentation:()=>Ji,SegformerModel:()=>Ki,SegformerPreTrainedModel:()=>Qi,Seq2SeqLMOutput:()=>Wl,SequenceClassifierOutput:()=>Hl,SiglipModel:()=>Cn,SiglipPreTrainedModel:()=>$n,SiglipTextModel:()=>Pn,SiglipVisionModel:()=>Sn,SpeechT5ForSpeechToText:()=>Si,SpeechT5ForTextToSpeech:()=>Ei,SpeechT5HifiGan:()=>Fi,SpeechT5Model:()=>Pi,SpeechT5PreTrainedModel:()=>Ci,SqueezeBertForMaskedLM:()=>ut,SqueezeBertForQuestionAnswering:()=>pt,SqueezeBertForSequenceClassification:()=>ct,SqueezeBertModel:()=>dt,SqueezeBertPreTrainedModel:()=>lt,StableLmForCausalLM:()=>ta,StableLmModel:()=>ea,StableLmPreTrainedModel:()=>Zi,Starcoder2ForCausalLM:()=>Ni,Starcoder2Model:()=>Di,Starcoder2PreTrainedModel:()=>Bi,Swin2SRForImageSuperResolution:()=>oo,Swin2SRModel:()=>so,Swin2SRPreTrainedModel:()=>ro,SwinForImageClassification:()=>no,SwinModel:()=>to,SwinPreTrainedModel:()=>eo,T5ForConditionalGeneration:()=>bt,T5Model:()=>yt,T5PreTrainedModel:()=>wt,TableTransformerForObjectDetection:()=>Gs,TableTransformerModel:()=>Vs,TableTransformerObjectDetectionOutput:()=>Us,TableTransformerPreTrainedModel:()=>js,TokenClassifierOutput:()=>Ql,TrOCRForCausalLM:()=>Ii,TrOCRPreTrainedModel:()=>Ai,UniSpeechForCTC:()=>ii,UniSpeechForSequenceClassification:()=>ai,UniSpeechModel:()=>oi,UniSpeechPreTrainedModel:()=>si,UniSpeechSatForAudioFrameClassification:()=>pi,UniSpeechSatForCTC:()=>ui,UniSpeechSatForSequenceClassification:()=>ci,UniSpeechSatModel:()=>di,UniSpeechSatPreTrainedModel:()=>li,ViTForImageClassification:()=>Jr,ViTMAEModel:()=>os,ViTMAEPreTrainedModel:()=>ss,ViTMSNForImageClassification:()=>ls,ViTMSNModel:()=>as,ViTMSNPreTrainedModel:()=>is,ViTModel:()=>Yr,ViTPreTrainedModel:()=>Kr,VisionEncoderDecoderModel:()=>hn,VitMatteForImageMatting:()=>fs,VitMattePreTrainedModel:()=>ms,VitPoseForPoseEstimation:()=>es,VitPosePreTrainedModel:()=>Zr,VitsModel:()=>Xi,VitsModelOutput:()=>td,VitsPreTrainedModel:()=>Hi,Wav2Vec2BertForCTC:()=>fi,Wav2Vec2BertForSequenceClassification:()=>gi,Wav2Vec2BertModel:()=>mi,Wav2Vec2BertPreTrainedModel:()=>hi,Wav2Vec2ForAudioFrameClassification:()=>Jo,Wav2Vec2ForCTC:()=>Ko,Wav2Vec2ForSequenceClassification:()=>Yo,Wav2Vec2Model:()=>Qo,Wav2Vec2PreTrainedModel:()=>Xo,WavLMForAudioFrameClassification:()=>$i,WavLMForCTC:()=>Mi,WavLMForSequenceClassification:()=>Ti,WavLMForXVector:()=>ki,WavLMModel:()=>xi,WavLMPreTrainedModel:()=>vi,WeSpeakerResNetModel:()=>ri,WeSpeakerResNetPreTrainedModel:()=>ni,WhisperForConditionalGeneration:()=>pn,WhisperModel:()=>cn,WhisperPreTrainedModel:()=>un,XLMForQuestionAnswering:()=>Zt,XLMForSequenceClassification:()=>Yt,XLMForTokenClassification:()=>Jt,XLMModel:()=>Qt,XLMPreTrainedModel:()=>Xt,XLMRobertaForMaskedLM:()=>nn,XLMRobertaForQuestionAnswering:()=>on,XLMRobertaForSequenceClassification:()=>rn,XLMRobertaForTokenClassification:()=>sn,XLMRobertaModel:()=>tn,XLMRobertaPreTrainedModel:()=>en,XLMWithLMHeadModel:()=>Kt,XVectorOutput:()=>Xl,YolosForObjectDetection:()=>Bo,YolosModel:()=>Oo,YolosObjectDetectionOutput:()=>Do,YolosPreTrainedModel:()=>Lo});var r=n(/*! ./configs.js */"./src/configs.js"),s=n(/*! ./backends/onnx.js */"./src/backends/onnx.js"),o=n(/*! ./utils/dtypes.js */"./src/utils/dtypes.js"),i=n(/*! ./utils/generic.js */"./src/utils/generic.js"),a=n(/*! ./utils/core.js */"./src/utils/core.js"),l=n(/*! ./utils/hub.js */"./src/utils/hub.js"),d=n(/*! ./utils/constants.js */"./src/utils/constants.js"),u=n(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),c=n(/*! ./generation/configuration_utils.js */"./src/generation/configuration_utils.js"),p=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),h=n(/*! ./utils/image.js */"./src/utils/image.js"),m=n(/*! ./utils/maths.js */"./src/utils/maths.js"),f=n(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),g=n(/*! ./generation/logits_sampler.js */"./src/generation/logits_sampler.js"),_=n(/*! ./env.js */"./src/env.js"),w=n(/*! ./models/whisper/generation_whisper.js */"./src/models/whisper/generation_whisper.js"),y=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");const b=0,v=1,x=2,M=3,T=4,k=5,$=6,C=7,P=8,S=new Map,E=new Map,F=new Map;async function A(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async i=>{const{buffer:a,session_options:d,session_config:u}=await async function(e,t,n){const i=n.config?.["transformers.js_config"]??{};let a=n.device??i.device;a&&"string"!=typeof a&&(a.hasOwnProperty(t)?a=a[t]:(console.warn(`device not specified for "${t}". Using the default device.`),a=null));const d=a??(_.apis.IS_NODE_ENV?"cpu":"wasm"),u=(0,s.deviceToExecutionProviders)(d);let c=n.dtype??i.dtype;"string"!=typeof c&&(c&&c.hasOwnProperty(t)?c=c[t]:(c=o.DEFAULT_DEVICE_DTYPE_MAPPING[d]??o.DATA_TYPES.fp32,console.warn(`dtype not specified for "${t}". Using the default dtype (${c}) for this device (${d}).`)));const p=c;if(!o.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(p))throw new Error(`Invalid dtype: ${p}. Should be one of: ${Object.keys(o.DATA_TYPES).join(", ")}`);if(p===o.DATA_TYPES.fp16&&"webgpu"===d&&!await(0,o.isWebGpuFp16Supported)())throw new Error(`The device (${d}) does not support fp16.`);const h=i.kv_cache_dtype?"string"==typeof i.kv_cache_dtype?i.kv_cache_dtype:i.kv_cache_dtype[p]??"float32":void 0;if(h&&!["float32","float16"].includes(h))throw new Error(`Invalid kv_cache_dtype: ${h}. Should be one of: float32, float16`);const m={dtype:p,kv_cache_dtype:h},f=o.DEFAULT_DTYPE_SUFFIX_MAPPING[p],g=`${n.subfolder??""}/${t}${f}.onnx`,w={...n.session_options};w.executionProviders??=u;const y=i.free_dimension_overrides;y?w.freeDimensionOverrides??=y:d.startsWith("webnn")&&!w.freeDimensionOverrides&&console.warn('WebNN does not currently support dynamic shapes and requires `free_dimension_overrides` to be set in config.json as a field within "transformers.js_config". When `free_dimension_overrides` is not set, you may experience significant performance degradation.');const b=(0,l.getModelFile)(e,g,!0,n),v=n.use_external_data_format??i.use_external_data_format;let x=[];if(v&&(!0===v||"object"==typeof v&&v.hasOwnProperty(t)&&!0===v[t])){if(_.apis.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const r=`${t}${f}.onnx_data`,s=`${n.subfolder??""}/${r}`;x.push(new Promise((async(t,o)=>{const i=await(0,l.getModelFile)(e,s,!0,n);t({path:r,data:i})})))}else void 0!==w.externalData&&(x=w.externalData.map((async t=>{if("string"==typeof t.data){const r=await(0,l.getModelFile)(e,t.data,!0,n);return{...t,data:r}}return t})));if(x.length>0&&(w.externalData=await Promise.all(x)),"webgpu"===d){const e=(0,r.getKeyValueShapes)(n.config,{prefix:"present"});if(Object.keys(e).length>0&&!(0,s.isONNXProxy)()){const t={};for(const n in e)t[n]="gpu-buffer";w.preferredOutputLocation=t}}return{buffer:await b,session_options:w,session_config:m}}(e,t[i],n);return[i,await(0,s.createInferenceSession)(a,d,u)]}))))}async function I(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async r=>[r,await(0,l.getModelJSON)(e,t[r],!1,n)]))))}async function z(e,t){const n=function(e,t){const n=Object.create(null),r=[];for(const o of e.inputNames){const e=t[o];e instanceof p.Tensor?n[o]=(0,s.isONNXProxy)()?e.clone():e:r.push(o)}if(r.length>0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${r.join(", ")}.`);const o=Object.keys(t).length,i=e.inputNames.length;if(o>i){let n=Object.keys(t).filter((t=>!e.inputNames.includes(t)));console.warn(`WARNING: Too many inputs were provided (${o} > ${i}). The following inputs will be ignored: "${n.join(", ")}".`)}return n}(e,t);try{const t=Object.fromEntries(Object.entries(n).map((([e,t])=>[e,t.ort_tensor])));let r=await e.run(t);return r=L(r),r}catch(e){throw console.error(`An error occurred during model execution: "${e}".`),console.error("Inputs given to model:",n),e}}function L(e){for(let t in e)(0,s.isONNXTensor)(e[t])?e[t]=new p.Tensor(e[t]):"object"==typeof e[t]&&L(e[t]);return e}function O(e){if(e instanceof p.Tensor)return e;if(0===e.length)throw Error("items must be non-empty");if(Array.isArray(e[0])){if(e.some((t=>t.length!==e[0].length)))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new p.Tensor("int64",BigInt64Array.from(e.flat().map((e=>BigInt(e)))),[e.length,e[0].length])}return new p.Tensor("int64",BigInt64Array.from(e.map((e=>BigInt(e)))),[1,e.length])}function B(e){return new p.Tensor("bool",[e],[1])}async function D(e,t){let{encoder_outputs:n,input_ids:r,decoder_input_ids:s,...o}=t;if(!n){const r=(0,a.pick)(t,e.sessions.model.inputNames);n=(await N(e,r)).last_hidden_state}o.input_ids=s,o.encoder_hidden_states=n,e.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(o.encoder_attention_mask=t.attention_mask);return await R(e,o,!0)}async function N(e,t){const n=e.sessions.model,r=(0,a.pick)(t,n.inputNames);if(n.inputNames.includes("inputs_embeds")&&!r.inputs_embeds){if(!t.input_ids)throw new Error("Both `input_ids` and `inputs_embeds` are missing in the model inputs.");r.inputs_embeds=await e.encode_text({input_ids:t.input_ids})}return n.inputNames.includes("token_type_ids")&&!r.token_type_ids&&(r.token_type_ids=new p.Tensor("int64",new BigInt64Array(r.input_ids.data.length),r.input_ids.dims)),await z(n,r)}async function R(e,t,n=!1){const r=e.sessions[n?"decoder_model_merged":"model"],{past_key_values:s,...o}=t;r.inputNames.includes("use_cache_branch")&&(o.use_cache_branch=B(!!s)),r.inputNames.includes("position_ids")&&o.attention_mask&&!o.position_ids&&(o.position_ids=function(e,t=null){const{input_ids:n,inputs_embeds:r,attention_mask:s}=e,{data:o,dims:i}=V(s);let a=new p.Tensor("int64",o,i);if(t){const e=-(n??r).dims.at(1);a=a.slice(null,[e,null])}return a}(o,s)),e.addPastKeyValues(o,s);const i=(0,a.pick)(o,r.inputNames);return await z(r,i)}async function j(e,{input_ids:t=null,attention_mask:n=null,pixel_values:r=null,position_ids:s=null,inputs_embeds:o=null,past_key_values:i=null,generation_config:a=null,logits_processor:l=null,...d}){if(!o)if(o=await e.encode_text({input_ids:t,...d}),r&&1!==t.dims[1]){const s=await e.encode_image({pixel_values:r,...d});({inputs_embeds:o,attention_mask:n}=e._merge_input_ids_with_image_features({image_features:s,inputs_embeds:o,input_ids:t,attention_mask:n}))}else if(i&&r&&1===t.dims[1]){const e=t.dims[1],r=Object.values(i)[0].dims.at(-2);n=(0,p.cat)([(0,p.ones)([t.dims[0],r]),n.slice(null,[n.dims[1]-e,n.dims[1]])],1)}if(!s&&"qwen2_vl"===e.config.model_type){const{image_grid_thw:r,video_grid_thw:o}=d;[s]=e.get_rope_index(t,r,o,n)}return await R(e,{inputs_embeds:o,past_key_values:i,attention_mask:n,position_ids:s,generation_config:a,logits_processor:l},!0)}function V(e){const[t,n]=e.dims,r=e.data,s=new BigInt64Array(r.length);for(let e=0;e<t;++e){const t=e*n;let o=BigInt(0);for(let e=0;e<n;++e){const n=t+e;0n===r[n]?s[n]=BigInt(1):(s[n]=o,o+=r[n])}}return{data:s,dims:e.dims}}function G(e,t,n,r){if(n.past_key_values){const t=Object.values(n.past_key_values)[0].dims.at(-2),{input_ids:r,attention_mask:s}=n;if(s&&s.dims[1]>r.dims[1]);else if(t<r.dims[1])n.input_ids=r.slice(null,[t,null]);else if(null!=e.config.image_token_index&&r.data.some((t=>t==e.config.image_token_index))){const s=e.config.num_image_tokens;if(!s)throw new Error("`num_image_tokens` is missing in the model configuration.");const o=r.dims[1]-(t-s);n.input_ids=r.slice(null,[-o,null]),n.attention_mask=(0,p.ones)([1,t+o])}}return n}function U(e,t,n,r){return n.past_key_values&&(t=t.map((e=>[e.at(-1)]))),{...n,decoder_input_ids:O(t)}}function q(e,...t){return e.config.is_encoder_decoder?U(e,...t):G(e,...t)}function W(e,t,n,r){const s=!!n.past_key_values;if(null!==r.guidance_scale&&r.guidance_scale>1&&(s?n.input_ids=(0,p.cat)([n.input_ids,n.input_ids],0):(n.input_ids=(0,p.cat)([n.input_ids,(0,p.full_like)(n.input_ids,BigInt(r.pad_token_id))],0),n.attention_mask=(0,p.cat)([n.attention_mask,(0,p.full_like)(n.attention_mask,0n)],0))),!s&&n.pixel_values||(n.pixel_values=(0,p.full)([0,0,3,384,384],1)),s){const e=0,t=1,r=e>0?1:0,s=1;n.images_seq_mask=new p.Tensor("bool",new Array(e+t).fill(!0).fill(!1,0,t),[s,e+t]),n.images_emb_mask=new p.Tensor("bool",new Array(e).fill(!!r),[s,1,e])}return n}class H extends i.Callable{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,t,n){super(),this.config=e,this.sessions=t,this.configs=n;const r=F.get(this.constructor),s=S.get(r);switch(this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,s){case T:this.can_generate=!0,this._forward=R,this._prepare_inputs_for_generation=G;break;case x:case M:case C:this.can_generate=!0,this._forward=D,this._prepare_inputs_for_generation=U;break;case v:this._forward=D;break;case $:this.can_generate=!0,this._forward=j,this._prepare_inputs_for_generation=q;break;case P:this.can_generate=!0,this._prepare_inputs_for_generation=W;break;default:this._forward=N}this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const t of Object.values(this.sessions))t?.handler?.dispose&&e.push(t.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:s=null,local_files_only:o=!1,revision:i="main",model_file_name:a=null,subfolder:l="onnx",device:u=null,dtype:c=null,use_external_data_format:p=null,session_options:h={}}={}){let m={progress_callback:t,config:n,cache_dir:s,local_files_only:o,revision:i,model_file_name:a,subfolder:l,device:u,dtype:c,use_external_data_format:p,session_options:h};const f=F.get(this),g=S.get(f);let _;if(n=m.config=await r.AutoConfig.from_pretrained(e,m),g===T)_=await Promise.all([A(e,{model:m.model_file_name??"model"},m),I(e,{generation_config:"generation_config.json"},m)]);else if(g===x||g===M)_=await Promise.all([A(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m),I(e,{generation_config:"generation_config.json"},m)]);else if(g===k)_=await Promise.all([A(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},m)]);else if(g===v)_=await Promise.all([A(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m)]);else if(g===$){const t={embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"};n.is_encoder_decoder&&(t.model="encoder_model"),_=await Promise.all([A(e,t,m),I(e,{generation_config:"generation_config.json"},m)])}else g===C?_=await Promise.all([A(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},m),I(e,{generation_config:"generation_config.json"},m)]):g===P?_=await Promise.all([A(e,{prepare_inputs_embeds:"prepare_inputs_embeds",model:"language_model",lm_head:"lm_head",gen_head:"gen_head",gen_img_embeds:"gen_img_embeds",image_decode:"image_decode"},m),I(e,{generation_config:"generation_config.json"},m)]):(g!==b&&console.warn(`Model type for '${f??n?.model_type}' not found, assuming encoder-only architecture. Please report this at ${d.GITHUB_ISSUE_URL}.`),_=await Promise.all([A(e,{model:m.model_file_name??"model"},m)]));return new this(n,..._)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}get generation_config(){return this.configs?.generation_config??null}_get_logits_warper(e){const t=new u.LogitsProcessorList;return null!==e.temperature&&1!==e.temperature&&t.push(new u.TemperatureLogitsWarper(e.temperature)),null!==e.top_k&&0!==e.top_k&&t.push(new u.TopKLogitsWarper(e.top_k)),null!==e.top_p&&e.top_p<1&&t.push(new u.TopPLogitsWarper(e.top_p)),t}_get_logits_processor(e,t,n=null){const r=new u.LogitsProcessorList;if(null!==e.repetition_penalty&&1!==e.repetition_penalty&&r.push(new u.RepetitionPenaltyLogitsProcessor(e.repetition_penalty)),null!==e.no_repeat_ngram_size&&e.no_repeat_ngram_size>0&&r.push(new u.NoRepeatNGramLogitsProcessor(e.no_repeat_ngram_size)),null!==e.bad_words_ids&&r.push(new u.NoBadWordsLogitsProcessor(e.bad_words_ids,e.eos_token_id)),null!==e.min_length&&null!==e.eos_token_id&&e.min_length>0&&r.push(new u.MinLengthLogitsProcessor(e.min_length,e.eos_token_id)),null!==e.min_new_tokens&&null!==e.eos_token_id&&e.min_new_tokens>0&&r.push(new u.MinNewTokensLengthLogitsProcessor(t,e.min_new_tokens,e.eos_token_id)),null!==e.forced_bos_token_id&&r.push(new u.ForcedBOSTokenLogitsProcessor(e.forced_bos_token_id)),null!==e.forced_eos_token_id&&r.push(new u.ForcedEOSTokenLogitsProcessor(e.max_length,e.forced_eos_token_id)),null!==e.begin_suppress_tokens){const n=t>1||null===e.forced_bos_token_id?t:t+1;r.push(new u.SuppressTokensAtBeginLogitsProcessor(e.begin_suppress_tokens,n))}return null!==e.guidance_scale&&e.guidance_scale>1&&r.push(new u.ClassifierFreeGuidanceLogitsProcessor(e.guidance_scale)),null!==n&&r.extend(n),r}_prepare_generation_config(e,t,n=c.GenerationConfig){const r={...this.config};for(const e of["decoder","generator","text_config"])e in r&&Object.assign(r,r[e]);const s=new n(r);return Object.assign(s,this.generation_config??{}),e&&Object.assign(s,e),t&&Object.assign(s,(0,a.pick)(t,Object.getOwnPropertyNames(s))),s}_get_stopping_criteria(e,t=null){const n=new f.StoppingCriteriaList;return null!==e.max_length&&n.push(new f.MaxLengthCriteria(e.max_length,this.config.max_position_embeddings??null)),null!==e.eos_token_id&&n.push(new f.EosTokenCriteria(e.eos_token_id)),t&&n.extend(t),n}_validate_model_class(){if(!this.can_generate){const e=[Ua,Xa,Ga,Da],t=F.get(this.constructor),n=new Set,r=this.config.model_type;for(const t of e){const e=t.get(r);e&&n.add(e[0])}let s=`The current model class (${t}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw n.size>0&&(s+=` Please use the following class instead: ${[...n].join(", ")}`),Error(s)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:t,model_inputs:n,is_encoder_decoder:r}){return n.past_key_values=this.getPastKeyValues(t,n.past_key_values),n.input_ids=new p.Tensor("int64",e.flat(),[e.length,1]),r||(n.attention_mask=(0,p.cat)([n.attention_mask,(0,p.ones)([n.attention_mask.dims[0],1])],1)),n.position_ids=null,n}_prepare_model_inputs({inputs:e,bos_token_id:t,model_kwargs:n}){const r=(0,a.pick)(n,this.forward_params),s=this.main_input_name;if(s in r){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else r[s]=e;return{inputs_tensor:r[s],model_inputs:r,model_input_name:s}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:t,model_input_name:n,generation_config:r}){if(this.sessions.model.inputNames.includes("inputs_embeds")&&!t.inputs_embeds&&"_prepare_inputs_embeds"in this){const{input_ids:e,pixel_values:n,attention_mask:r,...s}=t,o=await this._prepare_inputs_embeds(t);t={...s,...(0,a.pick)(o,["inputs_embeds","attention_mask"])}}let{last_hidden_state:s}=await N(this,t);if(null!==r.guidance_scale&&r.guidance_scale>1)s=(0,p.cat)([s,(0,p.full_like)(s,0)],0),"attention_mask"in t&&(t.attention_mask=(0,p.cat)([t.attention_mask,(0,p.zeros_like)(t.attention_mask)],0));else if(t.decoder_input_ids){const e=O(t.decoder_input_ids).dims[0];if(e!==s.dims[0]){if(1!==s.dims[0])throw new Error(`The encoder outputs have a different batch size (${s.dims[0]}) than the decoder inputs (${e}).`);s=(0,p.cat)(Array.from({length:e},(()=>s)),0)}}return t.encoder_outputs=s,t}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:t,model_kwargs:n,decoder_start_token_id:r,bos_token_id:s,generation_config:o}){let{decoder_input_ids:i,...a}=n;if(!(i instanceof p.Tensor)){if(i)Array.isArray(i[0])||(i=Array.from({length:e},(()=>i)));else if(r??=s,"musicgen"===this.config.model_type)i=Array.from({length:e*this.config.decoder.num_codebooks},(()=>[r]));else if(Array.isArray(r)){if(r.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${r.length}`);i=r}else i=Array.from({length:e},(()=>[r]));i=O(i)}return n.decoder_attention_mask=(0,p.ones_like)(i),{input_ids:i,model_inputs:a}}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,streamer:s=null,...o}){this._validate_model_class(),t=this._prepare_generation_config(t,o);let{inputs_tensor:i,model_inputs:a,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:o});const d=this.config.is_encoder_decoder;let u;d&&("encoder_outputs"in a||(a=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:i,model_inputs:a,model_input_name:l,generation_config:t}))),d?({input_ids:u,model_inputs:a}=this._prepare_decoder_input_ids_for_generation({batch_size:a[l].dims.at(0),model_input_name:l,model_kwargs:a,decoder_start_token_id:t.decoder_start_token_id,bos_token_id:t.bos_token_id,generation_config:t})):u=a[l];let c=u.dims.at(-1);null!==t.max_new_tokens&&(t.max_length=c+t.max_new_tokens);const h=this._get_logits_processor(t,c,n),m=this._get_stopping_criteria(t,r),f=a[l].dims.at(0),_=g.LogitsSampler.getSampler(t),w=new Array(f).fill(0),y=u.tolist();let b;s&&s.put(y);let v={};for(;;){if(a=this.prepare_inputs_for_generation(y,a,t),b=await this.forward(a),t.output_attentions&&t.return_dict_in_generate){const e=this.getAttentions(b);for(const t in e)t in v||(v[t]=[]),v[t].push(e[t])}const e=h(y,b.logits.slice(null,-1,null)),n=[];for(let t=0;t<e.dims.at(0);++t){const r=e[t],s=await _(r);for(const[e,r]of s){const s=BigInt(e);w[t]+=r,y[t].push(s),n.push([s]);break}}s&&s.put(n);if(m(y).every((e=>e)))break;a=this._update_model_kwargs_for_generation({generated_input_ids:n,outputs:b,model_inputs:a,is_encoder_decoder:d})}s&&s.end();const x=this.getPastKeyValues(b,a.past_key_values,!0),M=new p.Tensor("int64",y.flat(),[y.length,y[0].length]);if(t.return_dict_in_generate)return{sequences:M,past_key_values:x,...v};for(const e of Object.values(b))"gpu-buffer"===e.location&&e.dispose();return M}getPastKeyValues(e,t,n=!1){const r=Object.create(null);for(const s in e)if(s.startsWith("present")){const o=s.replace("present","past_key_values"),i=s.includes("encoder");if(r[o]=i&&t?t[o]:e[s],t&&(!i||n)){const e=t[o];"gpu-buffer"===e.location&&e.dispose()}}return r}getAttentions(e){const t={};for(const n of["cross_attentions","encoder_attentions","decoder_attentions"])for(const r in e)r.startsWith(n)&&(n in t||(t[n]=[]),t[n].push(e[r]));return t}addPastKeyValues(e,t){if(t)Object.assign(e,t);else{const t=this.sessions.decoder_model_merged??this.sessions.model,n=t?.config?.kv_cache_dtype??"float32",s="float16"===n?new Uint16Array:[],o=(e[this.main_input_name]??e.attention_mask).dims?.[0]??1,i=(0,r.getKeyValueShapes)(this.config,{batch_size:o});for(const t in i)e[t]=new p.Tensor(n,s,i[t])}}async encode_image({pixel_values:e}){const t=(await z(this.sessions.vision_encoder,{pixel_values:e})).image_features;return this.config.num_image_tokens||(console.warn(`The number of image tokens was not set in the model configuration. Setting it to the number of features detected by the vision encoder (${t.dims[1]}).`),this.config.num_image_tokens=t.dims[1]),t}async encode_text({input_ids:e}){return(await z(this.sessions.embed_tokens,{input_ids:e})).inputs_embeds}}class X{}class Q extends X{constructor({last_hidden_state:e,hidden_states:t=null,attentions:n=null}){super(),this.last_hidden_state=e,this.hidden_states=t,this.attentions=n}}class K extends H{}class Y extends K{}class J extends K{async _call(e){return new Kl(await super._call(e))}}class Z extends K{async _call(e){return new Hl(await super._call(e))}}class ee extends K{async _call(e){return new Ql(await super._call(e))}}class te extends K{async _call(e){return new Yl(await super._call(e))}}class ne extends H{}class re extends ne{}class se extends H{}class oe extends se{}class ie extends se{async _call(e){return new Kl(await super._call(e))}}class ae extends se{async _call(e){return new Hl(await super._call(e))}}class le extends se{async _call(e){return new Ql(await super._call(e))}}class de extends se{async _call(e){return new Yl(await super._call(e))}}class ue extends H{}class ce extends ue{}class pe extends ue{async _call(e){return new Kl(await super._call(e))}}class he extends ue{async _call(e){return new Hl(await super._call(e))}}class me extends ue{async _call(e){return new Ql(await super._call(e))}}class fe extends ue{async _call(e){return new Yl(await super._call(e))}}class ge extends H{}class _e extends ge{}class we extends ge{async _call(e){return new Kl(await super._call(e))}}class ye extends ge{async _call(e){return new Hl(await super._call(e))}}class be extends ge{async _call(e){return new Ql(await super._call(e))}}class ve extends ge{async _call(e){return new Yl(await super._call(e))}}class xe extends H{}class Me extends xe{}class Te extends xe{async _call(e){return new Kl(await super._call(e))}}class ke extends xe{async _call(e){return new Hl(await super._call(e))}}class $e extends xe{async _call(e){return new Ql(await super._call(e))}}class Ce extends xe{async _call(e){return new Yl(await super._call(e))}}class Pe extends H{}class Se extends Pe{}class Ee extends Pe{async _call(e){return new Kl(await super._call(e))}}class Fe extends Pe{async _call(e){return new Hl(await super._call(e))}}class Ae extends Pe{async _call(e){return new Ql(await super._call(e))}}class Ie extends Pe{async _call(e){return new Yl(await super._call(e))}}class ze extends H{}class Le extends ze{}class Oe extends ze{async _call(e){return new Kl(await super._call(e))}}class Be extends ze{async _call(e){return new Hl(await super._call(e))}}class De extends ze{async _call(e){return new Ql(await super._call(e))}}class Ne extends ze{async _call(e){return new Yl(await super._call(e))}}class Re extends H{}class je extends Re{}class Ve extends Re{async _call(e){return new Hl(await super._call(e))}}class Ge extends Re{async _call(e){return new Ql(await super._call(e))}}class Ue extends Re{async _call(e){return new Yl(await super._call(e))}}class qe extends Re{async _call(e){return new Kl(await super._call(e))}}class We extends H{}class He extends We{}class Xe extends We{async _call(e){return new Kl(await super._call(e))}}class Qe extends We{async _call(e){return new Hl(await super._call(e))}}class Ke extends We{async _call(e){return new Ql(await super._call(e))}}class Ye extends H{}class Je extends Ye{}class Ze extends Ye{async _call(e){return new Kl(await super._call(e))}}class et extends Ye{async _call(e){return new Hl(await super._call(e))}}class tt extends Ye{async _call(e){return new Yl(await super._call(e))}}class nt extends H{}class rt extends nt{}class st extends nt{async _call(e){return new Kl(await super._call(e))}}class ot extends nt{async _call(e){return new Hl(await super._call(e))}}class it extends nt{async _call(e){return new Ql(await super._call(e))}}class at extends nt{async _call(e){return new Yl(await super._call(e))}}class lt extends H{}class dt extends lt{}class ut extends lt{async _call(e){return new Kl(await super._call(e))}}class ct extends lt{async _call(e){return new Hl(await super._call(e))}}class pt extends lt{async _call(e){return new Yl(await super._call(e))}}class ht extends H{}class mt extends ht{}class ft extends ht{async _call(e){return new Hl(await super._call(e))}}class gt extends ht{async _call(e){return new Yl(await super._call(e))}}class _t extends ht{async _call(e){return new Kl(await super._call(e))}}class wt extends H{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"]}class yt extends wt{}class bt extends wt{}class vt extends H{}class xt extends vt{}class Mt extends vt{}class Tt extends H{}class kt extends Tt{}class $t extends Tt{}class Ct extends H{}class Pt extends Ct{}class St extends Ct{}class Et extends Ct{async _call(e){return new Hl(await super._call(e))}}class Ft extends H{}class At extends Ft{}class It extends Ft{}class zt extends Ft{async _call(e){return new Hl(await super._call(e))}}class Lt extends Ft{}class Ot extends H{}class Bt extends Ot{}class Dt extends Ot{}class Nt extends H{}class Rt extends Nt{}class jt extends Nt{}class Vt extends H{}class Gt extends Vt{}class Ut extends Vt{async _call(e){return new Kl(await super._call(e))}}class qt extends Vt{async _call(e){return new Hl(await super._call(e))}}class Wt extends Vt{async _call(e){return new Ql(await super._call(e))}}class Ht extends Vt{async _call(e){return new Yl(await super._call(e))}}class Xt extends H{}class Qt extends Xt{}class Kt extends Xt{async _call(e){return new Kl(await super._call(e))}}class Yt extends Xt{async _call(e){return new Hl(await super._call(e))}}class Jt extends Xt{async _call(e){return new Ql(await super._call(e))}}class Zt extends Xt{async _call(e){return new Yl(await super._call(e))}}class en extends H{}class tn extends en{}class nn extends en{async _call(e){return new Kl(await super._call(e))}}class rn extends en{async _call(e){return new Hl(await super._call(e))}}class sn extends en{async _call(e){return new Ql(await super._call(e))}}class on extends en{async _call(e){return new Yl(await super._call(e))}}class an extends H{}class ln extends an{}class dn extends an{}class un extends H{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"]}class cn extends un{}class pn extends un{_prepare_generation_config(e,t){return super._prepare_generation_config(e,t,w.WhisperGenerationConfig)}_retrieve_init_tokens(e){const t=[e.decoder_start_token_id];let n=e.language;const r=e.task;if(e.is_multilingual){n||(console.warn("No language specified - defaulting to English (en)."),n="en");const s=`<|${(0,y.whisper_language_to_code)(n)}|>`;t.push(e.lang_to_id[s]),t.push(e.task_to_id[r??"transcribe"])}else if(n||r)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&t.at(-1)!==e.no_timestamps_token_id?t.push(e.no_timestamps_token_id):e.return_timestamps&&t.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),t.pop()),t.filter((e=>null!=e))}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,...s}){t=this._prepare_generation_config(t,s);const o=s.decoder_input_ids??this._retrieve_init_tokens(t);if(t.return_timestamps&&(n??=new u.LogitsProcessorList,n.push(new u.WhisperTimeStampLogitsProcessor(t,o))),t.begin_suppress_tokens&&(n??=new u.LogitsProcessorList,n.push(new u.SuppressTokensAtBeginLogitsProcessor(t.begin_suppress_tokens,o.length))),t.return_token_timestamps){if(!t.alignment_heads)throw new Error("Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config.");"translate"===t.task&&console.warn("Token-level timestamps may not be reliable for task 'translate'."),t.output_attentions=!0,t.return_dict_in_generate=!0}const i=await super.generate({inputs:e,generation_config:t,logits_processor:n,decoder_input_ids:o,...s});return t.return_token_timestamps&&(i.token_timestamps=this._extract_token_timestamps(i,t.alignment_heads,t.num_frames)),i}_extract_token_timestamps(e,t,n=null,r=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");null==n&&console.warn("`num_frames` has not been set, meaning the entire audio will be analyzed. This may lead to inaccurate token-level timestamps for short audios (< 30 seconds).");let s=this.config.median_filter_width;void 0===s&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),s=7);const o=e.cross_attentions,i=Array.from({length:this.config.decoder_layers},((e,t)=>(0,p.cat)(o.map((e=>e[t])),2))),l=(0,p.stack)(t.map((([e,t])=>{if(e>=i.length)throw new Error(`Layer index ${e} is out of bounds for cross attentions (length ${i.length}).`);return n?i[e].slice(null,t,null,[0,n]):i[e].slice(null,t)}))).transpose(1,0,2,3),[d,u]=(0,p.std_mean)(l,-2,0,!0),c=l.clone();for(let e=0;e<c.dims[0];++e){const t=c[e];for(let n=0;n<t.dims[0];++n){const r=t[n],o=d[e][n][0].data,i=u[e][n][0].data;for(let e=0;e<r.dims[0];++e){let t=r[e].data;for(let e=0;e<t.length;++e)t[e]=(t[e]-i[e])/o[e];t.set((0,m.medianFilter)(t,s))}}}const h=[(0,p.mean)(c,1)],f=e.sequences.dims,g=new p.Tensor("float32",new Float32Array(f[0]*f[1]),f);for(let e=0;e<f[0];++e){const t=h[e].neg().squeeze_(0),[n,s]=(0,m.dynamic_time_warping)(t.tolist()),o=Array.from({length:n.length-1},((e,t)=>n[t+1]-n[t])),i=(0,a.mergeArrays)([1],o).map((e=>!!e)),l=[];for(let e=0;e<i.length;++e)i[e]&&l.push(s[e]*r);g[e].data.set(l,1)}return g}}class hn extends H{main_input_name="pixel_values";forward_params=["pixel_values","decoder_input_ids","encoder_hidden_states","past_key_values"]}class mn extends H{forward_params=["input_ids","pixel_values","attention_mask","position_ids","past_key_values"]}class fn extends mn{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){const s=this.config.image_token_index,o=n.tolist().map((e=>e.findIndex((e=>e==s)))),i=o.every((e=>-1===e)),a=o.every((e=>-1!==e));if(!i&&!a)throw new Error("Every input should contain either 0 or 1 image token.");if(i)return{inputs_embeds:e,attention_mask:r};const l=[],d=[];for(let n=0;n<o.length;++n){const s=o[n],i=e[n],a=t[n],u=r[n];l.push((0,p.cat)([i.slice([0,s]),a,i.slice([s+1,i.dims[0]])],0)),d.push((0,p.cat)([u.slice([0,s]),(0,p.ones)([a.dims[0]]),u.slice([s+1,u.dims[0]])],0))}return{inputs_embeds:(0,p.stack)(l,0),attention_mask:(0,p.stack)(d,0)}}}class gn extends fn{}class _n extends fn{}class wn extends H{forward_params=["input_ids","inputs_embeds","attention_mask","pixel_values","encoder_outputs","decoder_input_ids","decoder_inputs_embeds","decoder_attention_mask","past_key_values"];main_input_name="inputs_embeds"}class yn extends wn{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){return{inputs_embeds:(0,p.cat)([t,e],1),attention_mask:(0,p.cat)([(0,p.ones)(t.dims.slice(0,2)),r],1)}}async _prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:n,attention_mask:r}){if(!e&&!t)throw new Error("Either `input_ids` or `pixel_values` should be provided.");let s,o;return e&&(s=await this.encode_text({input_ids:e})),t&&(o=await this.encode_image({pixel_values:t})),s&&o?({inputs_embeds:n,attention_mask:r}=this._merge_input_ids_with_image_features({inputs_embeds:s,image_features:o,input_ids:e,attention_mask:r})):n=s||o,{inputs_embeds:n,attention_mask:r}}async forward({input_ids:e,pixel_values:t,attention_mask:n,decoder_input_ids:r,decoder_attention_mask:s,encoder_outputs:o,past_key_values:i,inputs_embeds:a,decoder_inputs_embeds:l}){if(a||({inputs_embeds:a,attention_mask:n}=await this._prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:a,attention_mask:n})),!o){let{last_hidden_state:e}=await N(this,{inputs_embeds:a,attention_mask:n});o=e}if(!l){if(!r)throw new Error("Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.");l=await this.encode_text({input_ids:r})}const d={inputs_embeds:l,attention_mask:s,encoder_attention_mask:n,encoder_hidden_states:o,past_key_values:i};return await R(this,d,!0)}}class bn extends H{}class vn extends bn{}class xn extends bn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Mn extends bn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Tn extends bn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class kn extends bn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class $n extends H{}class Cn extends $n{}class Pn extends $n{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Sn extends bn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class En extends H{}class Fn extends En{}class An extends H{}class In extends An{async forward(e){const t=!e.input_ids,n=!e.pixel_values;if(t&&n)throw new Error("Either `input_ids` or `pixel_values` should be provided.");if(t&&(e.input_ids=(0,p.ones)([e.pixel_values.dims[0],1])),n){const{image_size:t}=this.config.vision_config;e.pixel_values=(0,p.full)([0,3,t,t],0)}const{text_embeddings:r,image_embeddings:s,l2norm_text_embeddings:o,l2norm_image_embeddings:i}=await super.forward(e),a={};return t||(a.text_embeddings=r,a.l2norm_text_embeddings=o),n||(a.image_embeddings=s,a.l2norm_image_embeddings=i),a}}class zn extends An{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Ln extends An{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class On extends H{}class Bn extends On{}class Dn extends On{}class Nn extends H{}class Rn extends Nn{}class jn extends Nn{}class Vn extends H{}class Gn extends Vn{}class Un extends Vn{}class qn extends H{}class Wn extends qn{}class Hn extends qn{}class Xn extends H{}class Qn extends Xn{}class Kn extends Xn{}class Yn extends H{}class Jn extends Yn{}class Zn extends Yn{}class er extends H{}class tr extends er{}class nr extends er{}class rr extends H{}class sr extends rr{}class or extends rr{}class ir extends H{}class ar extends ir{}class lr extends ir{}class dr extends H{}class ur extends dr{}class cr extends dr{}class pr extends H{}class hr extends pr{}class mr extends pr{}class fr extends H{}class gr extends fr{}class _r extends fr{}class wr extends H{}class yr extends wr{}class br extends wr{}class vr extends H{}class xr extends vr{}class Mr extends vr{}class Tr extends H{}class kr extends Tr{}class $r extends Tr{}class Cr extends H{}class Pr extends Cr{}class Sr extends Cr{}class Er extends H{}class Fr extends Er{}class Ar extends Er{}class Ir extends H{forward_params=["input_ids","attention_mask","position_ids","past_key_values","pixel_values","image_grid_thw"]}class zr extends Ir{get_rope_index(e,t,n,r){const{vision_config:s,image_token_id:o,video_token_id:i,vision_start_token_id:a}=this.config,l=s.spatial_merge_size??2,d=[];if(t||n){let s=e.tolist();r||(r=(0,p.ones_like)(e));const u=r.tolist(),c=Array.from({length:3},(t=>Array.from({length:e.dims[0]},(t=>Array.from({length:e.dims[1]},(e=>1)))))),h=t?t.tolist():[],f=n?n.tolist():[];let g=0,_=0;for(let e=0;e<s.length;++e){const t=s[e].filter(((t,n)=>1==u[e][n])),n=t.reduce(((e,t,n)=>(t==a&&e.push(n),e)),[]).map((e=>t[e+1])),r=n.filter((e=>e==o)).length,p=n.filter((e=>e==i)).length;let w=[],y=0,b=r,v=p;for(let e=0;e<n.length;++e){const e=t.findIndex(((e,t)=>t>y&&e==o)),n=t.findIndex(((e,t)=>t>y&&e==i)),r=b>0&&-1!==e?e:t.length+1,s=v>0&&-1!==n?n:t.length+1;let a,d,u,c;r<s?([d,u,c]=h[g],++g,--b,a=r):([d,u,c]=f[_],++_,--v,a=s);const[p,x,M]=[Number(d),Math.floor(Number(u)/l),Math.floor(Number(c)/l)],T=a-y,k=w.length>0?(0,m.max)(w.at(-1))[0]+1:0;w.push(Array.from({length:3*T},((e,t)=>k+t%T)));const $=T+k,C=p*x*M,P=Array.from({length:C},((e,t)=>$+Math.floor(t/(x*M)))),S=Array.from({length:C},((e,t)=>$+Math.floor(t/M)%x)),E=Array.from({length:C},((e,t)=>$+t%M));w.push([P,S,E].flat()),y=a+C}if(y<t.length){const e=w.length>0?(0,m.max)(w.at(-1))[0]+1:0,n=t.length-y;w.push(Array.from({length:3*n},((t,r)=>e+r%n)))}const x=w.reduce(((e,t)=>e+t.length),0),M=new Array(x);let T=0;for(let e=0;e<3;++e)for(let t=0;t<w.length;++t){const n=w[t],r=n.length/3;for(let t=e*r;t<(e+1)*r;++t)M[T++]=n[t]}let k=0;const $=u[e];for(let t=0;t<$.length;++t)if(1==$[t]){for(let n=0;n<3;++n)c[n][e][t]=M[n*x/3+k];++k}const C=(0,m.max)(M)[0];d.push(C+1-s[e].length)}return[new p.Tensor("int64",c.flat(1/0),[3,e.dims[0],e.dims[1]]),new p.Tensor("int64",d,[d.length,1])]}if(r){const{data:e,dims:t}=V(r),n=BigInt64Array.from({length:3*e.length},((t,n)=>e[n%e.length])),s=Array.from({length:t[0]},((n,r)=>(0,m.max)(e.subarray(t[1]*r,t[1]*(r+1)))[0]+1+t[1]));return[new p.Tensor("int64",n,[3,...t]),new p.Tensor("int64",s,[s.length,1])]}{const[t,n]=e.dims,r=BigInt64Array.from({length:3*t*n},((e,r)=>BigInt(Math.floor(r%n/t))));return[new p.Tensor("int64",r,[3,...e.dims]),(0,p.zeros)([t,1])]}}async encode_image({pixel_values:e,image_grid_thw:t}){return(await z(this.sessions.vision_encoder,{pixel_values:e,grid_thw:t})).image_features}_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){const{image_token_id:s}=this.config,o=n.tolist().map((e=>e.reduce(((e,t,n)=>(t==s&&e.push(n),e)),[]))),i=o.reduce(((e,t)=>e+t.length),0),a=t.dims[0];if(i!==a)throw new Error(`Image features and image tokens do not match: tokens: ${i}, features ${a}`);let l=0;for(let n=0;n<o.length;++n){const r=o[n],s=e[n];for(let e=0;e<r.length;++e)s[r[e]].data.set(t[l++].data)}return{inputs_embeds:e,attention_mask:r}}prepare_inputs_for_generation(e,t,n){if(t.attention_mask&&!t.position_ids)if(t.past_key_values){t.pixel_values=null;const e=BigInt(Object.values(t.past_key_values)[0].dims.at(-2)),n=t.rope_deltas.map((t=>e+t));t.position_ids=(0,p.stack)([n,n,n],0)}else[t.position_ids,t.rope_deltas]=this.get_rope_index(t.input_ids,t.image_grid_thw,t.video_grid_thw,t.attention_mask);return t}}class Lr extends H{}class Or extends Lr{}class Br extends Lr{}class Dr extends H{}class Nr extends Dr{}class Rr extends Dr{}class jr extends H{}class Vr extends jr{}class Gr extends jr{}class Ur extends H{}class qr extends Ur{}class Wr extends Ur{}class Hr extends H{}class Xr extends Hr{}class Qr extends Hr{}class Kr extends H{}class Yr extends Kr{}class Jr extends Kr{async _call(e){return new Hl(await super._call(e))}}class Zr extends H{}class es extends Zr{}class ts extends H{}class ns extends ts{}class rs extends ts{async _call(e){return new Hl(await super._call(e))}}class ss extends H{}class os extends ss{}class is extends H{}class as extends is{}class ls extends is{async _call(e){return new Hl(await super._call(e))}}class ds extends H{}class us extends ds{}class cs extends H{}class ps extends cs{}class hs extends cs{async _call(e){return new Hl(await super._call(e))}}class ms extends H{}class fs extends ms{async _call(e){return new ed(await super._call(e))}}class gs extends H{}class _s extends gs{}class ws extends gs{async _call(e){return new Hl(await super._call(e))}}class ys extends H{}class bs extends ys{}class vs extends ys{async _call(e){return new Hl(await super._call(e))}}class xs extends H{}class Ms extends xs{}class Ts extends xs{}class ks extends H{}class $s extends ks{}class Cs extends ks{}class Ps extends H{}class Ss extends Ps{}class Es extends Ps{async _call(e){return new Hl(await super._call(e))}}class Fs extends H{}class As extends Fs{}class Is extends Fs{async _call(e){return new Ls(await super._call(e))}}class zs extends Fs{async _call(e){return new Os(await super._call(e))}}class Ls extends X{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Os extends X{constructor({logits:e,pred_boxes:t,pred_masks:n}){super(),this.logits=e,this.pred_boxes=t,this.pred_masks=n}}class Bs extends H{}class Ds extends Bs{}class Ns extends Bs{async _call(e){return new Rs(await super._call(e))}}class Rs extends X{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class js extends H{}class Vs extends js{}class Gs extends js{async _call(e){return new Us(await super._call(e))}}class Us extends Ls{}class qs extends H{}class Ws extends qs{}class Hs extends qs{async _call(e){return new Hl(await super._call(e))}}class Xs extends H{}class Qs extends Xs{}class Ks extends Xs{async _call(e){return new Hl(await super._call(e))}}class Ys extends H{}class Js extends Ys{}class Zs extends Ys{async _call(e){return new Hl(await super._call(e))}}class eo extends H{}class to extends eo{}class no extends eo{async _call(e){return new Hl(await super._call(e))}}class ro extends H{}class so extends ro{}class oo extends ro{}class io extends H{}class ao extends io{}class lo extends io{}class uo extends H{}class co extends uo{}class po extends H{}class ho extends po{}class mo extends po{}class fo extends po{}class go extends H{}class _o extends go{}class wo extends H{}class yo extends wo{}class bo extends wo{}class vo extends H{}class xo extends vo{}class Mo extends vo{}class To extends H{}class ko extends To{}class $o extends H{}class Co extends $o{}class Po extends $o{async _call(e){return new Hl(await super._call(e))}}class So extends H{}class Eo extends So{}class Fo extends So{async _call(e){return new Hl(await super._call(e))}}class Ao extends H{}class Io extends Ao{}class zo extends Ao{async _call(e){return new Hl(await super._call(e))}}class Lo extends H{}class Oo extends Lo{}class Bo extends Lo{async _call(e){return new Do(await super._call(e))}}class Do extends X{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class No extends H{}class Ro extends No{async get_image_embeddings({pixel_values:e}){return await N(this,{pixel_values:e})}async forward(e){if(e.image_embeddings&&e.image_positional_embeddings||(e={...e,...await this.get_image_embeddings(e)}),!e.input_labels&&e.input_points){const t=e.input_points.dims.slice(0,-1),n=t.reduce(((e,t)=>e*t),1);e.input_labels=new p.Tensor("int64",new BigInt64Array(n).fill(1n),t)}const t={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(t.input_points=e.input_points),e.input_labels&&(t.input_labels=e.input_labels),e.input_boxes&&(t.input_boxes=e.input_boxes),await z(this.sessions.prompt_encoder_mask_decoder,t)}async _call(e){return new jo(await super._call(e))}}class jo extends X{constructor({iou_scores:e,pred_masks:t}){super(),this.iou_scores=e,this.pred_masks=t}}class Vo extends H{}class Go extends Vo{}class Uo extends Vo{}class qo extends H{}class Wo extends qo{}class Ho extends qo{}class Xo extends H{}class Qo extends Xo{}class Ko extends Xo{async _call(e){return new Jl(await super._call(e))}}class Yo extends Xo{async _call(e){return new Hl(await super._call(e))}}class Jo extends Xo{async _call(e){return new Ql(await super._call(e))}}class Zo extends H{}class ei extends Zo{}class ti extends Zo{async _call(e){return new Ql(await super._call(e))}}class ni extends H{}class ri extends ni{}class si extends H{}class oi extends si{}class ii extends si{async _call(e){return new Jl(await super._call(e))}}class ai extends si{async _call(e){return new Hl(await super._call(e))}}class li extends H{}class di extends li{}class ui extends li{async _call(e){return new Jl(await super._call(e))}}class ci extends li{async _call(e){return new Hl(await super._call(e))}}class pi extends li{async _call(e){return new Ql(await super._call(e))}}class hi extends H{}class mi extends hi{}class fi extends hi{async _call(e){return new Jl(await super._call(e))}}class gi extends hi{async _call(e){return new Hl(await super._call(e))}}class _i extends H{}class wi extends Xo{}class yi extends Xo{async _call(e){return new Jl(await super._call(e))}}class bi extends Xo{async _call(e){return new Hl(await super._call(e))}}class vi extends H{}class xi extends vi{}class Mi extends vi{async _call(e){return new Jl(await super._call(e))}}class Ti extends vi{async _call(e){return new Hl(await super._call(e))}}class ki extends vi{async _call(e){return new Xl(await super._call(e))}}class $i extends vi{async _call(e){return new Ql(await super._call(e))}}class Ci extends H{}class Pi extends Ci{}class Si extends Ci{}class Ei extends Ci{async generate_speech(e,t,{threshold:n=.5,minlenratio:r=0,maxlenratio:s=20,vocoder:o=null}={}){const i={input_ids:e},{encoder_outputs:a,encoder_attention_mask:l}=await N(this,i),d=a.dims[1]/this.config.reduction_factor,u=Math.floor(d*s),c=Math.floor(d*r),h=this.config.num_mel_bins;let m=[],f=null,g=null,_=0;for(;;){++_;const e=B(!!g);let r;r=g?g.output_sequence_out:new p.Tensor("float32",new Float32Array(h),[1,1,h]);let s={use_cache_branch:e,output_sequence:r,encoder_attention_mask:l,speaker_embeddings:t,encoder_hidden_states:a};this.addPastKeyValues(s,f),g=await z(this.sessions.decoder_model_merged,s),f=this.getPastKeyValues(g,f);const{prob:o,spectrum:i}=g;if(m.push(i),_>=c&&(Array.from(o.data).filter((e=>e>=n)).length>0||_>=u))break}const w=(0,p.cat)(m),{waveform:y}=await z(o.sessions.model,{spectrogram:w});return{spectrogram:w,waveform:y}}}class Fi extends H{main_input_name="spectrogram"}class Ai extends H{}class Ii extends Ai{}class zi extends H{}class Li extends zi{}class Oi extends zi{}class Bi extends H{}class Di extends Bi{}class Ni extends Bi{}class Ri extends H{}class ji extends Ri{}class Vi extends Ri{}class Gi extends H{}class Ui extends Gi{}class qi extends Gi{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Wi extends Gi{static async from_pretrained(e,t={}){return t.model_file_name??="audio_model",super.from_pretrained(e,t)}}class Hi extends H{}class Xi extends Hi{async _call(e){return new td(await super._call(e))}}class Qi extends H{}class Ki extends Qi{}class Yi extends Qi{}class Ji extends Qi{}class Zi extends H{}class ea extends Zi{}class ta extends Zi{}class na extends H{}class ra extends na{}class sa extends na{async _call(e){return new Hl(await super._call(e))}}class oa extends H{}class ia extends oa{}class aa extends oa{}class la extends H{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];_apply_and_filter_by_delay_pattern_mask(e){const[t,n]=e.dims,r=this.config.decoder.num_codebooks,s=n-r;let o=0;for(let t=0;t<e.size;++t){if(e.data[t]===this.config.decoder.pad_token_id)continue;const i=t%n-Math.floor(t/n)%r;i>0&&i<=s&&(e.data[o++]=e.data[t])}const i=Math.floor(t/r),a=o/(i*r);return new p.Tensor(e.type,e.data.slice(0,o),[i,r,a])}prepare_inputs_for_generation(e,t,n){let r=structuredClone(e);for(let e=0;e<r.length;++e)for(let t=0;t<r[e].length;++t)e%this.config.decoder.num_codebooks>=t&&(r[e][t]=BigInt(this.config.decoder.pad_token_id));null!==n.guidance_scale&&n.guidance_scale>1&&(r=r.concat(r));return super.prepare_inputs_for_generation(r,t,n)}async generate(e){const t=await super.generate(e),n=this._apply_and_filter_by_delay_pattern_mask(t).unsqueeze_(0),{audio_values:r}=await z(this.sessions.encodec_decode,{audio_codes:n});return r}}class da extends H{}class ua extends da{}class ca extends da{async _call(e){return new Hl(await super._call(e))}}class pa extends H{}class ha extends pa{}class ma extends pa{async _call(e){return new Hl(await super._call(e))}}class fa extends H{}class ga extends fa{}class _a extends fa{async _call(e){return new Hl(await super._call(e))}}class wa extends H{}class ya extends wa{}class ba extends wa{async _call(e){return new Hl(await super._call(e))}}class va extends H{}class xa extends va{}class Ma extends H{}class Ta extends Ma{forward_params=["input_ids","pixel_values","images_seq_mask","images_emb_mask","attention_mask","position_ids","past_key_values"];constructor(...e){super(...e),this._generation_mode="text"}async forward(e){const t=this._generation_mode??"text";let n;if("text"!==t&&e.past_key_values){const t=this.sessions.gen_img_embeds,r=(0,a.pick)({image_ids:e.input_ids},t.inputNames);n=await z(t,r)}else{const t=this.sessions.prepare_inputs_embeds,r=(0,a.pick)(e,t.inputNames);n=await z(t,r)}const r={...e,...n},s=await R(this,r),o=this.sessions["text"===t?"lm_head":"gen_head"];if(!o)throw new Error(`Unable to find "${o}" generation head`);const i=await z(o,(0,a.pick)(s,o.inputNames));return{...n,...s,...i}}async generate(e){return this._generation_mode="text",super.generate(e)}async generate_images(e){this._generation_mode="image";const t=(e.inputs??e[this.main_input_name]).dims[1],n=(await super.generate(e)).slice(null,[t,null]),r=this.sessions.image_decode,{decoded_image:s}=await z(r,{generated_tokens:n}),o=s.add_(1).mul_(127.5).clamp_(0,255).to("uint8"),i=[];for(const e of o){const t=h.RawImage.fromTensor(e);i.push(t)}return i}}class ka extends X{constructor({char_logits:e,bpe_logits:t,wp_logits:n}){super(),this.char_logits=e,this.bpe_logits=t,this.wp_logits=n}get logits(){return[this.char_logits,this.bpe_logits,this.wp_logits]}}class $a extends H{}class Ca extends $a{async _call(e){return new ka(await super._call(e))}}class Pa extends H{}class Sa extends Pa{}class Ea extends Pa{}class Fa extends H{}class Aa extends Fa{}class Ia extends Fa{}class za{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:s=null,local_files_only:o=!1,revision:i="main",model_file_name:a=null,subfolder:l="onnx",device:d=null,dtype:u=null,use_external_data_format:c=null,session_options:p={}}={}){const h={progress_callback:t,config:n,cache_dir:s,local_files_only:o,revision:i,model_file_name:a,subfolder:l,device:d,dtype:u,use_external_data_format:c,session_options:p};if(h.config=await r.AutoConfig.from_pretrained(e,h),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(const t of this.MODEL_CLASS_MAPPINGS){const n=t.get(h.config.model_type);if(n)return await n[1].from_pretrained(e,h)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${h.config.model_type}", attempting to construct from base class.`),await H.from_pretrained(e,h);throw Error(`Unsupported model type: ${h.config.model_type}`)}}const La=new Map([["bert",["BertModel",Y]],["nomic_bert",["NomicBertModel",re]],["roformer",["RoFormerModel",oe]],["electra",["ElectraModel",_e]],["esm",["EsmModel",He]],["convbert",["ConvBertModel",ce]],["camembert",["CamembertModel",Me]],["deberta",["DebertaModel",Se]],["deberta-v2",["DebertaV2Model",Le]],["mpnet",["MPNetModel",rt]],["albert",["AlbertModel",mt]],["distilbert",["DistilBertModel",je]],["roberta",["RobertaModel",Gt]],["xlm",["XLMModel",Qt]],["xlm-roberta",["XLMRobertaModel",tn]],["clap",["ClapModel",Ui]],["clip",["CLIPModel",vn]],["clipseg",["CLIPSegModel",Bn]],["chinese_clip",["ChineseCLIPModel",Fn]],["siglip",["SiglipModel",Cn]],["jina_clip",["JinaCLIPModel",In]],["mobilebert",["MobileBertModel",Je]],["squeezebert",["SqueezeBertModel",dt]],["wav2vec2",["Wav2Vec2Model",Qo]],["wav2vec2-bert",["Wav2Vec2BertModel",mi]],["unispeech",["UniSpeechModel",oi]],["unispeech-sat",["UniSpeechSatModel",di]],["hubert",["HubertModel",wi]],["wavlm",["WavLMModel",xi]],["audio-spectrogram-transformer",["ASTModel",ln]],["vits",["VitsModel",Xi]],["pyannote",["PyAnnoteModel",ei]],["wespeaker-resnet",["WeSpeakerResNetModel",ri]],["detr",["DetrModel",As]],["rt_detr",["RTDetrModel",Ds]],["table-transformer",["TableTransformerModel",Vs]],["vit",["ViTModel",Yr]],["pvt",["PvtModel",ns]],["vit_msn",["ViTMSNModel",as]],["vit_mae",["ViTMAEModel",os]],["groupvit",["GroupViTModel",us]],["fastvit",["FastViTModel",ps]],["mobilevit",["MobileViTModel",_s]],["mobilevitv2",["MobileViTV2Model",bs]],["owlvit",["OwlViTModel",Ms]],["owlv2",["Owlv2Model",$s]],["beit",["BeitModel",Ss]],["deit",["DeiTModel",Ws]],["hiera",["HieraModel",Qs]],["convnext",["ConvNextModel",Co]],["convnextv2",["ConvNextV2Model",Eo]],["dinov2",["Dinov2Model",Io]],["resnet",["ResNetModel",Js]],["swin",["SwinModel",to]],["swin2sr",["Swin2SRModel",so]],["donut-swin",["DonutSwinModel",ko]],["yolos",["YolosModel",Oo]],["dpt",["DPTModel",ao]],["glpn",["GLPNModel",xo]],["hifigan",["SpeechT5HifiGan",Fi]],["efficientnet",["EfficientNetModel",ra]],["decision_transformer",["DecisionTransformerModel",xa]],["patchtst",["PatchTSTForPrediction",Sa]],["patchtsmixer",["PatchTSMixerForPrediction",Aa]],["mobilenet_v1",["MobileNetV1Model",ua]],["mobilenet_v2",["MobileNetV2Model",ha]],["mobilenet_v3",["MobileNetV3Model",ga]],["mobilenet_v4",["MobileNetV4Model",ya]],["maskformer",["MaskFormerModel",yo]],["mgp-str",["MgpstrForSceneTextRecognition",Ca]]]),Oa=new Map([["t5",["T5Model",yt]],["longt5",["LongT5Model",xt]],["mt5",["MT5Model",kt]],["bart",["BartModel",Pt]],["mbart",["MBartModel",At]],["marian",["MarianModel",Go]],["whisper",["WhisperModel",cn]],["m2m_100",["M2M100Model",Wo]],["blenderbot",["BlenderbotModel",Bt]],["blenderbot-small",["BlenderbotSmallModel",Rt]]]),Ba=new Map([["bloom",["BloomModel",Vr]],["jais",["JAISModel",Gn]],["gpt2",["GPT2Model",Rn]],["gptj",["GPTJModel",Jn]],["gpt_bigcode",["GPTBigCodeModel",tr]],["gpt_neo",["GPTNeoModel",Wn]],["gpt_neox",["GPTNeoXModel",Qn]],["codegen",["CodeGenModel",sr]],["llama",["LlamaModel",ar]],["olmo",["OlmoModel",hr]],["mobilellm",["MobileLLMModel",ur]],["granite",["GraniteModel",gr]],["cohere",["CohereModel",yr]],["gemma",["GemmaModel",xr]],["gemma2",["Gemma2Model",kr]],["openelm",["OpenELMModel",Pr]],["qwen2",["Qwen2Model",Fr]],["phi",["PhiModel",Or]],["phi3",["Phi3Model",Nr]],["mpt",["MptModel",qr]],["opt",["OPTModel",Xr]],["mistral",["MistralModel",Li]],["starcoder2",["Starcoder2Model",Di]],["falcon",["FalconModel",ji]],["stablelm",["StableLmModel",ea]]]),Da=new Map([["speecht5",["SpeechT5ForSpeechToText",Si]],["whisper",["WhisperForConditionalGeneration",pn]]]),Na=new Map([["speecht5",["SpeechT5ForTextToSpeech",Ei]]]),Ra=new Map([["vits",["VitsModel",Xi]],["musicgen",["MusicgenForConditionalGeneration",la]]]),ja=new Map([["bert",["BertForSequenceClassification",Z]],["roformer",["RoFormerForSequenceClassification",ae]],["electra",["ElectraForSequenceClassification",ye]],["esm",["EsmForSequenceClassification",Qe]],["convbert",["ConvBertForSequenceClassification",he]],["camembert",["CamembertForSequenceClassification",ke]],["deberta",["DebertaForSequenceClassification",Fe]],["deberta-v2",["DebertaV2ForSequenceClassification",Be]],["mpnet",["MPNetForSequenceClassification",ot]],["albert",["AlbertForSequenceClassification",ft]],["distilbert",["DistilBertForSequenceClassification",Ve]],["roberta",["RobertaForSequenceClassification",qt]],["xlm",["XLMForSequenceClassification",Yt]],["xlm-roberta",["XLMRobertaForSequenceClassification",rn]],["bart",["BartForSequenceClassification",Et]],["mbart",["MBartForSequenceClassification",zt]],["mobilebert",["MobileBertForSequenceClassification",et]],["squeezebert",["SqueezeBertForSequenceClassification",ct]]]),Va=new Map([["bert",["BertForTokenClassification",ee]],["roformer",["RoFormerForTokenClassification",le]],["electra",["ElectraForTokenClassification",be]],["esm",["EsmForTokenClassification",Ke]],["convbert",["ConvBertForTokenClassification",me]],["camembert",["CamembertForTokenClassification",$e]],["deberta",["DebertaForTokenClassification",Ae]],["deberta-v2",["DebertaV2ForTokenClassification",De]],["mpnet",["MPNetForTokenClassification",it]],["distilbert",["DistilBertForTokenClassification",Ge]],["roberta",["RobertaForTokenClassification",Wt]],["xlm",["XLMForTokenClassification",Jt]],["xlm-roberta",["XLMRobertaForTokenClassification",sn]]]),Ga=new Map([["t5",["T5ForConditionalGeneration",bt]],["longt5",["LongT5ForConditionalGeneration",Mt]],["mt5",["MT5ForConditionalGeneration",$t]],["bart",["BartForConditionalGeneration",St]],["mbart",["MBartForConditionalGeneration",It]],["marian",["MarianMTModel",Uo]],["m2m_100",["M2M100ForConditionalGeneration",Ho]],["blenderbot",["BlenderbotForConditionalGeneration",Dt]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",jt]]]),Ua=new Map([["bloom",["BloomForCausalLM",Gr]],["gpt2",["GPT2LMHeadModel",jn]],["jais",["JAISLMHeadModel",Un]],["gptj",["GPTJForCausalLM",Zn]],["gpt_bigcode",["GPTBigCodeForCausalLM",nr]],["gpt_neo",["GPTNeoForCausalLM",Hn]],["gpt_neox",["GPTNeoXForCausalLM",Kn]],["codegen",["CodeGenForCausalLM",or]],["llama",["LlamaForCausalLM",lr]],["olmo",["OlmoForCausalLM",mr]],["mobilellm",["MobileLLMForCausalLM",cr]],["granite",["GraniteForCausalLM",_r]],["cohere",["CohereForCausalLM",br]],["gemma",["GemmaForCausalLM",Mr]],["gemma2",["Gemma2ForCausalLM",$r]],["openelm",["OpenELMForCausalLM",Sr]],["qwen2",["Qwen2ForCausalLM",Ar]],["phi",["PhiForCausalLM",Br]],["phi3",["Phi3ForCausalLM",Rr]],["mpt",["MptForCausalLM",Wr]],["opt",["OPTForCausalLM",Qr]],["mbart",["MBartForCausalLM",Lt]],["mistral",["MistralForCausalLM",Oi]],["starcoder2",["Starcoder2ForCausalLM",Ni]],["falcon",["FalconForCausalLM",Vi]],["trocr",["TrOCRForCausalLM",Ii]],["stablelm",["StableLmForCausalLM",ta]]]),qa=new Map([["multi_modality",["MultiModalityCausalLM",Ta]]]),Wa=new Map([["bert",["BertForMaskedLM",J]],["roformer",["RoFormerForMaskedLM",ie]],["electra",["ElectraForMaskedLM",we]],["esm",["EsmForMaskedLM",Xe]],["convbert",["ConvBertForMaskedLM",pe]],["camembert",["CamembertForMaskedLM",Te]],["deberta",["DebertaForMaskedLM",Ee]],["deberta-v2",["DebertaV2ForMaskedLM",Oe]],["mpnet",["MPNetForMaskedLM",st]],["albert",["AlbertForMaskedLM",_t]],["distilbert",["DistilBertForMaskedLM",qe]],["roberta",["RobertaForMaskedLM",Ut]],["xlm",["XLMWithLMHeadModel",Kt]],["xlm-roberta",["XLMRobertaForMaskedLM",nn]],["mobilebert",["MobileBertForMaskedLM",Ze]],["squeezebert",["SqueezeBertForMaskedLM",ut]]]),Ha=new Map([["bert",["BertForQuestionAnswering",te]],["roformer",["RoFormerForQuestionAnswering",de]],["electra",["ElectraForQuestionAnswering",ve]],["convbert",["ConvBertForQuestionAnswering",fe]],["camembert",["CamembertForQuestionAnswering",Ce]],["deberta",["DebertaForQuestionAnswering",Ie]],["deberta-v2",["DebertaV2ForQuestionAnswering",Ne]],["mpnet",["MPNetForQuestionAnswering",at]],["albert",["AlbertForQuestionAnswering",gt]],["distilbert",["DistilBertForQuestionAnswering",Ue]],["roberta",["RobertaForQuestionAnswering",Ht]],["xlm",["XLMForQuestionAnswering",Zt]],["xlm-roberta",["XLMRobertaForQuestionAnswering",on]],["mobilebert",["MobileBertForQuestionAnswering",tt]],["squeezebert",["SqueezeBertForQuestionAnswering",pt]]]),Xa=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",hn]]]),Qa=new Map([["llava",["LlavaForConditionalGeneration",fn]],["llava_onevision",["LlavaOnevisionForConditionalGeneration",gn]],["moondream1",["Moondream1ForConditionalGeneration",_n]],["florence2",["Florence2ForConditionalGeneration",yn]],["qwen2-vl",["Qwen2VLForConditionalGeneration",zr]]]),Ka=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",hn]]]),Ya=new Map([["vit",["ViTForImageClassification",Jr]],["pvt",["PvtForImageClassification",rs]],["vit_msn",["ViTMSNForImageClassification",ls]],["fastvit",["FastViTForImageClassification",hs]],["mobilevit",["MobileViTForImageClassification",ws]],["mobilevitv2",["MobileViTV2ForImageClassification",vs]],["beit",["BeitForImageClassification",Es]],["deit",["DeiTForImageClassification",Hs]],["hiera",["HieraForImageClassification",Ks]],["convnext",["ConvNextForImageClassification",Po]],["convnextv2",["ConvNextV2ForImageClassification",Fo]],["dinov2",["Dinov2ForImageClassification",zo]],["resnet",["ResNetForImageClassification",Zs]],["swin",["SwinForImageClassification",no]],["segformer",["SegformerForImageClassification",Yi]],["efficientnet",["EfficientNetForImageClassification",sa]],["mobilenet_v1",["MobileNetV1ForImageClassification",ca]],["mobilenet_v2",["MobileNetV2ForImageClassification",ma]],["mobilenet_v3",["MobileNetV3ForImageClassification",_a]],["mobilenet_v4",["MobileNetV4ForImageClassification",ba]]]),Ja=new Map([["detr",["DetrForObjectDetection",Is]],["rt_detr",["RTDetrForObjectDetection",Ns]],["table-transformer",["TableTransformerForObjectDetection",Gs]],["yolos",["YolosForObjectDetection",Bo]]]),Za=new Map([["owlvit",["OwlViTForObjectDetection",Ts]],["owlv2",["Owlv2ForObjectDetection",Cs]]]),el=new Map([["detr",["DetrForSegmentation",zs]],["clipseg",["CLIPSegForImageSegmentation",Dn]]]),tl=new Map([["segformer",["SegformerForSemanticSegmentation",Ji]],["sapiens",["SapiensForSemanticSegmentation",ho]]]),nl=new Map([["detr",["DetrForSegmentation",zs]],["maskformer",["MaskFormerForInstanceSegmentation",bo]]]),rl=new Map([["sam",["SamModel",Ro]]]),sl=new Map([["wav2vec2",["Wav2Vec2ForCTC",Ko]],["wav2vec2-bert",["Wav2Vec2BertForCTC",fi]],["unispeech",["UniSpeechForCTC",ii]],["unispeech-sat",["UniSpeechSatForCTC",ui]],["wavlm",["WavLMForCTC",Mi]],["hubert",["HubertForCTC",yi]]]),ol=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",Yo]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",gi]],["unispeech",["UniSpeechForSequenceClassification",ai]],["unispeech-sat",["UniSpeechSatForSequenceClassification",ci]],["wavlm",["WavLMForSequenceClassification",Ti]],["hubert",["HubertForSequenceClassification",bi]],["audio-spectrogram-transformer",["ASTForAudioClassification",dn]]]),il=new Map([["wavlm",["WavLMForXVector",ki]]]),al=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",pi]],["wavlm",["WavLMForAudioFrameClassification",$i]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",Jo]],["pyannote",["PyAnnoteForAudioFrameClassification",ti]]]),ll=new Map([["vitmatte",["VitMatteForImageMatting",fs]]]),dl=new Map([["patchtst",["PatchTSTForPrediction",Ea]],["patchtsmixer",["PatchTSMixerForPrediction",Ia]]]),ul=new Map([["swin2sr",["Swin2SRForImageSuperResolution",oo]]]),cl=new Map([["dpt",["DPTForDepthEstimation",lo]],["depth_anything",["DepthAnythingForDepthEstimation",co]],["glpn",["GLPNForDepthEstimation",Mo]],["sapiens",["SapiensForDepthEstimation",mo]],["depth_pro",["DepthProForDepthEstimation",_o]]]),pl=new Map([["sapiens",["SapiensForNormalEstimation",fo]]]),hl=new Map([["vitpose",["VitPoseForPoseEstimation",es]]]),ml=new Map([["clip",["CLIPVisionModelWithProjection",kn]],["siglip",["SiglipVisionModel",Sn]],["jina_clip",["JinaCLIPVisionModel",Ln]]]),fl=[[La,b],[Oa,v],[Ba,T],[ja,b],[Va,b],[Ga,x],[Da,x],[Ua,T],[qa,P],[Wa,b],[Ha,b],[Xa,M],[Qa,$],[Ya,b],[el,b],[nl,b],[tl,b],[ll,b],[dl,b],[ul,b],[cl,b],[pl,b],[hl,b],[Ja,b],[Za,b],[rl,k],[sl,b],[ol,b],[Na,x],[Ra,b],[il,b],[al,b],[ml,b]];for(const[e,t]of fl)for(const[n,r]of e.values())S.set(n,t),F.set(r,n),E.set(n,r);const gl=[["MusicgenForConditionalGeneration",la,C],["CLIPTextModelWithProjection",Mn,b],["SiglipTextModel",Pn,b],["JinaCLIPTextModel",zn,b],["ClapTextModelWithProjection",qi,b],["ClapAudioModelWithProjection",Wi,b]];for(const[e,t,n]of gl)S.set(e,n),F.set(t,e),E.set(e,t);class _l extends za{static MODEL_CLASS_MAPPINGS=fl.map((e=>e[0]));static BASE_IF_FAIL=!0}class wl extends za{static MODEL_CLASS_MAPPINGS=[ja]}class yl extends za{static MODEL_CLASS_MAPPINGS=[Va]}class bl extends za{static MODEL_CLASS_MAPPINGS=[Ga]}class vl extends za{static MODEL_CLASS_MAPPINGS=[Da]}class xl extends za{static MODEL_CLASS_MAPPINGS=[Na]}class Ml extends za{static MODEL_CLASS_MAPPINGS=[Ra]}class Tl extends za{static MODEL_CLASS_MAPPINGS=[Ua]}class kl extends za{static MODEL_CLASS_MAPPINGS=[Wa]}class $l extends za{static MODEL_CLASS_MAPPINGS=[Ha]}class Cl extends za{static MODEL_CLASS_MAPPINGS=[Xa]}class Pl extends za{static MODEL_CLASS_MAPPINGS=[Ya]}class Sl extends za{static MODEL_CLASS_MAPPINGS=[el]}class El extends za{static MODEL_CLASS_MAPPINGS=[tl]}class Fl extends za{static MODEL_CLASS_MAPPINGS=[nl]}class Al extends za{static MODEL_CLASS_MAPPINGS=[Ja]}class Il extends za{static MODEL_CLASS_MAPPINGS=[Za]}class zl extends za{static MODEL_CLASS_MAPPINGS=[rl]}class Ll extends za{static MODEL_CLASS_MAPPINGS=[sl]}class Ol extends za{static MODEL_CLASS_MAPPINGS=[ol]}class Bl extends za{static MODEL_CLASS_MAPPINGS=[il]}class Dl extends za{static MODEL_CLASS_MAPPINGS=[al]}class Nl extends za{static MODEL_CLASS_MAPPINGS=[Ka]}class Rl extends za{static MODEL_CLASS_MAPPINGS=[ll]}class jl extends za{static MODEL_CLASS_MAPPINGS=[ul]}class Vl extends za{static MODEL_CLASS_MAPPINGS=[cl]}class Gl extends za{static MODEL_CLASS_MAPPINGS=[pl]}class Ul extends za{static MODEL_CLASS_MAPPINGS=[hl]}class ql extends za{static MODEL_CLASS_MAPPINGS=[ml]}class Wl extends X{constructor({logits:e,past_key_values:t,encoder_outputs:n,decoder_attentions:r=null,cross_attentions:s=null}){super(),this.logits=e,this.past_key_values=t,this.encoder_outputs=n,this.decoder_attentions=r,this.cross_attentions=s}}class Hl extends X{constructor({logits:e}){super(),this.logits=e}}class Xl extends X{constructor({logits:e,embeddings:t}){super(),this.logits=e,this.embeddings=t}}class Ql extends X{constructor({logits:e}){super(),this.logits=e}}class Kl extends X{constructor({logits:e}){super(),this.logits=e}}class Yl extends X{constructor({start_logits:e,end_logits:t}){super(),this.start_logits=e,this.end_logits=t}}class Jl extends X{constructor({logits:e}){super(),this.logits=e}}class Zl extends X{constructor({logits:e,past_key_values:t}){super(),this.logits=e,this.past_key_values=t}}class ed extends X{constructor({alphas:e}){super(),this.alphas=e}}class td extends X{constructor({waveform:e,spectrogram:t}){super(),this.waveform=e,this.spectrogram=t}}},"./src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js":
|
|
184
|
+
\***********************/(e,t,n)=>{n.r(t),n.d(t,{ASTForAudioClassification:()=>un,ASTModel:()=>dn,ASTPreTrainedModel:()=>ln,AlbertForMaskedLM:()=>wt,AlbertForQuestionAnswering:()=>_t,AlbertForSequenceClassification:()=>gt,AlbertModel:()=>ft,AlbertPreTrainedModel:()=>mt,AutoModel:()=>bl,AutoModelForAudioClassification:()=>Nl,AutoModelForAudioFrameClassification:()=>jl,AutoModelForCTC:()=>Dl,AutoModelForCausalLM:()=>Cl,AutoModelForDepthEstimation:()=>ql,AutoModelForDocumentQuestionAnswering:()=>Vl,AutoModelForImageClassification:()=>Fl,AutoModelForImageFeatureExtraction:()=>Xl,AutoModelForImageMatting:()=>Gl,AutoModelForImageSegmentation:()=>Al,AutoModelForImageToImage:()=>Ul,AutoModelForMaskGeneration:()=>Bl,AutoModelForMaskedLM:()=>Pl,AutoModelForNormalEstimation:()=>Wl,AutoModelForObjectDetection:()=>Ll,AutoModelForPoseEstimation:()=>Hl,AutoModelForQuestionAnswering:()=>Sl,AutoModelForSemanticSegmentation:()=>Il,AutoModelForSeq2SeqLM:()=>Ml,AutoModelForSequenceClassification:()=>vl,AutoModelForSpeechSeq2Seq:()=>Tl,AutoModelForTextToSpectrogram:()=>kl,AutoModelForTextToWaveform:()=>$l,AutoModelForTokenClassification:()=>xl,AutoModelForUniversalSegmentation:()=>zl,AutoModelForVision2Seq:()=>El,AutoModelForXVector:()=>Rl,AutoModelForZeroShotObjectDetection:()=>Ol,BartForConditionalGeneration:()=>Et,BartForSequenceClassification:()=>Ft,BartModel:()=>St,BartPretrainedModel:()=>Pt,BaseModelOutput:()=>K,BeitForImageClassification:()=>Is,BeitModel:()=>As,BeitPreTrainedModel:()=>Fs,BertForMaskedLM:()=>Z,BertForQuestionAnswering:()=>ne,BertForSequenceClassification:()=>ee,BertForTokenClassification:()=>te,BertModel:()=>J,BertPreTrainedModel:()=>Y,BlenderbotForConditionalGeneration:()=>Nt,BlenderbotModel:()=>Dt,BlenderbotPreTrainedModel:()=>Bt,BlenderbotSmallForConditionalGeneration:()=>Vt,BlenderbotSmallModel:()=>jt,BlenderbotSmallPreTrainedModel:()=>Rt,BloomForCausalLM:()=>Wr,BloomModel:()=>qr,BloomPreTrainedModel:()=>Ur,CLIPModel:()=>Tn,CLIPPreTrainedModel:()=>Mn,CLIPSegForImageSegmentation:()=>jn,CLIPSegModel:()=>Rn,CLIPSegPreTrainedModel:()=>Nn,CLIPTextModel:()=>kn,CLIPTextModelWithProjection:()=>$n,CLIPVisionModel:()=>Cn,CLIPVisionModelWithProjection:()=>Pn,CamembertForMaskedLM:()=>ke,CamembertForQuestionAnswering:()=>Pe,CamembertForSequenceClassification:()=>$e,CamembertForTokenClassification:()=>Ce,CamembertModel:()=>Te,CamembertPreTrainedModel:()=>Me,CausalLMOutput:()=>td,CausalLMOutputWithPast:()=>nd,ChineseCLIPModel:()=>zn,ChineseCLIPPreTrainedModel:()=>In,ClapAudioModelWithProjection:()=>Qi,ClapModel:()=>Hi,ClapPreTrainedModel:()=>Wi,ClapTextModelWithProjection:()=>Xi,CodeGenForCausalLM:()=>lr,CodeGenModel:()=>ar,CodeGenPreTrainedModel:()=>ir,CohereForCausalLM:()=>Mr,CohereModel:()=>xr,CoherePreTrainedModel:()=>vr,ConvBertForMaskedLM:()=>he,ConvBertForQuestionAnswering:()=>ge,ConvBertForSequenceClassification:()=>me,ConvBertForTokenClassification:()=>fe,ConvBertModel:()=>pe,ConvBertPreTrainedModel:()=>ce,ConvNextForImageClassification:()=>Fo,ConvNextModel:()=>Eo,ConvNextPreTrainedModel:()=>So,ConvNextV2ForImageClassification:()=>zo,ConvNextV2Model:()=>Io,ConvNextV2PreTrainedModel:()=>Ao,DPTForDepthEstimation:()=>po,DPTModel:()=>co,DPTPreTrainedModel:()=>uo,DebertaForMaskedLM:()=>Fe,DebertaForQuestionAnswering:()=>ze,DebertaForSequenceClassification:()=>Ae,DebertaForTokenClassification:()=>Ie,DebertaModel:()=>Ee,DebertaPreTrainedModel:()=>Se,DebertaV2ForMaskedLM:()=>Be,DebertaV2ForQuestionAnswering:()=>Re,DebertaV2ForSequenceClassification:()=>De,DebertaV2ForTokenClassification:()=>Ne,DebertaV2Model:()=>Oe,DebertaV2PreTrainedModel:()=>Le,DecisionTransformerModel:()=>ka,DecisionTransformerPreTrainedModel:()=>Ta,DeiTForImageClassification:()=>Ks,DeiTModel:()=>Qs,DeiTPreTrainedModel:()=>Xs,DepthAnythingForDepthEstimation:()=>mo,DepthAnythingPreTrainedModel:()=>ho,DepthProForDepthEstimation:()=>bo,DepthProPreTrainedModel:()=>yo,DetrForObjectDetection:()=>Os,DetrForSegmentation:()=>Bs,DetrModel:()=>Ls,DetrObjectDetectionOutput:()=>Ds,DetrPreTrainedModel:()=>zs,DetrSegmentationOutput:()=>Ns,Dinov2ForImageClassification:()=>Bo,Dinov2Model:()=>Oo,Dinov2PreTrainedModel:()=>Lo,DistilBertForMaskedLM:()=>We,DistilBertForQuestionAnswering:()=>qe,DistilBertForSequenceClassification:()=>Ge,DistilBertForTokenClassification:()=>Ue,DistilBertModel:()=>Ve,DistilBertPreTrainedModel:()=>je,DonutSwinModel:()=>Po,DonutSwinPreTrainedModel:()=>Co,EfficientNetForImageClassification:()=>aa,EfficientNetModel:()=>ia,EfficientNetPreTrainedModel:()=>oa,ElectraForMaskedLM:()=>ye,ElectraForQuestionAnswering:()=>xe,ElectraForSequenceClassification:()=>be,ElectraForTokenClassification:()=>ve,ElectraModel:()=>we,ElectraPreTrainedModel:()=>_e,EsmForMaskedLM:()=>Qe,EsmForSequenceClassification:()=>Ke,EsmForTokenClassification:()=>Ye,EsmModel:()=>Xe,EsmPreTrainedModel:()=>He,FalconForCausalLM:()=>qi,FalconModel:()=>Ui,FalconPreTrainedModel:()=>Gi,FastViTForImageClassification:()=>gs,FastViTModel:()=>fs,FastViTPreTrainedModel:()=>ms,Florence2ForConditionalGeneration:()=>bn,Florence2PreTrainedModel:()=>yn,GLPNForDepthEstimation:()=>$o,GLPNModel:()=>ko,GLPNPreTrainedModel:()=>To,GPT2LMHeadModel:()=>Un,GPT2Model:()=>Gn,GPT2PreTrainedModel:()=>Vn,GPTBigCodeForCausalLM:()=>or,GPTBigCodeModel:()=>sr,GPTBigCodePreTrainedModel:()=>rr,GPTJForCausalLM:()=>nr,GPTJModel:()=>tr,GPTJPreTrainedModel:()=>er,GPTNeoForCausalLM:()=>Kn,GPTNeoModel:()=>Qn,GPTNeoPreTrainedModel:()=>Xn,GPTNeoXForCausalLM:()=>Zn,GPTNeoXModel:()=>Jn,GPTNeoXPreTrainedModel:()=>Yn,Gemma2ForCausalLM:()=>Sr,Gemma2Model:()=>Pr,Gemma2PreTrainedModel:()=>Cr,GemmaForCausalLM:()=>$r,GemmaModel:()=>kr,GemmaPreTrainedModel:()=>Tr,GraniteForCausalLM:()=>br,GraniteModel:()=>yr,GranitePreTrainedModel:()=>wr,GroupViTModel:()=>hs,GroupViTPreTrainedModel:()=>ps,HieraForImageClassification:()=>Zs,HieraModel:()=>Js,HieraPreTrainedModel:()=>Ys,HubertForCTC:()=>xi,HubertForSequenceClassification:()=>Mi,HubertModel:()=>vi,HubertPreTrainedModel:()=>bi,Idefics3ForConditionalGeneration:()=>xn,Idefics3PreTrainedModel:()=>vn,ImageMattingOutput:()=>rd,JAISLMHeadModel:()=>Hn,JAISModel:()=>Wn,JAISPreTrainedModel:()=>qn,JinaCLIPModel:()=>On,JinaCLIPPreTrainedModel:()=>Ln,JinaCLIPTextModel:()=>Bn,JinaCLIPVisionModel:()=>Dn,LlamaForCausalLM:()=>cr,LlamaModel:()=>ur,LlamaPreTrainedModel:()=>dr,LlavaForConditionalGeneration:()=>gn,LlavaOnevisionForConditionalGeneration:()=>_n,LlavaPreTrainedModel:()=>fn,LongT5ForConditionalGeneration:()=>Tt,LongT5Model:()=>Mt,LongT5PreTrainedModel:()=>xt,M2M100ForConditionalGeneration:()=>Ko,M2M100Model:()=>Qo,M2M100PreTrainedModel:()=>Xo,MBartForCausalLM:()=>Ot,MBartForConditionalGeneration:()=>zt,MBartForSequenceClassification:()=>Lt,MBartModel:()=>It,MBartPreTrainedModel:()=>At,MPNetForMaskedLM:()=>ot,MPNetForQuestionAnswering:()=>lt,MPNetForSequenceClassification:()=>it,MPNetForTokenClassification:()=>at,MPNetModel:()=>st,MPNetPreTrainedModel:()=>rt,MT5ForConditionalGeneration:()=>Ct,MT5Model:()=>$t,MT5PreTrainedModel:()=>kt,MarianMTModel:()=>Ho,MarianModel:()=>Wo,MarianPreTrainedModel:()=>qo,MaskFormerForInstanceSegmentation:()=>Mo,MaskFormerModel:()=>xo,MaskFormerPreTrainedModel:()=>vo,MaskedLMOutput:()=>Zl,MgpstrForSceneTextRecognition:()=>Ea,MgpstrModelOutput:()=>Pa,MgpstrPreTrainedModel:()=>Sa,MistralForCausalLM:()=>Ni,MistralModel:()=>Di,MistralPreTrainedModel:()=>Bi,MobileBertForMaskedLM:()=>et,MobileBertForQuestionAnswering:()=>nt,MobileBertForSequenceClassification:()=>tt,MobileBertModel:()=>Ze,MobileBertPreTrainedModel:()=>Je,MobileLLMForCausalLM:()=>mr,MobileLLMModel:()=>hr,MobileLLMPreTrainedModel:()=>pr,MobileNetV1ForImageClassification:()=>ma,MobileNetV1Model:()=>ha,MobileNetV1PreTrainedModel:()=>pa,MobileNetV2ForImageClassification:()=>_a,MobileNetV2Model:()=>ga,MobileNetV2PreTrainedModel:()=>fa,MobileNetV3ForImageClassification:()=>ba,MobileNetV3Model:()=>ya,MobileNetV3PreTrainedModel:()=>wa,MobileNetV4ForImageClassification:()=>Ma,MobileNetV4Model:()=>xa,MobileNetV4PreTrainedModel:()=>va,MobileViTForImageClassification:()=>vs,MobileViTModel:()=>bs,MobileViTPreTrainedModel:()=>ys,MobileViTV2ForImageClassification:()=>Ts,MobileViTV2Model:()=>Ms,MobileViTV2PreTrainedModel:()=>xs,ModelOutput:()=>Q,Moondream1ForConditionalGeneration:()=>wn,MptForCausalLM:()=>Qr,MptModel:()=>Xr,MptPreTrainedModel:()=>Hr,MultiModalityCausalLM:()=>Ca,MultiModalityPreTrainedModel:()=>$a,MusicgenForCausalLM:()=>ua,MusicgenForConditionalGeneration:()=>ca,MusicgenModel:()=>da,MusicgenPreTrainedModel:()=>la,NomicBertModel:()=>se,NomicBertPreTrainedModel:()=>re,OPTForCausalLM:()=>Jr,OPTModel:()=>Yr,OPTPreTrainedModel:()=>Kr,OlmoForCausalLM:()=>_r,OlmoModel:()=>gr,OlmoPreTrainedModel:()=>fr,OpenELMForCausalLM:()=>Ar,OpenELMModel:()=>Fr,OpenELMPreTrainedModel:()=>Er,OwlViTForObjectDetection:()=>Cs,OwlViTModel:()=>$s,OwlViTPreTrainedModel:()=>ks,Owlv2ForObjectDetection:()=>Es,Owlv2Model:()=>Ss,Owlv2PreTrainedModel:()=>Ps,PatchTSMixerForPrediction:()=>Oa,PatchTSMixerModel:()=>La,PatchTSMixerPreTrainedModel:()=>za,PatchTSTForPrediction:()=>Ia,PatchTSTModel:()=>Aa,PatchTSTPreTrainedModel:()=>Fa,Phi3ForCausalLM:()=>Gr,Phi3Model:()=>Vr,Phi3PreTrainedModel:()=>jr,PhiForCausalLM:()=>Rr,PhiModel:()=>Nr,PhiPreTrainedModel:()=>Dr,PreTrainedModel:()=>X,PretrainedMixin:()=>Ba,PvtForImageClassification:()=>is,PvtModel:()=>os,PvtPreTrainedModel:()=>ss,PyAnnoteForAudioFrameClassification:()=>si,PyAnnoteModel:()=>ri,PyAnnotePreTrainedModel:()=>ni,QuestionAnsweringModelOutput:()=>ed,Qwen2ForCausalLM:()=>Lr,Qwen2Model:()=>zr,Qwen2PreTrainedModel:()=>Ir,Qwen2VLForConditionalGeneration:()=>Br,Qwen2VLPreTrainedModel:()=>Or,RTDetrForObjectDetection:()=>Vs,RTDetrModel:()=>js,RTDetrObjectDetectionOutput:()=>Gs,RTDetrPreTrainedModel:()=>Rs,ResNetForImageClassification:()=>no,ResNetModel:()=>to,ResNetPreTrainedModel:()=>eo,RoFormerForMaskedLM:()=>ae,RoFormerForQuestionAnswering:()=>ue,RoFormerForSequenceClassification:()=>le,RoFormerForTokenClassification:()=>de,RoFormerModel:()=>ie,RoFormerPreTrainedModel:()=>oe,RobertaForMaskedLM:()=>qt,RobertaForQuestionAnswering:()=>Xt,RobertaForSequenceClassification:()=>Wt,RobertaForTokenClassification:()=>Ht,RobertaModel:()=>Ut,RobertaPreTrainedModel:()=>Gt,SamImageSegmentationOutput:()=>Uo,SamModel:()=>Go,SamPreTrainedModel:()=>Vo,SapiensForDepthEstimation:()=>_o,SapiensForNormalEstimation:()=>wo,SapiensForSemanticSegmentation:()=>go,SapiensPreTrainedModel:()=>fo,SegformerForImageClassification:()=>ea,SegformerForSemanticSegmentation:()=>ta,SegformerModel:()=>Zi,SegformerPreTrainedModel:()=>Ji,Seq2SeqLMOutput:()=>Ql,SequenceClassifierOutput:()=>Kl,SiglipModel:()=>En,SiglipPreTrainedModel:()=>Sn,SiglipTextModel:()=>Fn,SiglipVisionModel:()=>An,SpeechT5ForSpeechToText:()=>Ai,SpeechT5ForTextToSpeech:()=>Ii,SpeechT5HifiGan:()=>zi,SpeechT5Model:()=>Fi,SpeechT5PreTrainedModel:()=>Ei,SqueezeBertForMaskedLM:()=>ct,SqueezeBertForQuestionAnswering:()=>ht,SqueezeBertForSequenceClassification:()=>pt,SqueezeBertModel:()=>ut,SqueezeBertPreTrainedModel:()=>dt,StableLmForCausalLM:()=>sa,StableLmModel:()=>ra,StableLmPreTrainedModel:()=>na,Starcoder2ForCausalLM:()=>Vi,Starcoder2Model:()=>ji,Starcoder2PreTrainedModel:()=>Ri,Swin2SRForImageSuperResolution:()=>lo,Swin2SRModel:()=>ao,Swin2SRPreTrainedModel:()=>io,SwinForImageClassification:()=>oo,SwinModel:()=>so,SwinPreTrainedModel:()=>ro,T5ForConditionalGeneration:()=>vt,T5Model:()=>bt,T5PreTrainedModel:()=>yt,TableTransformerForObjectDetection:()=>Ws,TableTransformerModel:()=>qs,TableTransformerObjectDetectionOutput:()=>Hs,TableTransformerPreTrainedModel:()=>Us,TokenClassifierOutput:()=>Jl,TrOCRForCausalLM:()=>Oi,TrOCRPreTrainedModel:()=>Li,UniSpeechForCTC:()=>di,UniSpeechForSequenceClassification:()=>ui,UniSpeechModel:()=>li,UniSpeechPreTrainedModel:()=>ai,UniSpeechSatForAudioFrameClassification:()=>fi,UniSpeechSatForCTC:()=>hi,UniSpeechSatForSequenceClassification:()=>mi,UniSpeechSatModel:()=>pi,UniSpeechSatPreTrainedModel:()=>ci,ViTForImageClassification:()=>ts,ViTMAEModel:()=>ls,ViTMAEPreTrainedModel:()=>as,ViTMSNForImageClassification:()=>cs,ViTMSNModel:()=>us,ViTMSNPreTrainedModel:()=>ds,ViTModel:()=>es,ViTPreTrainedModel:()=>Zr,VisionEncoderDecoderModel:()=>mn,VitMatteForImageMatting:()=>ws,VitMattePreTrainedModel:()=>_s,VitPoseForPoseEstimation:()=>rs,VitPosePreTrainedModel:()=>ns,VitsModel:()=>Yi,VitsModelOutput:()=>sd,VitsPreTrainedModel:()=>Ki,Wav2Vec2BertForCTC:()=>wi,Wav2Vec2BertForSequenceClassification:()=>yi,Wav2Vec2BertModel:()=>_i,Wav2Vec2BertPreTrainedModel:()=>gi,Wav2Vec2ForAudioFrameClassification:()=>ti,Wav2Vec2ForCTC:()=>Zo,Wav2Vec2ForSequenceClassification:()=>ei,Wav2Vec2Model:()=>Jo,Wav2Vec2PreTrainedModel:()=>Yo,WavLMForAudioFrameClassification:()=>Si,WavLMForCTC:()=>$i,WavLMForSequenceClassification:()=>Ci,WavLMForXVector:()=>Pi,WavLMModel:()=>ki,WavLMPreTrainedModel:()=>Ti,WeSpeakerResNetModel:()=>ii,WeSpeakerResNetPreTrainedModel:()=>oi,WhisperForConditionalGeneration:()=>hn,WhisperModel:()=>pn,WhisperPreTrainedModel:()=>cn,XLMForQuestionAnswering:()=>en,XLMForSequenceClassification:()=>Jt,XLMForTokenClassification:()=>Zt,XLMModel:()=>Kt,XLMPreTrainedModel:()=>Qt,XLMRobertaForMaskedLM:()=>rn,XLMRobertaForQuestionAnswering:()=>an,XLMRobertaForSequenceClassification:()=>sn,XLMRobertaForTokenClassification:()=>on,XLMRobertaModel:()=>nn,XLMRobertaPreTrainedModel:()=>tn,XLMWithLMHeadModel:()=>Yt,XVectorOutput:()=>Yl,YolosForObjectDetection:()=>Ro,YolosModel:()=>No,YolosObjectDetectionOutput:()=>jo,YolosPreTrainedModel:()=>Do});var r=n(/*! ./configs.js */"./src/configs.js"),s=n(/*! ./backends/onnx.js */"./src/backends/onnx.js"),o=n(/*! ./utils/dtypes.js */"./src/utils/dtypes.js"),i=n(/*! ./utils/generic.js */"./src/utils/generic.js"),a=n(/*! ./utils/core.js */"./src/utils/core.js"),l=n(/*! ./utils/hub.js */"./src/utils/hub.js"),d=n(/*! ./utils/constants.js */"./src/utils/constants.js"),u=n(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),c=n(/*! ./generation/configuration_utils.js */"./src/generation/configuration_utils.js"),p=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),h=n(/*! ./utils/image.js */"./src/utils/image.js"),m=n(/*! ./utils/maths.js */"./src/utils/maths.js"),f=n(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),g=n(/*! ./generation/logits_sampler.js */"./src/generation/logits_sampler.js"),_=n(/*! ./env.js */"./src/env.js"),w=n(/*! ./models/whisper/generation_whisper.js */"./src/models/whisper/generation_whisper.js"),y=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");const b=0,v=1,x=2,M=3,T=4,k=5,$=6,C=7,P=8,S=new Map,E=new Map,F=new Map;async function A(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async i=>{const{buffer:a,session_options:d,session_config:u}=await async function(e,t,n){const i=n.config?.["transformers.js_config"]??{};let a=n.device??i.device;a&&"string"!=typeof a&&(a.hasOwnProperty(t)?a=a[t]:(console.warn(`device not specified for "${t}". Using the default device.`),a=null));const d=a??(_.apis.IS_NODE_ENV?"cpu":"wasm"),u=(0,s.deviceToExecutionProviders)(d);let c=n.dtype??i.dtype;if("string"!=typeof c&&(c&&c.hasOwnProperty(t)?c=c[t]:(c=o.DEFAULT_DEVICE_DTYPE_MAPPING[d]??o.DATA_TYPES.fp32,console.warn(`dtype not specified for "${t}". Using the default dtype (${c}) for this device (${d}).`))),c===o.DATA_TYPES.auto){let e=i.dtype;"string"!=typeof e&&(e=e[t]),c=e&&e!==o.DATA_TYPES.auto&&o.DATA_TYPES.hasOwnProperty(e)?e:o.DEFAULT_DEVICE_DTYPE_MAPPING[d]??o.DATA_TYPES.fp32}const p=c;if(!o.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(p))throw new Error(`Invalid dtype: ${p}. Should be one of: ${Object.keys(o.DATA_TYPES).join(", ")}`);if(p===o.DATA_TYPES.fp16&&"webgpu"===d&&!await(0,o.isWebGpuFp16Supported)())throw new Error(`The device (${d}) does not support fp16.`);const h=i.kv_cache_dtype?"string"==typeof i.kv_cache_dtype?i.kv_cache_dtype:i.kv_cache_dtype[p]??"float32":void 0;if(h&&!["float32","float16"].includes(h))throw new Error(`Invalid kv_cache_dtype: ${h}. Should be one of: float32, float16`);const m={dtype:p,kv_cache_dtype:h},f=o.DEFAULT_DTYPE_SUFFIX_MAPPING[p],g=`${n.subfolder??""}/${t}${f}.onnx`,w={...n.session_options};w.executionProviders??=u;const y=i.free_dimension_overrides;y?w.freeDimensionOverrides??=y:d.startsWith("webnn")&&!w.freeDimensionOverrides&&console.warn('WebNN does not currently support dynamic shapes and requires `free_dimension_overrides` to be set in config.json as a field within "transformers.js_config". When `free_dimension_overrides` is not set, you may experience significant performance degradation.');const b=(0,l.getModelFile)(e,g,!0,n),v=n.use_external_data_format??i.use_external_data_format;let x=[];if(v&&(!0===v||"object"==typeof v&&v.hasOwnProperty(t)&&!0===v[t])){if(_.apis.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const r=`${t}${f}.onnx_data`,s=`${n.subfolder??""}/${r}`;x.push(new Promise((async(t,o)=>{const i=await(0,l.getModelFile)(e,s,!0,n);t({path:r,data:i})})))}else void 0!==w.externalData&&(x=w.externalData.map((async t=>{if("string"==typeof t.data){const r=await(0,l.getModelFile)(e,t.data,!0,n);return{...t,data:r}}return t})));if(x.length>0&&(w.externalData=await Promise.all(x)),"webgpu"===d){const e=(0,r.getKeyValueShapes)(n.config,{prefix:"present"});if(Object.keys(e).length>0&&!(0,s.isONNXProxy)()){const t={};for(const n in e)t[n]="gpu-buffer";w.preferredOutputLocation=t}}return{buffer:await b,session_options:w,session_config:m}}(e,t[i],n);return[i,await(0,s.createInferenceSession)(a,d,u)]}))))}async function I(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async r=>[r,await(0,l.getModelJSON)(e,t[r],!1,n)]))))}async function z(e,t){const n=function(e,t){const n=Object.create(null),r=[];for(const o of e.inputNames){const e=t[o];e instanceof p.Tensor?n[o]=(0,s.isONNXProxy)()?e.clone():e:r.push(o)}if(r.length>0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${r.join(", ")}.`);const o=Object.keys(t).length,i=e.inputNames.length;if(o>i){let n=Object.keys(t).filter((t=>!e.inputNames.includes(t)));console.warn(`WARNING: Too many inputs were provided (${o} > ${i}). The following inputs will be ignored: "${n.join(", ")}".`)}return n}(e,t);try{const t=Object.fromEntries(Object.entries(n).map((([e,t])=>[e,t.ort_tensor])));let r=await e.run(t);return r=L(r),r}catch(e){const t=Object.fromEntries(Object.entries(n).map((([e,{type:t,dims:n,data:r}])=>[e,{type:t,dims:n,data:r}])));throw console.error(`An error occurred during model execution: "${e}".`),console.error("Inputs given to model:",t),e}}function L(e){for(let t in e)(0,s.isONNXTensor)(e[t])?e[t]=new p.Tensor(e[t]):"object"==typeof e[t]&&L(e[t]);return e}function O(e){if(e instanceof p.Tensor)return e;if(0===e.length)throw Error("items must be non-empty");if(Array.isArray(e[0])){if(e.some((t=>t.length!==e[0].length)))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new p.Tensor("int64",BigInt64Array.from(e.flat().map((e=>BigInt(e)))),[e.length,e[0].length])}return new p.Tensor("int64",BigInt64Array.from(e.map((e=>BigInt(e)))),[1,e.length])}function B(e){return new p.Tensor("bool",[e],[1])}async function D(e,t){let{encoder_outputs:n,input_ids:r,decoder_input_ids:s,...o}=t;if(!n){const r=(0,a.pick)(t,e.sessions.model.inputNames);n=(await N(e,r)).last_hidden_state}o.input_ids=s,o.encoder_hidden_states=n,e.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(o.encoder_attention_mask=t.attention_mask);return await R(e,o,!0)}async function N(e,t){const n=e.sessions.model,r=(0,a.pick)(t,n.inputNames);if(n.inputNames.includes("inputs_embeds")&&!r.inputs_embeds){if(!t.input_ids)throw new Error("Both `input_ids` and `inputs_embeds` are missing in the model inputs.");r.inputs_embeds=await e.encode_text({input_ids:t.input_ids})}return n.inputNames.includes("token_type_ids")&&!r.token_type_ids&&(r.token_type_ids=new p.Tensor("int64",new BigInt64Array(r.input_ids.data.length),r.input_ids.dims)),await z(n,r)}async function R(e,t,n=!1){const r=e.sessions[n?"decoder_model_merged":"model"],{past_key_values:s,...o}=t;r.inputNames.includes("use_cache_branch")&&(o.use_cache_branch=B(!!s)),r.inputNames.includes("position_ids")&&o.attention_mask&&!o.position_ids&&(o.position_ids=function(e,t=null){const{input_ids:n,inputs_embeds:r,attention_mask:s}=e,{data:o,dims:i}=G(s);let a=new p.Tensor("int64",o,i);if(t){const e=-(n??r).dims.at(1);a=a.slice(null,[e,null])}return a}(o,s)),e.addPastKeyValues(o,s);const i=(0,a.pick)(o,r.inputNames);return await z(r,i)}function j({image_token_id:e,inputs_embeds:t,image_features:n,input_ids:r,attention_mask:s}){const o=r.tolist().map((t=>t.reduce(((t,n,r)=>(n==e&&t.push(r),t)),[]))),i=o.reduce(((e,t)=>e+t.length),0),a=n.dims[0];if(i!==a)throw new Error(`Image features and image tokens do not match: tokens: ${i}, features ${a}`);let l=0;for(let e=0;e<o.length;++e){const r=o[e],s=t[e];for(let e=0;e<r.length;++e)s[r[e]].data.set(n[l++].data)}return{inputs_embeds:t,attention_mask:s}}async function V(e,{input_ids:t=null,attention_mask:n=null,pixel_values:r=null,position_ids:s=null,inputs_embeds:o=null,past_key_values:i=null,generation_config:a=null,logits_processor:l=null,...d}){if(!o)if(o=await e.encode_text({input_ids:t,...d}),r&&1!==t.dims[1]){const s=await e.encode_image({pixel_values:r,...d});({inputs_embeds:o,attention_mask:n}=e._merge_input_ids_with_image_features({image_features:s,inputs_embeds:o,input_ids:t,attention_mask:n}))}else if(i&&r&&1===t.dims[1]){const e=t.dims[1],r=Object.values(i)[0].dims.at(-2);n=(0,p.cat)([(0,p.ones)([t.dims[0],r]),n.slice(null,[n.dims[1]-e,n.dims[1]])],1)}if(!s&&"qwen2_vl"===e.config.model_type){const{image_grid_thw:r,video_grid_thw:o}=d;[s]=e.get_rope_index(t,r,o,n)}return await R(e,{inputs_embeds:o,past_key_values:i,attention_mask:n,position_ids:s,generation_config:a,logits_processor:l},!0)}function G(e){const[t,n]=e.dims,r=e.data,s=new BigInt64Array(r.length);for(let e=0;e<t;++e){const t=e*n;let o=BigInt(0);for(let e=0;e<n;++e){const n=t+e;0n===r[n]?s[n]=BigInt(1):(s[n]=o,o+=r[n])}}return{data:s,dims:e.dims}}function U(e,t,n,r){if(n.past_key_values){const t=Object.values(n.past_key_values)[0].dims.at(-2),{input_ids:r,attention_mask:s}=n;if(s&&s.dims[1]>r.dims[1]);else if(t<r.dims[1])n.input_ids=r.slice(null,[t,null]);else if(null!=e.config.image_token_index&&r.data.some((t=>t==e.config.image_token_index))){const s=e.config.num_image_tokens;if(!s)throw new Error("`num_image_tokens` is missing in the model configuration.");const o=r.dims[1]-(t-s);n.input_ids=r.slice(null,[-o,null]),n.attention_mask=(0,p.ones)([1,t+o])}}return n}function q(e,t,n,r){return n.past_key_values&&(t=t.map((e=>[e.at(-1)]))),{...n,decoder_input_ids:O(t)}}function W(e,...t){return e.config.is_encoder_decoder?q(e,...t):U(e,...t)}function H(e,t,n,r){const s=!!n.past_key_values;if(null!==r.guidance_scale&&r.guidance_scale>1&&(s?n.input_ids=(0,p.cat)([n.input_ids,n.input_ids],0):(n.input_ids=(0,p.cat)([n.input_ids,(0,p.full_like)(n.input_ids,BigInt(r.pad_token_id))],0),n.attention_mask=(0,p.cat)([n.attention_mask,(0,p.full_like)(n.attention_mask,0n)],0))),!s&&n.pixel_values||(n.pixel_values=(0,p.full)([0,0,3,384,384],1)),s){const e=0,t=1,r=e>0?1:0,s=1;n.images_seq_mask=new p.Tensor("bool",new Array(e+t).fill(!0).fill(!1,0,t),[s,e+t]),n.images_emb_mask=new p.Tensor("bool",new Array(e).fill(!!r),[s,1,e])}return n}class X extends i.Callable{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,t,n){super(),this.config=e,this.sessions=t,this.configs=n;const r=F.get(this.constructor),s=S.get(r);switch(this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,s){case T:this.can_generate=!0,this._forward=R,this._prepare_inputs_for_generation=U;break;case x:case M:case C:this.can_generate=!0,this._forward=D,this._prepare_inputs_for_generation=q;break;case v:this._forward=D;break;case $:this.can_generate=!0,this._forward=V,this._prepare_inputs_for_generation=W;break;case P:this.can_generate=!0,this._prepare_inputs_for_generation=H;break;default:this._forward=N}this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const t of Object.values(this.sessions))t?.handler?.dispose&&e.push(t.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:s=null,local_files_only:o=!1,revision:i="main",model_file_name:a=null,subfolder:l="onnx",device:u=null,dtype:c=null,use_external_data_format:p=null,session_options:h={}}={}){let m={progress_callback:t,config:n,cache_dir:s,local_files_only:o,revision:i,model_file_name:a,subfolder:l,device:u,dtype:c,use_external_data_format:p,session_options:h};const f=F.get(this),g=S.get(f);let _;if(n=m.config=await r.AutoConfig.from_pretrained(e,m),g===T)_=await Promise.all([A(e,{model:m.model_file_name??"model"},m),I(e,{generation_config:"generation_config.json"},m)]);else if(g===x||g===M)_=await Promise.all([A(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m),I(e,{generation_config:"generation_config.json"},m)]);else if(g===k)_=await Promise.all([A(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},m)]);else if(g===v)_=await Promise.all([A(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m)]);else if(g===$){const t={embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"};n.is_encoder_decoder&&(t.model="encoder_model"),_=await Promise.all([A(e,t,m),I(e,{generation_config:"generation_config.json"},m)])}else if(g===C)_=await Promise.all([A(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},m),I(e,{generation_config:"generation_config.json"},m)]);else if(g===P)_=await Promise.all([A(e,{prepare_inputs_embeds:"prepare_inputs_embeds",model:"language_model",lm_head:"lm_head",gen_head:"gen_head",gen_img_embeds:"gen_img_embeds",image_decode:"image_decode"},m),I(e,{generation_config:"generation_config.json"},m)]);else{if(g!==b){const e=f??n?.model_type;"custom"!==e&&console.warn(`Model type for '${e}' not found, assuming encoder-only architecture. Please report this at ${d.GITHUB_ISSUE_URL}.`)}_=await Promise.all([A(e,{model:m.model_file_name??"model"},m)])}return new this(n,..._)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}get generation_config(){return this.configs?.generation_config??null}_get_logits_warper(e){const t=new u.LogitsProcessorList;return null!==e.temperature&&1!==e.temperature&&t.push(new u.TemperatureLogitsWarper(e.temperature)),null!==e.top_k&&0!==e.top_k&&t.push(new u.TopKLogitsWarper(e.top_k)),null!==e.top_p&&e.top_p<1&&t.push(new u.TopPLogitsWarper(e.top_p)),t}_get_logits_processor(e,t,n=null){const r=new u.LogitsProcessorList;if(null!==e.repetition_penalty&&1!==e.repetition_penalty&&r.push(new u.RepetitionPenaltyLogitsProcessor(e.repetition_penalty)),null!==e.no_repeat_ngram_size&&e.no_repeat_ngram_size>0&&r.push(new u.NoRepeatNGramLogitsProcessor(e.no_repeat_ngram_size)),null!==e.bad_words_ids&&r.push(new u.NoBadWordsLogitsProcessor(e.bad_words_ids,e.eos_token_id)),null!==e.min_length&&null!==e.eos_token_id&&e.min_length>0&&r.push(new u.MinLengthLogitsProcessor(e.min_length,e.eos_token_id)),null!==e.min_new_tokens&&null!==e.eos_token_id&&e.min_new_tokens>0&&r.push(new u.MinNewTokensLengthLogitsProcessor(t,e.min_new_tokens,e.eos_token_id)),null!==e.forced_bos_token_id&&r.push(new u.ForcedBOSTokenLogitsProcessor(e.forced_bos_token_id)),null!==e.forced_eos_token_id&&r.push(new u.ForcedEOSTokenLogitsProcessor(e.max_length,e.forced_eos_token_id)),null!==e.begin_suppress_tokens){const n=t>1||null===e.forced_bos_token_id?t:t+1;r.push(new u.SuppressTokensAtBeginLogitsProcessor(e.begin_suppress_tokens,n))}return null!==e.guidance_scale&&e.guidance_scale>1&&r.push(new u.ClassifierFreeGuidanceLogitsProcessor(e.guidance_scale)),null!==n&&r.extend(n),r}_prepare_generation_config(e,t,n=c.GenerationConfig){const r={...this.config};for(const e of["decoder","generator","text_config"])e in r&&Object.assign(r,r[e]);const s=new n(r);return Object.assign(s,this.generation_config??{}),e&&Object.assign(s,e),t&&Object.assign(s,(0,a.pick)(t,Object.getOwnPropertyNames(s))),s}_get_stopping_criteria(e,t=null){const n=new f.StoppingCriteriaList;return null!==e.max_length&&n.push(new f.MaxLengthCriteria(e.max_length,this.config.max_position_embeddings??null)),null!==e.eos_token_id&&n.push(new f.EosTokenCriteria(e.eos_token_id)),t&&n.extend(t),n}_validate_model_class(){if(!this.can_generate){const e=[Ha,Ya,Wa,ja],t=F.get(this.constructor),n=new Set,r=this.config.model_type;for(const t of e){const e=t.get(r);e&&n.add(e[0])}let s=`The current model class (${t}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw n.size>0&&(s+=` Please use the following class instead: ${[...n].join(", ")}`),Error(s)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:t,model_inputs:n,is_encoder_decoder:r}){return n.past_key_values=this.getPastKeyValues(t,n.past_key_values),n.input_ids=new p.Tensor("int64",e.flat(),[e.length,1]),r||(n.attention_mask=(0,p.cat)([n.attention_mask,(0,p.ones)([n.attention_mask.dims[0],1])],1)),n.position_ids=null,n}_prepare_model_inputs({inputs:e,bos_token_id:t,model_kwargs:n}){const r=(0,a.pick)(n,this.forward_params),s=this.main_input_name;if(s in r){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else r[s]=e;return{inputs_tensor:r[s],model_inputs:r,model_input_name:s}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:t,model_input_name:n,generation_config:r}){if(this.sessions.model.inputNames.includes("inputs_embeds")&&!t.inputs_embeds&&"_prepare_inputs_embeds"in this){const{input_ids:e,pixel_values:n,attention_mask:r,...s}=t,o=await this._prepare_inputs_embeds(t);t={...s,...(0,a.pick)(o,["inputs_embeds","attention_mask"])}}let{last_hidden_state:s}=await N(this,t);if(null!==r.guidance_scale&&r.guidance_scale>1)s=(0,p.cat)([s,(0,p.full_like)(s,0)],0),"attention_mask"in t&&(t.attention_mask=(0,p.cat)([t.attention_mask,(0,p.zeros_like)(t.attention_mask)],0));else if(t.decoder_input_ids){const e=O(t.decoder_input_ids).dims[0];if(e!==s.dims[0]){if(1!==s.dims[0])throw new Error(`The encoder outputs have a different batch size (${s.dims[0]}) than the decoder inputs (${e}).`);s=(0,p.cat)(Array.from({length:e},(()=>s)),0)}}return t.encoder_outputs=s,t}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:t,model_kwargs:n,decoder_start_token_id:r,bos_token_id:s,generation_config:o}){let{decoder_input_ids:i,...a}=n;if(!(i instanceof p.Tensor)){if(i)Array.isArray(i[0])||(i=Array.from({length:e},(()=>i)));else if(r??=s,"musicgen"===this.config.model_type)i=Array.from({length:e*this.config.decoder.num_codebooks},(()=>[r]));else if(Array.isArray(r)){if(r.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${r.length}`);i=r}else i=Array.from({length:e},(()=>[r]));i=O(i)}return n.decoder_attention_mask=(0,p.ones_like)(i),{input_ids:i,model_inputs:a}}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,streamer:s=null,...o}){this._validate_model_class(),t=this._prepare_generation_config(t,o);let{inputs_tensor:i,model_inputs:a,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:o});const d=this.config.is_encoder_decoder;let u;d&&("encoder_outputs"in a||(a=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:i,model_inputs:a,model_input_name:l,generation_config:t}))),d?({input_ids:u,model_inputs:a}=this._prepare_decoder_input_ids_for_generation({batch_size:a[l].dims.at(0),model_input_name:l,model_kwargs:a,decoder_start_token_id:t.decoder_start_token_id,bos_token_id:t.bos_token_id,generation_config:t})):u=a[l];let c=u.dims.at(-1);null!==t.max_new_tokens&&(t.max_length=c+t.max_new_tokens);const h=this._get_logits_processor(t,c,n),m=this._get_stopping_criteria(t,r),f=a[l].dims.at(0),_=g.LogitsSampler.getSampler(t),w=new Array(f).fill(0),y=u.tolist();let b;s&&s.put(y);let v={};for(;;){if(a=this.prepare_inputs_for_generation(y,a,t),b=await this.forward(a),t.output_attentions&&t.return_dict_in_generate){const e=this.getAttentions(b);for(const t in e)t in v||(v[t]=[]),v[t].push(e[t])}const e=h(y,b.logits.slice(null,-1,null)),n=[];for(let t=0;t<e.dims.at(0);++t){const r=e[t],s=await _(r);for(const[e,r]of s){const s=BigInt(e);w[t]+=r,y[t].push(s),n.push([s]);break}}s&&s.put(n);if(m(y).every((e=>e)))break;a=this._update_model_kwargs_for_generation({generated_input_ids:n,outputs:b,model_inputs:a,is_encoder_decoder:d})}s&&s.end();const x=this.getPastKeyValues(b,a.past_key_values,!0),M=new p.Tensor("int64",y.flat(),[y.length,y[0].length]);if(t.return_dict_in_generate)return{sequences:M,past_key_values:x,...v};for(const e of Object.values(b))"gpu-buffer"===e.location&&e.dispose();return M}getPastKeyValues(e,t,n=!1){const r=Object.create(null);for(const s in e)if(s.startsWith("present")){const o=s.replace("present","past_key_values"),i=s.includes("encoder");if(r[o]=i&&t?t[o]:e[s],t&&(!i||n)){const e=t[o];"gpu-buffer"===e.location&&e.dispose()}}return r}getAttentions(e){const t={};for(const n of["cross_attentions","encoder_attentions","decoder_attentions"])for(const r in e)r.startsWith(n)&&(n in t||(t[n]=[]),t[n].push(e[r]));return t}addPastKeyValues(e,t){if(t)Object.assign(e,t);else{const t=this.sessions.decoder_model_merged??this.sessions.model,n=t?.config?.kv_cache_dtype??"float32",s="float16"===n?new Uint16Array:[],o=(e[this.main_input_name]??e.attention_mask)?.dims?.[0]??1,i=(0,r.getKeyValueShapes)(this.config,{batch_size:o});for(const t in i)e[t]=new p.Tensor(n,s,i[t])}}async encode_image({pixel_values:e}){const t=(await z(this.sessions.vision_encoder,{pixel_values:e})).image_features;return this.config.num_image_tokens||(console.warn(`The number of image tokens was not set in the model configuration. Setting it to the number of features detected by the vision encoder (${t.dims[1]}).`),this.config.num_image_tokens=t.dims[1]),t}async encode_text({input_ids:e}){return(await z(this.sessions.embed_tokens,{input_ids:e})).inputs_embeds}}class Q{}class K extends Q{constructor({last_hidden_state:e,hidden_states:t=null,attentions:n=null}){super(),this.last_hidden_state=e,this.hidden_states=t,this.attentions=n}}class Y extends X{}class J extends Y{}class Z extends Y{async _call(e){return new Zl(await super._call(e))}}class ee extends Y{async _call(e){return new Kl(await super._call(e))}}class te extends Y{async _call(e){return new Jl(await super._call(e))}}class ne extends Y{async _call(e){return new ed(await super._call(e))}}class re extends X{}class se extends re{}class oe extends X{}class ie extends oe{}class ae extends oe{async _call(e){return new Zl(await super._call(e))}}class le extends oe{async _call(e){return new Kl(await super._call(e))}}class de extends oe{async _call(e){return new Jl(await super._call(e))}}class ue extends oe{async _call(e){return new ed(await super._call(e))}}class ce extends X{}class pe extends ce{}class he extends ce{async _call(e){return new Zl(await super._call(e))}}class me extends ce{async _call(e){return new Kl(await super._call(e))}}class fe extends ce{async _call(e){return new Jl(await super._call(e))}}class ge extends ce{async _call(e){return new ed(await super._call(e))}}class _e extends X{}class we extends _e{}class ye extends _e{async _call(e){return new Zl(await super._call(e))}}class be extends _e{async _call(e){return new Kl(await super._call(e))}}class ve extends _e{async _call(e){return new Jl(await super._call(e))}}class xe extends _e{async _call(e){return new ed(await super._call(e))}}class Me extends X{}class Te extends Me{}class ke extends Me{async _call(e){return new Zl(await super._call(e))}}class $e extends Me{async _call(e){return new Kl(await super._call(e))}}class Ce extends Me{async _call(e){return new Jl(await super._call(e))}}class Pe extends Me{async _call(e){return new ed(await super._call(e))}}class Se extends X{}class Ee extends Se{}class Fe extends Se{async _call(e){return new Zl(await super._call(e))}}class Ae extends Se{async _call(e){return new Kl(await super._call(e))}}class Ie extends Se{async _call(e){return new Jl(await super._call(e))}}class ze extends Se{async _call(e){return new ed(await super._call(e))}}class Le extends X{}class Oe extends Le{}class Be extends Le{async _call(e){return new Zl(await super._call(e))}}class De extends Le{async _call(e){return new Kl(await super._call(e))}}class Ne extends Le{async _call(e){return new Jl(await super._call(e))}}class Re extends Le{async _call(e){return new ed(await super._call(e))}}class je extends X{}class Ve extends je{}class Ge extends je{async _call(e){return new Kl(await super._call(e))}}class Ue extends je{async _call(e){return new Jl(await super._call(e))}}class qe extends je{async _call(e){return new ed(await super._call(e))}}class We extends je{async _call(e){return new Zl(await super._call(e))}}class He extends X{}class Xe extends He{}class Qe extends He{async _call(e){return new Zl(await super._call(e))}}class Ke extends He{async _call(e){return new Kl(await super._call(e))}}class Ye extends He{async _call(e){return new Jl(await super._call(e))}}class Je extends X{}class Ze extends Je{}class et extends Je{async _call(e){return new Zl(await super._call(e))}}class tt extends Je{async _call(e){return new Kl(await super._call(e))}}class nt extends Je{async _call(e){return new ed(await super._call(e))}}class rt extends X{}class st extends rt{}class ot extends rt{async _call(e){return new Zl(await super._call(e))}}class it extends rt{async _call(e){return new Kl(await super._call(e))}}class at extends rt{async _call(e){return new Jl(await super._call(e))}}class lt extends rt{async _call(e){return new ed(await super._call(e))}}class dt extends X{}class ut extends dt{}class ct extends dt{async _call(e){return new Zl(await super._call(e))}}class pt extends dt{async _call(e){return new Kl(await super._call(e))}}class ht extends dt{async _call(e){return new ed(await super._call(e))}}class mt extends X{}class ft extends mt{}class gt extends mt{async _call(e){return new Kl(await super._call(e))}}class _t extends mt{async _call(e){return new ed(await super._call(e))}}class wt extends mt{async _call(e){return new Zl(await super._call(e))}}class yt extends X{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"]}class bt extends yt{}class vt extends yt{}class xt extends X{}class Mt extends xt{}class Tt extends xt{}class kt extends X{}class $t extends kt{}class Ct extends kt{}class Pt extends X{}class St extends Pt{}class Et extends Pt{}class Ft extends Pt{async _call(e){return new Kl(await super._call(e))}}class At extends X{}class It extends At{}class zt extends At{}class Lt extends At{async _call(e){return new Kl(await super._call(e))}}class Ot extends At{}class Bt extends X{}class Dt extends Bt{}class Nt extends Bt{}class Rt extends X{}class jt extends Rt{}class Vt extends Rt{}class Gt extends X{}class Ut extends Gt{}class qt extends Gt{async _call(e){return new Zl(await super._call(e))}}class Wt extends Gt{async _call(e){return new Kl(await super._call(e))}}class Ht extends Gt{async _call(e){return new Jl(await super._call(e))}}class Xt extends Gt{async _call(e){return new ed(await super._call(e))}}class Qt extends X{}class Kt extends Qt{}class Yt extends Qt{async _call(e){return new Zl(await super._call(e))}}class Jt extends Qt{async _call(e){return new Kl(await super._call(e))}}class Zt extends Qt{async _call(e){return new Jl(await super._call(e))}}class en extends Qt{async _call(e){return new ed(await super._call(e))}}class tn extends X{}class nn extends tn{}class rn extends tn{async _call(e){return new Zl(await super._call(e))}}class sn extends tn{async _call(e){return new Kl(await super._call(e))}}class on extends tn{async _call(e){return new Jl(await super._call(e))}}class an extends tn{async _call(e){return new ed(await super._call(e))}}class ln extends X{}class dn extends ln{}class un extends ln{}class cn extends X{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"]}class pn extends cn{}class hn extends cn{_prepare_generation_config(e,t){return super._prepare_generation_config(e,t,w.WhisperGenerationConfig)}_retrieve_init_tokens(e){const t=[e.decoder_start_token_id];let n=e.language;const r=e.task;if(e.is_multilingual){n||(console.warn("No language specified - defaulting to English (en)."),n="en");const s=`<|${(0,y.whisper_language_to_code)(n)}|>`;t.push(e.lang_to_id[s]),t.push(e.task_to_id[r??"transcribe"])}else if(n||r)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&t.at(-1)!==e.no_timestamps_token_id?t.push(e.no_timestamps_token_id):e.return_timestamps&&t.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),t.pop()),t.filter((e=>null!=e))}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,...s}){t=this._prepare_generation_config(t,s);const o=s.decoder_input_ids??this._retrieve_init_tokens(t);if(t.return_timestamps&&(n??=new u.LogitsProcessorList,n.push(new u.WhisperTimeStampLogitsProcessor(t,o))),t.begin_suppress_tokens&&(n??=new u.LogitsProcessorList,n.push(new u.SuppressTokensAtBeginLogitsProcessor(t.begin_suppress_tokens,o.length))),t.return_token_timestamps){if(!t.alignment_heads)throw new Error("Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config.");"translate"===t.task&&console.warn("Token-level timestamps may not be reliable for task 'translate'."),t.output_attentions=!0,t.return_dict_in_generate=!0}const i=await super.generate({inputs:e,generation_config:t,logits_processor:n,decoder_input_ids:o,...s});return t.return_token_timestamps&&(i.token_timestamps=this._extract_token_timestamps(i,t.alignment_heads,t.num_frames)),i}_extract_token_timestamps(e,t,n=null,r=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");null==n&&console.warn("`num_frames` has not been set, meaning the entire audio will be analyzed. This may lead to inaccurate token-level timestamps for short audios (< 30 seconds).");let s=this.config.median_filter_width;void 0===s&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),s=7);const o=e.cross_attentions,i=Array.from({length:this.config.decoder_layers},((e,t)=>(0,p.cat)(o.map((e=>e[t])),2))),l=(0,p.stack)(t.map((([e,t])=>{if(e>=i.length)throw new Error(`Layer index ${e} is out of bounds for cross attentions (length ${i.length}).`);return n?i[e].slice(null,t,null,[0,n]):i[e].slice(null,t)}))).transpose(1,0,2,3),[d,u]=(0,p.std_mean)(l,-2,0,!0),c=l.clone();for(let e=0;e<c.dims[0];++e){const t=c[e];for(let n=0;n<t.dims[0];++n){const r=t[n],o=d[e][n][0].data,i=u[e][n][0].data;for(let e=0;e<r.dims[0];++e){let t=r[e].data;for(let e=0;e<t.length;++e)t[e]=(t[e]-i[e])/o[e];t.set((0,m.medianFilter)(t,s))}}}const h=[(0,p.mean)(c,1)],f=e.sequences.dims,g=new p.Tensor("float32",new Float32Array(f[0]*f[1]),f);for(let e=0;e<f[0];++e){const t=h[e].neg().squeeze_(0),[n,s]=(0,m.dynamic_time_warping)(t.tolist()),o=Array.from({length:n.length-1},((e,t)=>n[t+1]-n[t])),i=(0,a.mergeArrays)([1],o).map((e=>!!e)),l=[];for(let e=0;e<i.length;++e)i[e]&&l.push(s[e]*r);g[e].data.set(l,1)}return g}}class mn extends X{main_input_name="pixel_values";forward_params=["pixel_values","decoder_input_ids","encoder_hidden_states","past_key_values"]}class fn extends X{forward_params=["input_ids","attention_mask","pixel_values","position_ids","past_key_values"]}class gn extends fn{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){const s=this.config.image_token_index,o=n.tolist().map((e=>e.findIndex((e=>e==s)))),i=o.every((e=>-1===e)),a=o.every((e=>-1!==e));if(!i&&!a)throw new Error("Every input should contain either 0 or 1 image token.");if(i)return{inputs_embeds:e,attention_mask:r};const l=[],d=[];for(let n=0;n<o.length;++n){const s=o[n],i=e[n],a=t[n],u=r[n];l.push((0,p.cat)([i.slice([0,s]),a,i.slice([s+1,i.dims[0]])],0)),d.push((0,p.cat)([u.slice([0,s]),(0,p.ones)([a.dims[0]]),u.slice([s+1,u.dims[0]])],0))}return{inputs_embeds:(0,p.stack)(l,0),attention_mask:(0,p.stack)(d,0)}}}class _n extends gn{}class wn extends gn{}class yn extends X{forward_params=["input_ids","inputs_embeds","attention_mask","pixel_values","encoder_outputs","decoder_input_ids","decoder_inputs_embeds","decoder_attention_mask","past_key_values"];main_input_name="inputs_embeds"}class bn extends yn{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){return{inputs_embeds:(0,p.cat)([t,e],1),attention_mask:(0,p.cat)([(0,p.ones)(t.dims.slice(0,2)),r],1)}}async _prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:n,attention_mask:r}){if(!e&&!t)throw new Error("Either `input_ids` or `pixel_values` should be provided.");let s,o;return e&&(s=await this.encode_text({input_ids:e})),t&&(o=await this.encode_image({pixel_values:t})),s&&o?({inputs_embeds:n,attention_mask:r}=this._merge_input_ids_with_image_features({inputs_embeds:s,image_features:o,input_ids:e,attention_mask:r})):n=s||o,{inputs_embeds:n,attention_mask:r}}async forward({input_ids:e,pixel_values:t,attention_mask:n,decoder_input_ids:r,decoder_attention_mask:s,encoder_outputs:o,past_key_values:i,inputs_embeds:a,decoder_inputs_embeds:l}){if(a||({inputs_embeds:a,attention_mask:n}=await this._prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:a,attention_mask:n})),!o){let{last_hidden_state:e}=await N(this,{inputs_embeds:a,attention_mask:n});o=e}if(!l){if(!r)throw new Error("Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.");l=await this.encode_text({input_ids:r})}const d={inputs_embeds:l,attention_mask:s,encoder_attention_mask:n,encoder_hidden_states:o,past_key_values:i};return await R(this,d,!0)}}class vn extends X{forward_params=["input_ids","attention_mask","pixel_values","pixel_attention_mask","position_ids","past_key_values"]}class xn extends vn{async encode_image({pixel_values:e,pixel_attention_mask:t}){return(await z(this.sessions.vision_encoder,{pixel_values:e,pixel_attention_mask:t})).image_features}_merge_input_ids_with_image_features(e){const t=e.image_features.dims.at(-1),n=e.image_features.view(-1,t);return j({image_token_id:this.config.image_token_id,...e,image_features:n})}}class Mn extends X{}class Tn extends Mn{}class kn extends Mn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class $n extends Mn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Cn extends Mn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class Pn extends Mn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class Sn extends X{}class En extends Sn{}class Fn extends Sn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class An extends Mn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class In extends X{}class zn extends In{}class Ln extends X{}class On extends Ln{async forward(e){const t=!e.input_ids,n=!e.pixel_values;if(t&&n)throw new Error("Either `input_ids` or `pixel_values` should be provided.");if(t&&(e.input_ids=(0,p.ones)([e.pixel_values.dims[0],1])),n){const{image_size:t}=this.config.vision_config;e.pixel_values=(0,p.full)([0,3,t,t],0)}const{text_embeddings:r,image_embeddings:s,l2norm_text_embeddings:o,l2norm_image_embeddings:i}=await super.forward(e),a={};return t||(a.text_embeddings=r,a.l2norm_text_embeddings=o),n||(a.image_embeddings=s,a.l2norm_image_embeddings=i),a}}class Bn extends Ln{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Dn extends Ln{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class Nn extends X{}class Rn extends Nn{}class jn extends Nn{}class Vn extends X{}class Gn extends Vn{}class Un extends Vn{}class qn extends X{}class Wn extends qn{}class Hn extends qn{}class Xn extends X{}class Qn extends Xn{}class Kn extends Xn{}class Yn extends X{}class Jn extends Yn{}class Zn extends Yn{}class er extends X{}class tr extends er{}class nr extends er{}class rr extends X{}class sr extends rr{}class or extends rr{}class ir extends X{}class ar extends ir{}class lr extends ir{}class dr extends X{}class ur extends dr{}class cr extends dr{}class pr extends X{}class hr extends pr{}class mr extends pr{}class fr extends X{}class gr extends fr{}class _r extends fr{}class wr extends X{}class yr extends wr{}class br extends wr{}class vr extends X{}class xr extends vr{}class Mr extends vr{}class Tr extends X{}class kr extends Tr{}class $r extends Tr{}class Cr extends X{}class Pr extends Cr{}class Sr extends Cr{}class Er extends X{}class Fr extends Er{}class Ar extends Er{}class Ir extends X{}class zr extends Ir{}class Lr extends Ir{}class Or extends X{forward_params=["input_ids","attention_mask","position_ids","past_key_values","pixel_values","image_grid_thw"]}class Br extends Or{get_rope_index(e,t,n,r){const{vision_config:s,image_token_id:o,video_token_id:i,vision_start_token_id:a}=this.config,l=s.spatial_merge_size??2,d=[];if(t||n){let s=e.tolist();r||(r=(0,p.ones_like)(e));const u=r.tolist(),c=Array.from({length:3},(t=>Array.from({length:e.dims[0]},(t=>Array.from({length:e.dims[1]},(e=>1)))))),h=t?t.tolist():[],f=n?n.tolist():[];let g=0,_=0;for(let e=0;e<s.length;++e){const t=s[e].filter(((t,n)=>1==u[e][n])),n=t.reduce(((e,t,n)=>(t==a&&e.push(n),e)),[]).map((e=>t[e+1])),r=n.filter((e=>e==o)).length,p=n.filter((e=>e==i)).length;let w=[],y=0,b=r,v=p;for(let e=0;e<n.length;++e){const e=t.findIndex(((e,t)=>t>y&&e==o)),n=t.findIndex(((e,t)=>t>y&&e==i)),r=b>0&&-1!==e?e:t.length+1,s=v>0&&-1!==n?n:t.length+1;let a,d,u,c;r<s?([d,u,c]=h[g],++g,--b,a=r):([d,u,c]=f[_],++_,--v,a=s);const[p,x,M]=[Number(d),Math.floor(Number(u)/l),Math.floor(Number(c)/l)],T=a-y,k=w.length>0?(0,m.max)(w.at(-1))[0]+1:0;w.push(Array.from({length:3*T},((e,t)=>k+t%T)));const $=T+k,C=p*x*M,P=Array.from({length:C},((e,t)=>$+Math.floor(t/(x*M)))),S=Array.from({length:C},((e,t)=>$+Math.floor(t/M)%x)),E=Array.from({length:C},((e,t)=>$+t%M));w.push([P,S,E].flat()),y=a+C}if(y<t.length){const e=w.length>0?(0,m.max)(w.at(-1))[0]+1:0,n=t.length-y;w.push(Array.from({length:3*n},((t,r)=>e+r%n)))}const x=w.reduce(((e,t)=>e+t.length),0),M=new Array(x);let T=0;for(let e=0;e<3;++e)for(let t=0;t<w.length;++t){const n=w[t],r=n.length/3;for(let t=e*r;t<(e+1)*r;++t)M[T++]=n[t]}let k=0;const $=u[e];for(let t=0;t<$.length;++t)if(1==$[t]){for(let n=0;n<3;++n)c[n][e][t]=M[n*x/3+k];++k}const C=(0,m.max)(M)[0];d.push(C+1-s[e].length)}return[new p.Tensor("int64",c.flat(1/0),[3,e.dims[0],e.dims[1]]),new p.Tensor("int64",d,[d.length,1])]}if(r){const{data:e,dims:t}=G(r),n=BigInt64Array.from({length:3*e.length},((t,n)=>e[n%e.length])),s=Array.from({length:t[0]},((n,r)=>(0,m.max)(e.subarray(t[1]*r,t[1]*(r+1)))[0]+1+t[1]));return[new p.Tensor("int64",n,[3,...t]),new p.Tensor("int64",s,[s.length,1])]}{const[t,n]=e.dims,r=BigInt64Array.from({length:3*t*n},((e,r)=>BigInt(Math.floor(r%n/t))));return[new p.Tensor("int64",r,[3,...e.dims]),(0,p.zeros)([t,1])]}}async encode_image({pixel_values:e,image_grid_thw:t}){return(await z(this.sessions.vision_encoder,{pixel_values:e,grid_thw:t})).image_features}_merge_input_ids_with_image_features(e){return j({image_token_id:this.config.image_token_id,...e})}prepare_inputs_for_generation(e,t,n){if(t.attention_mask&&!t.position_ids)if(t.past_key_values){t.pixel_values=null;const e=BigInt(Object.values(t.past_key_values)[0].dims.at(-2)),n=t.rope_deltas.map((t=>e+t));t.position_ids=(0,p.stack)([n,n,n],0)}else[t.position_ids,t.rope_deltas]=this.get_rope_index(t.input_ids,t.image_grid_thw,t.video_grid_thw,t.attention_mask);return t}}class Dr extends X{}class Nr extends Dr{}class Rr extends Dr{}class jr extends X{}class Vr extends jr{}class Gr extends jr{}class Ur extends X{}class qr extends Ur{}class Wr extends Ur{}class Hr extends X{}class Xr extends Hr{}class Qr extends Hr{}class Kr extends X{}class Yr extends Kr{}class Jr extends Kr{}class Zr extends X{}class es extends Zr{}class ts extends Zr{async _call(e){return new Kl(await super._call(e))}}class ns extends X{}class rs extends ns{}class ss extends X{}class os extends ss{}class is extends ss{async _call(e){return new Kl(await super._call(e))}}class as extends X{}class ls extends as{}class ds extends X{}class us extends ds{}class cs extends ds{async _call(e){return new Kl(await super._call(e))}}class ps extends X{}class hs extends ps{}class ms extends X{}class fs extends ms{}class gs extends ms{async _call(e){return new Kl(await super._call(e))}}class _s extends X{}class ws extends _s{async _call(e){return new rd(await super._call(e))}}class ys extends X{}class bs extends ys{}class vs extends ys{async _call(e){return new Kl(await super._call(e))}}class xs extends X{}class Ms extends xs{}class Ts extends xs{async _call(e){return new Kl(await super._call(e))}}class ks extends X{}class $s extends ks{}class Cs extends ks{}class Ps extends X{}class Ss extends Ps{}class Es extends Ps{}class Fs extends X{}class As extends Fs{}class Is extends Fs{async _call(e){return new Kl(await super._call(e))}}class zs extends X{}class Ls extends zs{}class Os extends zs{async _call(e){return new Ds(await super._call(e))}}class Bs extends zs{async _call(e){return new Ns(await super._call(e))}}class Ds extends Q{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Ns extends Q{constructor({logits:e,pred_boxes:t,pred_masks:n}){super(),this.logits=e,this.pred_boxes=t,this.pred_masks=n}}class Rs extends X{}class js extends Rs{}class Vs extends Rs{async _call(e){return new Gs(await super._call(e))}}class Gs extends Q{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Us extends X{}class qs extends Us{}class Ws extends Us{async _call(e){return new Hs(await super._call(e))}}class Hs extends Ds{}class Xs extends X{}class Qs extends Xs{}class Ks extends Xs{async _call(e){return new Kl(await super._call(e))}}class Ys extends X{}class Js extends Ys{}class Zs extends Ys{async _call(e){return new Kl(await super._call(e))}}class eo extends X{}class to extends eo{}class no extends eo{async _call(e){return new Kl(await super._call(e))}}class ro extends X{}class so extends ro{}class oo extends ro{async _call(e){return new Kl(await super._call(e))}}class io extends X{}class ao extends io{}class lo extends io{}class uo extends X{}class co extends uo{}class po extends uo{}class ho extends X{}class mo extends ho{}class fo extends X{}class go extends fo{}class _o extends fo{}class wo extends fo{}class yo extends X{}class bo extends yo{}class vo extends X{}class xo extends vo{}class Mo extends vo{}class To extends X{}class ko extends To{}class $o extends To{}class Co extends X{}class Po extends Co{}class So extends X{}class Eo extends So{}class Fo extends So{async _call(e){return new Kl(await super._call(e))}}class Ao extends X{}class Io extends Ao{}class zo extends Ao{async _call(e){return new Kl(await super._call(e))}}class Lo extends X{}class Oo extends Lo{}class Bo extends Lo{async _call(e){return new Kl(await super._call(e))}}class Do extends X{}class No extends Do{}class Ro extends Do{async _call(e){return new jo(await super._call(e))}}class jo extends Q{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Vo extends X{}class Go extends Vo{async get_image_embeddings({pixel_values:e}){return await N(this,{pixel_values:e})}async forward(e){if(e.image_embeddings&&e.image_positional_embeddings||(e={...e,...await this.get_image_embeddings(e)}),!e.input_labels&&e.input_points){const t=e.input_points.dims.slice(0,-1),n=t.reduce(((e,t)=>e*t),1);e.input_labels=new p.Tensor("int64",new BigInt64Array(n).fill(1n),t)}const t={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(t.input_points=e.input_points),e.input_labels&&(t.input_labels=e.input_labels),e.input_boxes&&(t.input_boxes=e.input_boxes),await z(this.sessions.prompt_encoder_mask_decoder,t)}async _call(e){return new Uo(await super._call(e))}}class Uo extends Q{constructor({iou_scores:e,pred_masks:t}){super(),this.iou_scores=e,this.pred_masks=t}}class qo extends X{}class Wo extends qo{}class Ho extends qo{}class Xo extends X{}class Qo extends Xo{}class Ko extends Xo{}class Yo extends X{}class Jo extends Yo{}class Zo extends Yo{async _call(e){return new td(await super._call(e))}}class ei extends Yo{async _call(e){return new Kl(await super._call(e))}}class ti extends Yo{async _call(e){return new Jl(await super._call(e))}}class ni extends X{}class ri extends ni{}class si extends ni{async _call(e){return new Jl(await super._call(e))}}class oi extends X{}class ii extends oi{}class ai extends X{}class li extends ai{}class di extends ai{async _call(e){return new td(await super._call(e))}}class ui extends ai{async _call(e){return new Kl(await super._call(e))}}class ci extends X{}class pi extends ci{}class hi extends ci{async _call(e){return new td(await super._call(e))}}class mi extends ci{async _call(e){return new Kl(await super._call(e))}}class fi extends ci{async _call(e){return new Jl(await super._call(e))}}class gi extends X{}class _i extends gi{}class wi extends gi{async _call(e){return new td(await super._call(e))}}class yi extends gi{async _call(e){return new Kl(await super._call(e))}}class bi extends X{}class vi extends Yo{}class xi extends Yo{async _call(e){return new td(await super._call(e))}}class Mi extends Yo{async _call(e){return new Kl(await super._call(e))}}class Ti extends X{}class ki extends Ti{}class $i extends Ti{async _call(e){return new td(await super._call(e))}}class Ci extends Ti{async _call(e){return new Kl(await super._call(e))}}class Pi extends Ti{async _call(e){return new Yl(await super._call(e))}}class Si extends Ti{async _call(e){return new Jl(await super._call(e))}}class Ei extends X{}class Fi extends Ei{}class Ai extends Ei{}class Ii extends Ei{async generate_speech(e,t,{threshold:n=.5,minlenratio:r=0,maxlenratio:s=20,vocoder:o=null}={}){const i={input_ids:e},{encoder_outputs:a,encoder_attention_mask:l}=await N(this,i),d=a.dims[1]/this.config.reduction_factor,u=Math.floor(d*s),c=Math.floor(d*r),h=this.config.num_mel_bins;let m=[],f=null,g=null,_=0;for(;;){++_;const e=B(!!g);let r;r=g?g.output_sequence_out:new p.Tensor("float32",new Float32Array(h),[1,1,h]);let s={use_cache_branch:e,output_sequence:r,encoder_attention_mask:l,speaker_embeddings:t,encoder_hidden_states:a};this.addPastKeyValues(s,f),g=await z(this.sessions.decoder_model_merged,s),f=this.getPastKeyValues(g,f);const{prob:o,spectrum:i}=g;if(m.push(i),_>=c&&(Array.from(o.data).filter((e=>e>=n)).length>0||_>=u))break}const w=(0,p.cat)(m),{waveform:y}=await z(o.sessions.model,{spectrogram:w});return{spectrogram:w,waveform:y}}}class zi extends X{main_input_name="spectrogram"}class Li extends X{}class Oi extends Li{}class Bi extends X{}class Di extends Bi{}class Ni extends Bi{}class Ri extends X{}class ji extends Ri{}class Vi extends Ri{}class Gi extends X{}class Ui extends Gi{}class qi extends Gi{}class Wi extends X{}class Hi extends Wi{}class Xi extends Wi{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Qi extends Wi{static async from_pretrained(e,t={}){return t.model_file_name??="audio_model",super.from_pretrained(e,t)}}class Ki extends X{}class Yi extends Ki{async _call(e){return new sd(await super._call(e))}}class Ji extends X{}class Zi extends Ji{}class ea extends Ji{}class ta extends Ji{}class na extends X{}class ra extends na{}class sa extends na{}class oa extends X{}class ia extends oa{}class aa extends oa{async _call(e){return new Kl(await super._call(e))}}class la extends X{}class da extends la{}class ua extends la{}class ca extends X{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];_apply_and_filter_by_delay_pattern_mask(e){const[t,n]=e.dims,r=this.config.decoder.num_codebooks,s=n-r;let o=0;for(let t=0;t<e.size;++t){if(e.data[t]===this.config.decoder.pad_token_id)continue;const i=t%n-Math.floor(t/n)%r;i>0&&i<=s&&(e.data[o++]=e.data[t])}const i=Math.floor(t/r),a=o/(i*r);return new p.Tensor(e.type,e.data.slice(0,o),[i,r,a])}prepare_inputs_for_generation(e,t,n){let r=structuredClone(e);for(let e=0;e<r.length;++e)for(let t=0;t<r[e].length;++t)e%this.config.decoder.num_codebooks>=t&&(r[e][t]=BigInt(this.config.decoder.pad_token_id));null!==n.guidance_scale&&n.guidance_scale>1&&(r=r.concat(r));return super.prepare_inputs_for_generation(r,t,n)}async generate(e){const t=await super.generate(e),n=this._apply_and_filter_by_delay_pattern_mask(t).unsqueeze_(0),{audio_values:r}=await z(this.sessions.encodec_decode,{audio_codes:n});return r}}class pa extends X{}class ha extends pa{}class ma extends pa{async _call(e){return new Kl(await super._call(e))}}class fa extends X{}class ga extends fa{}class _a extends fa{async _call(e){return new Kl(await super._call(e))}}class wa extends X{}class ya extends wa{}class ba extends wa{async _call(e){return new Kl(await super._call(e))}}class va extends X{}class xa extends va{}class Ma extends va{async _call(e){return new Kl(await super._call(e))}}class Ta extends X{}class ka extends Ta{}class $a extends X{}class Ca extends $a{forward_params=["input_ids","pixel_values","images_seq_mask","images_emb_mask","attention_mask","position_ids","past_key_values"];constructor(...e){super(...e),this._generation_mode="text"}async forward(e){const t=this._generation_mode??"text";let n;if("text"!==t&&e.past_key_values){const t=this.sessions.gen_img_embeds,r=(0,a.pick)({image_ids:e.input_ids},t.inputNames);n=await z(t,r)}else{const t=this.sessions.prepare_inputs_embeds,r=(0,a.pick)(e,t.inputNames);n=await z(t,r)}const r={...e,...n},s=await R(this,r),o=this.sessions["text"===t?"lm_head":"gen_head"];if(!o)throw new Error(`Unable to find "${o}" generation head`);const i=await z(o,(0,a.pick)(s,o.inputNames));return{...n,...s,...i}}async generate(e){return this._generation_mode="text",super.generate(e)}async generate_images(e){this._generation_mode="image";const t=(e.inputs??e[this.main_input_name]).dims[1],n=(await super.generate(e)).slice(null,[t,null]),r=this.sessions.image_decode,{decoded_image:s}=await z(r,{generated_tokens:n}),o=s.add_(1).mul_(127.5).clamp_(0,255).to("uint8"),i=[];for(const e of o){const t=h.RawImage.fromTensor(e);i.push(t)}return i}}class Pa extends Q{constructor({char_logits:e,bpe_logits:t,wp_logits:n}){super(),this.char_logits=e,this.bpe_logits=t,this.wp_logits=n}get logits(){return[this.char_logits,this.bpe_logits,this.wp_logits]}}class Sa extends X{}class Ea extends Sa{async _call(e){return new Pa(await super._call(e))}}class Fa extends X{}class Aa extends Fa{}class Ia extends Fa{}class za extends X{}class La extends za{}class Oa extends za{}class Ba{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:s=null,local_files_only:o=!1,revision:i="main",model_file_name:a=null,subfolder:l="onnx",device:d=null,dtype:u=null,use_external_data_format:c=null,session_options:p={}}={}){const h={progress_callback:t,config:n,cache_dir:s,local_files_only:o,revision:i,model_file_name:a,subfolder:l,device:d,dtype:u,use_external_data_format:c,session_options:p};if(h.config=await r.AutoConfig.from_pretrained(e,h),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(const t of this.MODEL_CLASS_MAPPINGS){const n=t.get(h.config.model_type);if(n)return await n[1].from_pretrained(e,h)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${h.config.model_type}", attempting to construct from base class.`),await X.from_pretrained(e,h);throw Error(`Unsupported model type: ${h.config.model_type}`)}}const Da=new Map([["bert",["BertModel",J]],["nomic_bert",["NomicBertModel",se]],["roformer",["RoFormerModel",ie]],["electra",["ElectraModel",we]],["esm",["EsmModel",Xe]],["convbert",["ConvBertModel",pe]],["camembert",["CamembertModel",Te]],["deberta",["DebertaModel",Ee]],["deberta-v2",["DebertaV2Model",Oe]],["mpnet",["MPNetModel",st]],["albert",["AlbertModel",ft]],["distilbert",["DistilBertModel",Ve]],["roberta",["RobertaModel",Ut]],["xlm",["XLMModel",Kt]],["xlm-roberta",["XLMRobertaModel",nn]],["clap",["ClapModel",Hi]],["clip",["CLIPModel",Tn]],["clipseg",["CLIPSegModel",Rn]],["chinese_clip",["ChineseCLIPModel",zn]],["siglip",["SiglipModel",En]],["jina_clip",["JinaCLIPModel",On]],["mobilebert",["MobileBertModel",Ze]],["squeezebert",["SqueezeBertModel",ut]],["wav2vec2",["Wav2Vec2Model",Jo]],["wav2vec2-bert",["Wav2Vec2BertModel",_i]],["unispeech",["UniSpeechModel",li]],["unispeech-sat",["UniSpeechSatModel",pi]],["hubert",["HubertModel",vi]],["wavlm",["WavLMModel",ki]],["audio-spectrogram-transformer",["ASTModel",dn]],["vits",["VitsModel",Yi]],["pyannote",["PyAnnoteModel",ri]],["wespeaker-resnet",["WeSpeakerResNetModel",ii]],["detr",["DetrModel",Ls]],["rt_detr",["RTDetrModel",js]],["table-transformer",["TableTransformerModel",qs]],["vit",["ViTModel",es]],["pvt",["PvtModel",os]],["vit_msn",["ViTMSNModel",us]],["vit_mae",["ViTMAEModel",ls]],["groupvit",["GroupViTModel",hs]],["fastvit",["FastViTModel",fs]],["mobilevit",["MobileViTModel",bs]],["mobilevitv2",["MobileViTV2Model",Ms]],["owlvit",["OwlViTModel",$s]],["owlv2",["Owlv2Model",Ss]],["beit",["BeitModel",As]],["deit",["DeiTModel",Qs]],["hiera",["HieraModel",Js]],["convnext",["ConvNextModel",Eo]],["convnextv2",["ConvNextV2Model",Io]],["dinov2",["Dinov2Model",Oo]],["resnet",["ResNetModel",to]],["swin",["SwinModel",so]],["swin2sr",["Swin2SRModel",ao]],["donut-swin",["DonutSwinModel",Po]],["yolos",["YolosModel",No]],["dpt",["DPTModel",co]],["glpn",["GLPNModel",ko]],["hifigan",["SpeechT5HifiGan",zi]],["efficientnet",["EfficientNetModel",ia]],["decision_transformer",["DecisionTransformerModel",ka]],["patchtst",["PatchTSTForPrediction",Aa]],["patchtsmixer",["PatchTSMixerForPrediction",La]],["mobilenet_v1",["MobileNetV1Model",ha]],["mobilenet_v2",["MobileNetV2Model",ga]],["mobilenet_v3",["MobileNetV3Model",ya]],["mobilenet_v4",["MobileNetV4Model",xa]],["maskformer",["MaskFormerModel",xo]],["mgp-str",["MgpstrForSceneTextRecognition",Ea]]]),Na=new Map([["t5",["T5Model",bt]],["longt5",["LongT5Model",Mt]],["mt5",["MT5Model",$t]],["bart",["BartModel",St]],["mbart",["MBartModel",It]],["marian",["MarianModel",Wo]],["whisper",["WhisperModel",pn]],["m2m_100",["M2M100Model",Qo]],["blenderbot",["BlenderbotModel",Dt]],["blenderbot-small",["BlenderbotSmallModel",jt]]]),Ra=new Map([["bloom",["BloomModel",qr]],["jais",["JAISModel",Wn]],["gpt2",["GPT2Model",Gn]],["gptj",["GPTJModel",tr]],["gpt_bigcode",["GPTBigCodeModel",sr]],["gpt_neo",["GPTNeoModel",Qn]],["gpt_neox",["GPTNeoXModel",Jn]],["codegen",["CodeGenModel",ar]],["llama",["LlamaModel",ur]],["olmo",["OlmoModel",gr]],["mobilellm",["MobileLLMModel",hr]],["granite",["GraniteModel",yr]],["cohere",["CohereModel",xr]],["gemma",["GemmaModel",kr]],["gemma2",["Gemma2Model",Pr]],["openelm",["OpenELMModel",Fr]],["qwen2",["Qwen2Model",zr]],["phi",["PhiModel",Nr]],["phi3",["Phi3Model",Vr]],["mpt",["MptModel",Xr]],["opt",["OPTModel",Yr]],["mistral",["MistralModel",Di]],["starcoder2",["Starcoder2Model",ji]],["falcon",["FalconModel",Ui]],["stablelm",["StableLmModel",ra]]]),ja=new Map([["speecht5",["SpeechT5ForSpeechToText",Ai]],["whisper",["WhisperForConditionalGeneration",hn]]]),Va=new Map([["speecht5",["SpeechT5ForTextToSpeech",Ii]]]),Ga=new Map([["vits",["VitsModel",Yi]],["musicgen",["MusicgenForConditionalGeneration",ca]]]),Ua=new Map([["bert",["BertForSequenceClassification",ee]],["roformer",["RoFormerForSequenceClassification",le]],["electra",["ElectraForSequenceClassification",be]],["esm",["EsmForSequenceClassification",Ke]],["convbert",["ConvBertForSequenceClassification",me]],["camembert",["CamembertForSequenceClassification",$e]],["deberta",["DebertaForSequenceClassification",Ae]],["deberta-v2",["DebertaV2ForSequenceClassification",De]],["mpnet",["MPNetForSequenceClassification",it]],["albert",["AlbertForSequenceClassification",gt]],["distilbert",["DistilBertForSequenceClassification",Ge]],["roberta",["RobertaForSequenceClassification",Wt]],["xlm",["XLMForSequenceClassification",Jt]],["xlm-roberta",["XLMRobertaForSequenceClassification",sn]],["bart",["BartForSequenceClassification",Ft]],["mbart",["MBartForSequenceClassification",Lt]],["mobilebert",["MobileBertForSequenceClassification",tt]],["squeezebert",["SqueezeBertForSequenceClassification",pt]]]),qa=new Map([["bert",["BertForTokenClassification",te]],["roformer",["RoFormerForTokenClassification",de]],["electra",["ElectraForTokenClassification",ve]],["esm",["EsmForTokenClassification",Ye]],["convbert",["ConvBertForTokenClassification",fe]],["camembert",["CamembertForTokenClassification",Ce]],["deberta",["DebertaForTokenClassification",Ie]],["deberta-v2",["DebertaV2ForTokenClassification",Ne]],["mpnet",["MPNetForTokenClassification",at]],["distilbert",["DistilBertForTokenClassification",Ue]],["roberta",["RobertaForTokenClassification",Ht]],["xlm",["XLMForTokenClassification",Zt]],["xlm-roberta",["XLMRobertaForTokenClassification",on]]]),Wa=new Map([["t5",["T5ForConditionalGeneration",vt]],["longt5",["LongT5ForConditionalGeneration",Tt]],["mt5",["MT5ForConditionalGeneration",Ct]],["bart",["BartForConditionalGeneration",Et]],["mbart",["MBartForConditionalGeneration",zt]],["marian",["MarianMTModel",Ho]],["m2m_100",["M2M100ForConditionalGeneration",Ko]],["blenderbot",["BlenderbotForConditionalGeneration",Nt]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",Vt]]]),Ha=new Map([["bloom",["BloomForCausalLM",Wr]],["gpt2",["GPT2LMHeadModel",Un]],["jais",["JAISLMHeadModel",Hn]],["gptj",["GPTJForCausalLM",nr]],["gpt_bigcode",["GPTBigCodeForCausalLM",or]],["gpt_neo",["GPTNeoForCausalLM",Kn]],["gpt_neox",["GPTNeoXForCausalLM",Zn]],["codegen",["CodeGenForCausalLM",lr]],["llama",["LlamaForCausalLM",cr]],["olmo",["OlmoForCausalLM",_r]],["mobilellm",["MobileLLMForCausalLM",mr]],["granite",["GraniteForCausalLM",br]],["cohere",["CohereForCausalLM",Mr]],["gemma",["GemmaForCausalLM",$r]],["gemma2",["Gemma2ForCausalLM",Sr]],["openelm",["OpenELMForCausalLM",Ar]],["qwen2",["Qwen2ForCausalLM",Lr]],["phi",["PhiForCausalLM",Rr]],["phi3",["Phi3ForCausalLM",Gr]],["mpt",["MptForCausalLM",Qr]],["opt",["OPTForCausalLM",Jr]],["mbart",["MBartForCausalLM",Ot]],["mistral",["MistralForCausalLM",Ni]],["starcoder2",["Starcoder2ForCausalLM",Vi]],["falcon",["FalconForCausalLM",qi]],["trocr",["TrOCRForCausalLM",Oi]],["stablelm",["StableLmForCausalLM",sa]]]),Xa=new Map([["multi_modality",["MultiModalityCausalLM",Ca]]]),Qa=new Map([["bert",["BertForMaskedLM",Z]],["roformer",["RoFormerForMaskedLM",ae]],["electra",["ElectraForMaskedLM",ye]],["esm",["EsmForMaskedLM",Qe]],["convbert",["ConvBertForMaskedLM",he]],["camembert",["CamembertForMaskedLM",ke]],["deberta",["DebertaForMaskedLM",Fe]],["deberta-v2",["DebertaV2ForMaskedLM",Be]],["mpnet",["MPNetForMaskedLM",ot]],["albert",["AlbertForMaskedLM",wt]],["distilbert",["DistilBertForMaskedLM",We]],["roberta",["RobertaForMaskedLM",qt]],["xlm",["XLMWithLMHeadModel",Yt]],["xlm-roberta",["XLMRobertaForMaskedLM",rn]],["mobilebert",["MobileBertForMaskedLM",et]],["squeezebert",["SqueezeBertForMaskedLM",ct]]]),Ka=new Map([["bert",["BertForQuestionAnswering",ne]],["roformer",["RoFormerForQuestionAnswering",ue]],["electra",["ElectraForQuestionAnswering",xe]],["convbert",["ConvBertForQuestionAnswering",ge]],["camembert",["CamembertForQuestionAnswering",Pe]],["deberta",["DebertaForQuestionAnswering",ze]],["deberta-v2",["DebertaV2ForQuestionAnswering",Re]],["mpnet",["MPNetForQuestionAnswering",lt]],["albert",["AlbertForQuestionAnswering",_t]],["distilbert",["DistilBertForQuestionAnswering",qe]],["roberta",["RobertaForQuestionAnswering",Xt]],["xlm",["XLMForQuestionAnswering",en]],["xlm-roberta",["XLMRobertaForQuestionAnswering",an]],["mobilebert",["MobileBertForQuestionAnswering",nt]],["squeezebert",["SqueezeBertForQuestionAnswering",ht]]]),Ya=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",mn]],["idefics3",["Idefics3ForConditionalGeneration",xn]]]),Ja=new Map([["llava",["LlavaForConditionalGeneration",gn]],["llava_onevision",["LlavaOnevisionForConditionalGeneration",_n]],["moondream1",["Moondream1ForConditionalGeneration",wn]],["florence2",["Florence2ForConditionalGeneration",bn]],["qwen2-vl",["Qwen2VLForConditionalGeneration",Br]],["idefics3",["Idefics3ForConditionalGeneration",xn]]]),Za=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",mn]]]),el=new Map([["vit",["ViTForImageClassification",ts]],["pvt",["PvtForImageClassification",is]],["vit_msn",["ViTMSNForImageClassification",cs]],["fastvit",["FastViTForImageClassification",gs]],["mobilevit",["MobileViTForImageClassification",vs]],["mobilevitv2",["MobileViTV2ForImageClassification",Ts]],["beit",["BeitForImageClassification",Is]],["deit",["DeiTForImageClassification",Ks]],["hiera",["HieraForImageClassification",Zs]],["convnext",["ConvNextForImageClassification",Fo]],["convnextv2",["ConvNextV2ForImageClassification",zo]],["dinov2",["Dinov2ForImageClassification",Bo]],["resnet",["ResNetForImageClassification",no]],["swin",["SwinForImageClassification",oo]],["segformer",["SegformerForImageClassification",ea]],["efficientnet",["EfficientNetForImageClassification",aa]],["mobilenet_v1",["MobileNetV1ForImageClassification",ma]],["mobilenet_v2",["MobileNetV2ForImageClassification",_a]],["mobilenet_v3",["MobileNetV3ForImageClassification",ba]],["mobilenet_v4",["MobileNetV4ForImageClassification",Ma]]]),tl=new Map([["detr",["DetrForObjectDetection",Os]],["rt_detr",["RTDetrForObjectDetection",Vs]],["table-transformer",["TableTransformerForObjectDetection",Ws]],["yolos",["YolosForObjectDetection",Ro]]]),nl=new Map([["owlvit",["OwlViTForObjectDetection",Cs]],["owlv2",["Owlv2ForObjectDetection",Es]]]),rl=new Map([["detr",["DetrForSegmentation",Bs]],["clipseg",["CLIPSegForImageSegmentation",jn]]]),sl=new Map([["segformer",["SegformerForSemanticSegmentation",ta]],["sapiens",["SapiensForSemanticSegmentation",go]]]),ol=new Map([["detr",["DetrForSegmentation",Bs]],["maskformer",["MaskFormerForInstanceSegmentation",Mo]]]),il=new Map([["sam",["SamModel",Go]]]),al=new Map([["wav2vec2",["Wav2Vec2ForCTC",Zo]],["wav2vec2-bert",["Wav2Vec2BertForCTC",wi]],["unispeech",["UniSpeechForCTC",di]],["unispeech-sat",["UniSpeechSatForCTC",hi]],["wavlm",["WavLMForCTC",$i]],["hubert",["HubertForCTC",xi]]]),ll=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",ei]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",yi]],["unispeech",["UniSpeechForSequenceClassification",ui]],["unispeech-sat",["UniSpeechSatForSequenceClassification",mi]],["wavlm",["WavLMForSequenceClassification",Ci]],["hubert",["HubertForSequenceClassification",Mi]],["audio-spectrogram-transformer",["ASTForAudioClassification",un]]]),dl=new Map([["wavlm",["WavLMForXVector",Pi]]]),ul=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",fi]],["wavlm",["WavLMForAudioFrameClassification",Si]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",ti]],["pyannote",["PyAnnoteForAudioFrameClassification",si]]]),cl=new Map([["vitmatte",["VitMatteForImageMatting",ws]]]),pl=new Map([["patchtst",["PatchTSTForPrediction",Ia]],["patchtsmixer",["PatchTSMixerForPrediction",Oa]]]),hl=new Map([["swin2sr",["Swin2SRForImageSuperResolution",lo]]]),ml=new Map([["dpt",["DPTForDepthEstimation",po]],["depth_anything",["DepthAnythingForDepthEstimation",mo]],["glpn",["GLPNForDepthEstimation",$o]],["sapiens",["SapiensForDepthEstimation",_o]],["depth_pro",["DepthProForDepthEstimation",bo]]]),fl=new Map([["sapiens",["SapiensForNormalEstimation",wo]]]),gl=new Map([["vitpose",["VitPoseForPoseEstimation",rs]]]),_l=new Map([["clip",["CLIPVisionModelWithProjection",Pn]],["siglip",["SiglipVisionModel",An]],["jina_clip",["JinaCLIPVisionModel",Dn]]]),wl=[[Da,b],[Na,v],[Ra,T],[Ua,b],[qa,b],[Wa,x],[ja,x],[Ha,T],[Xa,P],[Qa,b],[Ka,b],[Ya,M],[Ja,$],[el,b],[rl,b],[ol,b],[sl,b],[cl,b],[pl,b],[hl,b],[ml,b],[fl,b],[gl,b],[tl,b],[nl,b],[il,k],[al,b],[ll,b],[Va,x],[Ga,b],[dl,b],[ul,b],[_l,b]];for(const[e,t]of wl)for(const[n,r]of e.values())S.set(n,t),F.set(r,n),E.set(n,r);const yl=[["MusicgenForConditionalGeneration",ca,C],["CLIPTextModelWithProjection",$n,b],["SiglipTextModel",Fn,b],["JinaCLIPTextModel",Bn,b],["ClapTextModelWithProjection",Xi,b],["ClapAudioModelWithProjection",Qi,b]];for(const[e,t,n]of yl)S.set(e,n),F.set(t,e),E.set(e,t);class bl extends Ba{static MODEL_CLASS_MAPPINGS=wl.map((e=>e[0]));static BASE_IF_FAIL=!0}class vl extends Ba{static MODEL_CLASS_MAPPINGS=[Ua]}class xl extends Ba{static MODEL_CLASS_MAPPINGS=[qa]}class Ml extends Ba{static MODEL_CLASS_MAPPINGS=[Wa]}class Tl extends Ba{static MODEL_CLASS_MAPPINGS=[ja]}class kl extends Ba{static MODEL_CLASS_MAPPINGS=[Va]}class $l extends Ba{static MODEL_CLASS_MAPPINGS=[Ga]}class Cl extends Ba{static MODEL_CLASS_MAPPINGS=[Ha]}class Pl extends Ba{static MODEL_CLASS_MAPPINGS=[Qa]}class Sl extends Ba{static MODEL_CLASS_MAPPINGS=[Ka]}class El extends Ba{static MODEL_CLASS_MAPPINGS=[Ya]}class Fl extends Ba{static MODEL_CLASS_MAPPINGS=[el]}class Al extends Ba{static MODEL_CLASS_MAPPINGS=[rl]}class Il extends Ba{static MODEL_CLASS_MAPPINGS=[sl]}class zl extends Ba{static MODEL_CLASS_MAPPINGS=[ol]}class Ll extends Ba{static MODEL_CLASS_MAPPINGS=[tl]}class Ol extends Ba{static MODEL_CLASS_MAPPINGS=[nl]}class Bl extends Ba{static MODEL_CLASS_MAPPINGS=[il]}class Dl extends Ba{static MODEL_CLASS_MAPPINGS=[al]}class Nl extends Ba{static MODEL_CLASS_MAPPINGS=[ll]}class Rl extends Ba{static MODEL_CLASS_MAPPINGS=[dl]}class jl extends Ba{static MODEL_CLASS_MAPPINGS=[ul]}class Vl extends Ba{static MODEL_CLASS_MAPPINGS=[Za]}class Gl extends Ba{static MODEL_CLASS_MAPPINGS=[cl]}class Ul extends Ba{static MODEL_CLASS_MAPPINGS=[hl]}class ql extends Ba{static MODEL_CLASS_MAPPINGS=[ml]}class Wl extends Ba{static MODEL_CLASS_MAPPINGS=[fl]}class Hl extends Ba{static MODEL_CLASS_MAPPINGS=[gl]}class Xl extends Ba{static MODEL_CLASS_MAPPINGS=[_l]}class Ql extends Q{constructor({logits:e,past_key_values:t,encoder_outputs:n,decoder_attentions:r=null,cross_attentions:s=null}){super(),this.logits=e,this.past_key_values=t,this.encoder_outputs=n,this.decoder_attentions=r,this.cross_attentions=s}}class Kl extends Q{constructor({logits:e}){super(),this.logits=e}}class Yl extends Q{constructor({logits:e,embeddings:t}){super(),this.logits=e,this.embeddings=t}}class Jl extends Q{constructor({logits:e}){super(),this.logits=e}}class Zl extends Q{constructor({logits:e}){super(),this.logits=e}}class ed extends Q{constructor({start_logits:e,end_logits:t}){super(),this.start_logits=e,this.end_logits=t}}class td extends Q{constructor({logits:e}){super(),this.logits=e}}class nd extends Q{constructor({logits:e,past_key_values:t}){super(),this.logits=e,this.past_key_values=t}}class rd extends Q{constructor({alphas:e}){super(),this.alphas=e}}class sd extends Q{constructor({waveform:e,spectrogram:t}){super(),this.waveform=e,this.spectrogram=t}}},"./src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js":
|
|
185
185
|
/*!******************************************************************************************************!*\
|
|
186
186
|
!*** ./src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js ***!
|
|
187
187
|
\******************************************************************************************************/(e,t,n)=>{n.r(t),n.d(t,{ASTFeatureExtractor:()=>o});var r=n(/*! ../../base/feature_extraction_utils.js */"./src/base/feature_extraction_utils.js"),s=(n(/*! ../../utils/tensor.js */"./src/utils/tensor.js"),n(/*! ../../utils/audio.js */"./src/utils/audio.js"));class o extends r.FeatureExtractor{constructor(e){super(e);const t=this.config.sampling_rate,n=(0,s.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<n.length;++e)n[e].push(0);this.mel_filters=n,this.window=(0,s.window_function)(400,"hann",{periodic:!1}),this.mean=this.config.mean,this.std=this.config.std}async _extract_fbank_features(e,t){return(0,s.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,max_num_frames:t,transpose:!0})}async _call(e){(0,r.validate_audio_inputs)(e,"ASTFeatureExtractor");const t=await this._extract_fbank_features(e,this.config.max_length);if(this.config.do_normalize){const e=2*this.std,n=t.data;for(let t=0;t<n.length;++t)n[t]=(n[t]-this.mean)/e}return{input_values:t.unsqueeze_(0)}}}},"./src/models/auto/feature_extraction_auto.js":
|
|
@@ -235,10 +235,16 @@ var r,s,o,i,a,l,d,u,c,p,h,m,f,g,_,w,y,b,v,x,M,T,k,$,C,P,S,E,F,A,I,z,L,O,B=Object
|
|
|
235
235
|
\******************************************************/(e,t,n)=>{n.r(t),n.d(t,{Florence2Processor:()=>i});var r=n(/*! ../../base/processing_utils.js */"./src/base/processing_utils.js"),s=n(/*! ../auto/image_processing_auto.js */"./src/models/auto/image_processing_auto.js"),o=n(/*! ../../tokenizers.js */"./src/tokenizers.js");class i extends r.Processor{static tokenizer_class=o.AutoTokenizer;static image_processor_class=s.AutoImageProcessor;constructor(e,t){super(e,t);const{tasks_answer_post_processing_type:n,task_prompts_without_inputs:r,task_prompts_with_input:s}=this.image_processor.config;this.tasks_answer_post_processing_type=new Map(Object.entries(n??{})),this.task_prompts_without_inputs=new Map(Object.entries(r??{})),this.task_prompts_with_input=new Map(Object.entries(s??{})),this.regexes={quad_boxes:/(.+?)<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>/gm,bboxes:/([^<]+)?<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>/gm},this.size_per_bin=1e3}construct_prompts(e){"string"==typeof e&&(e=[e]);const t=[];for(const n of e)if(this.task_prompts_without_inputs.has(n))t.push(this.task_prompts_without_inputs.get(n));else{for(const[e,r]of this.task_prompts_with_input)if(n.includes(e)){t.push(r.replaceAll("{input}",n).replaceAll(e,""));break}t.length!==e.length&&t.push(n)}return t}post_process_generation(e,t,n){const r=this.tasks_answer_post_processing_type.get(t)??"pure_text";let s;switch(e=e.replaceAll("<s>","").replaceAll("</s>",""),r){case"pure_text":s=e;break;case"description_with_bboxes":case"bboxes":case"phrase_grounding":case"ocr":const o="ocr"===r?"quad_boxes":"bboxes",i=e.matchAll(this.regexes[o]),a=[],l=[];for(const[e,t,...r]of i)a.push(t?t.trim():a.at(-1)??""),l.push(r.map(((e,t)=>(Number(e)+.5)/this.size_per_bin*n[t%2])));s={labels:a,[o]:l};break;default:throw new Error(`Task "${t}" (of type "${r}") not yet implemented.`)}return{[t]:s}}async _call(e,t=null,n={}){if(!e&&!t)throw new Error("Either text or images must be provided");return{...await this.image_processor(e,n),...t?this.tokenizer(t,n):{}}}}},"./src/models/glpn/image_processing_glpn.js":
|
|
236
236
|
/*!**************************************************!*\
|
|
237
237
|
!*** ./src/models/glpn/image_processing_glpn.js ***!
|
|
238
|
-
\**************************************************/(e,t,n)=>{n.r(t),n.d(t,{GLPNFeatureExtractor:()=>s});var r=n(/*! ../../base/image_processors_utils.js */"./src/base/image_processors_utils.js");class s extends r.ImageProcessor{}},"./src/models/
|
|
238
|
+
\**************************************************/(e,t,n)=>{n.r(t),n.d(t,{GLPNFeatureExtractor:()=>s});var r=n(/*! ../../base/image_processors_utils.js */"./src/base/image_processors_utils.js");class s extends r.ImageProcessor{}},"./src/models/idefics3/image_processing_idefics3.js":
|
|
239
|
+
/*!**********************************************************!*\
|
|
240
|
+
!*** ./src/models/idefics3/image_processing_idefics3.js ***!
|
|
241
|
+
\**********************************************************/(e,t,n)=>{n.r(t),n.d(t,{Idefics3ImageProcessor:()=>o});var r=n(/*! ../../base/image_processors_utils.js */"./src/base/image_processors_utils.js"),s=n(/*! ../../utils/tensor.js */"./src/utils/tensor.js");class o extends r.ImageProcessor{constructor(e){super(e),this.do_image_splitting=e.do_image_splitting??!0,this.max_image_size=e.max_image_size}get_resize_for_vision_encoder(e,t){let[n,r]=e.dims.slice(-2);const s=r/n;return r>=n?(r=Math.ceil(r/t)*t,n=Math.floor(r/s),n=Math.ceil(n/t)*t):(n=Math.ceil(n/t)*t,r=Math.floor(n*s),r=Math.ceil(r/t)*t),{height:n,width:r}}async _call(e,{do_image_splitting:t=null,return_row_col_info:n=!1}={}){let r;if(Array.isArray(e)){if(0===e.length||!e[0])throw new Error("No images provided.");r=Array.isArray(e[0])?e:[e]}else r=[[e]];let o=[],i=[],a=[];const l=[],d=[];for(const e of r){let n=await Promise.all(e.map((e=>this.preprocess(e))));l.push(...n.map((e=>e.original_size))),d.push(...n.map((e=>e.reshaped_input_size))),n.forEach((e=>e.pixel_values.unsqueeze_(0)));const{longest_edge:r}=this.max_image_size;let u;if(t??this.do_image_splitting){let e=new Array(n.length),t=new Array(n.length);u=await Promise.all(n.map((async(n,o)=>{const i=this.get_resize_for_vision_encoder(n.pixel_values,r),a=await(0,s.interpolate_4d)(n.pixel_values,{size:[i.height,i.width]}),{frames:l,num_splits_h:d,num_splits_w:u}=await this.split_image(a,this.max_image_size);return e[o]=d,t[o]=u,(0,s.cat)(l,0)}))),i.push(e),a.push(t)}else{const e=[r,r];u=await Promise.all(n.map((t=>(0,s.interpolate_4d)(t.pixel_values,{size:e})))),i.push(new Array(n.length).fill(0)),a.push(new Array(n.length).fill(0))}o.push((0,s.cat)(u,0))}const u=o.length,[c,p,h,m]=o[0].dims;let f,g;if(1===u)f=o[0].unsqueeze_(0),g=(0,s.full)([u,c,h,m],!0);else{const e=Math.max(...o.map((e=>e.dims.at(0))));g=(0,s.full)([u,e,h,m],!0);const t=g.data,n=e*h*m;for(let r=0;r<u;++r){const i=o[r].dims[0];if(i<e){o[r]=(0,s.cat)([o[r],(0,s.full)([e-i,p,h,m],0)],0);const a=r*n+i*h*m,l=(r+1)*n;t.fill(!1,a,l)}}f=(0,s.stack)(o,0)}return{pixel_values:f,pixel_attention_mask:g,original_sizes:l,reshaped_input_sizes:d,...n?{rows:i,cols:a}:{}}}async split_image(e,{longest_edge:t}){const n=t,r=t,o=[],[i,a]=e.dims.slice(-2);let l=0,d=0;if(i>n||a>r){l=Math.ceil(i/n),d=Math.ceil(a/r);const t=Math.ceil(i/l),u=Math.ceil(a/d);for(let n=0;n<l;n++)for(let r=0;r<d;r++){const s=r*u,l=n*t,d=Math.min(s+u,a),c=Math.min(l+t,i);o.push(e.slice(null,null,[l,c],[s,d]))}const c=n,p=r;i===c&&a===p||(e=await(0,s.interpolate_4d)(e,{size:[c,p]}))}return o.push(e),{frames:o,num_splits_h:l,num_splits_w:d}}}},"./src/models/idefics3/processing_idefics3.js":
|
|
242
|
+
/*!****************************************************!*\
|
|
243
|
+
!*** ./src/models/idefics3/processing_idefics3.js ***!
|
|
244
|
+
\****************************************************/(e,t,n)=>{n.r(t),n.d(t,{Idefics3Processor:()=>l});var r=n(/*! ../../base/processing_utils.js */"./src/base/processing_utils.js"),s=n(/*! ../auto/image_processing_auto.js */"./src/models/auto/image_processing_auto.js"),o=n(/*! ../../tokenizers.js */"./src/tokenizers.js"),i=(n(/*! ../../utils/image.js */"./src/utils/image.js"),n(/*! ../../utils/core.js */"./src/utils/core.js"));function a(e,t,n,r,s,o){return 0===e&&0===t?function(e,t,n,r){return`${t}${r}`+n.repeat(e)+`${t}`}(n,r,s,o):function(e,t,n,r,s,o){let i="";for(let o=0;o<t;++o){for(let t=0;t<n;++t)i+=r+`<row_${o+1}_col_${t+1}>`+s.repeat(e);i+="\n"}return i+=`\n${r}${o}`+s.repeat(e)+`${r}`,i}(n,e,t,r,s,o)}class l extends r.Processor{static image_processor_class=s.AutoImageProcessor;static tokenizer_class=o.AutoTokenizer;static uses_processor_config=!0;fake_image_token="<fake_token_around_image>";image_token="<image>";global_img_token="<global-img>";async _call(e,t=null,n={}){let r;n.return_row_col_info??=!0,t&&(r=await this.image_processor(t,n)),Array.isArray(e)||(e=[e]);const s=r.rows??[new Array(e.length).fill(0)],o=r.cols??[new Array(e.length).fill(0)],l=this.config.image_seq_len,d=[],u=[];for(let t=0;t<e.length;++t){const n=e[t],r=s[t],c=o[t];d.push((0,i.count)(n,this.image_token));const p=r.map(((e,t)=>a(e,c[t],l,this.fake_image_token,this.image_token,this.global_img_token))),h=n.split(this.image_token);if(0===h.length)throw new Error("The image token should be present in the text.");let m=h[0];for(let e=0;e<p.length;++e)m+=p[e]+h[e+1];u.push(m)}return{...this.tokenizer(u),...r}}}},"./src/models/image_processors.js":
|
|
239
245
|
/*!****************************************!*\
|
|
240
246
|
!*** ./src/models/image_processors.js ***!
|
|
241
|
-
\****************************************/(e,t,n)=>{n.r(t),n.d(t,{BeitFeatureExtractor:()=>r.BeitFeatureExtractor,BitImageProcessor:()=>s.BitImageProcessor,CLIPFeatureExtractor:()=>i.CLIPFeatureExtractor,CLIPImageProcessor:()=>i.CLIPImageProcessor,ChineseCLIPFeatureExtractor:()=>o.ChineseCLIPFeatureExtractor,ConvNextFeatureExtractor:()=>a.ConvNextFeatureExtractor,ConvNextImageProcessor:()=>a.ConvNextImageProcessor,DPTFeatureExtractor:()=>c.DPTFeatureExtractor,DPTImageProcessor:()=>c.DPTImageProcessor,DeiTFeatureExtractor:()=>l.DeiTFeatureExtractor,DeiTImageProcessor:()=>l.DeiTImageProcessor,DetrFeatureExtractor:()=>d.DetrFeatureExtractor,DetrImageProcessor:()=>d.DetrImageProcessor,DonutFeatureExtractor:()=>u.DonutFeatureExtractor,DonutImageProcessor:()=>u.DonutImageProcessor,EfficientNetImageProcessor:()=>p.EfficientNetImageProcessor,GLPNFeatureExtractor:()=>h.GLPNFeatureExtractor,JinaCLIPImageProcessor:()=>
|
|
247
|
+
\****************************************/(e,t,n)=>{n.r(t),n.d(t,{BeitFeatureExtractor:()=>r.BeitFeatureExtractor,BitImageProcessor:()=>s.BitImageProcessor,CLIPFeatureExtractor:()=>i.CLIPFeatureExtractor,CLIPImageProcessor:()=>i.CLIPImageProcessor,ChineseCLIPFeatureExtractor:()=>o.ChineseCLIPFeatureExtractor,ConvNextFeatureExtractor:()=>a.ConvNextFeatureExtractor,ConvNextImageProcessor:()=>a.ConvNextImageProcessor,DPTFeatureExtractor:()=>c.DPTFeatureExtractor,DPTImageProcessor:()=>c.DPTImageProcessor,DeiTFeatureExtractor:()=>l.DeiTFeatureExtractor,DeiTImageProcessor:()=>l.DeiTImageProcessor,DetrFeatureExtractor:()=>d.DetrFeatureExtractor,DetrImageProcessor:()=>d.DetrImageProcessor,DonutFeatureExtractor:()=>u.DonutFeatureExtractor,DonutImageProcessor:()=>u.DonutImageProcessor,EfficientNetImageProcessor:()=>p.EfficientNetImageProcessor,GLPNFeatureExtractor:()=>h.GLPNFeatureExtractor,Idefics3ImageProcessor:()=>m.Idefics3ImageProcessor,JinaCLIPImageProcessor:()=>g.JinaCLIPImageProcessor,LlavaOnevisionImageProcessor:()=>_.LlavaOnevisionImageProcessor,Mask2FormerImageProcessor:()=>w.Mask2FormerImageProcessor,MaskFormerFeatureExtractor:()=>y.MaskFormerFeatureExtractor,MaskFormerImageProcessor:()=>y.MaskFormerImageProcessor,MobileNetV1FeatureExtractor:()=>b.MobileNetV1FeatureExtractor,MobileNetV1ImageProcessor:()=>b.MobileNetV1ImageProcessor,MobileNetV2FeatureExtractor:()=>v.MobileNetV2FeatureExtractor,MobileNetV2ImageProcessor:()=>v.MobileNetV2ImageProcessor,MobileNetV3FeatureExtractor:()=>x.MobileNetV3FeatureExtractor,MobileNetV3ImageProcessor:()=>x.MobileNetV3ImageProcessor,MobileNetV4FeatureExtractor:()=>M.MobileNetV4FeatureExtractor,MobileNetV4ImageProcessor:()=>M.MobileNetV4ImageProcessor,MobileViTFeatureExtractor:()=>T.MobileViTFeatureExtractor,MobileViTImageProcessor:()=>T.MobileViTImageProcessor,NougatImageProcessor:()=>k.NougatImageProcessor,OwlViTFeatureExtractor:()=>C.OwlViTFeatureExtractor,OwlViTImageProcessor:()=>C.OwlViTImageProcessor,Owlv2ImageProcessor:()=>$.Owlv2ImageProcessor,PvtImageProcessor:()=>P.PvtImageProcessor,Qwen2VLImageProcessor:()=>S.Qwen2VLImageProcessor,RTDetrImageProcessor:()=>E.RTDetrImageProcessor,SamImageProcessor:()=>F.SamImageProcessor,SegformerFeatureExtractor:()=>A.SegformerFeatureExtractor,SegformerImageProcessor:()=>A.SegformerImageProcessor,SiglipImageProcessor:()=>I.SiglipImageProcessor,Swin2SRImageProcessor:()=>z.Swin2SRImageProcessor,VLMImageProcessor:()=>f.VLMImageProcessor,ViTFeatureExtractor:()=>L.ViTFeatureExtractor,ViTImageProcessor:()=>L.ViTImageProcessor,VitMatteImageProcessor:()=>O.VitMatteImageProcessor,VitPoseImageProcessor:()=>B.VitPoseImageProcessor,YolosFeatureExtractor:()=>D.YolosFeatureExtractor,YolosImageProcessor:()=>D.YolosImageProcessor});var r=n(/*! ./beit/image_processing_beit.js */"./src/models/beit/image_processing_beit.js"),s=n(/*! ./bit/image_processing_bit.js */"./src/models/bit/image_processing_bit.js"),o=n(/*! ./chinese_clip/image_processing_chinese_clip.js */"./src/models/chinese_clip/image_processing_chinese_clip.js"),i=n(/*! ./clip/image_processing_clip.js */"./src/models/clip/image_processing_clip.js"),a=n(/*! ./convnext/image_processing_convnext.js */"./src/models/convnext/image_processing_convnext.js"),l=n(/*! ./deit/image_processing_deit.js */"./src/models/deit/image_processing_deit.js"),d=n(/*! ./detr/image_processing_detr.js */"./src/models/detr/image_processing_detr.js"),u=n(/*! ./donut/image_processing_donut.js */"./src/models/donut/image_processing_donut.js"),c=n(/*! ./dpt/image_processing_dpt.js */"./src/models/dpt/image_processing_dpt.js"),p=n(/*! ./efficientnet/image_processing_efficientnet.js */"./src/models/efficientnet/image_processing_efficientnet.js"),h=n(/*! ./glpn/image_processing_glpn.js */"./src/models/glpn/image_processing_glpn.js"),m=n(/*! ./idefics3/image_processing_idefics3.js */"./src/models/idefics3/image_processing_idefics3.js"),f=n(/*! ./janus/image_processing_janus.js */"./src/models/janus/image_processing_janus.js"),g=n(/*! ./jina_clip/image_processing_jina_clip.js */"./src/models/jina_clip/image_processing_jina_clip.js"),_=n(/*! ./llava_onevision/image_processing_llava_onevision.js */"./src/models/llava_onevision/image_processing_llava_onevision.js"),w=n(/*! ./mask2former/image_processing_mask2former.js */"./src/models/mask2former/image_processing_mask2former.js"),y=n(/*! ./maskformer/image_processing_maskformer.js */"./src/models/maskformer/image_processing_maskformer.js"),b=n(/*! ./mobilenet_v1/image_processing_mobilenet_v1.js */"./src/models/mobilenet_v1/image_processing_mobilenet_v1.js"),v=n(/*! ./mobilenet_v2/image_processing_mobilenet_v2.js */"./src/models/mobilenet_v2/image_processing_mobilenet_v2.js"),x=n(/*! ./mobilenet_v3/image_processing_mobilenet_v3.js */"./src/models/mobilenet_v3/image_processing_mobilenet_v3.js"),M=n(/*! ./mobilenet_v4/image_processing_mobilenet_v4.js */"./src/models/mobilenet_v4/image_processing_mobilenet_v4.js"),T=n(/*! ./mobilevit/image_processing_mobilevit.js */"./src/models/mobilevit/image_processing_mobilevit.js"),k=n(/*! ./nougat/image_processing_nougat.js */"./src/models/nougat/image_processing_nougat.js"),$=n(/*! ./owlv2/image_processing_owlv2.js */"./src/models/owlv2/image_processing_owlv2.js"),C=n(/*! ./owlvit/image_processing_owlvit.js */"./src/models/owlvit/image_processing_owlvit.js"),P=n(/*! ./pvt/image_processing_pvt.js */"./src/models/pvt/image_processing_pvt.js"),S=n(/*! ./qwen2_vl/image_processing_qwen2_vl.js */"./src/models/qwen2_vl/image_processing_qwen2_vl.js"),E=n(/*! ./rt_detr/image_processing_rt_detr.js */"./src/models/rt_detr/image_processing_rt_detr.js"),F=n(/*! ./sam/image_processing_sam.js */"./src/models/sam/image_processing_sam.js"),A=n(/*! ./segformer/image_processing_segformer.js */"./src/models/segformer/image_processing_segformer.js"),I=n(/*! ./siglip/image_processing_siglip.js */"./src/models/siglip/image_processing_siglip.js"),z=n(/*! ./swin2sr/image_processing_swin2sr.js */"./src/models/swin2sr/image_processing_swin2sr.js"),L=n(/*! ./vit/image_processing_vit.js */"./src/models/vit/image_processing_vit.js"),O=n(/*! ./vitmatte/image_processing_vitmatte.js */"./src/models/vitmatte/image_processing_vitmatte.js"),B=n(/*! ./vitpose/image_processing_vitpose.js */"./src/models/vitpose/image_processing_vitpose.js"),D=n(/*! ./yolos/image_processing_yolos.js */"./src/models/yolos/image_processing_yolos.js")},"./src/models/janus/image_processing_janus.js":
|
|
242
248
|
/*!****************************************************!*\
|
|
243
249
|
!*** ./src/models/janus/image_processing_janus.js ***!
|
|
244
250
|
\****************************************************/(e,t,n)=>{n.r(t),n.d(t,{VLMImageProcessor:()=>s});var r=n(/*! ../../base/image_processors_utils.js */"./src/base/image_processors_utils.js");class s extends r.ImageProcessor{constructor(e){super({do_pad:!0,pad_size:{width:e.image_size,height:e.image_size},...e}),this.constant_values=this.config.background_color.map((e=>e*this.rescale_factor))}pad_image(e,t,n,r){return super.pad_image(e,t,n,{constant_values:this.constant_values,center:!0,...r})}}},"./src/models/janus/processing_janus.js":
|
|
@@ -292,7 +298,7 @@ var r,s,o,i,a,l,d,u,c,p,h,m,f,g,_,w,y,b,v,x,M,T,k,$,C,P,S,E,F,A,I,z,L,O,B=Object
|
|
|
292
298
|
\************************************************/(e,t,n)=>{n.r(t),n.d(t,{OwlViTProcessor:()=>i});var r=n(/*! ../../base/processing_utils.js */"./src/base/processing_utils.js"),s=n(/*! ../auto/image_processing_auto.js */"./src/models/auto/image_processing_auto.js"),o=n(/*! ../../tokenizers.js */"./src/tokenizers.js");class i extends r.Processor{static tokenizer_class=o.AutoTokenizer;static image_processor_class=s.AutoImageProcessor}},"./src/models/processors.js":
|
|
293
299
|
/*!**********************************!*\
|
|
294
300
|
!*** ./src/models/processors.js ***!
|
|
295
|
-
\**********************************/(e,t,n)=>{n.r(t),n.d(t,{Florence2Processor:()=>r.Florence2Processor,JinaCLIPProcessor:()=>
|
|
301
|
+
\**********************************/(e,t,n)=>{n.r(t),n.d(t,{Florence2Processor:()=>r.Florence2Processor,Idefics3Processor:()=>o.Idefics3Processor,JinaCLIPProcessor:()=>a.JinaCLIPProcessor,MgpstrProcessor:()=>s.MgpstrProcessor,OwlViTProcessor:()=>l.OwlViTProcessor,PyAnnoteProcessor:()=>d.PyAnnoteProcessor,Qwen2VLProcessor:()=>u.Qwen2VLProcessor,SamProcessor:()=>c.SamProcessor,SpeechT5Processor:()=>p.SpeechT5Processor,VLChatProcessor:()=>i.VLChatProcessor,Wav2Vec2ProcessorWithLM:()=>h.Wav2Vec2ProcessorWithLM,WhisperProcessor:()=>m.WhisperProcessor});var r=n(/*! ./florence2/processing_florence2.js */"./src/models/florence2/processing_florence2.js"),s=n(/*! ./mgp_str/processing_mgp_str.js */"./src/models/mgp_str/processing_mgp_str.js"),o=n(/*! ./idefics3/processing_idefics3.js */"./src/models/idefics3/processing_idefics3.js"),i=n(/*! ./janus/processing_janus.js */"./src/models/janus/processing_janus.js"),a=n(/*! ./jina_clip/processing_jina_clip.js */"./src/models/jina_clip/processing_jina_clip.js"),l=n(/*! ./owlvit/processing_owlvit.js */"./src/models/owlvit/processing_owlvit.js"),d=n(/*! ./pyannote/processing_pyannote.js */"./src/models/pyannote/processing_pyannote.js"),u=n(/*! ./qwen2_vl/processing_qwen2_vl.js */"./src/models/qwen2_vl/processing_qwen2_vl.js"),c=n(/*! ./sam/processing_sam.js */"./src/models/sam/processing_sam.js"),p=n(/*! ./speecht5/processing_speecht5.js */"./src/models/speecht5/processing_speecht5.js"),h=n(/*! ./wav2vec2/processing_wav2vec2.js */"./src/models/wav2vec2/processing_wav2vec2.js"),m=n(/*! ./whisper/processing_whisper.js */"./src/models/whisper/processing_whisper.js")},"./src/models/pvt/image_processing_pvt.js":
|
|
296
302
|
/*!************************************************!*\
|
|
297
303
|
!*** ./src/models/pvt/image_processing_pvt.js ***!
|
|
298
304
|
\************************************************/(e,t,n)=>{n.r(t),n.d(t,{PvtImageProcessor:()=>s});var r=n(/*! ../../base/image_processors_utils.js */"./src/base/image_processors_utils.js");class s extends r.ImageProcessor{}},"./src/models/pyannote/feature_extraction_pyannote.js":
|
|
@@ -385,7 +391,7 @@ var r,s,o,i,a,l,d,u,c,p,h,m,f,g,_,w,y,b,v,x,M,T,k,$,C,P,S,E,F,A,I,z,L,O,B=Object
|
|
|
385
391
|
\********************************/(e,t,n)=>{n.r(t),n.d(t,{CHAT_TEMPLATE_NAME:()=>l,CONFIG_NAME:()=>s,FEATURE_EXTRACTOR_NAME:()=>o,GENERATION_CONFIG_NAME:()=>d,GITHUB_ISSUE_URL:()=>r,IMAGE_PROCESSOR_NAME:()=>i,PROCESSOR_NAME:()=>a});const r="https://github.com/huggingface/transformers.js/issues/new/choose",s="config.json",o="preprocessor_config.json",i=o,a="processor_config.json",l="chat_template.json",d="generation_config.json"},"./src/utils/core.js":
|
|
386
392
|
/*!***************************!*\
|
|
387
393
|
!*** ./src/utils/core.js ***!
|
|
388
|
-
\***************************/(e,t,n)=>{function r(e,t){e&&e(t)}function s(e){return Object.fromEntries(Object.entries(e).map((([e,t])=>[t,e])))}function o(e){return e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&")}function i(e){return"TypedArray"===e?.prototype?.__proto__?.constructor?.name}function a(e){return Number.isInteger(e)||"bigint"==typeof e}function l(e){return null==e||-1===e}function d(e){const t=[];let n=e;for(;Array.isArray(n);)t.push(n.length),n=n[0];return t}function u(e,t,n=void 0){const r=e[t];if(void 0!==r)return delete e[t],r;if(void 0===n)throw Error(`Key ${t} does not exist in object.`);return n}function c(...e){return Array.prototype.concat.apply([],e)}function p(...e){return e.reduce(((e,t)=>e.flatMap((e=>t.map((t=>[e,t]))))))}function h(e,t){return Math.abs((e+t)%(2*t)-t)}function m(e,t){return Object.assign({},...t.map((t=>{if(void 0!==e[t])return{[t]:e[t]}})))}function f(e){let t=0;for(const n of e)++t;return t}n.r(t),n.d(t,{calculateDimensions:()=>d,calculateReflectOffset:()=>h,dispatchCallback:()=>r,escapeRegExp:()=>o,isIntegralNumber:()=>a,isNullishDimension:()=>l,isTypedArray:()=>i,len:()=>f,mergeArrays:()=>c,pick:()=>m,pop:()=>u,product:()=>p,reverseDictionary:()=>s})},"./src/utils/data-structures.js":
|
|
394
|
+
\***************************/(e,t,n)=>{function r(e,t){e&&e(t)}function s(e){return Object.fromEntries(Object.entries(e).map((([e,t])=>[t,e])))}function o(e){return e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&")}function i(e){return"TypedArray"===e?.prototype?.__proto__?.constructor?.name}function a(e){return Number.isInteger(e)||"bigint"==typeof e}function l(e){return null==e||-1===e}function d(e){const t=[];let n=e;for(;Array.isArray(n);)t.push(n.length),n=n[0];return t}function u(e,t,n=void 0){const r=e[t];if(void 0!==r)return delete e[t],r;if(void 0===n)throw Error(`Key ${t} does not exist in object.`);return n}function c(...e){return Array.prototype.concat.apply([],e)}function p(...e){return e.reduce(((e,t)=>e.flatMap((e=>t.map((t=>[e,t]))))))}function h(e,t){return Math.abs((e+t)%(2*t)-t)}function m(e,t){return Object.assign({},...t.map((t=>{if(void 0!==e[t])return{[t]:e[t]}})))}function f(e){let t=0;for(const n of e)++t;return t}function g(e,t){let n=0;for(const r of e)r===t&&++n;return n}n.r(t),n.d(t,{calculateDimensions:()=>d,calculateReflectOffset:()=>h,count:()=>g,dispatchCallback:()=>r,escapeRegExp:()=>o,isIntegralNumber:()=>a,isNullishDimension:()=>l,isTypedArray:()=>i,len:()=>f,mergeArrays:()=>c,pick:()=>m,pop:()=>u,product:()=>p,reverseDictionary:()=>s})},"./src/utils/data-structures.js":
|
|
389
395
|
/*!**************************************!*\
|
|
390
396
|
!*** ./src/utils/data-structures.js ***!
|
|
391
397
|
\**************************************/(e,t,n)=>{n.r(t),n.d(t,{CharTrie:()=>s,PriorityQueue:()=>r,TokenLattice:()=>i});class r{constructor(e=(e,t)=>e>t,t=1/0){this._heap=[],this._comparator=e,this._maxSize=t}get size(){return this._heap.length}isEmpty(){return 0===this.size}peek(){return this._heap[0]}push(...e){return this.extend(e)}extend(e){for(const t of e)if(this.size<this._maxSize)this._heap.push(t),this._siftUp();else{const e=this._smallest();this._comparator(t,this._heap[e])&&(this._heap[e]=t,this._siftUpFrom(e))}return this.size}pop(){const e=this.peek(),t=this.size-1;return t>0&&this._swap(0,t),this._heap.pop(),this._siftDown(),e}replace(e){const t=this.peek();return this._heap[0]=e,this._siftDown(),t}_parent(e){return(e+1>>>1)-1}_left(e){return 1+(e<<1)}_right(e){return e+1<<1}_greater(e,t){return this._comparator(this._heap[e],this._heap[t])}_swap(e,t){const n=this._heap[e];this._heap[e]=this._heap[t],this._heap[t]=n}_siftUp(){this._siftUpFrom(this.size-1)}_siftUpFrom(e){for(;e>0&&this._greater(e,this._parent(e));)this._swap(e,this._parent(e)),e=this._parent(e)}_siftDown(){let e=0;for(;this._left(e)<this.size&&this._greater(this._left(e),e)||this._right(e)<this.size&&this._greater(this._right(e),e);){const t=this._right(e)<this.size&&this._greater(this._right(e),this._left(e))?this._right(e):this._left(e);this._swap(e,t),e=t}}_smallest(){return 2**Math.floor(Math.log2(this.size))-1}}class s{constructor(){this.root=o.default()}extend(e){for(const t of e)this.push(t)}push(e){let t=this.root;for(const n of e){let e=t.children.get(n);void 0===e&&(e=o.default(),t.children.set(n,e)),t=e}t.isLeaf=!0}*commonPrefixSearch(e){let t=this.root;if(void 0===t)return;let n="";for(const r of e){if(n+=r,t=t.children.get(r),void 0===t)return;t.isLeaf&&(yield n)}}}class o{constructor(e,t){this.isLeaf=e,this.children=t}static default(){return new o(!1,new Map)}}class i{constructor(e,t,n){this.chars=Array.from(e),this.len=this.chars.length,this.bosTokenId=t,this.eosTokenId=n,this.nodes=[],this.beginNodes=Array.from({length:this.len+1},(()=>[])),this.endNodes=Array.from({length:this.len+1},(()=>[]));const r=new a(this.bosTokenId,0,0,0,0),s=new a(this.eosTokenId,1,this.len,0,0);this.nodes.push(r.clone()),this.nodes.push(s.clone()),this.beginNodes[this.len].push(s),this.endNodes[0].push(r)}insert(e,t,n,r){const s=this.nodes.length,o=new a(r,s,e,t,n);this.beginNodes[e].push(o),this.endNodes[e+t].push(o),this.nodes.push(o)}viterbi(){const e=this.len;let t=0;for(;t<=e;){if(0==this.beginNodes[t].length)return[];for(let e of this.beginNodes[t]){e.prev=null;let n=0,r=null;for(let s of this.endNodes[t]){const t=s.backtraceScore+e.score;(null===r||t>n)&&(r=s.clone(),n=t)}if(null===r)return[];e.prev=r,e.backtraceScore=n}++t}const n=[],r=this.beginNodes[e][0].prev;if(null===r)return[];let s=r.clone();for(;null!==s.prev;){n.push(s.clone());const e=s.clone();s=e.prev.clone()}return n.reverse(),n}piece(e){return this.chars.slice(e.pos,e.pos+e.length).join("")}tokens(){return this.viterbi().map((e=>this.piece(e)))}tokenIds(){return this.viterbi().map((e=>e.tokenId))}}class a{constructor(e,t,n,r,s){this.tokenId=e,this.nodeId=t,this.pos=n,this.length=r,this.score=s,this.prev=null,this.backtraceScore=0}clone(){const e=new a(this.tokenId,this.nodeId,this.pos,this.length,this.score);return e.prev=this.prev,e.backtraceScore=this.backtraceScore,e}}},"./src/utils/devices.js":
|
|
@@ -394,7 +400,7 @@ var r,s,o,i,a,l,d,u,c,p,h,m,f,g,_,w,y,b,v,x,M,T,k,$,C,P,S,E,F,A,I,z,L,O,B=Object
|
|
|
394
400
|
\******************************/(e,t,n)=>{n.r(t),n.d(t,{DEVICE_TYPES:()=>r});const r=Object.freeze({auto:"auto",gpu:"gpu",cpu:"cpu",wasm:"wasm",webgpu:"webgpu",cuda:"cuda",dml:"dml",webnn:"webnn","webnn-npu":"webnn-npu","webnn-gpu":"webnn-gpu","webnn-cpu":"webnn-cpu"})},"./src/utils/dtypes.js":
|
|
395
401
|
/*!*****************************!*\
|
|
396
402
|
!*** ./src/utils/dtypes.js ***!
|
|
397
|
-
\*****************************/(e,t,n)=>{n.r(t),n.d(t,{DATA_TYPES:()=>i,DEFAULT_DEVICE_DTYPE_MAPPING:()=>a,DEFAULT_DTYPE_SUFFIX_MAPPING:()=>l,isWebGpuFp16Supported:()=>o});var r=n(/*! ../env.js */"./src/env.js"),s=n(/*! ./devices.js */"./src/utils/devices.js");const o=function(){let e;return async function(){if(void 0===e)if(r.apis.IS_WEBGPU_AVAILABLE)try{const t=await navigator.gpu.requestAdapter();e=t.features.has("shader-f16")}catch(t){e=!1}else e=!1;return e}}(),i=Object.freeze({fp32:"fp32",fp16:"fp16",q8:"q8",int8:"int8",uint8:"uint8",q4:"q4",bnb4:"bnb4",q4f16:"q4f16"}),a=Object.freeze({[s.DEVICE_TYPES.wasm]:i.q8}),l=Object.freeze({[i.fp32]:"",[i.fp16]:"_fp16",[i.int8]:"_int8",[i.uint8]:"_uint8",[i.q8]:"_quantized",[i.q4]:"_q4",[i.q4f16]:"_q4f16",[i.bnb4]:"_bnb4"})},"./src/utils/generic.js":
|
|
403
|
+
\*****************************/(e,t,n)=>{n.r(t),n.d(t,{DATA_TYPES:()=>i,DEFAULT_DEVICE_DTYPE_MAPPING:()=>a,DEFAULT_DTYPE_SUFFIX_MAPPING:()=>l,isWebGpuFp16Supported:()=>o});var r=n(/*! ../env.js */"./src/env.js"),s=n(/*! ./devices.js */"./src/utils/devices.js");const o=function(){let e;return async function(){if(void 0===e)if(r.apis.IS_WEBGPU_AVAILABLE)try{const t=await navigator.gpu.requestAdapter();e=t.features.has("shader-f16")}catch(t){e=!1}else e=!1;return e}}(),i=Object.freeze({auto:"auto",fp32:"fp32",fp16:"fp16",q8:"q8",int8:"int8",uint8:"uint8",q4:"q4",bnb4:"bnb4",q4f16:"q4f16"}),a=Object.freeze({[s.DEVICE_TYPES.wasm]:i.q8}),l=Object.freeze({[i.fp32]:"",[i.fp16]:"_fp16",[i.int8]:"_int8",[i.uint8]:"_uint8",[i.q8]:"_quantized",[i.q4]:"_q4",[i.q4f16]:"_q4f16",[i.bnb4]:"_bnb4"})},"./src/utils/generic.js":
|
|
398
404
|
/*!******************************!*\
|
|
399
405
|
!*** ./src/utils/generic.js ***!
|
|
400
406
|
\******************************/(e,t,n)=>{n.r(t),n.d(t,{Callable:()=>r});const r=class{constructor(){let e=function(...t){return e._call(...t)};return Object.setPrototypeOf(e,new.target.prototype)}_call(...e){throw Error("Must implement _call method in subclass")}}},"./src/utils/hub.js":
|
|
@@ -403,14 +409,14 @@ var r,s,o,i,a,l,d,u,c,p,h,m,f,g,_,w,y,b,v,x,M,T,k,$,C,P,S,E,F,A,I,z,L,O,B=Object
|
|
|
403
409
|
\**************************/(e,t,n)=>{n.r(t),n.d(t,{getFile:()=>u,getModelFile:()=>h,getModelJSON:()=>m});var r=n(/*! fs */"?7a2c"),s=n(/*! path */"?a42a"),o=n(/*! ../env.js */"./src/env.js"),i=n(/*! ./core.js */"./src/utils/core.js");const a={txt:"text/plain",html:"text/html",css:"text/css",js:"text/javascript",json:"application/json",png:"image/png",jpg:"image/jpeg",jpeg:"image/jpeg",gif:"image/gif"};class l{constructor(e){if(this.filePath=e,this.headers=new Headers,this.exists=r.existsSync(e),this.exists){this.status=200,this.statusText="OK";let t=r.statSync(e);this.headers.set("content-length",t.size.toString()),this.updateContentType();let n=this;this.body=new ReadableStream({start(e){n.arrayBuffer().then((t=>{e.enqueue(new Uint8Array(t)),e.close()}))}})}else this.status=404,this.statusText="Not Found",this.body=null}updateContentType(){const e=this.filePath.toString().split(".").pop().toLowerCase();this.headers.set("content-type",a[e]??"application/octet-stream")}clone(){let e=new l(this.filePath);return e.exists=this.exists,e.status=this.status,e.statusText=this.statusText,e.headers=new Headers(this.headers),e}async arrayBuffer(){return(await r.promises.readFile(this.filePath)).buffer}async blob(){const e=await r.promises.readFile(this.filePath);return new Blob([e],{type:this.headers.get("content-type")})}async text(){return await r.promises.readFile(this.filePath,"utf8")}async json(){return JSON.parse(await this.text())}}function d(e,t=null,n=null){let r;try{r=new URL(e)}catch(e){return!1}return!(t&&!t.includes(r.protocol))&&!(n&&!n.includes(r.hostname))}async function u(e){if(o.env.useFS&&!d(e,["http:","https:","blob:"]))return new l(e);if("undefined"!=typeof process&&"node"===process?.release?.name){const t=!!process.env?.TESTING_REMOTELY,n=o.env.version,r=new Headers;r.set("User-Agent",`transformers.js/${n}; is_ci/${t};`);if(d(e,["http:","https:"],["huggingface.co","hf.co"])){const e=process.env?.HF_TOKEN??process.env?.HF_ACCESS_TOKEN;e&&r.set("Authorization",`Bearer ${e}`)}return fetch(e,{headers:r})}return fetch(e)}const c={400:"Bad request error occurred while trying to load file",401:"Unauthorized access to file",403:"Forbidden access to file",404:"Could not locate file",408:"Request timeout error occurred while trying to load file",500:"Internal server error error occurred while trying to load file",502:"Bad gateway error occurred while trying to load file",503:"Service unavailable error occurred while trying to load file",504:"Gateway timeout error occurred while trying to load file"};class p{constructor(e){this.path=e}async match(e){let t=s.join(this.path,e),n=new l(t);return n.exists?n:void 0}async put(e,t){const n=Buffer.from(await t.arrayBuffer());let o=s.join(this.path,e);try{await r.promises.mkdir(s.dirname(o),{recursive:!0}),await r.promises.writeFile(o,n)}catch(e){console.warn("An error occurred while writing the file to cache:",e)}}}async function h(e,t,n=!0,r={}){if(!o.env.allowLocalModels){if(r.local_files_only)throw Error("Invalid configuration detected: local models are disabled (`env.allowLocalModels=false`) but you have requested to only use local models (`local_files_only=true`).");if(!o.env.allowRemoteModels)throw Error("Invalid configuration detected: both local and remote models are disabled. Fix by setting `env.allowLocalModels` or `env.allowRemoteModels` to `true`.")}let s;if((0,i.dispatchCallback)(r.progress_callback,{status:"initiate",name:e,file:t}),!s&&o.env.useBrowserCache){if("undefined"==typeof caches)throw Error("Browser cache is not available in this environment.");try{s=await caches.open("transformers-cache")}catch(e){console.warn("An error occurred while opening the browser cache:",e)}}if(!s&&o.env.useFSCache&&(s=new p(r.cache_dir??o.env.cacheDir)),!s&&o.env.useCustomCache){if(!o.env.customCache)throw Error("`env.useCustomCache=true`, but `env.customCache` is not defined.");if(!o.env.customCache.match||!o.env.customCache.put)throw new Error("`env.customCache` must be an object which implements the `match` and `put` functions of the Web Cache API. For more information, see https://developer.mozilla.org/en-US/docs/Web/API/Cache");s=o.env.customCache}const a=r.revision??"main";let l,h,m=f(e,t),g=f(o.env.localModelPath,m),_=f(o.env.remoteHost,o.env.remotePathTemplate.replaceAll("{model}",e).replaceAll("{revision}",encodeURIComponent(a)),t),w="main"===a?m:f(e,a,t),y=s instanceof p?w:_,b=!1;s&&(h=await async function(e,...t){for(let n of t)try{let t=await e.match(n);if(t)return t}catch(e){continue}}(s,g,y));const v=void 0!==h;if(void 0===h){if(o.env.allowLocalModels){if(d(m,["http:","https:"])){if(r.local_files_only)throw new Error(`\`local_files_only=true\`, but attempted to load a remote file from: ${m}.`);if(!o.env.allowRemoteModels)throw new Error(`\`env.allowRemoteModels=false\`, but attempted to load a remote file from: ${m}.`)}else try{h=await u(g),l=g}catch(e){console.warn(`Unable to load from local path "${g}": "${e}"`)}}if(void 0===h||404===h.status){if(r.local_files_only||!o.env.allowRemoteModels){if(n)throw Error(`\`local_files_only=true\` or \`env.allowRemoteModels=false\` and file was not found locally at "${g}".`);return null}if(h=await u(_),200!==h.status)return function(e,t,n){if(!n)return null;const r=c[e]??`Error (${e}) occurred while trying to load file`;throw Error(`${r}: "${t}".`)}(h.status,_,n);l=y}b=s&&"undefined"!=typeof Response&&h instanceof Response&&200===h.status}(0,i.dispatchCallback)(r.progress_callback,{status:"download",name:e,file:t});const x={status:"progress",name:e,file:t};let M;return r.progress_callback?v&&"undefined"!=typeof navigator&&/firefox/i.test(navigator.userAgent)?(M=new Uint8Array(await h.arrayBuffer()),(0,i.dispatchCallback)(r.progress_callback,{...x,progress:100,loaded:M.length,total:M.length})):M=await async function(e,t){const n=e.headers.get("Content-Length");null===n&&console.warn("Unable to determine content-length from response headers. Will expand buffer when needed.");let r=parseInt(n??"0"),s=new Uint8Array(r),o=0;const i=e.body.getReader();async function a(){const{done:e,value:n}=await i.read();if(e)return;let l=o+n.length;if(l>r){r=l;let e=new Uint8Array(r);e.set(s),s=e}s.set(n,o),o=l;return t({progress:o/r*100,loaded:o,total:r}),a()}return await a(),s}(h,(e=>{(0,i.dispatchCallback)(r.progress_callback,{...x,...e})})):M=new Uint8Array(await h.arrayBuffer()),b&&l&&void 0===await s.match(l)&&await s.put(l,new Response(M,{headers:h.headers})).catch((e=>{console.warn(`Unable to add response to browser cache: ${e}.`)})),(0,i.dispatchCallback)(r.progress_callback,{status:"done",name:e,file:t}),M}async function m(e,t,n=!0,r={}){let s=await h(e,t,n,r);if(null===s)return{};let o=new TextDecoder("utf-8").decode(s);return JSON.parse(o)}function f(...e){return(e=e.map(((t,n)=>(n&&(t=t.replace(new RegExp("^/"),"")),n!==e.length-1&&(t=t.replace(new RegExp("/$"),"")),t)))).join("/")}},"./src/utils/image.js":
|
|
404
410
|
/*!****************************!*\
|
|
405
411
|
!*** ./src/utils/image.js ***!
|
|
406
|
-
\****************************/(e,t,n)=>{n.r(t),n.d(t,{RawImage:()=>f});var r=n(/*! ./core.js */"./src/utils/core.js"),s=n(/*! ./hub.js */"./src/utils/hub.js"),o=n(/*! ../env.js */"./src/env.js"),i=n(/*! ./tensor.js */"./src/utils/tensor.js"),a=n(/*! sharp */"?2b25");
|
|
412
|
+
\****************************/(e,t,n)=>{n.r(t),n.d(t,{RawImage:()=>m,load_image:()=>f});var r=n(/*! ./core.js */"./src/utils/core.js"),s=n(/*! ./hub.js */"./src/utils/hub.js"),o=n(/*! ../env.js */"./src/env.js"),i=n(/*! ./tensor.js */"./src/utils/tensor.js"),a=n(/*! sharp */"?2b25");let l,d,u;const c=o.apis.IS_BROWSER_ENV||o.apis.IS_WEBWORKER_ENV;if(c)l=(e,t)=>{if(!self.OffscreenCanvas)throw new Error("OffscreenCanvas not supported by this browser.");return new self.OffscreenCanvas(e,t)},u=self.createImageBitmap,d=self.ImageData;else{if(!a)throw new Error("Unable to load image processing library.");u=async e=>{const t=(await e.metadata()).channels,{data:n,info:r}=await e.rotate().raw().toBuffer({resolveWithObject:!0}),s=new m(new Uint8ClampedArray(n),r.width,r.height,r.channels);return void 0!==t&&t!==r.channels&&s.convert(t),s}}const p={0:"nearest",1:"lanczos",2:"bilinear",3:"bicubic",4:"box",5:"hamming"},h=new Map([["png","image/png"],["jpg","image/jpeg"],["jpeg","image/jpeg"],["gif","image/gif"]]);class m{constructor(e,t,n,r){this.data=e,this.width=t,this.height=n,this.channels=r}get size(){return[this.width,this.height]}static async read(e){if(e instanceof m)return e;if("string"==typeof e||e instanceof URL)return await this.fromURL(e);throw new Error("Unsupported input type: "+typeof e)}static fromCanvas(e){if(!c)throw new Error("fromCanvas() is only supported in browser environments.");const t=e.getContext("2d").getImageData(0,0,e.width,e.height).data;return new m(t,e.width,e.height,4)}static async fromURL(e){const t=await(0,s.getFile)(e);if(200!==t.status)throw new Error(`Unable to read image from "${e}" (${t.status} ${t.statusText})`);const n=await t.blob();return this.fromBlob(n)}static async fromBlob(e){if(c){const t=await u(e),n=l(t.width,t.height).getContext("2d");return n.drawImage(t,0,0),new this(n.getImageData(0,0,t.width,t.height).data,t.width,t.height,4)}{const t=a(await e.arrayBuffer());return await u(t)}}static fromTensor(e,t="CHW"){if(3!==e.dims.length)throw new Error(`Tensor should have 3 dimensions, but has ${e.dims.length} dimensions.`);if("CHW"===t)e=e.transpose(1,2,0);else if("HWC"!==t)throw new Error(`Unsupported channel format: ${t}`);if(!(e.data instanceof Uint8ClampedArray||e.data instanceof Uint8Array))throw new Error(`Unsupported tensor type: ${e.type}`);switch(e.dims[2]){case 1:case 2:case 3:case 4:return new m(e.data,e.dims[1],e.dims[0],e.dims[2]);default:throw new Error(`Unsupported number of channels: ${e.dims[2]}`)}}grayscale(){if(1===this.channels)return this;const e=new Uint8ClampedArray(this.width*this.height*1);switch(this.channels){case 3:case 4:for(let t=0,n=0;t<this.data.length;t+=this.channels){const r=this.data[t],s=this.data[t+1],o=this.data[t+2];e[n++]=Math.round(.2989*r+.587*s+.114*o)}break;default:throw new Error(`Conversion failed due to unsupported number of channels: ${this.channels}`)}return this._update(e,this.width,this.height,1)}rgb(){if(3===this.channels)return this;const e=new Uint8ClampedArray(this.width*this.height*3);switch(this.channels){case 1:for(let t=0,n=0;t<this.data.length;++t)e[n++]=this.data[t],e[n++]=this.data[t],e[n++]=this.data[t];break;case 4:for(let t=0,n=0;t<this.data.length;t+=4)e[n++]=this.data[t],e[n++]=this.data[t+1],e[n++]=this.data[t+2];break;default:throw new Error(`Conversion failed due to unsupported number of channels: ${this.channels}`)}return this._update(e,this.width,this.height,3)}rgba(){if(4===this.channels)return this;const e=new Uint8ClampedArray(this.width*this.height*4);switch(this.channels){case 1:for(let t=0,n=0;t<this.data.length;++t)e[n++]=this.data[t],e[n++]=this.data[t],e[n++]=this.data[t],e[n++]=255;break;case 3:for(let t=0,n=0;t<this.data.length;t+=3)e[n++]=this.data[t],e[n++]=this.data[t+1],e[n++]=this.data[t+2],e[n++]=255;break;default:throw new Error(`Conversion failed due to unsupported number of channels: ${this.channels}`)}return this._update(e,this.width,this.height,4)}async resize(e,t,{resample:n=2}={}){if(this.width===e&&this.height===t)return this;let s=p[n]??n;const o=(0,r.isNullishDimension)(e),i=(0,r.isNullishDimension)(t);if(o&&i)return this;if(o?e=t/this.height*this.width:i&&(t=e/this.width*this.height),c){const n=this.channels,r=this.toCanvas(),s=l(e,t).getContext("2d");s.drawImage(r,0,0,e,t);return new m(s.getImageData(0,0,e,t).data,e,t,4).convert(n)}{let n=this.toSharp();switch(s){case"box":case"hamming":"box"!==s&&"hamming"!==s||(console.warn(`Resampling method ${s} is not yet supported. Using bilinear instead.`),s="bilinear");case"nearest":case"bilinear":case"bicubic":n=n.affine([e/this.width,0,0,t/this.height],{interpolator:s});break;case"lanczos":n=n.resize({width:e,height:t,fit:"fill",kernel:"lanczos3"});break;default:throw new Error(`Resampling method ${s} is not supported.`)}return await u(n)}}async pad([e,t,n,r]){if(e=Math.max(e,0),t=Math.max(t,0),n=Math.max(n,0),r=Math.max(r,0),0===e&&0===t&&0===n&&0===r)return this;if(c){const s=this.channels,o=this.toCanvas(),i=this.width+e+t,a=this.height+n+r,d=l(i,a).getContext("2d");d.drawImage(o,0,0,this.width,this.height,e,n,this.width,this.height);return new m(d.getImageData(0,0,i,a).data,i,a,4).convert(s)}{const s=this.toSharp().extend({left:e,right:t,top:n,bottom:r});return await u(s)}}async crop([e,t,n,r]){if(e=Math.max(e,0),t=Math.max(t,0),n=Math.min(n,this.width-1),r=Math.min(r,this.height-1),0===e&&0===t&&n===this.width-1&&r===this.height-1)return this;const s=n-e+1,o=r-t+1;if(c){const n=this.channels,r=this.toCanvas(),i=l(s,o).getContext("2d");i.drawImage(r,e,t,s,o,0,0,s,o);return new m(i.getImageData(0,0,s,o).data,s,o,4).convert(n)}{const n=this.toSharp().extract({left:e,top:t,width:s,height:o});return await u(n)}}async center_crop(e,t){if(this.width===e&&this.height===t)return this;const n=(this.width-e)/2,r=(this.height-t)/2;if(c){const s=this.channels,o=this.toCanvas(),i=l(e,t).getContext("2d");let a=0,d=0,u=0,c=0;n>=0?a=n:u=-n,r>=0?d=r:c=-r,i.drawImage(o,a,d,e,t,u,c,e,t);return new m(i.getImageData(0,0,e,t).data,e,t,4).convert(s)}{let s=this.toSharp();if(n>=0&&r>=0)s=s.extract({left:Math.floor(n),top:Math.floor(r),width:e,height:t});else if(n<=0&&r<=0){const o=Math.floor(-r),i=Math.floor(-n);s=s.extend({top:o,left:i,right:e-this.width-i,bottom:t-this.height-o})}else{let o=[0,0],i=0;r<0?(o[0]=Math.floor(-r),o[1]=t-this.height-o[0]):i=Math.floor(r);let a=[0,0],l=0;n<0?(a[0]=Math.floor(-n),a[1]=e-this.width-a[0]):l=Math.floor(n),s=s.extend({top:o[0],bottom:o[1],left:a[0],right:a[1]}).extract({left:l,top:i,width:e,height:t})}return await u(s)}}async toBlob(e="image/png",t=1){if(!c)throw new Error("toBlob() is only supported in browser environments.");const n=this.toCanvas();return await n.convertToBlob({type:e,quality:t})}toTensor(e="CHW"){let t=new i.Tensor("uint8",new Uint8Array(this.data),[this.height,this.width,this.channels]);if("HWC"===e);else{if("CHW"!==e)throw new Error(`Unsupported channel format: ${e}`);t=t.permute(2,0,1)}return t}toCanvas(){if(!c)throw new Error("toCanvas() is only supported in browser environments.");const e=this.clone().rgba(),t=l(e.width,e.height),n=new d(e.data,e.width,e.height);return t.getContext("2d").putImageData(n,0,0),t}split(){const{data:e,width:t,height:n,channels:r}=this,s=e.constructor,o=e.length/r,i=Array.from({length:r},(()=>new s(o)));for(let t=0;t<o;++t){const n=r*t;for(let s=0;s<r;++s)i[s][t]=e[n+s]}return i.map((e=>new m(e,t,n,1)))}_update(e,t,n,r=null){return this.data=e,this.width=t,this.height=n,null!==r&&(this.channels=r),this}clone(){return new m(this.data.slice(),this.width,this.height,this.channels)}convert(e){if(this.channels===e)return this;switch(e){case 1:this.grayscale();break;case 3:this.rgb();break;case 4:this.rgba();break;default:throw new Error(`Conversion failed due to unsupported number of channels: ${this.channels}`)}return this}async save(e){if(!c){if(o.env.useFS){const t=this.toSharp();return await t.toFile(e)}throw new Error("Unable to save the image because filesystem is disabled in this environment.")}{if(o.apis.IS_WEBWORKER_ENV)throw new Error("Unable to save an image from a Web Worker.");const t=e.split(".").pop().toLowerCase(),n=h.get(t)??"image/png",r=await this.toBlob(n),s=URL.createObjectURL(r),i=document.createElement("a");i.href=s,i.download=e,i.click(),i.remove()}}toSharp(){if(c)throw new Error("toSharp() is only supported in server-side environments.");return a(this.data,{raw:{width:this.width,height:this.height,channels:this.channels}})}}const f=m.read.bind(m)},"./src/utils/maths.js":
|
|
407
413
|
/*!****************************!*\
|
|
408
414
|
!*** ./src/utils/maths.js ***!
|
|
409
415
|
\****************************/(e,t,n)=>{function r(e,[t,n,r],[s,o],i="bilinear",a=!1){const l=o/r,d=s/n,u=new e.constructor(s*o*t),c=n*r,p=s*o;for(let i=0;i<s;++i)for(let s=0;s<o;++s){const a=i*o+s,h=(s+.5)/l-.5,m=(i+.5)/d-.5;let f=Math.floor(h),g=Math.floor(m);const _=Math.min(f+1,r-1),w=Math.min(g+1,n-1);f=Math.max(f,0),g=Math.max(g,0);const y=h-f,b=m-g,v=(1-y)*(1-b),x=y*(1-b),M=(1-y)*b,T=y*b,k=g*r,$=w*r,C=k+f,P=k+_,S=$+f,E=$+_;for(let n=0;n<t;++n){const t=n*c;u[n*p+a]=v*e[t+C]+x*e[t+P]+M*e[t+S]+T*e[t+E]}}return u}function s(e,t,n){const r=new Array(n.length),s=new Array(n.length);for(let e=n.length-1,o=1;e>=0;--e)s[e]=o,r[e]=t[n[e]],o*=r[e];const o=n.map(((e,t)=>s[n.indexOf(t)])),i=new e.constructor(e.length);for(let n=0;n<e.length;++n){let r=0;for(let e=t.length-1,s=n;e>=0;--e)r+=s%t[e]*o[e],s=Math.floor(s/t[e]);i[r]=e[n]}return[i,r]}function o(e){const t=c(e)[0],n=e.map((e=>Math.exp(e-t))),r=n.reduce(((e,t)=>e+t),0);return n.map((e=>e/r))}function i(e){const t=c(e)[0];let n=0;for(let r=0;r<e.length;++r)n+=Math.exp(e[r]-t);const r=Math.log(n);return e.map((e=>e-t-r))}function a(e,t){let n=0;for(let r=0;r<e.length;++r)n+=e[r]*t[r];return n}function l(e,t){return a(e,t)/(d(e)*d(t))}function d(e){return Math.sqrt(e.reduce(((e,t)=>e+t*t),0))}function u(e){if(0===e.length)throw Error("Array must not be empty");let t=e[0],n=0;for(let r=1;r<e.length;++r)e[r]<t&&(t=e[r],n=r);return[t,n]}function c(e){if(0===e.length)throw Error("Array must not be empty");let t=e[0],n=0;for(let r=1;r<e.length;++r)e[r]>t&&(t=e[r],n=r);return[Number(t),n]}function p(e){return e>0&&!(e&e-1)}n.r(t),n.d(t,{FFT:()=>f,bankers_round:()=>w,cos_sim:()=>l,dot:()=>a,dynamic_time_warping:()=>y,interpolate_data:()=>r,log_softmax:()=>i,magnitude:()=>d,max:()=>c,medianFilter:()=>g,min:()=>u,permute_data:()=>s,round:()=>_,softmax:()=>o});class h{constructor(e){if(this.size=0|e,this.size<=1||!p(this.size))throw new Error("FFT size must be a power of two larger than 1");this._csize=e<<1,this.table=new Float64Array(2*this.size);for(let e=0;e<this.table.length;e+=2){const t=Math.PI*e/this.size;this.table[e]=Math.cos(t),this.table[e+1]=-Math.sin(t)}let t=0;for(let e=1;this.size>e;e<<=1)++t;this._width=t%2==0?t-1:t,this._bitrev=new Int32Array(1<<this._width);for(let e=0;e<this._bitrev.length;++e){this._bitrev[e]=0;for(let t=0;t<this._width;t+=2){const n=this._width-t-2;this._bitrev[e]|=(e>>>t&3)<<n}}}createComplexArray(){return new Float64Array(this._csize)}fromComplexArray(e,t){const n=t||new Array(e.length>>>1);for(let t=0;t<e.length;t+=2)n[t>>>1]=e[t];return n}toComplexArray(e,t){const n=t||this.createComplexArray();for(let t=0;t<n.length;t+=2)n[t]=e[t>>>1],n[t+1]=0;return n}transform(e,t){if(e===t)throw new Error("Input and output buffers must be different");this._transform4(e,t,1)}realTransform(e,t){if(e===t)throw new Error("Input and output buffers must be different");this._realTransform4(e,t,1)}inverseTransform(e,t){if(e===t)throw new Error("Input and output buffers must be different");this._transform4(e,t,-1);for(let t=0;t<e.length;++t)e[t]/=this.size}_transform4(e,t,n){const r=this._csize;let s,o,i=1<<this._width,a=r/i<<1;const l=this._bitrev;if(4===a)for(s=0,o=0;s<r;s+=a,++o){const n=l[o];this._singleTransform2(t,e,s,n,i)}else for(s=0,o=0;s<r;s+=a,++o){const r=l[o];this._singleTransform4(t,e,s,r,i,n)}const d=this.table;for(i>>=2;i>=2;i>>=2){a=r/i<<1;const t=a>>>2;for(s=0;s<r;s+=a){const r=s+t-1;for(let o=s,a=0;o<r;o+=2,a+=i){const r=o,s=r+t,i=s+t,l=i+t,u=e[r],c=e[r+1],p=e[s],h=e[s+1],m=e[i],f=e[i+1],g=e[l],_=e[l+1],w=d[a],y=n*d[a+1],b=p*w-h*y,v=p*y+h*w,x=d[2*a],M=n*d[2*a+1],T=m*x-f*M,k=m*M+f*x,$=d[3*a],C=n*d[3*a+1],P=g*$-_*C,S=g*C+_*$,E=u+T,F=c+k,A=u-T,I=c-k,z=b+P,L=v+S,O=n*(b-P),B=n*(v-S);e[r]=E+z,e[r+1]=F+L,e[s]=A+B,e[s+1]=I-O,e[i]=E-z,e[i+1]=F-L,e[l]=A-B,e[l+1]=I+O}}}}_singleTransform2(e,t,n,r,s){const o=e[r],i=e[r+1],a=e[r+s],l=e[r+s+1];t[n]=o+a,t[n+1]=i+l,t[n+2]=o-a,t[n+3]=i-l}_singleTransform4(e,t,n,r,s,o){const i=2*s,a=3*s,l=e[r],d=e[r+1],u=e[r+s],c=e[r+s+1],p=e[r+i],h=e[r+i+1],m=e[r+a],f=e[r+a+1],g=l+p,_=d+h,w=l-p,y=d-h,b=u+m,v=c+f,x=o*(u-m),M=o*(c-f);t[n]=g+b,t[n+1]=_+v,t[n+2]=w+M,t[n+3]=y-x,t[n+4]=g-b,t[n+5]=_-v,t[n+6]=w-M,t[n+7]=y+x}_realTransform4(e,t,n){const r=this._csize;let s,o,i=1<<this._width,a=r/i<<1;const l=this._bitrev;if(4===a)for(s=0,o=0;s<r;s+=a,++o){const n=l[o];this._singleRealTransform2(t,e,s,n>>>1,i>>>1)}else for(s=0,o=0;s<r;s+=a,++o){const r=l[o];this._singleRealTransform4(t,e,s,r>>>1,i>>>1,n)}const d=this.table;for(i>>=2;i>=2;i>>=2){a=r/i<<1;const t=a>>>1,o=t>>>1,l=o>>>1;for(s=0;s<r;s+=a)for(let r=0,a=0;r<=l;r+=2,a+=i){const i=s+r,u=i+o,c=u+o,p=c+o,h=e[i],m=e[i+1],f=e[u],g=e[u+1],_=e[c],w=e[c+1],y=e[p],b=e[p+1],v=h,x=m,M=d[a],T=n*d[a+1],k=f*M-g*T,$=f*T+g*M,C=d[2*a],P=n*d[2*a+1],S=_*C-w*P,E=_*P+w*C,F=d[3*a],A=n*d[3*a+1],I=y*F-b*A,z=y*A+b*F,L=v+S,O=x+E,B=v-S,D=x-E,N=k+I,R=$+z,j=n*(k-I),V=n*($-z);if(e[i]=L+N,e[i+1]=O+R,e[u]=B+V,e[u+1]=D-j,0===r){e[c]=L-N,e[c+1]=O-R;continue}if(r===l)continue;const G=s+o-r,U=s+t-r;e[G]=B-n*V,e[G+1]=-D-n*j,e[U]=L-n*N,e[U+1]=n*R-O}}const u=r>>>1;for(let t=2;t<u;t+=2)e[r-t]=e[t],e[r-t+1]=-e[t+1]}_singleRealTransform2(e,t,n,r,s){const o=e[r],i=e[r+s];t[n]=o+i,t[n+1]=0,t[n+2]=o-i,t[n+3]=0}_singleRealTransform4(e,t,n,r,s,o){const i=2*s,a=3*s,l=e[r],d=e[r+s],u=e[r+i],c=e[r+a],p=l+u,h=l-u,m=d+c,f=o*(d-c);t[n]=p+m,t[n+1]=0,t[n+2]=h,t[n+3]=-f,t[n+4]=p-m,t[n+5]=0,t[n+6]=h,t[n+7]=f}}class m{constructor(e){const t=2*(e-1),n=2*(2*e-1),r=2**Math.ceil(Math.log2(n));this.bufferSize=r,this._a=t;const s=new Float64Array(n),o=new Float64Array(r);this._chirpBuffer=new Float64Array(r),this._buffer1=new Float64Array(r),this._buffer2=new Float64Array(r),this._outBuffer1=new Float64Array(r),this._outBuffer2=new Float64Array(r);const i=-2*Math.PI/e,a=Math.cos(i),l=Math.sin(i);for(let t=0;t<n>>1;++t){const n=(t+1-e)**2/2,r=Math.sqrt(a**2+l**2)**n,i=n*Math.atan2(l,a),d=2*t;s[d]=r*Math.cos(i),s[d+1]=r*Math.sin(i),o[d]=s[d],o[d+1]=-s[d+1]}this._slicedChirpBuffer=s.subarray(t,n),this._f=new h(r>>1),this._f.transform(this._chirpBuffer,o)}_transform(e,t,n){const r=this._buffer1,s=this._buffer2,o=this._outBuffer1,i=this._outBuffer2,a=this._chirpBuffer,l=this._slicedChirpBuffer,d=this._a;if(n)for(let e=0;e<l.length;e+=2){const n=e+1,s=t[e>>1];r[e]=s*l[e],r[n]=s*l[n]}else for(let e=0;e<l.length;e+=2){const n=e+1;r[e]=t[e]*l[e]-t[n]*l[n],r[n]=t[e]*l[n]+t[n]*l[e]}this._f.transform(o,r);for(let e=0;e<a.length;e+=2){const t=e+1;s[e]=o[e]*a[e]-o[t]*a[t],s[t]=o[e]*a[t]+o[t]*a[e]}this._f.inverseTransform(i,s);for(let t=0;t<i.length;t+=2){const n=i[t+d],r=i[t+d+1],s=l[t],o=l[t+1];e[t]=n*s-r*o,e[t+1]=n*o+r*s}}transform(e,t){this._transform(e,t,!1)}realTransform(e,t){this._transform(e,t,!0)}}class f{constructor(e){this.fft_length=e,this.isPowerOfTwo=p(e),this.isPowerOfTwo?(this.fft=new h(e),this.outputBufferSize=2*e):(this.fft=new m(e),this.outputBufferSize=this.fft.bufferSize)}realTransform(e,t){this.fft.realTransform(e,t)}transform(e,t){this.fft.transform(e,t)}}function g(e,t){if(t%2==0||t<=0)throw new Error("Window size must be a positive odd number");const n=new e.constructor(e.length),r=new e.constructor(t),s=Math.floor(t/2);for(let t=0;t<e.length;++t){let o=0;for(let n=-s;n<=s;++n){let s=t+n;s<0?s=Math.abs(s):s>=e.length&&(s=2*(e.length-1)-s),r[o++]=e[s]}r.sort(),n[t]=r[s]}return n}function _(e,t){const n=Math.pow(10,t);return Math.round(e*n)/n}function w(e){const t=Math.round(e);return Math.abs(e)%1==.5?t%2==0?t:t-1:t}function y(e){const t=e.length,n=e[0].length,r=[t+1,n+1],s=Array.from({length:r[0]},(()=>Array(r[1]).fill(1/0)));s[0][0]=0;const o=Array.from({length:r[0]},(()=>Array(r[1]).fill(-1)));for(let t=1;t<r[1];++t)for(let n=1;n<r[0];++n){const r=s[n-1][t-1],i=s[n-1][t],a=s[n][t-1];let l,d;r<i&&r<a?(l=r,d=0):i<r&&i<a?(l=i,d=1):(l=a,d=2),s[n][t]=e[n-1][t-1]+l,o[n][t]=d}for(let e=0;e<r[1];++e)o[0][e]=2;for(let e=0;e<r[0];++e)o[e][0]=1;let i=t,a=n,l=[],d=[];for(;i>0||a>0;)switch(l.push(i-1),d.push(a-1),o[i][a]){case 0:--i,--a;break;case 1:--i;break;case 2:--a;break;default:throw new Error(`Internal error in dynamic time warping. Unexpected trace[${i}, ${a}]. Please file a bug report.`)}return l.reverse(),d.reverse(),[l,d]}},"./src/utils/tensor.js":
|
|
410
416
|
/*!*****************************!*\
|
|
411
417
|
!*** ./src/utils/tensor.js ***!
|
|
412
|
-
\*****************************/(e,t,n)=>{n.r(t),n.d(t,{Tensor:()=>a,cat:()=>y,full:()=>T,full_like:()=>k,interpolate:()=>d,interpolate_4d:()=>u,layer_norm:()=>f,matmul:()=>c,mean:()=>x,mean_pooling:()=>m,ones:()=>$,ones_like:()=>C,permute:()=>l,quantize_embeddings:()=>E,rfft:()=>p,stack:()=>b,std_mean:()=>v,topk:()=>h,zeros:()=>P,zeros_like:()=>S});var r=n(/*! ./maths.js */"./src/utils/maths.js"),s=n(/*! ../backends/onnx.js */"./src/backends/onnx.js"),o=n(/*! ../ops/registry.js */"./src/ops/registry.js");const i=Object.freeze({float32:Float32Array,float16:Uint16Array,float64:Float64Array,string:Array,int8:Int8Array,uint8:Uint8Array,int16:Int16Array,uint16:Uint16Array,int32:Int32Array,uint32:Uint32Array,int64:BigInt64Array,uint64:BigUint64Array,bool:Uint8Array});class a{get dims(){return this.ort_tensor.dims}set dims(e){this.ort_tensor.dims=e}get type(){return this.ort_tensor.type}get data(){return this.ort_tensor.data}get size(){return this.ort_tensor.size}get location(){return this.ort_tensor.location}ort_tensor;constructor(...e){return(0,s.isONNXTensor)(e[0])?this.ort_tensor=e[0]:this.ort_tensor=new s.Tensor(e[0],e[1],e[2]),new Proxy(this,{get:(e,t)=>{if("string"==typeof t){let n=Number(t);if(Number.isInteger(n))return e._getitem(n)}return e[t]},set:(e,t,n)=>e[t]=n})}dispose(){this.ort_tensor.dispose()}*[Symbol.iterator](){const[e,...t]=this.dims;if(t.length>0){const n=t.reduce(((e,t)=>e*t));for(let r=0;r<e;++r)yield this._subarray(r,n,t)}else yield*this.data}_getitem(e){const[t,...n]=this.dims;if(e=w(e,t),n.length>0){const t=n.reduce(((e,t)=>e*t));return this._subarray(e,t,n)}return new a(this.type,[this.data[e]],n)}indexOf(e){const t=this.data;for(let n=0;n<t.length;++n)if(t[n]==e)return n;return-1}_subarray(e,t,n){const r=e*t,s=(e+1)*t,o="subarray"in this.data?this.data.subarray(r,s):this.data.slice(r,s);return new a(this.type,o,n)}item(){const e=this.data;if(1!==e.length)throw new Error(`a Tensor with ${e.length} elements cannot be converted to Scalar`);return e[0]}tolist(){return function(e,t){const n=e.length,r=t.reduce(((e,t)=>e*t));if(n!==r)throw Error(`cannot reshape array of size ${n} into shape (${t})`);let s=e;for(let e=t.length-1;e>=0;e--)s=s.reduce(((n,r)=>{let s=n[n.length-1];return s.length<t[e]?s.push(r):n.push([r]),n}),[[]]);return s[0]}(this.data,this.dims)}sigmoid(){return this.clone().sigmoid_()}sigmoid_(){const e=this.data;for(let t=0;t<e.length;++t)e[t]=1/(1+Math.exp(-e[t]));return this}map(e){return this.clone().map_(e)}map_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]=e(t[n],n,t);return this}mul(e){return this.clone().mul_(e)}mul_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]*=e;return this}div(e){return this.clone().div_(e)}div_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]/=e;return this}add(e){return this.clone().add_(e)}add_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]+=e;return this}sub(e){return this.clone().sub_(e)}sub_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]-=e;return this}clone(){return new a(this.type,this.data.slice(),this.dims.slice())}slice(...e){const t=[],n=[];for(let r=0;r<this.dims.length;++r){let s=e[r];if(null==s)n.push([0,this.dims[r]]),t.push(this.dims[r]);else if("number"==typeof s)s=w(s,this.dims[r],r),n.push([s,s+1]);else{if(!Array.isArray(s)||2!==s.length)throw new Error(`Invalid slice: ${s}`);{let[e,o]=s;if(e=null===e?0:w(e,this.dims[r],r,!1),o=null===o?this.dims[r]:w(o,this.dims[r],r,!1),e>o)throw new Error(`Invalid slice: ${s}`);const i=[Math.max(e,0),Math.min(o,this.dims[r])];n.push(i),t.push(i[1]-i[0])}}}const r=n.map((([e,t])=>t-e)),s=r.reduce(((e,t)=>e*t)),o=this.data,i=new o.constructor(s),l=this.stride();for(let e=0;e<s;++e){let t=0;for(let s=r.length-1,o=e;s>=0;--s){const e=r[s];t+=(o%e+n[s][0])*l[s],o=Math.floor(o/e)}i[e]=o[t]}return new a(this.type,i,t)}permute(...e){return l(this,e)}transpose(...e){return this.permute(...e)}sum(e=null,t=!1){return this.norm(1,e,t)}norm(e="fro",t=null,n=!1){if("fro"===e)e=2;else if("string"==typeof e)throw Error(`Unsupported norm: ${e}`);const r=this.data;if(null===t){let t=r.reduce(((t,n)=>t+n**e),0)**(1/e);return new a(this.type,[t],[])}t=w(t,this.dims.length);const s=this.dims.slice();s[t]=1;const o=new r.constructor(r.length/this.dims[t]);for(let n=0;n<r.length;++n){let i=0;for(let e=this.dims.length-1,r=n,o=1;e>=0;--e){const n=this.dims[e];if(e!==t){i+=r%n*o,o*=s[e]}r=Math.floor(r/n)}o[i]+=r[n]**e}if(1!==e)for(let t=0;t<o.length;++t)o[t]=o[t]**(1/e);return n||s.splice(t,1),new a(this.type,o,s)}normalize_(e=2,t=1){t=w(t,this.dims.length);const n=this.norm(e,t,!0),r=this.data,s=n.data;for(let e=0;e<r.length;++e){let n=0;for(let r=this.dims.length-1,s=e,o=1;r>=0;--r){const e=this.dims[r];if(r!==t){n+=s%e*o,o*=this.dims[r]}s=Math.floor(s/e)}r[e]/=s[n]}return this}normalize(e=2,t=1){return this.clone().normalize_(e,t)}stride(){return function(e){const t=new Array(e.length);for(let n=e.length-1,r=1;n>=0;--n)t[n]=r,r*=e[n];return t}(this.dims)}squeeze(e=null){return new a(this.type,this.data,g(this.dims,e))}squeeze_(e=null){return this.dims=g(this.dims,e),this}unsqueeze(e=null){return new a(this.type,this.data,_(this.dims,e))}unsqueeze_(e=null){return this.dims=_(this.dims,e),this}flatten_(e=0,t=-1){t=(t+this.dims.length)%this.dims.length;let n=this.dims.slice(0,e),r=this.dims.slice(e,t+1),s=this.dims.slice(t+1);return this.dims=[...n,r.reduce(((e,t)=>e*t),1),...s],this}flatten(e=0,t=-1){return this.clone().flatten_(e,t)}view(...e){let t=-1;for(let n=0;n<e.length;++n)if(-1===e[n]){if(-1!==t)throw new Error("Only one dimension can be inferred");t=n}const n=this.data;if(-1!==t){const r=e.reduce(((e,n,r)=>r!==t?e*n:e),1);e[t]=n.length/r}return new a(this.type,n,e)}neg_(){const e=this.data;for(let t=0;t<e.length;++t)e[t]=-e[t];return this}neg(){return this.clone().neg_()}clamp_(e,t){const n=this.data;for(let r=0;r<n.length;++r)n[r]=Math.min(Math.max(n[r],e),t);return this}clamp(e,t){return this.clone().clamp_(e,t)}round_(){const e=this.data;for(let t=0;t<e.length;++t)e[t]=Math.round(e[t]);return this}round(){return this.clone().round_()}mean(e=null,t=!1){return x(this,e,t)}to(e){if(this.type===e)return this;if(!i.hasOwnProperty(e))throw new Error(`Unsupported type: ${e}`);return new a(e,i[e].from(this.data),this.dims)}}function l(e,t){const[n,s]=(0,r.permute_data)(e.data,e.dims,t);return new a(e.type,n,s)}function d(e,[t,n],s="bilinear",o=!1){const i=e.dims.at(-3)??1,l=e.dims.at(-2),d=e.dims.at(-1);let u=(0,r.interpolate_data)(e.data,[i,l,d],[t,n],s,o);return new a(e.type,u,[i,t,n])}async function u(e,{size:t=null,mode:n="bilinear"}={}){if(4!==e.dims.length)throw new Error("`interpolate_4d` currently only supports 4D input.");if(!t)throw new Error("`interpolate_4d` requires a `size` argument.");let r,s;if(2===t.length)r=[...e.dims.slice(0,2),...t];else if(3===t.length)r=[e.dims[0],...t];else{if(4!==t.length)throw new Error("`size` must be of length 2, 3, or 4.");r=t}if("bilinear"===n)s=await o.TensorOpRegistry.bilinear_interpolate_4d;else{if("bicubic"!==n)throw new Error(`Unsupported mode: ${n}`);s=await o.TensorOpRegistry.bicubic_interpolate_4d}const i=new a("int64",new BigInt64Array(r.map(BigInt)),[r.length]);return await s({x:e,s:i})}async function c(e,t){const n=await o.TensorOpRegistry.matmul;return await n({a:e,b:t})}async function p(e,t){const n=await o.TensorOpRegistry.rfft;return await n({x:e,a:t})}async function h(e,t){const n=await o.TensorOpRegistry.top_k;return t=null===t?e.dims.at(-1):Math.min(t,e.dims.at(-1)),await n({x:e,k:new a("int64",[BigInt(t)],[1])})}function m(e,t){const n=e.data,r=t.data,s=[e.dims[0],e.dims[2]],o=new n.constructor(s[0]*s[1]),[i,l,d]=e.dims;let u=0;for(let e=0;e<i;++e){const t=e*d*l;for(let s=0;s<d;++s){let i=0,a=0;const c=e*l,p=t+s;for(let e=0;e<l;++e){const t=Number(r[c+e]);a+=t,i+=n[p+e*d]*t}const h=i/a;o[u++]=h}}return new a(e.type,o,s)}function f(e,t,{eps:n=1e-5}={}){if(2!==e.dims.length)throw new Error("`layer_norm` currently only supports 2D input.");const[r,s]=e.dims;if(1!==t.length&&t[0]!==s)throw new Error("`normalized_shape` must be a 1D array with shape `[input.dims[1]]`.");const[o,i]=v(e,1,0,!0),l=o.data,d=i.data,u=e.data,c=new u.constructor(u.length);for(let e=0;e<r;++e){const t=e*s;for(let r=0;r<s;++r){const s=t+r;c[s]=(u[s]-d[e])/(l[e]+n)}}return new a(e.type,c,e.dims)}function g(e,t){return e=e.slice(),null===t?e=e.filter((e=>1!==e)):"number"==typeof t?1===e[t]&&e.splice(t,1):Array.isArray(t)&&(e=e.filter(((e,n)=>1!==e||!t.includes(n)))),e}function _(e,t){return t=w(t,e.length+1),(e=e.slice()).splice(t,0,1),e}function w(e,t,n=null,r=!0){if(r&&(e<-t||e>=t))throw new Error(`IndexError: index ${e} is out of bounds for dimension${null===n?"":" "+n} with size ${t}`);return e<0&&(e=(e%t+t)%t),e}function y(e,t=0){t=w(t,e[0].dims.length);const n=e[0].dims.slice();n[t]=e.reduce(((e,n)=>e+n.dims[t]),0);const r=n.reduce(((e,t)=>e*t),1),s=new e[0].data.constructor(r),o=e[0].type;if(0===t){let t=0;for(const n of e){const e=n.data;s.set(e,t),t+=e.length}}else{let r=0;for(let o=0;o<e.length;++o){const{data:i,dims:a}=e[o];for(let e=0;e<i.length;++e){let o=0;for(let s=a.length-1,i=e,l=1;s>=0;--s){const e=a[s];let d=i%e;s===t&&(d+=r),o+=d*l,l*=n[s],i=Math.floor(i/e)}s[o]=i[e]}r+=a[t]}}return new a(o,s,n)}function b(e,t=0){return y(e.map((e=>e.unsqueeze(t))),t)}function v(e,t=null,n=1,r=!1){const s=e.data,o=e.dims;if(null===t){const t=s.reduce(((e,t)=>e+t),0)/s.length,r=Math.sqrt(s.reduce(((e,n)=>e+(n-t)**2),0)/(s.length-n)),o=new a(e.type,[t],[]);return[new a(e.type,[r],[]),o]}const i=x(e,t=w(t,o.length),r),l=i.data,d=o.slice();d[t]=1;const u=new s.constructor(s.length/o[t]);for(let e=0;e<s.length;++e){let n=0;for(let r=o.length-1,s=e,i=1;r>=0;--r){const e=o[r];if(r!==t){n+=s%e*i,i*=d[r]}s=Math.floor(s/e)}u[n]+=(s[e]-l[n])**2}for(let e=0;e<u.length;++e)u[e]=Math.sqrt(u[e]/(o[t]-n));r||d.splice(t,1);return[new a(e.type,u,d),i]}function x(e,t=null,n=!1){const r=e.data;if(null===t){const t=r.reduce(((e,t)=>e+t),0);return new a(e.type,[t/r.length],[])}const s=e.dims;t=w(t,s.length);const o=s.slice();o[t]=1;const i=new r.constructor(r.length/s[t]);for(let e=0;e<r.length;++e){let n=0;for(let r=s.length-1,i=e,a=1;r>=0;--r){const e=s[r];if(r!==t){n+=i%e*a,a*=o[r]}i=Math.floor(i/e)}i[n]+=r[e]}if(1!==s[t])for(let e=0;e<i.length;++e)i[e]=i[e]/s[t];return n||o.splice(t,1),new a(e.type,i,o)}function M(e,t,n,r){const s=e.reduce(((e,t)=>e*t),1);return new a(n,new r(s).fill(t),e)}function T(e,t){let n,r;if("number"==typeof t)n="float32",r=Float32Array;else{if("bigint"!=typeof t)throw new Error("Unsupported data type: "+typeof t);n="int64",r=BigInt64Array}return M(e,t,n,r)}function k(e,t){return T(e.dims,t)}function $(e){return M(e,1n,"int64",BigInt64Array)}function C(e){return $(e.dims)}function P(e){return M(e,0n,"int64",BigInt64Array)}function S(e){return P(e.dims)}function E(e,t){if(2!==e.dims.length)throw new Error("The tensor must have 2 dimensions");if(e.dims.at(-1)%8!=0)throw new Error("The last dimension of the tensor must be a multiple of 8");if(!["binary","ubinary"].includes(t))throw new Error("The precision must be either 'binary' or 'ubinary'");const n="binary"===t,r=n?"int8":"uint8",s=n?Int8Array:Uint8Array,o=e.data,i=new s(o.length/8);for(let e=0;e<o.length;++e){const t=o[e]>0?1:0,r=Math.floor(e/8),s=e%8;i[r]|=t<<7-s,n&&0===s&&(i[r]-=128)}return new a(r,i,[e.dims[0],e.dims[1]/8])}}},r={};function s(e){var t=r[e];if(void 0!==t)return t.exports;var o=r[e]={exports:{}};return n[e](o,o.exports,s),o.exports}s.m=n,t=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,s.t=function(n,r){if(1&r&&(n=this(n)),8&r)return n;if("object"==typeof n&&n){if(4&r&&n.__esModule)return n;if(16&r&&"function"==typeof n.then)return n}var o=Object.create(null);s.r(o);var i={};e=e||[null,t({}),t([]),t(t)];for(var a=2&r&&n;"object"==typeof a&&!~e.indexOf(a);a=t(a))Object.getOwnPropertyNames(a).forEach((e=>i[e]=()=>n[e]));return i.default=()=>n,s.d(o,i),o},s.d=(e,t)=>{for(var n in t)s.o(t,n)&&!s.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},s.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),s.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},(()=>{var e;if("string"==typeof import.meta.url&&(e=import.meta.url),!e)throw new Error("Automatic publicPath is not supported in this browser");e=e.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/"),s.p=e})(),s.b=new URL("./",import.meta.url);var o={};
|
|
418
|
+
\*****************************/(e,t,n)=>{n.r(t),n.d(t,{Tensor:()=>a,cat:()=>y,full:()=>T,full_like:()=>k,interpolate:()=>d,interpolate_4d:()=>u,layer_norm:()=>f,matmul:()=>c,mean:()=>x,mean_pooling:()=>m,ones:()=>$,ones_like:()=>C,permute:()=>l,quantize_embeddings:()=>E,rfft:()=>p,stack:()=>b,std_mean:()=>v,topk:()=>h,zeros:()=>P,zeros_like:()=>S});var r=n(/*! ./maths.js */"./src/utils/maths.js"),s=n(/*! ../backends/onnx.js */"./src/backends/onnx.js"),o=n(/*! ../ops/registry.js */"./src/ops/registry.js");const i=Object.freeze({float32:Float32Array,float16:Uint16Array,float64:Float64Array,string:Array,int8:Int8Array,uint8:Uint8Array,int16:Int16Array,uint16:Uint16Array,int32:Int32Array,uint32:Uint32Array,int64:BigInt64Array,uint64:BigUint64Array,bool:Uint8Array,uint4:Uint8Array,int4:Int8Array});class a{get dims(){return this.ort_tensor.dims}set dims(e){this.ort_tensor.dims=e}get type(){return this.ort_tensor.type}get data(){return this.ort_tensor.data}get size(){return this.ort_tensor.size}get location(){return this.ort_tensor.location}ort_tensor;constructor(...e){return(0,s.isONNXTensor)(e[0])?this.ort_tensor=e[0]:this.ort_tensor=new s.Tensor(e[0],e[1],e[2]),new Proxy(this,{get:(e,t)=>{if("string"==typeof t){let n=Number(t);if(Number.isInteger(n))return e._getitem(n)}return e[t]},set:(e,t,n)=>e[t]=n})}dispose(){this.ort_tensor.dispose()}*[Symbol.iterator](){const[e,...t]=this.dims;if(t.length>0){const n=t.reduce(((e,t)=>e*t));for(let r=0;r<e;++r)yield this._subarray(r,n,t)}else yield*this.data}_getitem(e){const[t,...n]=this.dims;if(e=w(e,t),n.length>0){const t=n.reduce(((e,t)=>e*t));return this._subarray(e,t,n)}return new a(this.type,[this.data[e]],n)}indexOf(e){const t=this.data;for(let n=0;n<t.length;++n)if(t[n]==e)return n;return-1}_subarray(e,t,n){const r=e*t,s=(e+1)*t,o="subarray"in this.data?this.data.subarray(r,s):this.data.slice(r,s);return new a(this.type,o,n)}item(){const e=this.data;if(1!==e.length)throw new Error(`a Tensor with ${e.length} elements cannot be converted to Scalar`);return e[0]}tolist(){return function(e,t){const n=e.length,r=t.reduce(((e,t)=>e*t));if(n!==r)throw Error(`cannot reshape array of size ${n} into shape (${t})`);let s=e;for(let e=t.length-1;e>=0;e--)s=s.reduce(((n,r)=>{let s=n[n.length-1];return s.length<t[e]?s.push(r):n.push([r]),n}),[[]]);return s[0]}(this.data,this.dims)}sigmoid(){return this.clone().sigmoid_()}sigmoid_(){const e=this.data;for(let t=0;t<e.length;++t)e[t]=1/(1+Math.exp(-e[t]));return this}map(e){return this.clone().map_(e)}map_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]=e(t[n],n,t);return this}mul(e){return this.clone().mul_(e)}mul_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]*=e;return this}div(e){return this.clone().div_(e)}div_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]/=e;return this}add(e){return this.clone().add_(e)}add_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]+=e;return this}sub(e){return this.clone().sub_(e)}sub_(e){const t=this.data;for(let n=0;n<t.length;++n)t[n]-=e;return this}clone(){return new a(this.type,this.data.slice(),this.dims.slice())}slice(...e){const t=[],n=[];for(let r=0;r<this.dims.length;++r){let s=e[r];if(null==s)n.push([0,this.dims[r]]),t.push(this.dims[r]);else if("number"==typeof s)s=w(s,this.dims[r],r),n.push([s,s+1]);else{if(!Array.isArray(s)||2!==s.length)throw new Error(`Invalid slice: ${s}`);{let[e,o]=s;if(e=null===e?0:w(e,this.dims[r],r,!1),o=null===o?this.dims[r]:w(o,this.dims[r],r,!1),e>o)throw new Error(`Invalid slice: ${s}`);const i=[Math.max(e,0),Math.min(o,this.dims[r])];n.push(i),t.push(i[1]-i[0])}}}const r=n.map((([e,t])=>t-e)),s=r.reduce(((e,t)=>e*t)),o=this.data,i=new o.constructor(s),l=this.stride();for(let e=0;e<s;++e){let t=0;for(let s=r.length-1,o=e;s>=0;--s){const e=r[s];t+=(o%e+n[s][0])*l[s],o=Math.floor(o/e)}i[e]=o[t]}return new a(this.type,i,t)}permute(...e){return l(this,e)}transpose(...e){return this.permute(...e)}sum(e=null,t=!1){return this.norm(1,e,t)}norm(e="fro",t=null,n=!1){if("fro"===e)e=2;else if("string"==typeof e)throw Error(`Unsupported norm: ${e}`);const r=this.data;if(null===t){let t=r.reduce(((t,n)=>t+n**e),0)**(1/e);return new a(this.type,[t],[])}t=w(t,this.dims.length);const s=this.dims.slice();s[t]=1;const o=new r.constructor(r.length/this.dims[t]);for(let n=0;n<r.length;++n){let i=0;for(let e=this.dims.length-1,r=n,o=1;e>=0;--e){const n=this.dims[e];if(e!==t){i+=r%n*o,o*=s[e]}r=Math.floor(r/n)}o[i]+=r[n]**e}if(1!==e)for(let t=0;t<o.length;++t)o[t]=o[t]**(1/e);return n||s.splice(t,1),new a(this.type,o,s)}normalize_(e=2,t=1){t=w(t,this.dims.length);const n=this.norm(e,t,!0),r=this.data,s=n.data;for(let e=0;e<r.length;++e){let n=0;for(let r=this.dims.length-1,s=e,o=1;r>=0;--r){const e=this.dims[r];if(r!==t){n+=s%e*o,o*=this.dims[r]}s=Math.floor(s/e)}r[e]/=s[n]}return this}normalize(e=2,t=1){return this.clone().normalize_(e,t)}stride(){return function(e){const t=new Array(e.length);for(let n=e.length-1,r=1;n>=0;--n)t[n]=r,r*=e[n];return t}(this.dims)}squeeze(e=null){return new a(this.type,this.data,g(this.dims,e))}squeeze_(e=null){return this.dims=g(this.dims,e),this}unsqueeze(e=null){return new a(this.type,this.data,_(this.dims,e))}unsqueeze_(e=null){return this.dims=_(this.dims,e),this}flatten_(e=0,t=-1){t=(t+this.dims.length)%this.dims.length;let n=this.dims.slice(0,e),r=this.dims.slice(e,t+1),s=this.dims.slice(t+1);return this.dims=[...n,r.reduce(((e,t)=>e*t),1),...s],this}flatten(e=0,t=-1){return this.clone().flatten_(e,t)}view(...e){let t=-1;for(let n=0;n<e.length;++n)if(-1===e[n]){if(-1!==t)throw new Error("Only one dimension can be inferred");t=n}const n=this.data;if(-1!==t){const r=e.reduce(((e,n,r)=>r!==t?e*n:e),1);e[t]=n.length/r}return new a(this.type,n,e)}neg_(){const e=this.data;for(let t=0;t<e.length;++t)e[t]=-e[t];return this}neg(){return this.clone().neg_()}clamp_(e,t){const n=this.data;for(let r=0;r<n.length;++r)n[r]=Math.min(Math.max(n[r],e),t);return this}clamp(e,t){return this.clone().clamp_(e,t)}round_(){const e=this.data;for(let t=0;t<e.length;++t)e[t]=Math.round(e[t]);return this}round(){return this.clone().round_()}mean(e=null,t=!1){return x(this,e,t)}to(e){if(this.type===e)return this;if(!i.hasOwnProperty(e))throw new Error(`Unsupported type: ${e}`);return new a(e,i[e].from(this.data),this.dims)}}function l(e,t){const[n,s]=(0,r.permute_data)(e.data,e.dims,t);return new a(e.type,n,s)}function d(e,[t,n],s="bilinear",o=!1){const i=e.dims.at(-3)??1,l=e.dims.at(-2),d=e.dims.at(-1);let u=(0,r.interpolate_data)(e.data,[i,l,d],[t,n],s,o);return new a(e.type,u,[i,t,n])}async function u(e,{size:t=null,mode:n="bilinear"}={}){if(4!==e.dims.length)throw new Error("`interpolate_4d` currently only supports 4D input.");if(!t)throw new Error("`interpolate_4d` requires a `size` argument.");let r,s;if(2===t.length)r=[...e.dims.slice(0,2),...t];else if(3===t.length)r=[e.dims[0],...t];else{if(4!==t.length)throw new Error("`size` must be of length 2, 3, or 4.");r=t}if("bilinear"===n)s=await o.TensorOpRegistry.bilinear_interpolate_4d;else{if("bicubic"!==n)throw new Error(`Unsupported mode: ${n}`);s=await o.TensorOpRegistry.bicubic_interpolate_4d}const i=new a("int64",new BigInt64Array(r.map(BigInt)),[r.length]);return await s({x:e,s:i})}async function c(e,t){const n=await o.TensorOpRegistry.matmul;return await n({a:e,b:t})}async function p(e,t){const n=await o.TensorOpRegistry.rfft;return await n({x:e,a:t})}async function h(e,t){const n=await o.TensorOpRegistry.top_k;return t=null===t?e.dims.at(-1):Math.min(t,e.dims.at(-1)),await n({x:e,k:new a("int64",[BigInt(t)],[1])})}function m(e,t){const n=e.data,r=t.data,s=[e.dims[0],e.dims[2]],o=new n.constructor(s[0]*s[1]),[i,l,d]=e.dims;let u=0;for(let e=0;e<i;++e){const t=e*d*l;for(let s=0;s<d;++s){let i=0,a=0;const c=e*l,p=t+s;for(let e=0;e<l;++e){const t=Number(r[c+e]);a+=t,i+=n[p+e*d]*t}const h=i/a;o[u++]=h}}return new a(e.type,o,s)}function f(e,t,{eps:n=1e-5}={}){if(2!==e.dims.length)throw new Error("`layer_norm` currently only supports 2D input.");const[r,s]=e.dims;if(1!==t.length&&t[0]!==s)throw new Error("`normalized_shape` must be a 1D array with shape `[input.dims[1]]`.");const[o,i]=v(e,1,0,!0),l=o.data,d=i.data,u=e.data,c=new u.constructor(u.length);for(let e=0;e<r;++e){const t=e*s;for(let r=0;r<s;++r){const s=t+r;c[s]=(u[s]-d[e])/(l[e]+n)}}return new a(e.type,c,e.dims)}function g(e,t){return e=e.slice(),null===t?e=e.filter((e=>1!==e)):"number"==typeof t?1===e[t]&&e.splice(t,1):Array.isArray(t)&&(e=e.filter(((e,n)=>1!==e||!t.includes(n)))),e}function _(e,t){return t=w(t,e.length+1),(e=e.slice()).splice(t,0,1),e}function w(e,t,n=null,r=!0){if(r&&(e<-t||e>=t))throw new Error(`IndexError: index ${e} is out of bounds for dimension${null===n?"":" "+n} with size ${t}`);return e<0&&(e=(e%t+t)%t),e}function y(e,t=0){t=w(t,e[0].dims.length);const n=e[0].dims.slice();n[t]=e.reduce(((e,n)=>e+n.dims[t]),0);const r=n.reduce(((e,t)=>e*t),1),s=new e[0].data.constructor(r),o=e[0].type;if(0===t){let t=0;for(const n of e){const e=n.data;s.set(e,t),t+=e.length}}else{let r=0;for(let o=0;o<e.length;++o){const{data:i,dims:a}=e[o];for(let e=0;e<i.length;++e){let o=0;for(let s=a.length-1,i=e,l=1;s>=0;--s){const e=a[s];let d=i%e;s===t&&(d+=r),o+=d*l,l*=n[s],i=Math.floor(i/e)}s[o]=i[e]}r+=a[t]}}return new a(o,s,n)}function b(e,t=0){return y(e.map((e=>e.unsqueeze(t))),t)}function v(e,t=null,n=1,r=!1){const s=e.data,o=e.dims;if(null===t){const t=s.reduce(((e,t)=>e+t),0)/s.length,r=Math.sqrt(s.reduce(((e,n)=>e+(n-t)**2),0)/(s.length-n)),o=new a(e.type,[t],[]);return[new a(e.type,[r],[]),o]}const i=x(e,t=w(t,o.length),r),l=i.data,d=o.slice();d[t]=1;const u=new s.constructor(s.length/o[t]);for(let e=0;e<s.length;++e){let n=0;for(let r=o.length-1,s=e,i=1;r>=0;--r){const e=o[r];if(r!==t){n+=s%e*i,i*=d[r]}s=Math.floor(s/e)}u[n]+=(s[e]-l[n])**2}for(let e=0;e<u.length;++e)u[e]=Math.sqrt(u[e]/(o[t]-n));r||d.splice(t,1);return[new a(e.type,u,d),i]}function x(e,t=null,n=!1){const r=e.data;if(null===t){const t=r.reduce(((e,t)=>e+t),0);return new a(e.type,[t/r.length],[])}const s=e.dims;t=w(t,s.length);const o=s.slice();o[t]=1;const i=new r.constructor(r.length/s[t]);for(let e=0;e<r.length;++e){let n=0;for(let r=s.length-1,i=e,a=1;r>=0;--r){const e=s[r];if(r!==t){n+=i%e*a,a*=o[r]}i=Math.floor(i/e)}i[n]+=r[e]}if(1!==s[t])for(let e=0;e<i.length;++e)i[e]=i[e]/s[t];return n||o.splice(t,1),new a(e.type,i,o)}function M(e,t,n,r){const s=e.reduce(((e,t)=>e*t),1);return new a(n,new r(s).fill(t),e)}function T(e,t){let n,r;if("number"==typeof t)n="float32",r=Float32Array;else if("bigint"==typeof t)n="int64",r=BigInt64Array;else{if("boolean"!=typeof t)throw new Error("Unsupported data type: "+typeof t);n="bool",r=Uint8Array}return M(e,t,n,r)}function k(e,t){return T(e.dims,t)}function $(e){return M(e,1n,"int64",BigInt64Array)}function C(e){return $(e.dims)}function P(e){return M(e,0n,"int64",BigInt64Array)}function S(e){return P(e.dims)}function E(e,t){if(2!==e.dims.length)throw new Error("The tensor must have 2 dimensions");if(e.dims.at(-1)%8!=0)throw new Error("The last dimension of the tensor must be a multiple of 8");if(!["binary","ubinary"].includes(t))throw new Error("The precision must be either 'binary' or 'ubinary'");const n="binary"===t,r=n?"int8":"uint8",s=n?Int8Array:Uint8Array,o=e.data,i=new s(o.length/8);for(let e=0;e<o.length;++e){const t=o[e]>0?1:0,r=Math.floor(e/8),s=e%8;i[r]|=t<<7-s,n&&0===s&&(i[r]-=128)}return new a(r,i,[e.dims[0],e.dims[1]/8])}}},r={};function s(e){var t=r[e];if(void 0!==t)return t.exports;var o=r[e]={exports:{}};return n[e](o,o.exports,s),o.exports}s.m=n,t=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,s.t=function(n,r){if(1&r&&(n=this(n)),8&r)return n;if("object"==typeof n&&n){if(4&r&&n.__esModule)return n;if(16&r&&"function"==typeof n.then)return n}var o=Object.create(null);s.r(o);var i={};e=e||[null,t({}),t([]),t(t)];for(var a=2&r&&n;"object"==typeof a&&!~e.indexOf(a);a=t(a))Object.getOwnPropertyNames(a).forEach((e=>i[e]=()=>n[e]));return i.default=()=>n,s.d(o,i),o},s.d=(e,t)=>{for(var n in t)s.o(t,n)&&!s.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},s.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),s.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},(()=>{var e;if("string"==typeof import.meta.url&&(e=import.meta.url),!e)throw new Error("Automatic publicPath is not supported in this browser");e=e.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/"),s.p=e})(),s.b=new URL("./",import.meta.url);var o={};
|
|
413
419
|
/*!*****************************!*\
|
|
414
420
|
!*** ./src/transformers.js ***!
|
|
415
|
-
\*****************************/s.r(o),s.d(o,{ASTFeatureExtractor:()=>g.ASTFeatureExtractor,ASTForAudioClassification:()=>l.ASTForAudioClassification,ASTModel:()=>l.ASTModel,ASTPreTrainedModel:()=>l.ASTPreTrainedModel,AlbertForMaskedLM:()=>l.AlbertForMaskedLM,AlbertForQuestionAnswering:()=>l.AlbertForQuestionAnswering,AlbertForSequenceClassification:()=>l.AlbertForSequenceClassification,AlbertModel:()=>l.AlbertModel,AlbertPreTrainedModel:()=>l.AlbertPreTrainedModel,AlbertTokenizer:()=>d.AlbertTokenizer,AudioClassificationPipeline:()=>a.AudioClassificationPipeline,AutoConfig:()=>u.AutoConfig,AutoFeatureExtractor:()=>_.AutoFeatureExtractor,AutoImageProcessor:()=>b.AutoImageProcessor,AutoModel:()=>l.AutoModel,AutoModelForAudioClassification:()=>l.AutoModelForAudioClassification,AutoModelForAudioFrameClassification:()=>l.AutoModelForAudioFrameClassification,AutoModelForCTC:()=>l.AutoModelForCTC,AutoModelForCausalLM:()=>l.AutoModelForCausalLM,AutoModelForDepthEstimation:()=>l.AutoModelForDepthEstimation,AutoModelForDocumentQuestionAnswering:()=>l.AutoModelForDocumentQuestionAnswering,AutoModelForImageClassification:()=>l.AutoModelForImageClassification,AutoModelForImageFeatureExtraction:()=>l.AutoModelForImageFeatureExtraction,AutoModelForImageMatting:()=>l.AutoModelForImageMatting,AutoModelForImageSegmentation:()=>l.AutoModelForImageSegmentation,AutoModelForImageToImage:()=>l.AutoModelForImageToImage,AutoModelForMaskGeneration:()=>l.AutoModelForMaskGeneration,AutoModelForMaskedLM:()=>l.AutoModelForMaskedLM,AutoModelForNormalEstimation:()=>l.AutoModelForNormalEstimation,AutoModelForObjectDetection:()=>l.AutoModelForObjectDetection,AutoModelForPoseEstimation:()=>l.AutoModelForPoseEstimation,AutoModelForQuestionAnswering:()=>l.AutoModelForQuestionAnswering,AutoModelForSemanticSegmentation:()=>l.AutoModelForSemanticSegmentation,AutoModelForSeq2SeqLM:()=>l.AutoModelForSeq2SeqLM,AutoModelForSequenceClassification:()=>l.AutoModelForSequenceClassification,AutoModelForSpeechSeq2Seq:()=>l.AutoModelForSpeechSeq2Seq,AutoModelForTextToSpectrogram:()=>l.AutoModelForTextToSpectrogram,AutoModelForTextToWaveform:()=>l.AutoModelForTextToWaveform,AutoModelForTokenClassification:()=>l.AutoModelForTokenClassification,AutoModelForUniversalSegmentation:()=>l.AutoModelForUniversalSegmentation,AutoModelForVision2Seq:()=>l.AutoModelForVision2Seq,AutoModelForXVector:()=>l.AutoModelForXVector,AutoModelForZeroShotObjectDetection:()=>l.AutoModelForZeroShotObjectDetection,AutoProcessor:()=>M.AutoProcessor,AutoTokenizer:()=>d.AutoTokenizer,AutomaticSpeechRecognitionPipeline:()=>a.AutomaticSpeechRecognitionPipeline,BartForConditionalGeneration:()=>l.BartForConditionalGeneration,BartForSequenceClassification:()=>l.BartForSequenceClassification,BartModel:()=>l.BartModel,BartPretrainedModel:()=>l.BartPretrainedModel,BartTokenizer:()=>d.BartTokenizer,BaseModelOutput:()=>l.BaseModelOutput,BaseStreamer:()=>T.BaseStreamer,BeitFeatureExtractor:()=>y.BeitFeatureExtractor,BeitForImageClassification:()=>l.BeitForImageClassification,BeitModel:()=>l.BeitModel,BeitPreTrainedModel:()=>l.BeitPreTrainedModel,BertForMaskedLM:()=>l.BertForMaskedLM,BertForQuestionAnswering:()=>l.BertForQuestionAnswering,BertForSequenceClassification:()=>l.BertForSequenceClassification,BertForTokenClassification:()=>l.BertForTokenClassification,BertModel:()=>l.BertModel,BertPreTrainedModel:()=>l.BertPreTrainedModel,BertTokenizer:()=>d.BertTokenizer,BitImageProcessor:()=>y.BitImageProcessor,BlenderbotForConditionalGeneration:()=>l.BlenderbotForConditionalGeneration,BlenderbotModel:()=>l.BlenderbotModel,BlenderbotPreTrainedModel:()=>l.BlenderbotPreTrainedModel,BlenderbotSmallForConditionalGeneration:()=>l.BlenderbotSmallForConditionalGeneration,BlenderbotSmallModel:()=>l.BlenderbotSmallModel,BlenderbotSmallPreTrainedModel:()=>l.BlenderbotSmallPreTrainedModel,BlenderbotSmallTokenizer:()=>d.BlenderbotSmallTokenizer,BlenderbotTokenizer:()=>d.BlenderbotTokenizer,BloomForCausalLM:()=>l.BloomForCausalLM,BloomModel:()=>l.BloomModel,BloomPreTrainedModel:()=>l.BloomPreTrainedModel,BloomTokenizer:()=>d.BloomTokenizer,CLIPFeatureExtractor:()=>y.CLIPFeatureExtractor,CLIPImageProcessor:()=>y.CLIPImageProcessor,CLIPModel:()=>l.CLIPModel,CLIPPreTrainedModel:()=>l.CLIPPreTrainedModel,CLIPSegForImageSegmentation:()=>l.CLIPSegForImageSegmentation,CLIPSegModel:()=>l.CLIPSegModel,CLIPSegPreTrainedModel:()=>l.CLIPSegPreTrainedModel,CLIPTextModel:()=>l.CLIPTextModel,CLIPTextModelWithProjection:()=>l.CLIPTextModelWithProjection,CLIPTokenizer:()=>d.CLIPTokenizer,CLIPVisionModel:()=>l.CLIPVisionModel,CLIPVisionModelWithProjection:()=>l.CLIPVisionModelWithProjection,CamembertForMaskedLM:()=>l.CamembertForMaskedLM,CamembertForQuestionAnswering:()=>l.CamembertForQuestionAnswering,CamembertForSequenceClassification:()=>l.CamembertForSequenceClassification,CamembertForTokenClassification:()=>l.CamembertForTokenClassification,CamembertModel:()=>l.CamembertModel,CamembertPreTrainedModel:()=>l.CamembertPreTrainedModel,CamembertTokenizer:()=>d.CamembertTokenizer,CausalLMOutput:()=>l.CausalLMOutput,CausalLMOutputWithPast:()=>l.CausalLMOutputWithPast,ChineseCLIPFeatureExtractor:()=>y.ChineseCLIPFeatureExtractor,ChineseCLIPModel:()=>l.ChineseCLIPModel,ChineseCLIPPreTrainedModel:()=>l.ChineseCLIPPreTrainedModel,ClapAudioModelWithProjection:()=>l.ClapAudioModelWithProjection,ClapFeatureExtractor:()=>g.ClapFeatureExtractor,ClapModel:()=>l.ClapModel,ClapPreTrainedModel:()=>l.ClapPreTrainedModel,ClapTextModelWithProjection:()=>l.ClapTextModelWithProjection,ClassifierFreeGuidanceLogitsProcessor:()=>$.ClassifierFreeGuidanceLogitsProcessor,CodeGenForCausalLM:()=>l.CodeGenForCausalLM,CodeGenModel:()=>l.CodeGenModel,CodeGenPreTrainedModel:()=>l.CodeGenPreTrainedModel,CodeGenTokenizer:()=>d.CodeGenTokenizer,CodeLlamaTokenizer:()=>d.CodeLlamaTokenizer,CohereForCausalLM:()=>l.CohereForCausalLM,CohereModel:()=>l.CohereModel,CoherePreTrainedModel:()=>l.CoherePreTrainedModel,CohereTokenizer:()=>d.CohereTokenizer,ConvBertForMaskedLM:()=>l.ConvBertForMaskedLM,ConvBertForQuestionAnswering:()=>l.ConvBertForQuestionAnswering,ConvBertForSequenceClassification:()=>l.ConvBertForSequenceClassification,ConvBertForTokenClassification:()=>l.ConvBertForTokenClassification,ConvBertModel:()=>l.ConvBertModel,ConvBertPreTrainedModel:()=>l.ConvBertPreTrainedModel,ConvBertTokenizer:()=>d.ConvBertTokenizer,ConvNextFeatureExtractor:()=>y.ConvNextFeatureExtractor,ConvNextForImageClassification:()=>l.ConvNextForImageClassification,ConvNextImageProcessor:()=>y.ConvNextImageProcessor,ConvNextModel:()=>l.ConvNextModel,ConvNextPreTrainedModel:()=>l.ConvNextPreTrainedModel,ConvNextV2ForImageClassification:()=>l.ConvNextV2ForImageClassification,ConvNextV2Model:()=>l.ConvNextV2Model,ConvNextV2PreTrainedModel:()=>l.ConvNextV2PreTrainedModel,DPTFeatureExtractor:()=>y.DPTFeatureExtractor,DPTForDepthEstimation:()=>l.DPTForDepthEstimation,DPTImageProcessor:()=>y.DPTImageProcessor,DPTModel:()=>l.DPTModel,DPTPreTrainedModel:()=>l.DPTPreTrainedModel,DebertaForMaskedLM:()=>l.DebertaForMaskedLM,DebertaForQuestionAnswering:()=>l.DebertaForQuestionAnswering,DebertaForSequenceClassification:()=>l.DebertaForSequenceClassification,DebertaForTokenClassification:()=>l.DebertaForTokenClassification,DebertaModel:()=>l.DebertaModel,DebertaPreTrainedModel:()=>l.DebertaPreTrainedModel,DebertaTokenizer:()=>d.DebertaTokenizer,DebertaV2ForMaskedLM:()=>l.DebertaV2ForMaskedLM,DebertaV2ForQuestionAnswering:()=>l.DebertaV2ForQuestionAnswering,DebertaV2ForSequenceClassification:()=>l.DebertaV2ForSequenceClassification,DebertaV2ForTokenClassification:()=>l.DebertaV2ForTokenClassification,DebertaV2Model:()=>l.DebertaV2Model,DebertaV2PreTrainedModel:()=>l.DebertaV2PreTrainedModel,DebertaV2Tokenizer:()=>d.DebertaV2Tokenizer,DecisionTransformerModel:()=>l.DecisionTransformerModel,DecisionTransformerPreTrainedModel:()=>l.DecisionTransformerPreTrainedModel,DeiTFeatureExtractor:()=>y.DeiTFeatureExtractor,DeiTForImageClassification:()=>l.DeiTForImageClassification,DeiTImageProcessor:()=>y.DeiTImageProcessor,DeiTModel:()=>l.DeiTModel,DeiTPreTrainedModel:()=>l.DeiTPreTrainedModel,DepthAnythingForDepthEstimation:()=>l.DepthAnythingForDepthEstimation,DepthAnythingPreTrainedModel:()=>l.DepthAnythingPreTrainedModel,DepthEstimationPipeline:()=>a.DepthEstimationPipeline,DepthProForDepthEstimation:()=>l.DepthProForDepthEstimation,DepthProPreTrainedModel:()=>l.DepthProPreTrainedModel,DetrFeatureExtractor:()=>y.DetrFeatureExtractor,DetrForObjectDetection:()=>l.DetrForObjectDetection,DetrForSegmentation:()=>l.DetrForSegmentation,DetrImageProcessor:()=>y.DetrImageProcessor,DetrModel:()=>l.DetrModel,DetrObjectDetectionOutput:()=>l.DetrObjectDetectionOutput,DetrPreTrainedModel:()=>l.DetrPreTrainedModel,DetrSegmentationOutput:()=>l.DetrSegmentationOutput,Dinov2ForImageClassification:()=>l.Dinov2ForImageClassification,Dinov2Model:()=>l.Dinov2Model,Dinov2PreTrainedModel:()=>l.Dinov2PreTrainedModel,DistilBertForMaskedLM:()=>l.DistilBertForMaskedLM,DistilBertForQuestionAnswering:()=>l.DistilBertForQuestionAnswering,DistilBertForSequenceClassification:()=>l.DistilBertForSequenceClassification,DistilBertForTokenClassification:()=>l.DistilBertForTokenClassification,DistilBertModel:()=>l.DistilBertModel,DistilBertPreTrainedModel:()=>l.DistilBertPreTrainedModel,DistilBertTokenizer:()=>d.DistilBertTokenizer,DocumentQuestionAnsweringPipeline:()=>a.DocumentQuestionAnsweringPipeline,DonutFeatureExtractor:()=>y.DonutFeatureExtractor,DonutImageProcessor:()=>y.DonutImageProcessor,DonutSwinModel:()=>l.DonutSwinModel,DonutSwinPreTrainedModel:()=>l.DonutSwinPreTrainedModel,EfficientNetForImageClassification:()=>l.EfficientNetForImageClassification,EfficientNetImageProcessor:()=>y.EfficientNetImageProcessor,EfficientNetModel:()=>l.EfficientNetModel,EfficientNetPreTrainedModel:()=>l.EfficientNetPreTrainedModel,ElectraForMaskedLM:()=>l.ElectraForMaskedLM,ElectraForQuestionAnswering:()=>l.ElectraForQuestionAnswering,ElectraForSequenceClassification:()=>l.ElectraForSequenceClassification,ElectraForTokenClassification:()=>l.ElectraForTokenClassification,ElectraModel:()=>l.ElectraModel,ElectraPreTrainedModel:()=>l.ElectraPreTrainedModel,ElectraTokenizer:()=>d.ElectraTokenizer,EosTokenCriteria:()=>k.EosTokenCriteria,EsmForMaskedLM:()=>l.EsmForMaskedLM,EsmForSequenceClassification:()=>l.EsmForSequenceClassification,EsmForTokenClassification:()=>l.EsmForTokenClassification,EsmModel:()=>l.EsmModel,EsmPreTrainedModel:()=>l.EsmPreTrainedModel,EsmTokenizer:()=>d.EsmTokenizer,FFT:()=>m.FFT,FalconForCausalLM:()=>l.FalconForCausalLM,FalconModel:()=>l.FalconModel,FalconPreTrainedModel:()=>l.FalconPreTrainedModel,FalconTokenizer:()=>d.FalconTokenizer,FastViTForImageClassification:()=>l.FastViTForImageClassification,FastViTModel:()=>l.FastViTModel,FastViTPreTrainedModel:()=>l.FastViTPreTrainedModel,FeatureExtractionPipeline:()=>a.FeatureExtractionPipeline,FeatureExtractor:()=>f.FeatureExtractor,FillMaskPipeline:()=>a.FillMaskPipeline,Florence2ForConditionalGeneration:()=>l.Florence2ForConditionalGeneration,Florence2PreTrainedModel:()=>l.Florence2PreTrainedModel,Florence2Processor:()=>x.Florence2Processor,ForcedBOSTokenLogitsProcessor:()=>$.ForcedBOSTokenLogitsProcessor,ForcedEOSTokenLogitsProcessor:()=>$.ForcedEOSTokenLogitsProcessor,GLPNFeatureExtractor:()=>y.GLPNFeatureExtractor,GLPNForDepthEstimation:()=>l.GLPNForDepthEstimation,GLPNModel:()=>l.GLPNModel,GLPNPreTrainedModel:()=>l.GLPNPreTrainedModel,GPT2LMHeadModel:()=>l.GPT2LMHeadModel,GPT2Model:()=>l.GPT2Model,GPT2PreTrainedModel:()=>l.GPT2PreTrainedModel,GPT2Tokenizer:()=>d.GPT2Tokenizer,GPTBigCodeForCausalLM:()=>l.GPTBigCodeForCausalLM,GPTBigCodeModel:()=>l.GPTBigCodeModel,GPTBigCodePreTrainedModel:()=>l.GPTBigCodePreTrainedModel,GPTJForCausalLM:()=>l.GPTJForCausalLM,GPTJModel:()=>l.GPTJModel,GPTJPreTrainedModel:()=>l.GPTJPreTrainedModel,GPTNeoForCausalLM:()=>l.GPTNeoForCausalLM,GPTNeoModel:()=>l.GPTNeoModel,GPTNeoPreTrainedModel:()=>l.GPTNeoPreTrainedModel,GPTNeoXForCausalLM:()=>l.GPTNeoXForCausalLM,GPTNeoXModel:()=>l.GPTNeoXModel,GPTNeoXPreTrainedModel:()=>l.GPTNeoXPreTrainedModel,GPTNeoXTokenizer:()=>d.GPTNeoXTokenizer,Gemma2ForCausalLM:()=>l.Gemma2ForCausalLM,Gemma2Model:()=>l.Gemma2Model,Gemma2PreTrainedModel:()=>l.Gemma2PreTrainedModel,GemmaForCausalLM:()=>l.GemmaForCausalLM,GemmaModel:()=>l.GemmaModel,GemmaPreTrainedModel:()=>l.GemmaPreTrainedModel,GemmaTokenizer:()=>d.GemmaTokenizer,GraniteForCausalLM:()=>l.GraniteForCausalLM,GraniteModel:()=>l.GraniteModel,GranitePreTrainedModel:()=>l.GranitePreTrainedModel,Grok1Tokenizer:()=>d.Grok1Tokenizer,GroupViTModel:()=>l.GroupViTModel,GroupViTPreTrainedModel:()=>l.GroupViTPreTrainedModel,HerbertTokenizer:()=>d.HerbertTokenizer,HieraForImageClassification:()=>l.HieraForImageClassification,HieraModel:()=>l.HieraModel,HieraPreTrainedModel:()=>l.HieraPreTrainedModel,HubertForCTC:()=>l.HubertForCTC,HubertForSequenceClassification:()=>l.HubertForSequenceClassification,HubertModel:()=>l.HubertModel,HubertPreTrainedModel:()=>l.HubertPreTrainedModel,ImageClassificationPipeline:()=>a.ImageClassificationPipeline,ImageFeatureExtractionPipeline:()=>a.ImageFeatureExtractionPipeline,ImageFeatureExtractor:()=>g.ImageFeatureExtractor,ImageMattingOutput:()=>l.ImageMattingOutput,ImageProcessor:()=>w.ImageProcessor,ImageSegmentationPipeline:()=>a.ImageSegmentationPipeline,ImageToImagePipeline:()=>a.ImageToImagePipeline,ImageToTextPipeline:()=>a.ImageToTextPipeline,InterruptableStoppingCriteria:()=>k.InterruptableStoppingCriteria,JAISLMHeadModel:()=>l.JAISLMHeadModel,JAISModel:()=>l.JAISModel,JAISPreTrainedModel:()=>l.JAISPreTrainedModel,JinaCLIPImageProcessor:()=>y.JinaCLIPImageProcessor,JinaCLIPModel:()=>l.JinaCLIPModel,JinaCLIPPreTrainedModel:()=>l.JinaCLIPPreTrainedModel,JinaCLIPProcessor:()=>x.JinaCLIPProcessor,JinaCLIPTextModel:()=>l.JinaCLIPTextModel,JinaCLIPVisionModel:()=>l.JinaCLIPVisionModel,LlamaForCausalLM:()=>l.LlamaForCausalLM,LlamaModel:()=>l.LlamaModel,LlamaPreTrainedModel:()=>l.LlamaPreTrainedModel,LlamaTokenizer:()=>d.LlamaTokenizer,LlavaForConditionalGeneration:()=>l.LlavaForConditionalGeneration,LlavaOnevisionForConditionalGeneration:()=>l.LlavaOnevisionForConditionalGeneration,LlavaOnevisionImageProcessor:()=>y.LlavaOnevisionImageProcessor,LlavaPreTrainedModel:()=>l.LlavaPreTrainedModel,LogitsProcessor:()=>$.LogitsProcessor,LogitsProcessorList:()=>$.LogitsProcessorList,LogitsWarper:()=>$.LogitsWarper,LongT5ForConditionalGeneration:()=>l.LongT5ForConditionalGeneration,LongT5Model:()=>l.LongT5Model,LongT5PreTrainedModel:()=>l.LongT5PreTrainedModel,M2M100ForConditionalGeneration:()=>l.M2M100ForConditionalGeneration,M2M100Model:()=>l.M2M100Model,M2M100PreTrainedModel:()=>l.M2M100PreTrainedModel,M2M100Tokenizer:()=>d.M2M100Tokenizer,MBart50Tokenizer:()=>d.MBart50Tokenizer,MBartForCausalLM:()=>l.MBartForCausalLM,MBartForConditionalGeneration:()=>l.MBartForConditionalGeneration,MBartForSequenceClassification:()=>l.MBartForSequenceClassification,MBartModel:()=>l.MBartModel,MBartPreTrainedModel:()=>l.MBartPreTrainedModel,MBartTokenizer:()=>d.MBartTokenizer,MPNetForMaskedLM:()=>l.MPNetForMaskedLM,MPNetForQuestionAnswering:()=>l.MPNetForQuestionAnswering,MPNetForSequenceClassification:()=>l.MPNetForSequenceClassification,MPNetForTokenClassification:()=>l.MPNetForTokenClassification,MPNetModel:()=>l.MPNetModel,MPNetPreTrainedModel:()=>l.MPNetPreTrainedModel,MPNetTokenizer:()=>d.MPNetTokenizer,MT5ForConditionalGeneration:()=>l.MT5ForConditionalGeneration,MT5Model:()=>l.MT5Model,MT5PreTrainedModel:()=>l.MT5PreTrainedModel,MarianMTModel:()=>l.MarianMTModel,MarianModel:()=>l.MarianModel,MarianPreTrainedModel:()=>l.MarianPreTrainedModel,MarianTokenizer:()=>d.MarianTokenizer,Mask2FormerImageProcessor:()=>y.Mask2FormerImageProcessor,MaskFormerFeatureExtractor:()=>y.MaskFormerFeatureExtractor,MaskFormerForInstanceSegmentation:()=>l.MaskFormerForInstanceSegmentation,MaskFormerImageProcessor:()=>y.MaskFormerImageProcessor,MaskFormerModel:()=>l.MaskFormerModel,MaskFormerPreTrainedModel:()=>l.MaskFormerPreTrainedModel,MaskedLMOutput:()=>l.MaskedLMOutput,MaxLengthCriteria:()=>k.MaxLengthCriteria,MgpstrForSceneTextRecognition:()=>l.MgpstrForSceneTextRecognition,MgpstrModelOutput:()=>l.MgpstrModelOutput,MgpstrPreTrainedModel:()=>l.MgpstrPreTrainedModel,MgpstrProcessor:()=>x.MgpstrProcessor,MgpstrTokenizer:()=>d.MgpstrTokenizer,MinLengthLogitsProcessor:()=>$.MinLengthLogitsProcessor,MinNewTokensLengthLogitsProcessor:()=>$.MinNewTokensLengthLogitsProcessor,MistralForCausalLM:()=>l.MistralForCausalLM,MistralModel:()=>l.MistralModel,MistralPreTrainedModel:()=>l.MistralPreTrainedModel,MobileBertForMaskedLM:()=>l.MobileBertForMaskedLM,MobileBertForQuestionAnswering:()=>l.MobileBertForQuestionAnswering,MobileBertForSequenceClassification:()=>l.MobileBertForSequenceClassification,MobileBertModel:()=>l.MobileBertModel,MobileBertPreTrainedModel:()=>l.MobileBertPreTrainedModel,MobileBertTokenizer:()=>d.MobileBertTokenizer,MobileLLMForCausalLM:()=>l.MobileLLMForCausalLM,MobileLLMModel:()=>l.MobileLLMModel,MobileLLMPreTrainedModel:()=>l.MobileLLMPreTrainedModel,MobileNetV1FeatureExtractor:()=>y.MobileNetV1FeatureExtractor,MobileNetV1ForImageClassification:()=>l.MobileNetV1ForImageClassification,MobileNetV1ImageProcessor:()=>y.MobileNetV1ImageProcessor,MobileNetV1Model:()=>l.MobileNetV1Model,MobileNetV1PreTrainedModel:()=>l.MobileNetV1PreTrainedModel,MobileNetV2FeatureExtractor:()=>y.MobileNetV2FeatureExtractor,MobileNetV2ForImageClassification:()=>l.MobileNetV2ForImageClassification,MobileNetV2ImageProcessor:()=>y.MobileNetV2ImageProcessor,MobileNetV2Model:()=>l.MobileNetV2Model,MobileNetV2PreTrainedModel:()=>l.MobileNetV2PreTrainedModel,MobileNetV3FeatureExtractor:()=>y.MobileNetV3FeatureExtractor,MobileNetV3ForImageClassification:()=>l.MobileNetV3ForImageClassification,MobileNetV3ImageProcessor:()=>y.MobileNetV3ImageProcessor,MobileNetV3Model:()=>l.MobileNetV3Model,MobileNetV3PreTrainedModel:()=>l.MobileNetV3PreTrainedModel,MobileNetV4FeatureExtractor:()=>y.MobileNetV4FeatureExtractor,MobileNetV4ForImageClassification:()=>l.MobileNetV4ForImageClassification,MobileNetV4ImageProcessor:()=>y.MobileNetV4ImageProcessor,MobileNetV4Model:()=>l.MobileNetV4Model,MobileNetV4PreTrainedModel:()=>l.MobileNetV4PreTrainedModel,MobileViTFeatureExtractor:()=>y.MobileViTFeatureExtractor,MobileViTForImageClassification:()=>l.MobileViTForImageClassification,MobileViTImageProcessor:()=>y.MobileViTImageProcessor,MobileViTModel:()=>l.MobileViTModel,MobileViTPreTrainedModel:()=>l.MobileViTPreTrainedModel,MobileViTV2ForImageClassification:()=>l.MobileViTV2ForImageClassification,MobileViTV2Model:()=>l.MobileViTV2Model,MobileViTV2PreTrainedModel:()=>l.MobileViTV2PreTrainedModel,ModelOutput:()=>l.ModelOutput,Moondream1ForConditionalGeneration:()=>l.Moondream1ForConditionalGeneration,MptForCausalLM:()=>l.MptForCausalLM,MptModel:()=>l.MptModel,MptPreTrainedModel:()=>l.MptPreTrainedModel,MultiModalityCausalLM:()=>l.MultiModalityCausalLM,MultiModalityPreTrainedModel:()=>l.MultiModalityPreTrainedModel,MusicgenForCausalLM:()=>l.MusicgenForCausalLM,MusicgenForConditionalGeneration:()=>l.MusicgenForConditionalGeneration,MusicgenModel:()=>l.MusicgenModel,MusicgenPreTrainedModel:()=>l.MusicgenPreTrainedModel,NllbTokenizer:()=>d.NllbTokenizer,NoBadWordsLogitsProcessor:()=>$.NoBadWordsLogitsProcessor,NoRepeatNGramLogitsProcessor:()=>$.NoRepeatNGramLogitsProcessor,NomicBertModel:()=>l.NomicBertModel,NomicBertPreTrainedModel:()=>l.NomicBertPreTrainedModel,NougatImageProcessor:()=>y.NougatImageProcessor,NougatTokenizer:()=>d.NougatTokenizer,OPTForCausalLM:()=>l.OPTForCausalLM,OPTModel:()=>l.OPTModel,OPTPreTrainedModel:()=>l.OPTPreTrainedModel,ObjectDetectionPipeline:()=>a.ObjectDetectionPipeline,OlmoForCausalLM:()=>l.OlmoForCausalLM,OlmoModel:()=>l.OlmoModel,OlmoPreTrainedModel:()=>l.OlmoPreTrainedModel,OpenELMForCausalLM:()=>l.OpenELMForCausalLM,OpenELMModel:()=>l.OpenELMModel,OpenELMPreTrainedModel:()=>l.OpenELMPreTrainedModel,OwlViTFeatureExtractor:()=>y.OwlViTFeatureExtractor,OwlViTForObjectDetection:()=>l.OwlViTForObjectDetection,OwlViTImageProcessor:()=>y.OwlViTImageProcessor,OwlViTModel:()=>l.OwlViTModel,OwlViTPreTrainedModel:()=>l.OwlViTPreTrainedModel,OwlViTProcessor:()=>x.OwlViTProcessor,Owlv2ForObjectDetection:()=>l.Owlv2ForObjectDetection,Owlv2ImageProcessor:()=>y.Owlv2ImageProcessor,Owlv2Model:()=>l.Owlv2Model,Owlv2PreTrainedModel:()=>l.Owlv2PreTrainedModel,PatchTSMixerForPrediction:()=>l.PatchTSMixerForPrediction,PatchTSMixerModel:()=>l.PatchTSMixerModel,PatchTSMixerPreTrainedModel:()=>l.PatchTSMixerPreTrainedModel,PatchTSTForPrediction:()=>l.PatchTSTForPrediction,PatchTSTModel:()=>l.PatchTSTModel,PatchTSTPreTrainedModel:()=>l.PatchTSTPreTrainedModel,Phi3ForCausalLM:()=>l.Phi3ForCausalLM,Phi3Model:()=>l.Phi3Model,Phi3PreTrainedModel:()=>l.Phi3PreTrainedModel,PhiForCausalLM:()=>l.PhiForCausalLM,PhiModel:()=>l.PhiModel,PhiPreTrainedModel:()=>l.PhiPreTrainedModel,Pipeline:()=>a.Pipeline,PreTrainedModel:()=>l.PreTrainedModel,PreTrainedTokenizer:()=>d.PreTrainedTokenizer,PretrainedConfig:()=>u.PretrainedConfig,PretrainedMixin:()=>l.PretrainedMixin,Processor:()=>v.Processor,PvtForImageClassification:()=>l.PvtForImageClassification,PvtImageProcessor:()=>y.PvtImageProcessor,PvtModel:()=>l.PvtModel,PvtPreTrainedModel:()=>l.PvtPreTrainedModel,PyAnnoteFeatureExtractor:()=>g.PyAnnoteFeatureExtractor,PyAnnoteForAudioFrameClassification:()=>l.PyAnnoteForAudioFrameClassification,PyAnnoteModel:()=>l.PyAnnoteModel,PyAnnotePreTrainedModel:()=>l.PyAnnotePreTrainedModel,PyAnnoteProcessor:()=>x.PyAnnoteProcessor,QuestionAnsweringModelOutput:()=>l.QuestionAnsweringModelOutput,QuestionAnsweringPipeline:()=>a.QuestionAnsweringPipeline,Qwen2ForCausalLM:()=>l.Qwen2ForCausalLM,Qwen2Model:()=>l.Qwen2Model,Qwen2PreTrainedModel:()=>l.Qwen2PreTrainedModel,Qwen2Tokenizer:()=>d.Qwen2Tokenizer,Qwen2VLForConditionalGeneration:()=>l.Qwen2VLForConditionalGeneration,Qwen2VLImageProcessor:()=>y.Qwen2VLImageProcessor,Qwen2VLPreTrainedModel:()=>l.Qwen2VLPreTrainedModel,Qwen2VLProcessor:()=>x.Qwen2VLProcessor,RTDetrForObjectDetection:()=>l.RTDetrForObjectDetection,RTDetrImageProcessor:()=>y.RTDetrImageProcessor,RTDetrModel:()=>l.RTDetrModel,RTDetrObjectDetectionOutput:()=>l.RTDetrObjectDetectionOutput,RTDetrPreTrainedModel:()=>l.RTDetrPreTrainedModel,RawImage:()=>p.RawImage,RepetitionPenaltyLogitsProcessor:()=>$.RepetitionPenaltyLogitsProcessor,ResNetForImageClassification:()=>l.ResNetForImageClassification,ResNetModel:()=>l.ResNetModel,ResNetPreTrainedModel:()=>l.ResNetPreTrainedModel,RoFormerForMaskedLM:()=>l.RoFormerForMaskedLM,RoFormerForQuestionAnswering:()=>l.RoFormerForQuestionAnswering,RoFormerForSequenceClassification:()=>l.RoFormerForSequenceClassification,RoFormerForTokenClassification:()=>l.RoFormerForTokenClassification,RoFormerModel:()=>l.RoFormerModel,RoFormerPreTrainedModel:()=>l.RoFormerPreTrainedModel,RoFormerTokenizer:()=>d.RoFormerTokenizer,RobertaForMaskedLM:()=>l.RobertaForMaskedLM,RobertaForQuestionAnswering:()=>l.RobertaForQuestionAnswering,RobertaForSequenceClassification:()=>l.RobertaForSequenceClassification,RobertaForTokenClassification:()=>l.RobertaForTokenClassification,RobertaModel:()=>l.RobertaModel,RobertaPreTrainedModel:()=>l.RobertaPreTrainedModel,RobertaTokenizer:()=>d.RobertaTokenizer,SamImageProcessor:()=>y.SamImageProcessor,SamImageSegmentationOutput:()=>l.SamImageSegmentationOutput,SamModel:()=>l.SamModel,SamPreTrainedModel:()=>l.SamPreTrainedModel,SamProcessor:()=>x.SamProcessor,SapiensForDepthEstimation:()=>l.SapiensForDepthEstimation,SapiensForNormalEstimation:()=>l.SapiensForNormalEstimation,SapiensForSemanticSegmentation:()=>l.SapiensForSemanticSegmentation,SapiensPreTrainedModel:()=>l.SapiensPreTrainedModel,SeamlessM4TFeatureExtractor:()=>g.SeamlessM4TFeatureExtractor,SegformerFeatureExtractor:()=>y.SegformerFeatureExtractor,SegformerForImageClassification:()=>l.SegformerForImageClassification,SegformerForSemanticSegmentation:()=>l.SegformerForSemanticSegmentation,SegformerImageProcessor:()=>y.SegformerImageProcessor,SegformerModel:()=>l.SegformerModel,SegformerPreTrainedModel:()=>l.SegformerPreTrainedModel,Seq2SeqLMOutput:()=>l.Seq2SeqLMOutput,SequenceClassifierOutput:()=>l.SequenceClassifierOutput,SiglipImageProcessor:()=>y.SiglipImageProcessor,SiglipModel:()=>l.SiglipModel,SiglipPreTrainedModel:()=>l.SiglipPreTrainedModel,SiglipTextModel:()=>l.SiglipTextModel,SiglipTokenizer:()=>d.SiglipTokenizer,SiglipVisionModel:()=>l.SiglipVisionModel,SpeechT5FeatureExtractor:()=>g.SpeechT5FeatureExtractor,SpeechT5ForSpeechToText:()=>l.SpeechT5ForSpeechToText,SpeechT5ForTextToSpeech:()=>l.SpeechT5ForTextToSpeech,SpeechT5HifiGan:()=>l.SpeechT5HifiGan,SpeechT5Model:()=>l.SpeechT5Model,SpeechT5PreTrainedModel:()=>l.SpeechT5PreTrainedModel,SpeechT5Processor:()=>x.SpeechT5Processor,SpeechT5Tokenizer:()=>d.SpeechT5Tokenizer,SqueezeBertForMaskedLM:()=>l.SqueezeBertForMaskedLM,SqueezeBertForQuestionAnswering:()=>l.SqueezeBertForQuestionAnswering,SqueezeBertForSequenceClassification:()=>l.SqueezeBertForSequenceClassification,SqueezeBertModel:()=>l.SqueezeBertModel,SqueezeBertPreTrainedModel:()=>l.SqueezeBertPreTrainedModel,SqueezeBertTokenizer:()=>d.SqueezeBertTokenizer,StableLmForCausalLM:()=>l.StableLmForCausalLM,StableLmModel:()=>l.StableLmModel,StableLmPreTrainedModel:()=>l.StableLmPreTrainedModel,Starcoder2ForCausalLM:()=>l.Starcoder2ForCausalLM,Starcoder2Model:()=>l.Starcoder2Model,Starcoder2PreTrainedModel:()=>l.Starcoder2PreTrainedModel,StoppingCriteria:()=>k.StoppingCriteria,StoppingCriteriaList:()=>k.StoppingCriteriaList,SummarizationPipeline:()=>a.SummarizationPipeline,SuppressTokensAtBeginLogitsProcessor:()=>$.SuppressTokensAtBeginLogitsProcessor,Swin2SRForImageSuperResolution:()=>l.Swin2SRForImageSuperResolution,Swin2SRImageProcessor:()=>y.Swin2SRImageProcessor,Swin2SRModel:()=>l.Swin2SRModel,Swin2SRPreTrainedModel:()=>l.Swin2SRPreTrainedModel,SwinForImageClassification:()=>l.SwinForImageClassification,SwinModel:()=>l.SwinModel,SwinPreTrainedModel:()=>l.SwinPreTrainedModel,T5ForConditionalGeneration:()=>l.T5ForConditionalGeneration,T5Model:()=>l.T5Model,T5PreTrainedModel:()=>l.T5PreTrainedModel,T5Tokenizer:()=>d.T5Tokenizer,TableTransformerForObjectDetection:()=>l.TableTransformerForObjectDetection,TableTransformerModel:()=>l.TableTransformerModel,TableTransformerObjectDetectionOutput:()=>l.TableTransformerObjectDetectionOutput,TableTransformerPreTrainedModel:()=>l.TableTransformerPreTrainedModel,TemperatureLogitsWarper:()=>$.TemperatureLogitsWarper,Tensor:()=>h.Tensor,Text2TextGenerationPipeline:()=>a.Text2TextGenerationPipeline,TextClassificationPipeline:()=>a.TextClassificationPipeline,TextGenerationPipeline:()=>a.TextGenerationPipeline,TextStreamer:()=>T.TextStreamer,TextToAudioPipeline:()=>a.TextToAudioPipeline,TokenClassificationPipeline:()=>a.TokenClassificationPipeline,TokenClassifierOutput:()=>l.TokenClassifierOutput,TokenizerModel:()=>d.TokenizerModel,TopKLogitsWarper:()=>$.TopKLogitsWarper,TopPLogitsWarper:()=>$.TopPLogitsWarper,TrOCRForCausalLM:()=>l.TrOCRForCausalLM,TrOCRPreTrainedModel:()=>l.TrOCRPreTrainedModel,TranslationPipeline:()=>a.TranslationPipeline,UniSpeechForCTC:()=>l.UniSpeechForCTC,UniSpeechForSequenceClassification:()=>l.UniSpeechForSequenceClassification,UniSpeechModel:()=>l.UniSpeechModel,UniSpeechPreTrainedModel:()=>l.UniSpeechPreTrainedModel,UniSpeechSatForAudioFrameClassification:()=>l.UniSpeechSatForAudioFrameClassification,UniSpeechSatForCTC:()=>l.UniSpeechSatForCTC,UniSpeechSatForSequenceClassification:()=>l.UniSpeechSatForSequenceClassification,UniSpeechSatModel:()=>l.UniSpeechSatModel,UniSpeechSatPreTrainedModel:()=>l.UniSpeechSatPreTrainedModel,VLChatProcessor:()=>x.VLChatProcessor,VLMImageProcessor:()=>y.VLMImageProcessor,ViTFeatureExtractor:()=>y.ViTFeatureExtractor,ViTForImageClassification:()=>l.ViTForImageClassification,ViTImageProcessor:()=>y.ViTImageProcessor,ViTMAEModel:()=>l.ViTMAEModel,ViTMAEPreTrainedModel:()=>l.ViTMAEPreTrainedModel,ViTMSNForImageClassification:()=>l.ViTMSNForImageClassification,ViTMSNModel:()=>l.ViTMSNModel,ViTMSNPreTrainedModel:()=>l.ViTMSNPreTrainedModel,ViTModel:()=>l.ViTModel,ViTPreTrainedModel:()=>l.ViTPreTrainedModel,VisionEncoderDecoderModel:()=>l.VisionEncoderDecoderModel,VitMatteForImageMatting:()=>l.VitMatteForImageMatting,VitMatteImageProcessor:()=>y.VitMatteImageProcessor,VitMattePreTrainedModel:()=>l.VitMattePreTrainedModel,VitPoseForPoseEstimation:()=>l.VitPoseForPoseEstimation,VitPoseImageProcessor:()=>y.VitPoseImageProcessor,VitPosePreTrainedModel:()=>l.VitPosePreTrainedModel,VitsModel:()=>l.VitsModel,VitsModelOutput:()=>l.VitsModelOutput,VitsPreTrainedModel:()=>l.VitsPreTrainedModel,VitsTokenizer:()=>d.VitsTokenizer,Wav2Vec2BertForCTC:()=>l.Wav2Vec2BertForCTC,Wav2Vec2BertForSequenceClassification:()=>l.Wav2Vec2BertForSequenceClassification,Wav2Vec2BertModel:()=>l.Wav2Vec2BertModel,Wav2Vec2BertPreTrainedModel:()=>l.Wav2Vec2BertPreTrainedModel,Wav2Vec2CTCTokenizer:()=>d.Wav2Vec2CTCTokenizer,Wav2Vec2FeatureExtractor:()=>g.Wav2Vec2FeatureExtractor,Wav2Vec2ForAudioFrameClassification:()=>l.Wav2Vec2ForAudioFrameClassification,Wav2Vec2ForCTC:()=>l.Wav2Vec2ForCTC,Wav2Vec2ForSequenceClassification:()=>l.Wav2Vec2ForSequenceClassification,Wav2Vec2Model:()=>l.Wav2Vec2Model,Wav2Vec2PreTrainedModel:()=>l.Wav2Vec2PreTrainedModel,Wav2Vec2ProcessorWithLM:()=>x.Wav2Vec2ProcessorWithLM,WavLMForAudioFrameClassification:()=>l.WavLMForAudioFrameClassification,WavLMForCTC:()=>l.WavLMForCTC,WavLMForSequenceClassification:()=>l.WavLMForSequenceClassification,WavLMForXVector:()=>l.WavLMForXVector,WavLMModel:()=>l.WavLMModel,WavLMPreTrainedModel:()=>l.WavLMPreTrainedModel,WeSpeakerFeatureExtractor:()=>g.WeSpeakerFeatureExtractor,WeSpeakerResNetModel:()=>l.WeSpeakerResNetModel,WeSpeakerResNetPreTrainedModel:()=>l.WeSpeakerResNetPreTrainedModel,WhisperFeatureExtractor:()=>g.WhisperFeatureExtractor,WhisperForConditionalGeneration:()=>l.WhisperForConditionalGeneration,WhisperModel:()=>l.WhisperModel,WhisperPreTrainedModel:()=>l.WhisperPreTrainedModel,WhisperProcessor:()=>x.WhisperProcessor,WhisperTextStreamer:()=>T.WhisperTextStreamer,WhisperTimeStampLogitsProcessor:()=>$.WhisperTimeStampLogitsProcessor,WhisperTokenizer:()=>d.WhisperTokenizer,XLMForQuestionAnswering:()=>l.XLMForQuestionAnswering,XLMForSequenceClassification:()=>l.XLMForSequenceClassification,XLMForTokenClassification:()=>l.XLMForTokenClassification,XLMModel:()=>l.XLMModel,XLMPreTrainedModel:()=>l.XLMPreTrainedModel,XLMRobertaForMaskedLM:()=>l.XLMRobertaForMaskedLM,XLMRobertaForQuestionAnswering:()=>l.XLMRobertaForQuestionAnswering,XLMRobertaForSequenceClassification:()=>l.XLMRobertaForSequenceClassification,XLMRobertaForTokenClassification:()=>l.XLMRobertaForTokenClassification,XLMRobertaModel:()=>l.XLMRobertaModel,XLMRobertaPreTrainedModel:()=>l.XLMRobertaPreTrainedModel,XLMRobertaTokenizer:()=>d.XLMRobertaTokenizer,XLMTokenizer:()=>d.XLMTokenizer,XLMWithLMHeadModel:()=>l.XLMWithLMHeadModel,XVectorOutput:()=>l.XVectorOutput,YolosFeatureExtractor:()=>y.YolosFeatureExtractor,YolosForObjectDetection:()=>l.YolosForObjectDetection,YolosImageProcessor:()=>y.YolosImageProcessor,YolosModel:()=>l.YolosModel,YolosObjectDetectionOutput:()=>l.YolosObjectDetectionOutput,YolosPreTrainedModel:()=>l.YolosPreTrainedModel,ZeroShotAudioClassificationPipeline:()=>a.ZeroShotAudioClassificationPipeline,ZeroShotClassificationPipeline:()=>a.ZeroShotClassificationPipeline,ZeroShotImageClassificationPipeline:()=>a.ZeroShotImageClassificationPipeline,ZeroShotObjectDetectionPipeline:()=>a.ZeroShotObjectDetectionPipeline,bankers_round:()=>m.bankers_round,cat:()=>h.cat,cos_sim:()=>m.cos_sim,dot:()=>m.dot,dynamic_time_warping:()=>m.dynamic_time_warping,env:()=>i.env,full:()=>h.full,full_like:()=>h.full_like,getKeyValueShapes:()=>u.getKeyValueShapes,hamming:()=>c.hamming,hanning:()=>c.hanning,interpolate:()=>h.interpolate,interpolate_4d:()=>h.interpolate_4d,interpolate_data:()=>m.interpolate_data,is_chinese_char:()=>d.is_chinese_char,layer_norm:()=>h.layer_norm,log_softmax:()=>m.log_softmax,magnitude:()=>m.magnitude,matmul:()=>h.matmul,max:()=>m.max,mean:()=>h.mean,mean_pooling:()=>h.mean_pooling,medianFilter:()=>m.medianFilter,mel_filter_bank:()=>c.mel_filter_bank,min:()=>m.min,ones:()=>h.ones,ones_like:()=>h.ones_like,permute:()=>h.permute,permute_data:()=>m.permute_data,pipeline:()=>a.pipeline,quantize_embeddings:()=>h.quantize_embeddings,read_audio:()=>c.read_audio,rfft:()=>h.rfft,round:()=>m.round,softmax:()=>m.softmax,spectrogram:()=>c.spectrogram,stack:()=>h.stack,std_mean:()=>h.std_mean,topk:()=>h.topk,window_function:()=>c.window_function,zeros:()=>h.zeros,zeros_like:()=>h.zeros_like});var i=s(/*! ./env.js */"./src/env.js"),a=s(/*! ./pipelines.js */"./src/pipelines.js"),l=s(/*! ./models.js */"./src/models.js"),d=s(/*! ./tokenizers.js */"./src/tokenizers.js"),u=s(/*! ./configs.js */"./src/configs.js"),c=s(/*! ./utils/audio.js */"./src/utils/audio.js"),p=s(/*! ./utils/image.js */"./src/utils/image.js"),h=s(/*! ./utils/tensor.js */"./src/utils/tensor.js"),m=s(/*! ./utils/maths.js */"./src/utils/maths.js"),f=s(/*! ./base/feature_extraction_utils.js */"./src/base/feature_extraction_utils.js"),g=s(/*! ./models/feature_extractors.js */"./src/models/feature_extractors.js"),_=s(/*! ./models/auto/feature_extraction_auto.js */"./src/models/auto/feature_extraction_auto.js"),w=s(/*! ./base/image_processors_utils.js */"./src/base/image_processors_utils.js"),y=s(/*! ./models/image_processors.js */"./src/models/image_processors.js"),b=s(/*! ./models/auto/image_processing_auto.js */"./src/models/auto/image_processing_auto.js"),v=s(/*! ./base/processing_utils.js */"./src/base/processing_utils.js"),x=s(/*! ./models/processors.js */"./src/models/processors.js"),M=s(/*! ./models/auto/processing_auto.js */"./src/models/auto/processing_auto.js"),T=s(/*! ./generation/streamers.js */"./src/generation/streamers.js"),k=s(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),$=s(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),C=o.ASTFeatureExtractor,P=o.ASTForAudioClassification,S=o.ASTModel,E=o.ASTPreTrainedModel,F=o.AlbertForMaskedLM,A=o.AlbertForQuestionAnswering,I=o.AlbertForSequenceClassification,z=o.AlbertModel,L=o.AlbertPreTrainedModel,O=o.AlbertTokenizer,B=o.AudioClassificationPipeline,D=o.AutoConfig,N=o.AutoFeatureExtractor,R=o.AutoImageProcessor,j=o.AutoModel,V=o.AutoModelForAudioClassification,G=o.AutoModelForAudioFrameClassification,U=o.AutoModelForCTC,q=o.AutoModelForCausalLM,W=o.AutoModelForDepthEstimation,H=o.AutoModelForDocumentQuestionAnswering,X=o.AutoModelForImageClassification,Q=o.AutoModelForImageFeatureExtraction,K=o.AutoModelForImageMatting,Y=o.AutoModelForImageSegmentation,J=o.AutoModelForImageToImage,Z=o.AutoModelForMaskGeneration,ee=o.AutoModelForMaskedLM,te=o.AutoModelForNormalEstimation,ne=o.AutoModelForObjectDetection,re=o.AutoModelForPoseEstimation,se=o.AutoModelForQuestionAnswering,oe=o.AutoModelForSemanticSegmentation,ie=o.AutoModelForSeq2SeqLM,ae=o.AutoModelForSequenceClassification,le=o.AutoModelForSpeechSeq2Seq,de=o.AutoModelForTextToSpectrogram,ue=o.AutoModelForTextToWaveform,ce=o.AutoModelForTokenClassification,pe=o.AutoModelForUniversalSegmentation,he=o.AutoModelForVision2Seq,me=o.AutoModelForXVector,fe=o.AutoModelForZeroShotObjectDetection,ge=o.AutoProcessor,_e=o.AutoTokenizer,we=o.AutomaticSpeechRecognitionPipeline,ye=o.BartForConditionalGeneration,be=o.BartForSequenceClassification,ve=o.BartModel,xe=o.BartPretrainedModel,Me=o.BartTokenizer,Te=o.BaseModelOutput,ke=o.BaseStreamer,$e=o.BeitFeatureExtractor,Ce=o.BeitForImageClassification,Pe=o.BeitModel,Se=o.BeitPreTrainedModel,Ee=o.BertForMaskedLM,Fe=o.BertForQuestionAnswering,Ae=o.BertForSequenceClassification,Ie=o.BertForTokenClassification,ze=o.BertModel,Le=o.BertPreTrainedModel,Oe=o.BertTokenizer,Be=o.BitImageProcessor,De=o.BlenderbotForConditionalGeneration,Ne=o.BlenderbotModel,Re=o.BlenderbotPreTrainedModel,je=o.BlenderbotSmallForConditionalGeneration,Ve=o.BlenderbotSmallModel,Ge=o.BlenderbotSmallPreTrainedModel,Ue=o.BlenderbotSmallTokenizer,qe=o.BlenderbotTokenizer,We=o.BloomForCausalLM,He=o.BloomModel,Xe=o.BloomPreTrainedModel,Qe=o.BloomTokenizer,Ke=o.CLIPFeatureExtractor,Ye=o.CLIPImageProcessor,Je=o.CLIPModel,Ze=o.CLIPPreTrainedModel,et=o.CLIPSegForImageSegmentation,tt=o.CLIPSegModel,nt=o.CLIPSegPreTrainedModel,rt=o.CLIPTextModel,st=o.CLIPTextModelWithProjection,ot=o.CLIPTokenizer,it=o.CLIPVisionModel,at=o.CLIPVisionModelWithProjection,lt=o.CamembertForMaskedLM,dt=o.CamembertForQuestionAnswering,ut=o.CamembertForSequenceClassification,ct=o.CamembertForTokenClassification,pt=o.CamembertModel,ht=o.CamembertPreTrainedModel,mt=o.CamembertTokenizer,ft=o.CausalLMOutput,gt=o.CausalLMOutputWithPast,_t=o.ChineseCLIPFeatureExtractor,wt=o.ChineseCLIPModel,yt=o.ChineseCLIPPreTrainedModel,bt=o.ClapAudioModelWithProjection,vt=o.ClapFeatureExtractor,xt=o.ClapModel,Mt=o.ClapPreTrainedModel,Tt=o.ClapTextModelWithProjection,kt=o.ClassifierFreeGuidanceLogitsProcessor,$t=o.CodeGenForCausalLM,Ct=o.CodeGenModel,Pt=o.CodeGenPreTrainedModel,St=o.CodeGenTokenizer,Et=o.CodeLlamaTokenizer,Ft=o.CohereForCausalLM,At=o.CohereModel,It=o.CoherePreTrainedModel,zt=o.CohereTokenizer,Lt=o.ConvBertForMaskedLM,Ot=o.ConvBertForQuestionAnswering,Bt=o.ConvBertForSequenceClassification,Dt=o.ConvBertForTokenClassification,Nt=o.ConvBertModel,Rt=o.ConvBertPreTrainedModel,jt=o.ConvBertTokenizer,Vt=o.ConvNextFeatureExtractor,Gt=o.ConvNextForImageClassification,Ut=o.ConvNextImageProcessor,qt=o.ConvNextModel,Wt=o.ConvNextPreTrainedModel,Ht=o.ConvNextV2ForImageClassification,Xt=o.ConvNextV2Model,Qt=o.ConvNextV2PreTrainedModel,Kt=o.DPTFeatureExtractor,Yt=o.DPTForDepthEstimation,Jt=o.DPTImageProcessor,Zt=o.DPTModel,en=o.DPTPreTrainedModel,tn=o.DebertaForMaskedLM,nn=o.DebertaForQuestionAnswering,rn=o.DebertaForSequenceClassification,sn=o.DebertaForTokenClassification,on=o.DebertaModel,an=o.DebertaPreTrainedModel,ln=o.DebertaTokenizer,dn=o.DebertaV2ForMaskedLM,un=o.DebertaV2ForQuestionAnswering,cn=o.DebertaV2ForSequenceClassification,pn=o.DebertaV2ForTokenClassification,hn=o.DebertaV2Model,mn=o.DebertaV2PreTrainedModel,fn=o.DebertaV2Tokenizer,gn=o.DecisionTransformerModel,_n=o.DecisionTransformerPreTrainedModel,wn=o.DeiTFeatureExtractor,yn=o.DeiTForImageClassification,bn=o.DeiTImageProcessor,vn=o.DeiTModel,xn=o.DeiTPreTrainedModel,Mn=o.DepthAnythingForDepthEstimation,Tn=o.DepthAnythingPreTrainedModel,kn=o.DepthEstimationPipeline,$n=o.DepthProForDepthEstimation,Cn=o.DepthProPreTrainedModel,Pn=o.DetrFeatureExtractor,Sn=o.DetrForObjectDetection,En=o.DetrForSegmentation,Fn=o.DetrImageProcessor,An=o.DetrModel,In=o.DetrObjectDetectionOutput,zn=o.DetrPreTrainedModel,Ln=o.DetrSegmentationOutput,On=o.Dinov2ForImageClassification,Bn=o.Dinov2Model,Dn=o.Dinov2PreTrainedModel,Nn=o.DistilBertForMaskedLM,Rn=o.DistilBertForQuestionAnswering,jn=o.DistilBertForSequenceClassification,Vn=o.DistilBertForTokenClassification,Gn=o.DistilBertModel,Un=o.DistilBertPreTrainedModel,qn=o.DistilBertTokenizer,Wn=o.DocumentQuestionAnsweringPipeline,Hn=o.DonutFeatureExtractor,Xn=o.DonutImageProcessor,Qn=o.DonutSwinModel,Kn=o.DonutSwinPreTrainedModel,Yn=o.EfficientNetForImageClassification,Jn=o.EfficientNetImageProcessor,Zn=o.EfficientNetModel,er=o.EfficientNetPreTrainedModel,tr=o.ElectraForMaskedLM,nr=o.ElectraForQuestionAnswering,rr=o.ElectraForSequenceClassification,sr=o.ElectraForTokenClassification,or=o.ElectraModel,ir=o.ElectraPreTrainedModel,ar=o.ElectraTokenizer,lr=o.EosTokenCriteria,dr=o.EsmForMaskedLM,ur=o.EsmForSequenceClassification,cr=o.EsmForTokenClassification,pr=o.EsmModel,hr=o.EsmPreTrainedModel,mr=o.EsmTokenizer,fr=o.FFT,gr=o.FalconForCausalLM,_r=o.FalconModel,wr=o.FalconPreTrainedModel,yr=o.FalconTokenizer,br=o.FastViTForImageClassification,vr=o.FastViTModel,xr=o.FastViTPreTrainedModel,Mr=o.FeatureExtractionPipeline,Tr=o.FeatureExtractor,kr=o.FillMaskPipeline,$r=o.Florence2ForConditionalGeneration,Cr=o.Florence2PreTrainedModel,Pr=o.Florence2Processor,Sr=o.ForcedBOSTokenLogitsProcessor,Er=o.ForcedEOSTokenLogitsProcessor,Fr=o.GLPNFeatureExtractor,Ar=o.GLPNForDepthEstimation,Ir=o.GLPNModel,zr=o.GLPNPreTrainedModel,Lr=o.GPT2LMHeadModel,Or=o.GPT2Model,Br=o.GPT2PreTrainedModel,Dr=o.GPT2Tokenizer,Nr=o.GPTBigCodeForCausalLM,Rr=o.GPTBigCodeModel,jr=o.GPTBigCodePreTrainedModel,Vr=o.GPTJForCausalLM,Gr=o.GPTJModel,Ur=o.GPTJPreTrainedModel,qr=o.GPTNeoForCausalLM,Wr=o.GPTNeoModel,Hr=o.GPTNeoPreTrainedModel,Xr=o.GPTNeoXForCausalLM,Qr=o.GPTNeoXModel,Kr=o.GPTNeoXPreTrainedModel,Yr=o.GPTNeoXTokenizer,Jr=o.Gemma2ForCausalLM,Zr=o.Gemma2Model,es=o.Gemma2PreTrainedModel,ts=o.GemmaForCausalLM,ns=o.GemmaModel,rs=o.GemmaPreTrainedModel,ss=o.GemmaTokenizer,os=o.GraniteForCausalLM,is=o.GraniteModel,as=o.GranitePreTrainedModel,ls=o.Grok1Tokenizer,ds=o.GroupViTModel,us=o.GroupViTPreTrainedModel,cs=o.HerbertTokenizer,ps=o.HieraForImageClassification,hs=o.HieraModel,ms=o.HieraPreTrainedModel,fs=o.HubertForCTC,gs=o.HubertForSequenceClassification,_s=o.HubertModel,ws=o.HubertPreTrainedModel,ys=o.ImageClassificationPipeline,bs=o.ImageFeatureExtractionPipeline,vs=o.ImageFeatureExtractor,xs=o.ImageMattingOutput,Ms=o.ImageProcessor,Ts=o.ImageSegmentationPipeline,ks=o.ImageToImagePipeline,$s=o.ImageToTextPipeline,Cs=o.InterruptableStoppingCriteria,Ps=o.JAISLMHeadModel,Ss=o.JAISModel,Es=o.JAISPreTrainedModel,Fs=o.JinaCLIPImageProcessor,As=o.JinaCLIPModel,Is=o.JinaCLIPPreTrainedModel,zs=o.JinaCLIPProcessor,Ls=o.JinaCLIPTextModel,Os=o.JinaCLIPVisionModel,Bs=o.LlamaForCausalLM,Ds=o.LlamaModel,Ns=o.LlamaPreTrainedModel,Rs=o.LlamaTokenizer,js=o.LlavaForConditionalGeneration,Vs=o.LlavaOnevisionForConditionalGeneration,Gs=o.LlavaOnevisionImageProcessor,Us=o.LlavaPreTrainedModel,qs=o.LogitsProcessor,Ws=o.LogitsProcessorList,Hs=o.LogitsWarper,Xs=o.LongT5ForConditionalGeneration,Qs=o.LongT5Model,Ks=o.LongT5PreTrainedModel,Ys=o.M2M100ForConditionalGeneration,Js=o.M2M100Model,Zs=o.M2M100PreTrainedModel,eo=o.M2M100Tokenizer,to=o.MBart50Tokenizer,no=o.MBartForCausalLM,ro=o.MBartForConditionalGeneration,so=o.MBartForSequenceClassification,oo=o.MBartModel,io=o.MBartPreTrainedModel,ao=o.MBartTokenizer,lo=o.MPNetForMaskedLM,uo=o.MPNetForQuestionAnswering,co=o.MPNetForSequenceClassification,po=o.MPNetForTokenClassification,ho=o.MPNetModel,mo=o.MPNetPreTrainedModel,fo=o.MPNetTokenizer,go=o.MT5ForConditionalGeneration,_o=o.MT5Model,wo=o.MT5PreTrainedModel,yo=o.MarianMTModel,bo=o.MarianModel,vo=o.MarianPreTrainedModel,xo=o.MarianTokenizer,Mo=o.Mask2FormerImageProcessor,To=o.MaskFormerFeatureExtractor,ko=o.MaskFormerForInstanceSegmentation,$o=o.MaskFormerImageProcessor,Co=o.MaskFormerModel,Po=o.MaskFormerPreTrainedModel,So=o.MaskedLMOutput,Eo=o.MaxLengthCriteria,Fo=o.MgpstrForSceneTextRecognition,Ao=o.MgpstrModelOutput,Io=o.MgpstrPreTrainedModel,zo=o.MgpstrProcessor,Lo=o.MgpstrTokenizer,Oo=o.MinLengthLogitsProcessor,Bo=o.MinNewTokensLengthLogitsProcessor,Do=o.MistralForCausalLM,No=o.MistralModel,Ro=o.MistralPreTrainedModel,jo=o.MobileBertForMaskedLM,Vo=o.MobileBertForQuestionAnswering,Go=o.MobileBertForSequenceClassification,Uo=o.MobileBertModel,qo=o.MobileBertPreTrainedModel,Wo=o.MobileBertTokenizer,Ho=o.MobileLLMForCausalLM,Xo=o.MobileLLMModel,Qo=o.MobileLLMPreTrainedModel,Ko=o.MobileNetV1FeatureExtractor,Yo=o.MobileNetV1ForImageClassification,Jo=o.MobileNetV1ImageProcessor,Zo=o.MobileNetV1Model,ei=o.MobileNetV1PreTrainedModel,ti=o.MobileNetV2FeatureExtractor,ni=o.MobileNetV2ForImageClassification,ri=o.MobileNetV2ImageProcessor,si=o.MobileNetV2Model,oi=o.MobileNetV2PreTrainedModel,ii=o.MobileNetV3FeatureExtractor,ai=o.MobileNetV3ForImageClassification,li=o.MobileNetV3ImageProcessor,di=o.MobileNetV3Model,ui=o.MobileNetV3PreTrainedModel,ci=o.MobileNetV4FeatureExtractor,pi=o.MobileNetV4ForImageClassification,hi=o.MobileNetV4ImageProcessor,mi=o.MobileNetV4Model,fi=o.MobileNetV4PreTrainedModel,gi=o.MobileViTFeatureExtractor,_i=o.MobileViTForImageClassification,wi=o.MobileViTImageProcessor,yi=o.MobileViTModel,bi=o.MobileViTPreTrainedModel,vi=o.MobileViTV2ForImageClassification,xi=o.MobileViTV2Model,Mi=o.MobileViTV2PreTrainedModel,Ti=o.ModelOutput,ki=o.Moondream1ForConditionalGeneration,$i=o.MptForCausalLM,Ci=o.MptModel,Pi=o.MptPreTrainedModel,Si=o.MultiModalityCausalLM,Ei=o.MultiModalityPreTrainedModel,Fi=o.MusicgenForCausalLM,Ai=o.MusicgenForConditionalGeneration,Ii=o.MusicgenModel,zi=o.MusicgenPreTrainedModel,Li=o.NllbTokenizer,Oi=o.NoBadWordsLogitsProcessor,Bi=o.NoRepeatNGramLogitsProcessor,Di=o.NomicBertModel,Ni=o.NomicBertPreTrainedModel,Ri=o.NougatImageProcessor,ji=o.NougatTokenizer,Vi=o.OPTForCausalLM,Gi=o.OPTModel,Ui=o.OPTPreTrainedModel,qi=o.ObjectDetectionPipeline,Wi=o.OlmoForCausalLM,Hi=o.OlmoModel,Xi=o.OlmoPreTrainedModel,Qi=o.OpenELMForCausalLM,Ki=o.OpenELMModel,Yi=o.OpenELMPreTrainedModel,Ji=o.OwlViTFeatureExtractor,Zi=o.OwlViTForObjectDetection,ea=o.OwlViTImageProcessor,ta=o.OwlViTModel,na=o.OwlViTPreTrainedModel,ra=o.OwlViTProcessor,sa=o.Owlv2ForObjectDetection,oa=o.Owlv2ImageProcessor,ia=o.Owlv2Model,aa=o.Owlv2PreTrainedModel,la=o.PatchTSMixerForPrediction,da=o.PatchTSMixerModel,ua=o.PatchTSMixerPreTrainedModel,ca=o.PatchTSTForPrediction,pa=o.PatchTSTModel,ha=o.PatchTSTPreTrainedModel,ma=o.Phi3ForCausalLM,fa=o.Phi3Model,ga=o.Phi3PreTrainedModel,_a=o.PhiForCausalLM,wa=o.PhiModel,ya=o.PhiPreTrainedModel,ba=o.Pipeline,va=o.PreTrainedModel,xa=o.PreTrainedTokenizer,Ma=o.PretrainedConfig,Ta=o.PretrainedMixin,ka=o.Processor,$a=o.PvtForImageClassification,Ca=o.PvtImageProcessor,Pa=o.PvtModel,Sa=o.PvtPreTrainedModel,Ea=o.PyAnnoteFeatureExtractor,Fa=o.PyAnnoteForAudioFrameClassification,Aa=o.PyAnnoteModel,Ia=o.PyAnnotePreTrainedModel,za=o.PyAnnoteProcessor,La=o.QuestionAnsweringModelOutput,Oa=o.QuestionAnsweringPipeline,Ba=o.Qwen2ForCausalLM,Da=o.Qwen2Model,Na=o.Qwen2PreTrainedModel,Ra=o.Qwen2Tokenizer,ja=o.Qwen2VLForConditionalGeneration,Va=o.Qwen2VLImageProcessor,Ga=o.Qwen2VLPreTrainedModel,Ua=o.Qwen2VLProcessor,qa=o.RTDetrForObjectDetection,Wa=o.RTDetrImageProcessor,Ha=o.RTDetrModel,Xa=o.RTDetrObjectDetectionOutput,Qa=o.RTDetrPreTrainedModel,Ka=o.RawImage,Ya=o.RepetitionPenaltyLogitsProcessor,Ja=o.ResNetForImageClassification,Za=o.ResNetModel,el=o.ResNetPreTrainedModel,tl=o.RoFormerForMaskedLM,nl=o.RoFormerForQuestionAnswering,rl=o.RoFormerForSequenceClassification,sl=o.RoFormerForTokenClassification,ol=o.RoFormerModel,il=o.RoFormerPreTrainedModel,al=o.RoFormerTokenizer,ll=o.RobertaForMaskedLM,dl=o.RobertaForQuestionAnswering,ul=o.RobertaForSequenceClassification,cl=o.RobertaForTokenClassification,pl=o.RobertaModel,hl=o.RobertaPreTrainedModel,ml=o.RobertaTokenizer,fl=o.SamImageProcessor,gl=o.SamImageSegmentationOutput,_l=o.SamModel,wl=o.SamPreTrainedModel,yl=o.SamProcessor,bl=o.SapiensForDepthEstimation,vl=o.SapiensForNormalEstimation,xl=o.SapiensForSemanticSegmentation,Ml=o.SapiensPreTrainedModel,Tl=o.SeamlessM4TFeatureExtractor,kl=o.SegformerFeatureExtractor,$l=o.SegformerForImageClassification,Cl=o.SegformerForSemanticSegmentation,Pl=o.SegformerImageProcessor,Sl=o.SegformerModel,El=o.SegformerPreTrainedModel,Fl=o.Seq2SeqLMOutput,Al=o.SequenceClassifierOutput,Il=o.SiglipImageProcessor,zl=o.SiglipModel,Ll=o.SiglipPreTrainedModel,Ol=o.SiglipTextModel,Bl=o.SiglipTokenizer,Dl=o.SiglipVisionModel,Nl=o.SpeechT5FeatureExtractor,Rl=o.SpeechT5ForSpeechToText,jl=o.SpeechT5ForTextToSpeech,Vl=o.SpeechT5HifiGan,Gl=o.SpeechT5Model,Ul=o.SpeechT5PreTrainedModel,ql=o.SpeechT5Processor,Wl=o.SpeechT5Tokenizer,Hl=o.SqueezeBertForMaskedLM,Xl=o.SqueezeBertForQuestionAnswering,Ql=o.SqueezeBertForSequenceClassification,Kl=o.SqueezeBertModel,Yl=o.SqueezeBertPreTrainedModel,Jl=o.SqueezeBertTokenizer,Zl=o.StableLmForCausalLM,ed=o.StableLmModel,td=o.StableLmPreTrainedModel,nd=o.Starcoder2ForCausalLM,rd=o.Starcoder2Model,sd=o.Starcoder2PreTrainedModel,od=o.StoppingCriteria,id=o.StoppingCriteriaList,ad=o.SummarizationPipeline,ld=o.SuppressTokensAtBeginLogitsProcessor,dd=o.Swin2SRForImageSuperResolution,ud=o.Swin2SRImageProcessor,cd=o.Swin2SRModel,pd=o.Swin2SRPreTrainedModel,hd=o.SwinForImageClassification,md=o.SwinModel,fd=o.SwinPreTrainedModel,gd=o.T5ForConditionalGeneration,_d=o.T5Model,wd=o.T5PreTrainedModel,yd=o.T5Tokenizer,bd=o.TableTransformerForObjectDetection,vd=o.TableTransformerModel,xd=o.TableTransformerObjectDetectionOutput,Md=o.TableTransformerPreTrainedModel,Td=o.TemperatureLogitsWarper,kd=o.Tensor,$d=o.Text2TextGenerationPipeline,Cd=o.TextClassificationPipeline,Pd=o.TextGenerationPipeline,Sd=o.TextStreamer,Ed=o.TextToAudioPipeline,Fd=o.TokenClassificationPipeline,Ad=o.TokenClassifierOutput,Id=o.TokenizerModel,zd=o.TopKLogitsWarper,Ld=o.TopPLogitsWarper,Od=o.TrOCRForCausalLM,Bd=o.TrOCRPreTrainedModel,Dd=o.TranslationPipeline,Nd=o.UniSpeechForCTC,Rd=o.UniSpeechForSequenceClassification,jd=o.UniSpeechModel,Vd=o.UniSpeechPreTrainedModel,Gd=o.UniSpeechSatForAudioFrameClassification,Ud=o.UniSpeechSatForCTC,qd=o.UniSpeechSatForSequenceClassification,Wd=o.UniSpeechSatModel,Hd=o.UniSpeechSatPreTrainedModel,Xd=o.VLChatProcessor,Qd=o.VLMImageProcessor,Kd=o.ViTFeatureExtractor,Yd=o.ViTForImageClassification,Jd=o.ViTImageProcessor,Zd=o.ViTMAEModel,eu=o.ViTMAEPreTrainedModel,tu=o.ViTMSNForImageClassification,nu=o.ViTMSNModel,ru=o.ViTMSNPreTrainedModel,su=o.ViTModel,ou=o.ViTPreTrainedModel,iu=o.VisionEncoderDecoderModel,au=o.VitMatteForImageMatting,lu=o.VitMatteImageProcessor,du=o.VitMattePreTrainedModel,uu=o.VitPoseForPoseEstimation,cu=o.VitPoseImageProcessor,pu=o.VitPosePreTrainedModel,hu=o.VitsModel,mu=o.VitsModelOutput,fu=o.VitsPreTrainedModel,gu=o.VitsTokenizer,_u=o.Wav2Vec2BertForCTC,wu=o.Wav2Vec2BertForSequenceClassification,yu=o.Wav2Vec2BertModel,bu=o.Wav2Vec2BertPreTrainedModel,vu=o.Wav2Vec2CTCTokenizer,xu=o.Wav2Vec2FeatureExtractor,Mu=o.Wav2Vec2ForAudioFrameClassification,Tu=o.Wav2Vec2ForCTC,ku=o.Wav2Vec2ForSequenceClassification,$u=o.Wav2Vec2Model,Cu=o.Wav2Vec2PreTrainedModel,Pu=o.Wav2Vec2ProcessorWithLM,Su=o.WavLMForAudioFrameClassification,Eu=o.WavLMForCTC,Fu=o.WavLMForSequenceClassification,Au=o.WavLMForXVector,Iu=o.WavLMModel,zu=o.WavLMPreTrainedModel,Lu=o.WeSpeakerFeatureExtractor,Ou=o.WeSpeakerResNetModel,Bu=o.WeSpeakerResNetPreTrainedModel,Du=o.WhisperFeatureExtractor,Nu=o.WhisperForConditionalGeneration,Ru=o.WhisperModel,ju=o.WhisperPreTrainedModel,Vu=o.WhisperProcessor,Gu=o.WhisperTextStreamer,Uu=o.WhisperTimeStampLogitsProcessor,qu=o.WhisperTokenizer,Wu=o.XLMForQuestionAnswering,Hu=o.XLMForSequenceClassification,Xu=o.XLMForTokenClassification,Qu=o.XLMModel,Ku=o.XLMPreTrainedModel,Yu=o.XLMRobertaForMaskedLM,Ju=o.XLMRobertaForQuestionAnswering,Zu=o.XLMRobertaForSequenceClassification,ec=o.XLMRobertaForTokenClassification,tc=o.XLMRobertaModel,nc=o.XLMRobertaPreTrainedModel,rc=o.XLMRobertaTokenizer,sc=o.XLMTokenizer,oc=o.XLMWithLMHeadModel,ic=o.XVectorOutput,ac=o.YolosFeatureExtractor,lc=o.YolosForObjectDetection,dc=o.YolosImageProcessor,uc=o.YolosModel,cc=o.YolosObjectDetectionOutput,pc=o.YolosPreTrainedModel,hc=o.ZeroShotAudioClassificationPipeline,mc=o.ZeroShotClassificationPipeline,fc=o.ZeroShotImageClassificationPipeline,gc=o.ZeroShotObjectDetectionPipeline,_c=o.bankers_round,wc=o.cat,yc=o.cos_sim,bc=o.dot,vc=o.dynamic_time_warping,xc=o.env,Mc=o.full,Tc=o.full_like,kc=o.getKeyValueShapes,$c=o.hamming,Cc=o.hanning,Pc=o.interpolate,Sc=o.interpolate_4d,Ec=o.interpolate_data,Fc=o.is_chinese_char,Ac=o.layer_norm,Ic=o.log_softmax,zc=o.magnitude,Lc=o.matmul,Oc=o.max,Bc=o.mean,Dc=o.mean_pooling,Nc=o.medianFilter,Rc=o.mel_filter_bank,jc=o.min,Vc=o.ones,Gc=o.ones_like,Uc=o.permute,qc=o.permute_data,Wc=o.pipeline,Hc=o.quantize_embeddings,Xc=o.read_audio,Qc=o.rfft,Kc=o.round,Yc=o.softmax,Jc=o.spectrogram,Zc=o.stack,ep=o.std_mean,tp=o.topk,np=o.window_function,rp=o.zeros,sp=o.zeros_like;export{C as ASTFeatureExtractor,P as ASTForAudioClassification,S as ASTModel,E as ASTPreTrainedModel,F as AlbertForMaskedLM,A as AlbertForQuestionAnswering,I as AlbertForSequenceClassification,z as AlbertModel,L as AlbertPreTrainedModel,O as AlbertTokenizer,B as AudioClassificationPipeline,D as AutoConfig,N as AutoFeatureExtractor,R as AutoImageProcessor,j as AutoModel,V as AutoModelForAudioClassification,G as AutoModelForAudioFrameClassification,U as AutoModelForCTC,q as AutoModelForCausalLM,W as AutoModelForDepthEstimation,H as AutoModelForDocumentQuestionAnswering,X as AutoModelForImageClassification,Q as AutoModelForImageFeatureExtraction,K as AutoModelForImageMatting,Y as AutoModelForImageSegmentation,J as AutoModelForImageToImage,Z as AutoModelForMaskGeneration,ee as AutoModelForMaskedLM,te as AutoModelForNormalEstimation,ne as AutoModelForObjectDetection,re as AutoModelForPoseEstimation,se as AutoModelForQuestionAnswering,oe as AutoModelForSemanticSegmentation,ie as AutoModelForSeq2SeqLM,ae as AutoModelForSequenceClassification,le as AutoModelForSpeechSeq2Seq,de as AutoModelForTextToSpectrogram,ue as AutoModelForTextToWaveform,ce as AutoModelForTokenClassification,pe as AutoModelForUniversalSegmentation,he as AutoModelForVision2Seq,me as AutoModelForXVector,fe as AutoModelForZeroShotObjectDetection,ge as AutoProcessor,_e as AutoTokenizer,we as AutomaticSpeechRecognitionPipeline,ye as BartForConditionalGeneration,be as BartForSequenceClassification,ve as BartModel,xe as BartPretrainedModel,Me as BartTokenizer,Te as BaseModelOutput,ke as BaseStreamer,$e as BeitFeatureExtractor,Ce as BeitForImageClassification,Pe as BeitModel,Se as BeitPreTrainedModel,Ee as BertForMaskedLM,Fe as BertForQuestionAnswering,Ae as BertForSequenceClassification,Ie as BertForTokenClassification,ze as BertModel,Le as BertPreTrainedModel,Oe as BertTokenizer,Be as BitImageProcessor,De as BlenderbotForConditionalGeneration,Ne as BlenderbotModel,Re as BlenderbotPreTrainedModel,je as BlenderbotSmallForConditionalGeneration,Ve as BlenderbotSmallModel,Ge as BlenderbotSmallPreTrainedModel,Ue as BlenderbotSmallTokenizer,qe as BlenderbotTokenizer,We as BloomForCausalLM,He as BloomModel,Xe as BloomPreTrainedModel,Qe as BloomTokenizer,Ke as CLIPFeatureExtractor,Ye as CLIPImageProcessor,Je as CLIPModel,Ze as CLIPPreTrainedModel,et as CLIPSegForImageSegmentation,tt as CLIPSegModel,nt as CLIPSegPreTrainedModel,rt as CLIPTextModel,st as CLIPTextModelWithProjection,ot as CLIPTokenizer,it as CLIPVisionModel,at as CLIPVisionModelWithProjection,lt as CamembertForMaskedLM,dt as CamembertForQuestionAnswering,ut as CamembertForSequenceClassification,ct as CamembertForTokenClassification,pt as CamembertModel,ht as CamembertPreTrainedModel,mt as CamembertTokenizer,ft as CausalLMOutput,gt as CausalLMOutputWithPast,_t as ChineseCLIPFeatureExtractor,wt as ChineseCLIPModel,yt as ChineseCLIPPreTrainedModel,bt as ClapAudioModelWithProjection,vt as ClapFeatureExtractor,xt as ClapModel,Mt as ClapPreTrainedModel,Tt as ClapTextModelWithProjection,kt as ClassifierFreeGuidanceLogitsProcessor,$t as CodeGenForCausalLM,Ct as CodeGenModel,Pt as CodeGenPreTrainedModel,St as CodeGenTokenizer,Et as CodeLlamaTokenizer,Ft as CohereForCausalLM,At as CohereModel,It as CoherePreTrainedModel,zt as CohereTokenizer,Lt as ConvBertForMaskedLM,Ot as ConvBertForQuestionAnswering,Bt as ConvBertForSequenceClassification,Dt as ConvBertForTokenClassification,Nt as ConvBertModel,Rt as ConvBertPreTrainedModel,jt as ConvBertTokenizer,Vt as ConvNextFeatureExtractor,Gt as ConvNextForImageClassification,Ut as ConvNextImageProcessor,qt as ConvNextModel,Wt as ConvNextPreTrainedModel,Ht as ConvNextV2ForImageClassification,Xt as ConvNextV2Model,Qt as ConvNextV2PreTrainedModel,Kt as DPTFeatureExtractor,Yt as DPTForDepthEstimation,Jt as DPTImageProcessor,Zt as DPTModel,en as DPTPreTrainedModel,tn as DebertaForMaskedLM,nn as DebertaForQuestionAnswering,rn as DebertaForSequenceClassification,sn as DebertaForTokenClassification,on as DebertaModel,an as DebertaPreTrainedModel,ln as DebertaTokenizer,dn as DebertaV2ForMaskedLM,un as DebertaV2ForQuestionAnswering,cn as DebertaV2ForSequenceClassification,pn as DebertaV2ForTokenClassification,hn as DebertaV2Model,mn as DebertaV2PreTrainedModel,fn as DebertaV2Tokenizer,gn as DecisionTransformerModel,_n as DecisionTransformerPreTrainedModel,wn as DeiTFeatureExtractor,yn as DeiTForImageClassification,bn as DeiTImageProcessor,vn as DeiTModel,xn as DeiTPreTrainedModel,Mn as DepthAnythingForDepthEstimation,Tn as DepthAnythingPreTrainedModel,kn as DepthEstimationPipeline,$n as DepthProForDepthEstimation,Cn as DepthProPreTrainedModel,Pn as DetrFeatureExtractor,Sn as DetrForObjectDetection,En as DetrForSegmentation,Fn as DetrImageProcessor,An as DetrModel,In as DetrObjectDetectionOutput,zn as DetrPreTrainedModel,Ln as DetrSegmentationOutput,On as Dinov2ForImageClassification,Bn as Dinov2Model,Dn as Dinov2PreTrainedModel,Nn as DistilBertForMaskedLM,Rn as DistilBertForQuestionAnswering,jn as DistilBertForSequenceClassification,Vn as DistilBertForTokenClassification,Gn as DistilBertModel,Un as DistilBertPreTrainedModel,qn as DistilBertTokenizer,Wn as DocumentQuestionAnsweringPipeline,Hn as DonutFeatureExtractor,Xn as DonutImageProcessor,Qn as DonutSwinModel,Kn as DonutSwinPreTrainedModel,Yn as EfficientNetForImageClassification,Jn as EfficientNetImageProcessor,Zn as EfficientNetModel,er as EfficientNetPreTrainedModel,tr as ElectraForMaskedLM,nr as ElectraForQuestionAnswering,rr as ElectraForSequenceClassification,sr as ElectraForTokenClassification,or as ElectraModel,ir as ElectraPreTrainedModel,ar as ElectraTokenizer,lr as EosTokenCriteria,dr as EsmForMaskedLM,ur as EsmForSequenceClassification,cr as EsmForTokenClassification,pr as EsmModel,hr as EsmPreTrainedModel,mr as EsmTokenizer,fr as FFT,gr as FalconForCausalLM,_r as FalconModel,wr as FalconPreTrainedModel,yr as FalconTokenizer,br as FastViTForImageClassification,vr as FastViTModel,xr as FastViTPreTrainedModel,Mr as FeatureExtractionPipeline,Tr as FeatureExtractor,kr as FillMaskPipeline,$r as Florence2ForConditionalGeneration,Cr as Florence2PreTrainedModel,Pr as Florence2Processor,Sr as ForcedBOSTokenLogitsProcessor,Er as ForcedEOSTokenLogitsProcessor,Fr as GLPNFeatureExtractor,Ar as GLPNForDepthEstimation,Ir as GLPNModel,zr as GLPNPreTrainedModel,Lr as GPT2LMHeadModel,Or as GPT2Model,Br as GPT2PreTrainedModel,Dr as GPT2Tokenizer,Nr as GPTBigCodeForCausalLM,Rr as GPTBigCodeModel,jr as GPTBigCodePreTrainedModel,Vr as GPTJForCausalLM,Gr as GPTJModel,Ur as GPTJPreTrainedModel,qr as GPTNeoForCausalLM,Wr as GPTNeoModel,Hr as GPTNeoPreTrainedModel,Xr as GPTNeoXForCausalLM,Qr as GPTNeoXModel,Kr as GPTNeoXPreTrainedModel,Yr as GPTNeoXTokenizer,Jr as Gemma2ForCausalLM,Zr as Gemma2Model,es as Gemma2PreTrainedModel,ts as GemmaForCausalLM,ns as GemmaModel,rs as GemmaPreTrainedModel,ss as GemmaTokenizer,os as GraniteForCausalLM,is as GraniteModel,as as GranitePreTrainedModel,ls as Grok1Tokenizer,ds as GroupViTModel,us as GroupViTPreTrainedModel,cs as HerbertTokenizer,ps as HieraForImageClassification,hs as HieraModel,ms as HieraPreTrainedModel,fs as HubertForCTC,gs as HubertForSequenceClassification,_s as HubertModel,ws as HubertPreTrainedModel,ys as ImageClassificationPipeline,bs as ImageFeatureExtractionPipeline,vs as ImageFeatureExtractor,xs as ImageMattingOutput,Ms as ImageProcessor,Ts as ImageSegmentationPipeline,ks as ImageToImagePipeline,$s as ImageToTextPipeline,Cs as InterruptableStoppingCriteria,Ps as JAISLMHeadModel,Ss as JAISModel,Es as JAISPreTrainedModel,Fs as JinaCLIPImageProcessor,As as JinaCLIPModel,Is as JinaCLIPPreTrainedModel,zs as JinaCLIPProcessor,Ls as JinaCLIPTextModel,Os as JinaCLIPVisionModel,Bs as LlamaForCausalLM,Ds as LlamaModel,Ns as LlamaPreTrainedModel,Rs as LlamaTokenizer,js as LlavaForConditionalGeneration,Vs as LlavaOnevisionForConditionalGeneration,Gs as LlavaOnevisionImageProcessor,Us as LlavaPreTrainedModel,qs as LogitsProcessor,Ws as LogitsProcessorList,Hs as LogitsWarper,Xs as LongT5ForConditionalGeneration,Qs as LongT5Model,Ks as LongT5PreTrainedModel,Ys as M2M100ForConditionalGeneration,Js as M2M100Model,Zs as M2M100PreTrainedModel,eo as M2M100Tokenizer,to as MBart50Tokenizer,no as MBartForCausalLM,ro as MBartForConditionalGeneration,so as MBartForSequenceClassification,oo as MBartModel,io as MBartPreTrainedModel,ao as MBartTokenizer,lo as MPNetForMaskedLM,uo as MPNetForQuestionAnswering,co as MPNetForSequenceClassification,po as MPNetForTokenClassification,ho as MPNetModel,mo as MPNetPreTrainedModel,fo as MPNetTokenizer,go as MT5ForConditionalGeneration,_o as MT5Model,wo as MT5PreTrainedModel,yo as MarianMTModel,bo as MarianModel,vo as MarianPreTrainedModel,xo as MarianTokenizer,Mo as Mask2FormerImageProcessor,To as MaskFormerFeatureExtractor,ko as MaskFormerForInstanceSegmentation,$o as MaskFormerImageProcessor,Co as MaskFormerModel,Po as MaskFormerPreTrainedModel,So as MaskedLMOutput,Eo as MaxLengthCriteria,Fo as MgpstrForSceneTextRecognition,Ao as MgpstrModelOutput,Io as MgpstrPreTrainedModel,zo as MgpstrProcessor,Lo as MgpstrTokenizer,Oo as MinLengthLogitsProcessor,Bo as MinNewTokensLengthLogitsProcessor,Do as MistralForCausalLM,No as MistralModel,Ro as MistralPreTrainedModel,jo as MobileBertForMaskedLM,Vo as MobileBertForQuestionAnswering,Go as MobileBertForSequenceClassification,Uo as MobileBertModel,qo as MobileBertPreTrainedModel,Wo as MobileBertTokenizer,Ho as MobileLLMForCausalLM,Xo as MobileLLMModel,Qo as MobileLLMPreTrainedModel,Ko as MobileNetV1FeatureExtractor,Yo as MobileNetV1ForImageClassification,Jo as MobileNetV1ImageProcessor,Zo as MobileNetV1Model,ei as MobileNetV1PreTrainedModel,ti as MobileNetV2FeatureExtractor,ni as MobileNetV2ForImageClassification,ri as MobileNetV2ImageProcessor,si as MobileNetV2Model,oi as MobileNetV2PreTrainedModel,ii as MobileNetV3FeatureExtractor,ai as MobileNetV3ForImageClassification,li as MobileNetV3ImageProcessor,di as MobileNetV3Model,ui as MobileNetV3PreTrainedModel,ci as MobileNetV4FeatureExtractor,pi as MobileNetV4ForImageClassification,hi as MobileNetV4ImageProcessor,mi as MobileNetV4Model,fi as MobileNetV4PreTrainedModel,gi as MobileViTFeatureExtractor,_i as MobileViTForImageClassification,wi as MobileViTImageProcessor,yi as MobileViTModel,bi as MobileViTPreTrainedModel,vi as MobileViTV2ForImageClassification,xi as MobileViTV2Model,Mi as MobileViTV2PreTrainedModel,Ti as ModelOutput,ki as Moondream1ForConditionalGeneration,$i as MptForCausalLM,Ci as MptModel,Pi as MptPreTrainedModel,Si as MultiModalityCausalLM,Ei as MultiModalityPreTrainedModel,Fi as MusicgenForCausalLM,Ai as MusicgenForConditionalGeneration,Ii as MusicgenModel,zi as MusicgenPreTrainedModel,Li as NllbTokenizer,Oi as NoBadWordsLogitsProcessor,Bi as NoRepeatNGramLogitsProcessor,Di as NomicBertModel,Ni as NomicBertPreTrainedModel,Ri as NougatImageProcessor,ji as NougatTokenizer,Vi as OPTForCausalLM,Gi as OPTModel,Ui as OPTPreTrainedModel,qi as ObjectDetectionPipeline,Wi as OlmoForCausalLM,Hi as OlmoModel,Xi as OlmoPreTrainedModel,Qi as OpenELMForCausalLM,Ki as OpenELMModel,Yi as OpenELMPreTrainedModel,Ji as OwlViTFeatureExtractor,Zi as OwlViTForObjectDetection,ea as OwlViTImageProcessor,ta as OwlViTModel,na as OwlViTPreTrainedModel,ra as OwlViTProcessor,sa as Owlv2ForObjectDetection,oa as Owlv2ImageProcessor,ia as Owlv2Model,aa as Owlv2PreTrainedModel,la as PatchTSMixerForPrediction,da as PatchTSMixerModel,ua as PatchTSMixerPreTrainedModel,ca as PatchTSTForPrediction,pa as PatchTSTModel,ha as PatchTSTPreTrainedModel,ma as Phi3ForCausalLM,fa as Phi3Model,ga as Phi3PreTrainedModel,_a as PhiForCausalLM,wa as PhiModel,ya as PhiPreTrainedModel,ba as Pipeline,va as PreTrainedModel,xa as PreTrainedTokenizer,Ma as PretrainedConfig,Ta as PretrainedMixin,ka as Processor,$a as PvtForImageClassification,Ca as PvtImageProcessor,Pa as PvtModel,Sa as PvtPreTrainedModel,Ea as PyAnnoteFeatureExtractor,Fa as PyAnnoteForAudioFrameClassification,Aa as PyAnnoteModel,Ia as PyAnnotePreTrainedModel,za as PyAnnoteProcessor,La as QuestionAnsweringModelOutput,Oa as QuestionAnsweringPipeline,Ba as Qwen2ForCausalLM,Da as Qwen2Model,Na as Qwen2PreTrainedModel,Ra as Qwen2Tokenizer,ja as Qwen2VLForConditionalGeneration,Va as Qwen2VLImageProcessor,Ga as Qwen2VLPreTrainedModel,Ua as Qwen2VLProcessor,qa as RTDetrForObjectDetection,Wa as RTDetrImageProcessor,Ha as RTDetrModel,Xa as RTDetrObjectDetectionOutput,Qa as RTDetrPreTrainedModel,Ka as RawImage,Ya as RepetitionPenaltyLogitsProcessor,Ja as ResNetForImageClassification,Za as ResNetModel,el as ResNetPreTrainedModel,tl as RoFormerForMaskedLM,nl as RoFormerForQuestionAnswering,rl as RoFormerForSequenceClassification,sl as RoFormerForTokenClassification,ol as RoFormerModel,il as RoFormerPreTrainedModel,al as RoFormerTokenizer,ll as RobertaForMaskedLM,dl as RobertaForQuestionAnswering,ul as RobertaForSequenceClassification,cl as RobertaForTokenClassification,pl as RobertaModel,hl as RobertaPreTrainedModel,ml as RobertaTokenizer,fl as SamImageProcessor,gl as SamImageSegmentationOutput,_l as SamModel,wl as SamPreTrainedModel,yl as SamProcessor,bl as SapiensForDepthEstimation,vl as SapiensForNormalEstimation,xl as SapiensForSemanticSegmentation,Ml as SapiensPreTrainedModel,Tl as SeamlessM4TFeatureExtractor,kl as SegformerFeatureExtractor,$l as SegformerForImageClassification,Cl as SegformerForSemanticSegmentation,Pl as SegformerImageProcessor,Sl as SegformerModel,El as SegformerPreTrainedModel,Fl as Seq2SeqLMOutput,Al as SequenceClassifierOutput,Il as SiglipImageProcessor,zl as SiglipModel,Ll as SiglipPreTrainedModel,Ol as SiglipTextModel,Bl as SiglipTokenizer,Dl as SiglipVisionModel,Nl as SpeechT5FeatureExtractor,Rl as SpeechT5ForSpeechToText,jl as SpeechT5ForTextToSpeech,Vl as SpeechT5HifiGan,Gl as SpeechT5Model,Ul as SpeechT5PreTrainedModel,ql as SpeechT5Processor,Wl as SpeechT5Tokenizer,Hl as SqueezeBertForMaskedLM,Xl as SqueezeBertForQuestionAnswering,Ql as SqueezeBertForSequenceClassification,Kl as SqueezeBertModel,Yl as SqueezeBertPreTrainedModel,Jl as SqueezeBertTokenizer,Zl as StableLmForCausalLM,ed as StableLmModel,td as StableLmPreTrainedModel,nd as Starcoder2ForCausalLM,rd as Starcoder2Model,sd as Starcoder2PreTrainedModel,od as StoppingCriteria,id as StoppingCriteriaList,ad as SummarizationPipeline,ld as SuppressTokensAtBeginLogitsProcessor,dd as Swin2SRForImageSuperResolution,ud as Swin2SRImageProcessor,cd as Swin2SRModel,pd as Swin2SRPreTrainedModel,hd as SwinForImageClassification,md as SwinModel,fd as SwinPreTrainedModel,gd as T5ForConditionalGeneration,_d as T5Model,wd as T5PreTrainedModel,yd as T5Tokenizer,bd as TableTransformerForObjectDetection,vd as TableTransformerModel,xd as TableTransformerObjectDetectionOutput,Md as TableTransformerPreTrainedModel,Td as TemperatureLogitsWarper,kd as Tensor,$d as Text2TextGenerationPipeline,Cd as TextClassificationPipeline,Pd as TextGenerationPipeline,Sd as TextStreamer,Ed as TextToAudioPipeline,Fd as TokenClassificationPipeline,Ad as TokenClassifierOutput,Id as TokenizerModel,zd as TopKLogitsWarper,Ld as TopPLogitsWarper,Od as TrOCRForCausalLM,Bd as TrOCRPreTrainedModel,Dd as TranslationPipeline,Nd as UniSpeechForCTC,Rd as UniSpeechForSequenceClassification,jd as UniSpeechModel,Vd as UniSpeechPreTrainedModel,Gd as UniSpeechSatForAudioFrameClassification,Ud as UniSpeechSatForCTC,qd as UniSpeechSatForSequenceClassification,Wd as UniSpeechSatModel,Hd as UniSpeechSatPreTrainedModel,Xd as VLChatProcessor,Qd as VLMImageProcessor,Kd as ViTFeatureExtractor,Yd as ViTForImageClassification,Jd as ViTImageProcessor,Zd as ViTMAEModel,eu as ViTMAEPreTrainedModel,tu as ViTMSNForImageClassification,nu as ViTMSNModel,ru as ViTMSNPreTrainedModel,su as ViTModel,ou as ViTPreTrainedModel,iu as VisionEncoderDecoderModel,au as VitMatteForImageMatting,lu as VitMatteImageProcessor,du as VitMattePreTrainedModel,uu as VitPoseForPoseEstimation,cu as VitPoseImageProcessor,pu as VitPosePreTrainedModel,hu as VitsModel,mu as VitsModelOutput,fu as VitsPreTrainedModel,gu as VitsTokenizer,_u as Wav2Vec2BertForCTC,wu as Wav2Vec2BertForSequenceClassification,yu as Wav2Vec2BertModel,bu as Wav2Vec2BertPreTrainedModel,vu as Wav2Vec2CTCTokenizer,xu as Wav2Vec2FeatureExtractor,Mu as Wav2Vec2ForAudioFrameClassification,Tu as Wav2Vec2ForCTC,ku as Wav2Vec2ForSequenceClassification,$u as Wav2Vec2Model,Cu as Wav2Vec2PreTrainedModel,Pu as Wav2Vec2ProcessorWithLM,Su as WavLMForAudioFrameClassification,Eu as WavLMForCTC,Fu as WavLMForSequenceClassification,Au as WavLMForXVector,Iu as WavLMModel,zu as WavLMPreTrainedModel,Lu as WeSpeakerFeatureExtractor,Ou as WeSpeakerResNetModel,Bu as WeSpeakerResNetPreTrainedModel,Du as WhisperFeatureExtractor,Nu as WhisperForConditionalGeneration,Ru as WhisperModel,ju as WhisperPreTrainedModel,Vu as WhisperProcessor,Gu as WhisperTextStreamer,Uu as WhisperTimeStampLogitsProcessor,qu as WhisperTokenizer,Wu as XLMForQuestionAnswering,Hu as XLMForSequenceClassification,Xu as XLMForTokenClassification,Qu as XLMModel,Ku as XLMPreTrainedModel,Yu as XLMRobertaForMaskedLM,Ju as XLMRobertaForQuestionAnswering,Zu as XLMRobertaForSequenceClassification,ec as XLMRobertaForTokenClassification,tc as XLMRobertaModel,nc as XLMRobertaPreTrainedModel,rc as XLMRobertaTokenizer,sc as XLMTokenizer,oc as XLMWithLMHeadModel,ic as XVectorOutput,ac as YolosFeatureExtractor,lc as YolosForObjectDetection,dc as YolosImageProcessor,uc as YolosModel,cc as YolosObjectDetectionOutput,pc as YolosPreTrainedModel,hc as ZeroShotAudioClassificationPipeline,mc as ZeroShotClassificationPipeline,fc as ZeroShotImageClassificationPipeline,gc as ZeroShotObjectDetectionPipeline,_c as bankers_round,wc as cat,yc as cos_sim,bc as dot,vc as dynamic_time_warping,xc as env,Mc as full,Tc as full_like,kc as getKeyValueShapes,$c as hamming,Cc as hanning,Pc as interpolate,Sc as interpolate_4d,Ec as interpolate_data,Fc as is_chinese_char,Ac as layer_norm,Ic as log_softmax,zc as magnitude,Lc as matmul,Oc as max,Bc as mean,Dc as mean_pooling,Nc as medianFilter,Rc as mel_filter_bank,jc as min,Vc as ones,Gc as ones_like,Uc as permute,qc as permute_data,Wc as pipeline,Hc as quantize_embeddings,Xc as read_audio,Qc as rfft,Kc as round,Yc as softmax,Jc as spectrogram,Zc as stack,ep as std_mean,tp as topk,np as window_function,rp as zeros,sp as zeros_like};
|
|
421
|
+
\*****************************/s.r(o),s.d(o,{ASTFeatureExtractor:()=>g.ASTFeatureExtractor,ASTForAudioClassification:()=>l.ASTForAudioClassification,ASTModel:()=>l.ASTModel,ASTPreTrainedModel:()=>l.ASTPreTrainedModel,AlbertForMaskedLM:()=>l.AlbertForMaskedLM,AlbertForQuestionAnswering:()=>l.AlbertForQuestionAnswering,AlbertForSequenceClassification:()=>l.AlbertForSequenceClassification,AlbertModel:()=>l.AlbertModel,AlbertPreTrainedModel:()=>l.AlbertPreTrainedModel,AlbertTokenizer:()=>d.AlbertTokenizer,AudioClassificationPipeline:()=>a.AudioClassificationPipeline,AutoConfig:()=>u.AutoConfig,AutoFeatureExtractor:()=>_.AutoFeatureExtractor,AutoImageProcessor:()=>b.AutoImageProcessor,AutoModel:()=>l.AutoModel,AutoModelForAudioClassification:()=>l.AutoModelForAudioClassification,AutoModelForAudioFrameClassification:()=>l.AutoModelForAudioFrameClassification,AutoModelForCTC:()=>l.AutoModelForCTC,AutoModelForCausalLM:()=>l.AutoModelForCausalLM,AutoModelForDepthEstimation:()=>l.AutoModelForDepthEstimation,AutoModelForDocumentQuestionAnswering:()=>l.AutoModelForDocumentQuestionAnswering,AutoModelForImageClassification:()=>l.AutoModelForImageClassification,AutoModelForImageFeatureExtraction:()=>l.AutoModelForImageFeatureExtraction,AutoModelForImageMatting:()=>l.AutoModelForImageMatting,AutoModelForImageSegmentation:()=>l.AutoModelForImageSegmentation,AutoModelForImageToImage:()=>l.AutoModelForImageToImage,AutoModelForMaskGeneration:()=>l.AutoModelForMaskGeneration,AutoModelForMaskedLM:()=>l.AutoModelForMaskedLM,AutoModelForNormalEstimation:()=>l.AutoModelForNormalEstimation,AutoModelForObjectDetection:()=>l.AutoModelForObjectDetection,AutoModelForPoseEstimation:()=>l.AutoModelForPoseEstimation,AutoModelForQuestionAnswering:()=>l.AutoModelForQuestionAnswering,AutoModelForSemanticSegmentation:()=>l.AutoModelForSemanticSegmentation,AutoModelForSeq2SeqLM:()=>l.AutoModelForSeq2SeqLM,AutoModelForSequenceClassification:()=>l.AutoModelForSequenceClassification,AutoModelForSpeechSeq2Seq:()=>l.AutoModelForSpeechSeq2Seq,AutoModelForTextToSpectrogram:()=>l.AutoModelForTextToSpectrogram,AutoModelForTextToWaveform:()=>l.AutoModelForTextToWaveform,AutoModelForTokenClassification:()=>l.AutoModelForTokenClassification,AutoModelForUniversalSegmentation:()=>l.AutoModelForUniversalSegmentation,AutoModelForVision2Seq:()=>l.AutoModelForVision2Seq,AutoModelForXVector:()=>l.AutoModelForXVector,AutoModelForZeroShotObjectDetection:()=>l.AutoModelForZeroShotObjectDetection,AutoProcessor:()=>M.AutoProcessor,AutoTokenizer:()=>d.AutoTokenizer,AutomaticSpeechRecognitionPipeline:()=>a.AutomaticSpeechRecognitionPipeline,BartForConditionalGeneration:()=>l.BartForConditionalGeneration,BartForSequenceClassification:()=>l.BartForSequenceClassification,BartModel:()=>l.BartModel,BartPretrainedModel:()=>l.BartPretrainedModel,BartTokenizer:()=>d.BartTokenizer,BaseModelOutput:()=>l.BaseModelOutput,BaseStreamer:()=>T.BaseStreamer,BeitFeatureExtractor:()=>y.BeitFeatureExtractor,BeitForImageClassification:()=>l.BeitForImageClassification,BeitModel:()=>l.BeitModel,BeitPreTrainedModel:()=>l.BeitPreTrainedModel,BertForMaskedLM:()=>l.BertForMaskedLM,BertForQuestionAnswering:()=>l.BertForQuestionAnswering,BertForSequenceClassification:()=>l.BertForSequenceClassification,BertForTokenClassification:()=>l.BertForTokenClassification,BertModel:()=>l.BertModel,BertPreTrainedModel:()=>l.BertPreTrainedModel,BertTokenizer:()=>d.BertTokenizer,BitImageProcessor:()=>y.BitImageProcessor,BlenderbotForConditionalGeneration:()=>l.BlenderbotForConditionalGeneration,BlenderbotModel:()=>l.BlenderbotModel,BlenderbotPreTrainedModel:()=>l.BlenderbotPreTrainedModel,BlenderbotSmallForConditionalGeneration:()=>l.BlenderbotSmallForConditionalGeneration,BlenderbotSmallModel:()=>l.BlenderbotSmallModel,BlenderbotSmallPreTrainedModel:()=>l.BlenderbotSmallPreTrainedModel,BlenderbotSmallTokenizer:()=>d.BlenderbotSmallTokenizer,BlenderbotTokenizer:()=>d.BlenderbotTokenizer,BloomForCausalLM:()=>l.BloomForCausalLM,BloomModel:()=>l.BloomModel,BloomPreTrainedModel:()=>l.BloomPreTrainedModel,BloomTokenizer:()=>d.BloomTokenizer,CLIPFeatureExtractor:()=>y.CLIPFeatureExtractor,CLIPImageProcessor:()=>y.CLIPImageProcessor,CLIPModel:()=>l.CLIPModel,CLIPPreTrainedModel:()=>l.CLIPPreTrainedModel,CLIPSegForImageSegmentation:()=>l.CLIPSegForImageSegmentation,CLIPSegModel:()=>l.CLIPSegModel,CLIPSegPreTrainedModel:()=>l.CLIPSegPreTrainedModel,CLIPTextModel:()=>l.CLIPTextModel,CLIPTextModelWithProjection:()=>l.CLIPTextModelWithProjection,CLIPTokenizer:()=>d.CLIPTokenizer,CLIPVisionModel:()=>l.CLIPVisionModel,CLIPVisionModelWithProjection:()=>l.CLIPVisionModelWithProjection,CamembertForMaskedLM:()=>l.CamembertForMaskedLM,CamembertForQuestionAnswering:()=>l.CamembertForQuestionAnswering,CamembertForSequenceClassification:()=>l.CamembertForSequenceClassification,CamembertForTokenClassification:()=>l.CamembertForTokenClassification,CamembertModel:()=>l.CamembertModel,CamembertPreTrainedModel:()=>l.CamembertPreTrainedModel,CamembertTokenizer:()=>d.CamembertTokenizer,CausalLMOutput:()=>l.CausalLMOutput,CausalLMOutputWithPast:()=>l.CausalLMOutputWithPast,ChineseCLIPFeatureExtractor:()=>y.ChineseCLIPFeatureExtractor,ChineseCLIPModel:()=>l.ChineseCLIPModel,ChineseCLIPPreTrainedModel:()=>l.ChineseCLIPPreTrainedModel,ClapAudioModelWithProjection:()=>l.ClapAudioModelWithProjection,ClapFeatureExtractor:()=>g.ClapFeatureExtractor,ClapModel:()=>l.ClapModel,ClapPreTrainedModel:()=>l.ClapPreTrainedModel,ClapTextModelWithProjection:()=>l.ClapTextModelWithProjection,ClassifierFreeGuidanceLogitsProcessor:()=>$.ClassifierFreeGuidanceLogitsProcessor,CodeGenForCausalLM:()=>l.CodeGenForCausalLM,CodeGenModel:()=>l.CodeGenModel,CodeGenPreTrainedModel:()=>l.CodeGenPreTrainedModel,CodeGenTokenizer:()=>d.CodeGenTokenizer,CodeLlamaTokenizer:()=>d.CodeLlamaTokenizer,CohereForCausalLM:()=>l.CohereForCausalLM,CohereModel:()=>l.CohereModel,CoherePreTrainedModel:()=>l.CoherePreTrainedModel,CohereTokenizer:()=>d.CohereTokenizer,ConvBertForMaskedLM:()=>l.ConvBertForMaskedLM,ConvBertForQuestionAnswering:()=>l.ConvBertForQuestionAnswering,ConvBertForSequenceClassification:()=>l.ConvBertForSequenceClassification,ConvBertForTokenClassification:()=>l.ConvBertForTokenClassification,ConvBertModel:()=>l.ConvBertModel,ConvBertPreTrainedModel:()=>l.ConvBertPreTrainedModel,ConvBertTokenizer:()=>d.ConvBertTokenizer,ConvNextFeatureExtractor:()=>y.ConvNextFeatureExtractor,ConvNextForImageClassification:()=>l.ConvNextForImageClassification,ConvNextImageProcessor:()=>y.ConvNextImageProcessor,ConvNextModel:()=>l.ConvNextModel,ConvNextPreTrainedModel:()=>l.ConvNextPreTrainedModel,ConvNextV2ForImageClassification:()=>l.ConvNextV2ForImageClassification,ConvNextV2Model:()=>l.ConvNextV2Model,ConvNextV2PreTrainedModel:()=>l.ConvNextV2PreTrainedModel,DPTFeatureExtractor:()=>y.DPTFeatureExtractor,DPTForDepthEstimation:()=>l.DPTForDepthEstimation,DPTImageProcessor:()=>y.DPTImageProcessor,DPTModel:()=>l.DPTModel,DPTPreTrainedModel:()=>l.DPTPreTrainedModel,DebertaForMaskedLM:()=>l.DebertaForMaskedLM,DebertaForQuestionAnswering:()=>l.DebertaForQuestionAnswering,DebertaForSequenceClassification:()=>l.DebertaForSequenceClassification,DebertaForTokenClassification:()=>l.DebertaForTokenClassification,DebertaModel:()=>l.DebertaModel,DebertaPreTrainedModel:()=>l.DebertaPreTrainedModel,DebertaTokenizer:()=>d.DebertaTokenizer,DebertaV2ForMaskedLM:()=>l.DebertaV2ForMaskedLM,DebertaV2ForQuestionAnswering:()=>l.DebertaV2ForQuestionAnswering,DebertaV2ForSequenceClassification:()=>l.DebertaV2ForSequenceClassification,DebertaV2ForTokenClassification:()=>l.DebertaV2ForTokenClassification,DebertaV2Model:()=>l.DebertaV2Model,DebertaV2PreTrainedModel:()=>l.DebertaV2PreTrainedModel,DebertaV2Tokenizer:()=>d.DebertaV2Tokenizer,DecisionTransformerModel:()=>l.DecisionTransformerModel,DecisionTransformerPreTrainedModel:()=>l.DecisionTransformerPreTrainedModel,DeiTFeatureExtractor:()=>y.DeiTFeatureExtractor,DeiTForImageClassification:()=>l.DeiTForImageClassification,DeiTImageProcessor:()=>y.DeiTImageProcessor,DeiTModel:()=>l.DeiTModel,DeiTPreTrainedModel:()=>l.DeiTPreTrainedModel,DepthAnythingForDepthEstimation:()=>l.DepthAnythingForDepthEstimation,DepthAnythingPreTrainedModel:()=>l.DepthAnythingPreTrainedModel,DepthEstimationPipeline:()=>a.DepthEstimationPipeline,DepthProForDepthEstimation:()=>l.DepthProForDepthEstimation,DepthProPreTrainedModel:()=>l.DepthProPreTrainedModel,DetrFeatureExtractor:()=>y.DetrFeatureExtractor,DetrForObjectDetection:()=>l.DetrForObjectDetection,DetrForSegmentation:()=>l.DetrForSegmentation,DetrImageProcessor:()=>y.DetrImageProcessor,DetrModel:()=>l.DetrModel,DetrObjectDetectionOutput:()=>l.DetrObjectDetectionOutput,DetrPreTrainedModel:()=>l.DetrPreTrainedModel,DetrSegmentationOutput:()=>l.DetrSegmentationOutput,Dinov2ForImageClassification:()=>l.Dinov2ForImageClassification,Dinov2Model:()=>l.Dinov2Model,Dinov2PreTrainedModel:()=>l.Dinov2PreTrainedModel,DistilBertForMaskedLM:()=>l.DistilBertForMaskedLM,DistilBertForQuestionAnswering:()=>l.DistilBertForQuestionAnswering,DistilBertForSequenceClassification:()=>l.DistilBertForSequenceClassification,DistilBertForTokenClassification:()=>l.DistilBertForTokenClassification,DistilBertModel:()=>l.DistilBertModel,DistilBertPreTrainedModel:()=>l.DistilBertPreTrainedModel,DistilBertTokenizer:()=>d.DistilBertTokenizer,DocumentQuestionAnsweringPipeline:()=>a.DocumentQuestionAnsweringPipeline,DonutFeatureExtractor:()=>y.DonutFeatureExtractor,DonutImageProcessor:()=>y.DonutImageProcessor,DonutSwinModel:()=>l.DonutSwinModel,DonutSwinPreTrainedModel:()=>l.DonutSwinPreTrainedModel,EfficientNetForImageClassification:()=>l.EfficientNetForImageClassification,EfficientNetImageProcessor:()=>y.EfficientNetImageProcessor,EfficientNetModel:()=>l.EfficientNetModel,EfficientNetPreTrainedModel:()=>l.EfficientNetPreTrainedModel,ElectraForMaskedLM:()=>l.ElectraForMaskedLM,ElectraForQuestionAnswering:()=>l.ElectraForQuestionAnswering,ElectraForSequenceClassification:()=>l.ElectraForSequenceClassification,ElectraForTokenClassification:()=>l.ElectraForTokenClassification,ElectraModel:()=>l.ElectraModel,ElectraPreTrainedModel:()=>l.ElectraPreTrainedModel,ElectraTokenizer:()=>d.ElectraTokenizer,EosTokenCriteria:()=>k.EosTokenCriteria,EsmForMaskedLM:()=>l.EsmForMaskedLM,EsmForSequenceClassification:()=>l.EsmForSequenceClassification,EsmForTokenClassification:()=>l.EsmForTokenClassification,EsmModel:()=>l.EsmModel,EsmPreTrainedModel:()=>l.EsmPreTrainedModel,EsmTokenizer:()=>d.EsmTokenizer,FFT:()=>m.FFT,FalconForCausalLM:()=>l.FalconForCausalLM,FalconModel:()=>l.FalconModel,FalconPreTrainedModel:()=>l.FalconPreTrainedModel,FalconTokenizer:()=>d.FalconTokenizer,FastViTForImageClassification:()=>l.FastViTForImageClassification,FastViTModel:()=>l.FastViTModel,FastViTPreTrainedModel:()=>l.FastViTPreTrainedModel,FeatureExtractionPipeline:()=>a.FeatureExtractionPipeline,FeatureExtractor:()=>f.FeatureExtractor,FillMaskPipeline:()=>a.FillMaskPipeline,Florence2ForConditionalGeneration:()=>l.Florence2ForConditionalGeneration,Florence2PreTrainedModel:()=>l.Florence2PreTrainedModel,Florence2Processor:()=>x.Florence2Processor,ForcedBOSTokenLogitsProcessor:()=>$.ForcedBOSTokenLogitsProcessor,ForcedEOSTokenLogitsProcessor:()=>$.ForcedEOSTokenLogitsProcessor,GLPNFeatureExtractor:()=>y.GLPNFeatureExtractor,GLPNForDepthEstimation:()=>l.GLPNForDepthEstimation,GLPNModel:()=>l.GLPNModel,GLPNPreTrainedModel:()=>l.GLPNPreTrainedModel,GPT2LMHeadModel:()=>l.GPT2LMHeadModel,GPT2Model:()=>l.GPT2Model,GPT2PreTrainedModel:()=>l.GPT2PreTrainedModel,GPT2Tokenizer:()=>d.GPT2Tokenizer,GPTBigCodeForCausalLM:()=>l.GPTBigCodeForCausalLM,GPTBigCodeModel:()=>l.GPTBigCodeModel,GPTBigCodePreTrainedModel:()=>l.GPTBigCodePreTrainedModel,GPTJForCausalLM:()=>l.GPTJForCausalLM,GPTJModel:()=>l.GPTJModel,GPTJPreTrainedModel:()=>l.GPTJPreTrainedModel,GPTNeoForCausalLM:()=>l.GPTNeoForCausalLM,GPTNeoModel:()=>l.GPTNeoModel,GPTNeoPreTrainedModel:()=>l.GPTNeoPreTrainedModel,GPTNeoXForCausalLM:()=>l.GPTNeoXForCausalLM,GPTNeoXModel:()=>l.GPTNeoXModel,GPTNeoXPreTrainedModel:()=>l.GPTNeoXPreTrainedModel,GPTNeoXTokenizer:()=>d.GPTNeoXTokenizer,Gemma2ForCausalLM:()=>l.Gemma2ForCausalLM,Gemma2Model:()=>l.Gemma2Model,Gemma2PreTrainedModel:()=>l.Gemma2PreTrainedModel,GemmaForCausalLM:()=>l.GemmaForCausalLM,GemmaModel:()=>l.GemmaModel,GemmaPreTrainedModel:()=>l.GemmaPreTrainedModel,GemmaTokenizer:()=>d.GemmaTokenizer,GraniteForCausalLM:()=>l.GraniteForCausalLM,GraniteModel:()=>l.GraniteModel,GranitePreTrainedModel:()=>l.GranitePreTrainedModel,Grok1Tokenizer:()=>d.Grok1Tokenizer,GroupViTModel:()=>l.GroupViTModel,GroupViTPreTrainedModel:()=>l.GroupViTPreTrainedModel,HerbertTokenizer:()=>d.HerbertTokenizer,HieraForImageClassification:()=>l.HieraForImageClassification,HieraModel:()=>l.HieraModel,HieraPreTrainedModel:()=>l.HieraPreTrainedModel,HubertForCTC:()=>l.HubertForCTC,HubertForSequenceClassification:()=>l.HubertForSequenceClassification,HubertModel:()=>l.HubertModel,HubertPreTrainedModel:()=>l.HubertPreTrainedModel,Idefics3ForConditionalGeneration:()=>l.Idefics3ForConditionalGeneration,Idefics3ImageProcessor:()=>y.Idefics3ImageProcessor,Idefics3PreTrainedModel:()=>l.Idefics3PreTrainedModel,Idefics3Processor:()=>x.Idefics3Processor,ImageClassificationPipeline:()=>a.ImageClassificationPipeline,ImageFeatureExtractionPipeline:()=>a.ImageFeatureExtractionPipeline,ImageFeatureExtractor:()=>g.ImageFeatureExtractor,ImageMattingOutput:()=>l.ImageMattingOutput,ImageProcessor:()=>w.ImageProcessor,ImageSegmentationPipeline:()=>a.ImageSegmentationPipeline,ImageToImagePipeline:()=>a.ImageToImagePipeline,ImageToTextPipeline:()=>a.ImageToTextPipeline,InterruptableStoppingCriteria:()=>k.InterruptableStoppingCriteria,JAISLMHeadModel:()=>l.JAISLMHeadModel,JAISModel:()=>l.JAISModel,JAISPreTrainedModel:()=>l.JAISPreTrainedModel,JinaCLIPImageProcessor:()=>y.JinaCLIPImageProcessor,JinaCLIPModel:()=>l.JinaCLIPModel,JinaCLIPPreTrainedModel:()=>l.JinaCLIPPreTrainedModel,JinaCLIPProcessor:()=>x.JinaCLIPProcessor,JinaCLIPTextModel:()=>l.JinaCLIPTextModel,JinaCLIPVisionModel:()=>l.JinaCLIPVisionModel,LlamaForCausalLM:()=>l.LlamaForCausalLM,LlamaModel:()=>l.LlamaModel,LlamaPreTrainedModel:()=>l.LlamaPreTrainedModel,LlamaTokenizer:()=>d.LlamaTokenizer,LlavaForConditionalGeneration:()=>l.LlavaForConditionalGeneration,LlavaOnevisionForConditionalGeneration:()=>l.LlavaOnevisionForConditionalGeneration,LlavaOnevisionImageProcessor:()=>y.LlavaOnevisionImageProcessor,LlavaPreTrainedModel:()=>l.LlavaPreTrainedModel,LogitsProcessor:()=>$.LogitsProcessor,LogitsProcessorList:()=>$.LogitsProcessorList,LogitsWarper:()=>$.LogitsWarper,LongT5ForConditionalGeneration:()=>l.LongT5ForConditionalGeneration,LongT5Model:()=>l.LongT5Model,LongT5PreTrainedModel:()=>l.LongT5PreTrainedModel,M2M100ForConditionalGeneration:()=>l.M2M100ForConditionalGeneration,M2M100Model:()=>l.M2M100Model,M2M100PreTrainedModel:()=>l.M2M100PreTrainedModel,M2M100Tokenizer:()=>d.M2M100Tokenizer,MBart50Tokenizer:()=>d.MBart50Tokenizer,MBartForCausalLM:()=>l.MBartForCausalLM,MBartForConditionalGeneration:()=>l.MBartForConditionalGeneration,MBartForSequenceClassification:()=>l.MBartForSequenceClassification,MBartModel:()=>l.MBartModel,MBartPreTrainedModel:()=>l.MBartPreTrainedModel,MBartTokenizer:()=>d.MBartTokenizer,MPNetForMaskedLM:()=>l.MPNetForMaskedLM,MPNetForQuestionAnswering:()=>l.MPNetForQuestionAnswering,MPNetForSequenceClassification:()=>l.MPNetForSequenceClassification,MPNetForTokenClassification:()=>l.MPNetForTokenClassification,MPNetModel:()=>l.MPNetModel,MPNetPreTrainedModel:()=>l.MPNetPreTrainedModel,MPNetTokenizer:()=>d.MPNetTokenizer,MT5ForConditionalGeneration:()=>l.MT5ForConditionalGeneration,MT5Model:()=>l.MT5Model,MT5PreTrainedModel:()=>l.MT5PreTrainedModel,MarianMTModel:()=>l.MarianMTModel,MarianModel:()=>l.MarianModel,MarianPreTrainedModel:()=>l.MarianPreTrainedModel,MarianTokenizer:()=>d.MarianTokenizer,Mask2FormerImageProcessor:()=>y.Mask2FormerImageProcessor,MaskFormerFeatureExtractor:()=>y.MaskFormerFeatureExtractor,MaskFormerForInstanceSegmentation:()=>l.MaskFormerForInstanceSegmentation,MaskFormerImageProcessor:()=>y.MaskFormerImageProcessor,MaskFormerModel:()=>l.MaskFormerModel,MaskFormerPreTrainedModel:()=>l.MaskFormerPreTrainedModel,MaskedLMOutput:()=>l.MaskedLMOutput,MaxLengthCriteria:()=>k.MaxLengthCriteria,MgpstrForSceneTextRecognition:()=>l.MgpstrForSceneTextRecognition,MgpstrModelOutput:()=>l.MgpstrModelOutput,MgpstrPreTrainedModel:()=>l.MgpstrPreTrainedModel,MgpstrProcessor:()=>x.MgpstrProcessor,MgpstrTokenizer:()=>d.MgpstrTokenizer,MinLengthLogitsProcessor:()=>$.MinLengthLogitsProcessor,MinNewTokensLengthLogitsProcessor:()=>$.MinNewTokensLengthLogitsProcessor,MistralForCausalLM:()=>l.MistralForCausalLM,MistralModel:()=>l.MistralModel,MistralPreTrainedModel:()=>l.MistralPreTrainedModel,MobileBertForMaskedLM:()=>l.MobileBertForMaskedLM,MobileBertForQuestionAnswering:()=>l.MobileBertForQuestionAnswering,MobileBertForSequenceClassification:()=>l.MobileBertForSequenceClassification,MobileBertModel:()=>l.MobileBertModel,MobileBertPreTrainedModel:()=>l.MobileBertPreTrainedModel,MobileBertTokenizer:()=>d.MobileBertTokenizer,MobileLLMForCausalLM:()=>l.MobileLLMForCausalLM,MobileLLMModel:()=>l.MobileLLMModel,MobileLLMPreTrainedModel:()=>l.MobileLLMPreTrainedModel,MobileNetV1FeatureExtractor:()=>y.MobileNetV1FeatureExtractor,MobileNetV1ForImageClassification:()=>l.MobileNetV1ForImageClassification,MobileNetV1ImageProcessor:()=>y.MobileNetV1ImageProcessor,MobileNetV1Model:()=>l.MobileNetV1Model,MobileNetV1PreTrainedModel:()=>l.MobileNetV1PreTrainedModel,MobileNetV2FeatureExtractor:()=>y.MobileNetV2FeatureExtractor,MobileNetV2ForImageClassification:()=>l.MobileNetV2ForImageClassification,MobileNetV2ImageProcessor:()=>y.MobileNetV2ImageProcessor,MobileNetV2Model:()=>l.MobileNetV2Model,MobileNetV2PreTrainedModel:()=>l.MobileNetV2PreTrainedModel,MobileNetV3FeatureExtractor:()=>y.MobileNetV3FeatureExtractor,MobileNetV3ForImageClassification:()=>l.MobileNetV3ForImageClassification,MobileNetV3ImageProcessor:()=>y.MobileNetV3ImageProcessor,MobileNetV3Model:()=>l.MobileNetV3Model,MobileNetV3PreTrainedModel:()=>l.MobileNetV3PreTrainedModel,MobileNetV4FeatureExtractor:()=>y.MobileNetV4FeatureExtractor,MobileNetV4ForImageClassification:()=>l.MobileNetV4ForImageClassification,MobileNetV4ImageProcessor:()=>y.MobileNetV4ImageProcessor,MobileNetV4Model:()=>l.MobileNetV4Model,MobileNetV4PreTrainedModel:()=>l.MobileNetV4PreTrainedModel,MobileViTFeatureExtractor:()=>y.MobileViTFeatureExtractor,MobileViTForImageClassification:()=>l.MobileViTForImageClassification,MobileViTImageProcessor:()=>y.MobileViTImageProcessor,MobileViTModel:()=>l.MobileViTModel,MobileViTPreTrainedModel:()=>l.MobileViTPreTrainedModel,MobileViTV2ForImageClassification:()=>l.MobileViTV2ForImageClassification,MobileViTV2Model:()=>l.MobileViTV2Model,MobileViTV2PreTrainedModel:()=>l.MobileViTV2PreTrainedModel,ModelOutput:()=>l.ModelOutput,Moondream1ForConditionalGeneration:()=>l.Moondream1ForConditionalGeneration,MptForCausalLM:()=>l.MptForCausalLM,MptModel:()=>l.MptModel,MptPreTrainedModel:()=>l.MptPreTrainedModel,MultiModalityCausalLM:()=>l.MultiModalityCausalLM,MultiModalityPreTrainedModel:()=>l.MultiModalityPreTrainedModel,MusicgenForCausalLM:()=>l.MusicgenForCausalLM,MusicgenForConditionalGeneration:()=>l.MusicgenForConditionalGeneration,MusicgenModel:()=>l.MusicgenModel,MusicgenPreTrainedModel:()=>l.MusicgenPreTrainedModel,NllbTokenizer:()=>d.NllbTokenizer,NoBadWordsLogitsProcessor:()=>$.NoBadWordsLogitsProcessor,NoRepeatNGramLogitsProcessor:()=>$.NoRepeatNGramLogitsProcessor,NomicBertModel:()=>l.NomicBertModel,NomicBertPreTrainedModel:()=>l.NomicBertPreTrainedModel,NougatImageProcessor:()=>y.NougatImageProcessor,NougatTokenizer:()=>d.NougatTokenizer,OPTForCausalLM:()=>l.OPTForCausalLM,OPTModel:()=>l.OPTModel,OPTPreTrainedModel:()=>l.OPTPreTrainedModel,ObjectDetectionPipeline:()=>a.ObjectDetectionPipeline,OlmoForCausalLM:()=>l.OlmoForCausalLM,OlmoModel:()=>l.OlmoModel,OlmoPreTrainedModel:()=>l.OlmoPreTrainedModel,OpenELMForCausalLM:()=>l.OpenELMForCausalLM,OpenELMModel:()=>l.OpenELMModel,OpenELMPreTrainedModel:()=>l.OpenELMPreTrainedModel,OwlViTFeatureExtractor:()=>y.OwlViTFeatureExtractor,OwlViTForObjectDetection:()=>l.OwlViTForObjectDetection,OwlViTImageProcessor:()=>y.OwlViTImageProcessor,OwlViTModel:()=>l.OwlViTModel,OwlViTPreTrainedModel:()=>l.OwlViTPreTrainedModel,OwlViTProcessor:()=>x.OwlViTProcessor,Owlv2ForObjectDetection:()=>l.Owlv2ForObjectDetection,Owlv2ImageProcessor:()=>y.Owlv2ImageProcessor,Owlv2Model:()=>l.Owlv2Model,Owlv2PreTrainedModel:()=>l.Owlv2PreTrainedModel,PatchTSMixerForPrediction:()=>l.PatchTSMixerForPrediction,PatchTSMixerModel:()=>l.PatchTSMixerModel,PatchTSMixerPreTrainedModel:()=>l.PatchTSMixerPreTrainedModel,PatchTSTForPrediction:()=>l.PatchTSTForPrediction,PatchTSTModel:()=>l.PatchTSTModel,PatchTSTPreTrainedModel:()=>l.PatchTSTPreTrainedModel,Phi3ForCausalLM:()=>l.Phi3ForCausalLM,Phi3Model:()=>l.Phi3Model,Phi3PreTrainedModel:()=>l.Phi3PreTrainedModel,PhiForCausalLM:()=>l.PhiForCausalLM,PhiModel:()=>l.PhiModel,PhiPreTrainedModel:()=>l.PhiPreTrainedModel,Pipeline:()=>a.Pipeline,PreTrainedModel:()=>l.PreTrainedModel,PreTrainedTokenizer:()=>d.PreTrainedTokenizer,PretrainedConfig:()=>u.PretrainedConfig,PretrainedMixin:()=>l.PretrainedMixin,Processor:()=>v.Processor,PvtForImageClassification:()=>l.PvtForImageClassification,PvtImageProcessor:()=>y.PvtImageProcessor,PvtModel:()=>l.PvtModel,PvtPreTrainedModel:()=>l.PvtPreTrainedModel,PyAnnoteFeatureExtractor:()=>g.PyAnnoteFeatureExtractor,PyAnnoteForAudioFrameClassification:()=>l.PyAnnoteForAudioFrameClassification,PyAnnoteModel:()=>l.PyAnnoteModel,PyAnnotePreTrainedModel:()=>l.PyAnnotePreTrainedModel,PyAnnoteProcessor:()=>x.PyAnnoteProcessor,QuestionAnsweringModelOutput:()=>l.QuestionAnsweringModelOutput,QuestionAnsweringPipeline:()=>a.QuestionAnsweringPipeline,Qwen2ForCausalLM:()=>l.Qwen2ForCausalLM,Qwen2Model:()=>l.Qwen2Model,Qwen2PreTrainedModel:()=>l.Qwen2PreTrainedModel,Qwen2Tokenizer:()=>d.Qwen2Tokenizer,Qwen2VLForConditionalGeneration:()=>l.Qwen2VLForConditionalGeneration,Qwen2VLImageProcessor:()=>y.Qwen2VLImageProcessor,Qwen2VLPreTrainedModel:()=>l.Qwen2VLPreTrainedModel,Qwen2VLProcessor:()=>x.Qwen2VLProcessor,RTDetrForObjectDetection:()=>l.RTDetrForObjectDetection,RTDetrImageProcessor:()=>y.RTDetrImageProcessor,RTDetrModel:()=>l.RTDetrModel,RTDetrObjectDetectionOutput:()=>l.RTDetrObjectDetectionOutput,RTDetrPreTrainedModel:()=>l.RTDetrPreTrainedModel,RawImage:()=>p.RawImage,RepetitionPenaltyLogitsProcessor:()=>$.RepetitionPenaltyLogitsProcessor,ResNetForImageClassification:()=>l.ResNetForImageClassification,ResNetModel:()=>l.ResNetModel,ResNetPreTrainedModel:()=>l.ResNetPreTrainedModel,RoFormerForMaskedLM:()=>l.RoFormerForMaskedLM,RoFormerForQuestionAnswering:()=>l.RoFormerForQuestionAnswering,RoFormerForSequenceClassification:()=>l.RoFormerForSequenceClassification,RoFormerForTokenClassification:()=>l.RoFormerForTokenClassification,RoFormerModel:()=>l.RoFormerModel,RoFormerPreTrainedModel:()=>l.RoFormerPreTrainedModel,RoFormerTokenizer:()=>d.RoFormerTokenizer,RobertaForMaskedLM:()=>l.RobertaForMaskedLM,RobertaForQuestionAnswering:()=>l.RobertaForQuestionAnswering,RobertaForSequenceClassification:()=>l.RobertaForSequenceClassification,RobertaForTokenClassification:()=>l.RobertaForTokenClassification,RobertaModel:()=>l.RobertaModel,RobertaPreTrainedModel:()=>l.RobertaPreTrainedModel,RobertaTokenizer:()=>d.RobertaTokenizer,SamImageProcessor:()=>y.SamImageProcessor,SamImageSegmentationOutput:()=>l.SamImageSegmentationOutput,SamModel:()=>l.SamModel,SamPreTrainedModel:()=>l.SamPreTrainedModel,SamProcessor:()=>x.SamProcessor,SapiensForDepthEstimation:()=>l.SapiensForDepthEstimation,SapiensForNormalEstimation:()=>l.SapiensForNormalEstimation,SapiensForSemanticSegmentation:()=>l.SapiensForSemanticSegmentation,SapiensPreTrainedModel:()=>l.SapiensPreTrainedModel,SeamlessM4TFeatureExtractor:()=>g.SeamlessM4TFeatureExtractor,SegformerFeatureExtractor:()=>y.SegformerFeatureExtractor,SegformerForImageClassification:()=>l.SegformerForImageClassification,SegformerForSemanticSegmentation:()=>l.SegformerForSemanticSegmentation,SegformerImageProcessor:()=>y.SegformerImageProcessor,SegformerModel:()=>l.SegformerModel,SegformerPreTrainedModel:()=>l.SegformerPreTrainedModel,Seq2SeqLMOutput:()=>l.Seq2SeqLMOutput,SequenceClassifierOutput:()=>l.SequenceClassifierOutput,SiglipImageProcessor:()=>y.SiglipImageProcessor,SiglipModel:()=>l.SiglipModel,SiglipPreTrainedModel:()=>l.SiglipPreTrainedModel,SiglipTextModel:()=>l.SiglipTextModel,SiglipTokenizer:()=>d.SiglipTokenizer,SiglipVisionModel:()=>l.SiglipVisionModel,SpeechT5FeatureExtractor:()=>g.SpeechT5FeatureExtractor,SpeechT5ForSpeechToText:()=>l.SpeechT5ForSpeechToText,SpeechT5ForTextToSpeech:()=>l.SpeechT5ForTextToSpeech,SpeechT5HifiGan:()=>l.SpeechT5HifiGan,SpeechT5Model:()=>l.SpeechT5Model,SpeechT5PreTrainedModel:()=>l.SpeechT5PreTrainedModel,SpeechT5Processor:()=>x.SpeechT5Processor,SpeechT5Tokenizer:()=>d.SpeechT5Tokenizer,SqueezeBertForMaskedLM:()=>l.SqueezeBertForMaskedLM,SqueezeBertForQuestionAnswering:()=>l.SqueezeBertForQuestionAnswering,SqueezeBertForSequenceClassification:()=>l.SqueezeBertForSequenceClassification,SqueezeBertModel:()=>l.SqueezeBertModel,SqueezeBertPreTrainedModel:()=>l.SqueezeBertPreTrainedModel,SqueezeBertTokenizer:()=>d.SqueezeBertTokenizer,StableLmForCausalLM:()=>l.StableLmForCausalLM,StableLmModel:()=>l.StableLmModel,StableLmPreTrainedModel:()=>l.StableLmPreTrainedModel,Starcoder2ForCausalLM:()=>l.Starcoder2ForCausalLM,Starcoder2Model:()=>l.Starcoder2Model,Starcoder2PreTrainedModel:()=>l.Starcoder2PreTrainedModel,StoppingCriteria:()=>k.StoppingCriteria,StoppingCriteriaList:()=>k.StoppingCriteriaList,SummarizationPipeline:()=>a.SummarizationPipeline,SuppressTokensAtBeginLogitsProcessor:()=>$.SuppressTokensAtBeginLogitsProcessor,Swin2SRForImageSuperResolution:()=>l.Swin2SRForImageSuperResolution,Swin2SRImageProcessor:()=>y.Swin2SRImageProcessor,Swin2SRModel:()=>l.Swin2SRModel,Swin2SRPreTrainedModel:()=>l.Swin2SRPreTrainedModel,SwinForImageClassification:()=>l.SwinForImageClassification,SwinModel:()=>l.SwinModel,SwinPreTrainedModel:()=>l.SwinPreTrainedModel,T5ForConditionalGeneration:()=>l.T5ForConditionalGeneration,T5Model:()=>l.T5Model,T5PreTrainedModel:()=>l.T5PreTrainedModel,T5Tokenizer:()=>d.T5Tokenizer,TableTransformerForObjectDetection:()=>l.TableTransformerForObjectDetection,TableTransformerModel:()=>l.TableTransformerModel,TableTransformerObjectDetectionOutput:()=>l.TableTransformerObjectDetectionOutput,TableTransformerPreTrainedModel:()=>l.TableTransformerPreTrainedModel,TemperatureLogitsWarper:()=>$.TemperatureLogitsWarper,Tensor:()=>h.Tensor,Text2TextGenerationPipeline:()=>a.Text2TextGenerationPipeline,TextClassificationPipeline:()=>a.TextClassificationPipeline,TextGenerationPipeline:()=>a.TextGenerationPipeline,TextStreamer:()=>T.TextStreamer,TextToAudioPipeline:()=>a.TextToAudioPipeline,TokenClassificationPipeline:()=>a.TokenClassificationPipeline,TokenClassifierOutput:()=>l.TokenClassifierOutput,TokenizerModel:()=>d.TokenizerModel,TopKLogitsWarper:()=>$.TopKLogitsWarper,TopPLogitsWarper:()=>$.TopPLogitsWarper,TrOCRForCausalLM:()=>l.TrOCRForCausalLM,TrOCRPreTrainedModel:()=>l.TrOCRPreTrainedModel,TranslationPipeline:()=>a.TranslationPipeline,UniSpeechForCTC:()=>l.UniSpeechForCTC,UniSpeechForSequenceClassification:()=>l.UniSpeechForSequenceClassification,UniSpeechModel:()=>l.UniSpeechModel,UniSpeechPreTrainedModel:()=>l.UniSpeechPreTrainedModel,UniSpeechSatForAudioFrameClassification:()=>l.UniSpeechSatForAudioFrameClassification,UniSpeechSatForCTC:()=>l.UniSpeechSatForCTC,UniSpeechSatForSequenceClassification:()=>l.UniSpeechSatForSequenceClassification,UniSpeechSatModel:()=>l.UniSpeechSatModel,UniSpeechSatPreTrainedModel:()=>l.UniSpeechSatPreTrainedModel,VLChatProcessor:()=>x.VLChatProcessor,VLMImageProcessor:()=>y.VLMImageProcessor,ViTFeatureExtractor:()=>y.ViTFeatureExtractor,ViTForImageClassification:()=>l.ViTForImageClassification,ViTImageProcessor:()=>y.ViTImageProcessor,ViTMAEModel:()=>l.ViTMAEModel,ViTMAEPreTrainedModel:()=>l.ViTMAEPreTrainedModel,ViTMSNForImageClassification:()=>l.ViTMSNForImageClassification,ViTMSNModel:()=>l.ViTMSNModel,ViTMSNPreTrainedModel:()=>l.ViTMSNPreTrainedModel,ViTModel:()=>l.ViTModel,ViTPreTrainedModel:()=>l.ViTPreTrainedModel,VisionEncoderDecoderModel:()=>l.VisionEncoderDecoderModel,VitMatteForImageMatting:()=>l.VitMatteForImageMatting,VitMatteImageProcessor:()=>y.VitMatteImageProcessor,VitMattePreTrainedModel:()=>l.VitMattePreTrainedModel,VitPoseForPoseEstimation:()=>l.VitPoseForPoseEstimation,VitPoseImageProcessor:()=>y.VitPoseImageProcessor,VitPosePreTrainedModel:()=>l.VitPosePreTrainedModel,VitsModel:()=>l.VitsModel,VitsModelOutput:()=>l.VitsModelOutput,VitsPreTrainedModel:()=>l.VitsPreTrainedModel,VitsTokenizer:()=>d.VitsTokenizer,Wav2Vec2BertForCTC:()=>l.Wav2Vec2BertForCTC,Wav2Vec2BertForSequenceClassification:()=>l.Wav2Vec2BertForSequenceClassification,Wav2Vec2BertModel:()=>l.Wav2Vec2BertModel,Wav2Vec2BertPreTrainedModel:()=>l.Wav2Vec2BertPreTrainedModel,Wav2Vec2CTCTokenizer:()=>d.Wav2Vec2CTCTokenizer,Wav2Vec2FeatureExtractor:()=>g.Wav2Vec2FeatureExtractor,Wav2Vec2ForAudioFrameClassification:()=>l.Wav2Vec2ForAudioFrameClassification,Wav2Vec2ForCTC:()=>l.Wav2Vec2ForCTC,Wav2Vec2ForSequenceClassification:()=>l.Wav2Vec2ForSequenceClassification,Wav2Vec2Model:()=>l.Wav2Vec2Model,Wav2Vec2PreTrainedModel:()=>l.Wav2Vec2PreTrainedModel,Wav2Vec2ProcessorWithLM:()=>x.Wav2Vec2ProcessorWithLM,WavLMForAudioFrameClassification:()=>l.WavLMForAudioFrameClassification,WavLMForCTC:()=>l.WavLMForCTC,WavLMForSequenceClassification:()=>l.WavLMForSequenceClassification,WavLMForXVector:()=>l.WavLMForXVector,WavLMModel:()=>l.WavLMModel,WavLMPreTrainedModel:()=>l.WavLMPreTrainedModel,WeSpeakerFeatureExtractor:()=>g.WeSpeakerFeatureExtractor,WeSpeakerResNetModel:()=>l.WeSpeakerResNetModel,WeSpeakerResNetPreTrainedModel:()=>l.WeSpeakerResNetPreTrainedModel,WhisperFeatureExtractor:()=>g.WhisperFeatureExtractor,WhisperForConditionalGeneration:()=>l.WhisperForConditionalGeneration,WhisperModel:()=>l.WhisperModel,WhisperPreTrainedModel:()=>l.WhisperPreTrainedModel,WhisperProcessor:()=>x.WhisperProcessor,WhisperTextStreamer:()=>T.WhisperTextStreamer,WhisperTimeStampLogitsProcessor:()=>$.WhisperTimeStampLogitsProcessor,WhisperTokenizer:()=>d.WhisperTokenizer,XLMForQuestionAnswering:()=>l.XLMForQuestionAnswering,XLMForSequenceClassification:()=>l.XLMForSequenceClassification,XLMForTokenClassification:()=>l.XLMForTokenClassification,XLMModel:()=>l.XLMModel,XLMPreTrainedModel:()=>l.XLMPreTrainedModel,XLMRobertaForMaskedLM:()=>l.XLMRobertaForMaskedLM,XLMRobertaForQuestionAnswering:()=>l.XLMRobertaForQuestionAnswering,XLMRobertaForSequenceClassification:()=>l.XLMRobertaForSequenceClassification,XLMRobertaForTokenClassification:()=>l.XLMRobertaForTokenClassification,XLMRobertaModel:()=>l.XLMRobertaModel,XLMRobertaPreTrainedModel:()=>l.XLMRobertaPreTrainedModel,XLMRobertaTokenizer:()=>d.XLMRobertaTokenizer,XLMTokenizer:()=>d.XLMTokenizer,XLMWithLMHeadModel:()=>l.XLMWithLMHeadModel,XVectorOutput:()=>l.XVectorOutput,YolosFeatureExtractor:()=>y.YolosFeatureExtractor,YolosForObjectDetection:()=>l.YolosForObjectDetection,YolosImageProcessor:()=>y.YolosImageProcessor,YolosModel:()=>l.YolosModel,YolosObjectDetectionOutput:()=>l.YolosObjectDetectionOutput,YolosPreTrainedModel:()=>l.YolosPreTrainedModel,ZeroShotAudioClassificationPipeline:()=>a.ZeroShotAudioClassificationPipeline,ZeroShotClassificationPipeline:()=>a.ZeroShotClassificationPipeline,ZeroShotImageClassificationPipeline:()=>a.ZeroShotImageClassificationPipeline,ZeroShotObjectDetectionPipeline:()=>a.ZeroShotObjectDetectionPipeline,bankers_round:()=>m.bankers_round,cat:()=>h.cat,cos_sim:()=>m.cos_sim,dot:()=>m.dot,dynamic_time_warping:()=>m.dynamic_time_warping,env:()=>i.env,full:()=>h.full,full_like:()=>h.full_like,getKeyValueShapes:()=>u.getKeyValueShapes,hamming:()=>c.hamming,hanning:()=>c.hanning,interpolate:()=>h.interpolate,interpolate_4d:()=>h.interpolate_4d,interpolate_data:()=>m.interpolate_data,is_chinese_char:()=>d.is_chinese_char,layer_norm:()=>h.layer_norm,load_image:()=>p.load_image,log_softmax:()=>m.log_softmax,magnitude:()=>m.magnitude,matmul:()=>h.matmul,max:()=>m.max,mean:()=>h.mean,mean_pooling:()=>h.mean_pooling,medianFilter:()=>m.medianFilter,mel_filter_bank:()=>c.mel_filter_bank,min:()=>m.min,ones:()=>h.ones,ones_like:()=>h.ones_like,permute:()=>h.permute,permute_data:()=>m.permute_data,pipeline:()=>a.pipeline,quantize_embeddings:()=>h.quantize_embeddings,read_audio:()=>c.read_audio,rfft:()=>h.rfft,round:()=>m.round,softmax:()=>m.softmax,spectrogram:()=>c.spectrogram,stack:()=>h.stack,std_mean:()=>h.std_mean,topk:()=>h.topk,window_function:()=>c.window_function,zeros:()=>h.zeros,zeros_like:()=>h.zeros_like});var i=s(/*! ./env.js */"./src/env.js"),a=s(/*! ./pipelines.js */"./src/pipelines.js"),l=s(/*! ./models.js */"./src/models.js"),d=s(/*! ./tokenizers.js */"./src/tokenizers.js"),u=s(/*! ./configs.js */"./src/configs.js"),c=s(/*! ./utils/audio.js */"./src/utils/audio.js"),p=s(/*! ./utils/image.js */"./src/utils/image.js"),h=s(/*! ./utils/tensor.js */"./src/utils/tensor.js"),m=s(/*! ./utils/maths.js */"./src/utils/maths.js"),f=s(/*! ./base/feature_extraction_utils.js */"./src/base/feature_extraction_utils.js"),g=s(/*! ./models/feature_extractors.js */"./src/models/feature_extractors.js"),_=s(/*! ./models/auto/feature_extraction_auto.js */"./src/models/auto/feature_extraction_auto.js"),w=s(/*! ./base/image_processors_utils.js */"./src/base/image_processors_utils.js"),y=s(/*! ./models/image_processors.js */"./src/models/image_processors.js"),b=s(/*! ./models/auto/image_processing_auto.js */"./src/models/auto/image_processing_auto.js"),v=s(/*! ./base/processing_utils.js */"./src/base/processing_utils.js"),x=s(/*! ./models/processors.js */"./src/models/processors.js"),M=s(/*! ./models/auto/processing_auto.js */"./src/models/auto/processing_auto.js"),T=s(/*! ./generation/streamers.js */"./src/generation/streamers.js"),k=s(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),$=s(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),C=o.ASTFeatureExtractor,P=o.ASTForAudioClassification,S=o.ASTModel,E=o.ASTPreTrainedModel,F=o.AlbertForMaskedLM,A=o.AlbertForQuestionAnswering,I=o.AlbertForSequenceClassification,z=o.AlbertModel,L=o.AlbertPreTrainedModel,O=o.AlbertTokenizer,B=o.AudioClassificationPipeline,D=o.AutoConfig,N=o.AutoFeatureExtractor,R=o.AutoImageProcessor,j=o.AutoModel,V=o.AutoModelForAudioClassification,G=o.AutoModelForAudioFrameClassification,U=o.AutoModelForCTC,q=o.AutoModelForCausalLM,W=o.AutoModelForDepthEstimation,H=o.AutoModelForDocumentQuestionAnswering,X=o.AutoModelForImageClassification,Q=o.AutoModelForImageFeatureExtraction,K=o.AutoModelForImageMatting,Y=o.AutoModelForImageSegmentation,J=o.AutoModelForImageToImage,Z=o.AutoModelForMaskGeneration,ee=o.AutoModelForMaskedLM,te=o.AutoModelForNormalEstimation,ne=o.AutoModelForObjectDetection,re=o.AutoModelForPoseEstimation,se=o.AutoModelForQuestionAnswering,oe=o.AutoModelForSemanticSegmentation,ie=o.AutoModelForSeq2SeqLM,ae=o.AutoModelForSequenceClassification,le=o.AutoModelForSpeechSeq2Seq,de=o.AutoModelForTextToSpectrogram,ue=o.AutoModelForTextToWaveform,ce=o.AutoModelForTokenClassification,pe=o.AutoModelForUniversalSegmentation,he=o.AutoModelForVision2Seq,me=o.AutoModelForXVector,fe=o.AutoModelForZeroShotObjectDetection,ge=o.AutoProcessor,_e=o.AutoTokenizer,we=o.AutomaticSpeechRecognitionPipeline,ye=o.BartForConditionalGeneration,be=o.BartForSequenceClassification,ve=o.BartModel,xe=o.BartPretrainedModel,Me=o.BartTokenizer,Te=o.BaseModelOutput,ke=o.BaseStreamer,$e=o.BeitFeatureExtractor,Ce=o.BeitForImageClassification,Pe=o.BeitModel,Se=o.BeitPreTrainedModel,Ee=o.BertForMaskedLM,Fe=o.BertForQuestionAnswering,Ae=o.BertForSequenceClassification,Ie=o.BertForTokenClassification,ze=o.BertModel,Le=o.BertPreTrainedModel,Oe=o.BertTokenizer,Be=o.BitImageProcessor,De=o.BlenderbotForConditionalGeneration,Ne=o.BlenderbotModel,Re=o.BlenderbotPreTrainedModel,je=o.BlenderbotSmallForConditionalGeneration,Ve=o.BlenderbotSmallModel,Ge=o.BlenderbotSmallPreTrainedModel,Ue=o.BlenderbotSmallTokenizer,qe=o.BlenderbotTokenizer,We=o.BloomForCausalLM,He=o.BloomModel,Xe=o.BloomPreTrainedModel,Qe=o.BloomTokenizer,Ke=o.CLIPFeatureExtractor,Ye=o.CLIPImageProcessor,Je=o.CLIPModel,Ze=o.CLIPPreTrainedModel,et=o.CLIPSegForImageSegmentation,tt=o.CLIPSegModel,nt=o.CLIPSegPreTrainedModel,rt=o.CLIPTextModel,st=o.CLIPTextModelWithProjection,ot=o.CLIPTokenizer,it=o.CLIPVisionModel,at=o.CLIPVisionModelWithProjection,lt=o.CamembertForMaskedLM,dt=o.CamembertForQuestionAnswering,ut=o.CamembertForSequenceClassification,ct=o.CamembertForTokenClassification,pt=o.CamembertModel,ht=o.CamembertPreTrainedModel,mt=o.CamembertTokenizer,ft=o.CausalLMOutput,gt=o.CausalLMOutputWithPast,_t=o.ChineseCLIPFeatureExtractor,wt=o.ChineseCLIPModel,yt=o.ChineseCLIPPreTrainedModel,bt=o.ClapAudioModelWithProjection,vt=o.ClapFeatureExtractor,xt=o.ClapModel,Mt=o.ClapPreTrainedModel,Tt=o.ClapTextModelWithProjection,kt=o.ClassifierFreeGuidanceLogitsProcessor,$t=o.CodeGenForCausalLM,Ct=o.CodeGenModel,Pt=o.CodeGenPreTrainedModel,St=o.CodeGenTokenizer,Et=o.CodeLlamaTokenizer,Ft=o.CohereForCausalLM,At=o.CohereModel,It=o.CoherePreTrainedModel,zt=o.CohereTokenizer,Lt=o.ConvBertForMaskedLM,Ot=o.ConvBertForQuestionAnswering,Bt=o.ConvBertForSequenceClassification,Dt=o.ConvBertForTokenClassification,Nt=o.ConvBertModel,Rt=o.ConvBertPreTrainedModel,jt=o.ConvBertTokenizer,Vt=o.ConvNextFeatureExtractor,Gt=o.ConvNextForImageClassification,Ut=o.ConvNextImageProcessor,qt=o.ConvNextModel,Wt=o.ConvNextPreTrainedModel,Ht=o.ConvNextV2ForImageClassification,Xt=o.ConvNextV2Model,Qt=o.ConvNextV2PreTrainedModel,Kt=o.DPTFeatureExtractor,Yt=o.DPTForDepthEstimation,Jt=o.DPTImageProcessor,Zt=o.DPTModel,en=o.DPTPreTrainedModel,tn=o.DebertaForMaskedLM,nn=o.DebertaForQuestionAnswering,rn=o.DebertaForSequenceClassification,sn=o.DebertaForTokenClassification,on=o.DebertaModel,an=o.DebertaPreTrainedModel,ln=o.DebertaTokenizer,dn=o.DebertaV2ForMaskedLM,un=o.DebertaV2ForQuestionAnswering,cn=o.DebertaV2ForSequenceClassification,pn=o.DebertaV2ForTokenClassification,hn=o.DebertaV2Model,mn=o.DebertaV2PreTrainedModel,fn=o.DebertaV2Tokenizer,gn=o.DecisionTransformerModel,_n=o.DecisionTransformerPreTrainedModel,wn=o.DeiTFeatureExtractor,yn=o.DeiTForImageClassification,bn=o.DeiTImageProcessor,vn=o.DeiTModel,xn=o.DeiTPreTrainedModel,Mn=o.DepthAnythingForDepthEstimation,Tn=o.DepthAnythingPreTrainedModel,kn=o.DepthEstimationPipeline,$n=o.DepthProForDepthEstimation,Cn=o.DepthProPreTrainedModel,Pn=o.DetrFeatureExtractor,Sn=o.DetrForObjectDetection,En=o.DetrForSegmentation,Fn=o.DetrImageProcessor,An=o.DetrModel,In=o.DetrObjectDetectionOutput,zn=o.DetrPreTrainedModel,Ln=o.DetrSegmentationOutput,On=o.Dinov2ForImageClassification,Bn=o.Dinov2Model,Dn=o.Dinov2PreTrainedModel,Nn=o.DistilBertForMaskedLM,Rn=o.DistilBertForQuestionAnswering,jn=o.DistilBertForSequenceClassification,Vn=o.DistilBertForTokenClassification,Gn=o.DistilBertModel,Un=o.DistilBertPreTrainedModel,qn=o.DistilBertTokenizer,Wn=o.DocumentQuestionAnsweringPipeline,Hn=o.DonutFeatureExtractor,Xn=o.DonutImageProcessor,Qn=o.DonutSwinModel,Kn=o.DonutSwinPreTrainedModel,Yn=o.EfficientNetForImageClassification,Jn=o.EfficientNetImageProcessor,Zn=o.EfficientNetModel,er=o.EfficientNetPreTrainedModel,tr=o.ElectraForMaskedLM,nr=o.ElectraForQuestionAnswering,rr=o.ElectraForSequenceClassification,sr=o.ElectraForTokenClassification,or=o.ElectraModel,ir=o.ElectraPreTrainedModel,ar=o.ElectraTokenizer,lr=o.EosTokenCriteria,dr=o.EsmForMaskedLM,ur=o.EsmForSequenceClassification,cr=o.EsmForTokenClassification,pr=o.EsmModel,hr=o.EsmPreTrainedModel,mr=o.EsmTokenizer,fr=o.FFT,gr=o.FalconForCausalLM,_r=o.FalconModel,wr=o.FalconPreTrainedModel,yr=o.FalconTokenizer,br=o.FastViTForImageClassification,vr=o.FastViTModel,xr=o.FastViTPreTrainedModel,Mr=o.FeatureExtractionPipeline,Tr=o.FeatureExtractor,kr=o.FillMaskPipeline,$r=o.Florence2ForConditionalGeneration,Cr=o.Florence2PreTrainedModel,Pr=o.Florence2Processor,Sr=o.ForcedBOSTokenLogitsProcessor,Er=o.ForcedEOSTokenLogitsProcessor,Fr=o.GLPNFeatureExtractor,Ar=o.GLPNForDepthEstimation,Ir=o.GLPNModel,zr=o.GLPNPreTrainedModel,Lr=o.GPT2LMHeadModel,Or=o.GPT2Model,Br=o.GPT2PreTrainedModel,Dr=o.GPT2Tokenizer,Nr=o.GPTBigCodeForCausalLM,Rr=o.GPTBigCodeModel,jr=o.GPTBigCodePreTrainedModel,Vr=o.GPTJForCausalLM,Gr=o.GPTJModel,Ur=o.GPTJPreTrainedModel,qr=o.GPTNeoForCausalLM,Wr=o.GPTNeoModel,Hr=o.GPTNeoPreTrainedModel,Xr=o.GPTNeoXForCausalLM,Qr=o.GPTNeoXModel,Kr=o.GPTNeoXPreTrainedModel,Yr=o.GPTNeoXTokenizer,Jr=o.Gemma2ForCausalLM,Zr=o.Gemma2Model,es=o.Gemma2PreTrainedModel,ts=o.GemmaForCausalLM,ns=o.GemmaModel,rs=o.GemmaPreTrainedModel,ss=o.GemmaTokenizer,os=o.GraniteForCausalLM,is=o.GraniteModel,as=o.GranitePreTrainedModel,ls=o.Grok1Tokenizer,ds=o.GroupViTModel,us=o.GroupViTPreTrainedModel,cs=o.HerbertTokenizer,ps=o.HieraForImageClassification,hs=o.HieraModel,ms=o.HieraPreTrainedModel,fs=o.HubertForCTC,gs=o.HubertForSequenceClassification,_s=o.HubertModel,ws=o.HubertPreTrainedModel,ys=o.Idefics3ForConditionalGeneration,bs=o.Idefics3ImageProcessor,vs=o.Idefics3PreTrainedModel,xs=o.Idefics3Processor,Ms=o.ImageClassificationPipeline,Ts=o.ImageFeatureExtractionPipeline,ks=o.ImageFeatureExtractor,$s=o.ImageMattingOutput,Cs=o.ImageProcessor,Ps=o.ImageSegmentationPipeline,Ss=o.ImageToImagePipeline,Es=o.ImageToTextPipeline,Fs=o.InterruptableStoppingCriteria,As=o.JAISLMHeadModel,Is=o.JAISModel,zs=o.JAISPreTrainedModel,Ls=o.JinaCLIPImageProcessor,Os=o.JinaCLIPModel,Bs=o.JinaCLIPPreTrainedModel,Ds=o.JinaCLIPProcessor,Ns=o.JinaCLIPTextModel,Rs=o.JinaCLIPVisionModel,js=o.LlamaForCausalLM,Vs=o.LlamaModel,Gs=o.LlamaPreTrainedModel,Us=o.LlamaTokenizer,qs=o.LlavaForConditionalGeneration,Ws=o.LlavaOnevisionForConditionalGeneration,Hs=o.LlavaOnevisionImageProcessor,Xs=o.LlavaPreTrainedModel,Qs=o.LogitsProcessor,Ks=o.LogitsProcessorList,Ys=o.LogitsWarper,Js=o.LongT5ForConditionalGeneration,Zs=o.LongT5Model,eo=o.LongT5PreTrainedModel,to=o.M2M100ForConditionalGeneration,no=o.M2M100Model,ro=o.M2M100PreTrainedModel,so=o.M2M100Tokenizer,oo=o.MBart50Tokenizer,io=o.MBartForCausalLM,ao=o.MBartForConditionalGeneration,lo=o.MBartForSequenceClassification,uo=o.MBartModel,co=o.MBartPreTrainedModel,po=o.MBartTokenizer,ho=o.MPNetForMaskedLM,mo=o.MPNetForQuestionAnswering,fo=o.MPNetForSequenceClassification,go=o.MPNetForTokenClassification,_o=o.MPNetModel,wo=o.MPNetPreTrainedModel,yo=o.MPNetTokenizer,bo=o.MT5ForConditionalGeneration,vo=o.MT5Model,xo=o.MT5PreTrainedModel,Mo=o.MarianMTModel,To=o.MarianModel,ko=o.MarianPreTrainedModel,$o=o.MarianTokenizer,Co=o.Mask2FormerImageProcessor,Po=o.MaskFormerFeatureExtractor,So=o.MaskFormerForInstanceSegmentation,Eo=o.MaskFormerImageProcessor,Fo=o.MaskFormerModel,Ao=o.MaskFormerPreTrainedModel,Io=o.MaskedLMOutput,zo=o.MaxLengthCriteria,Lo=o.MgpstrForSceneTextRecognition,Oo=o.MgpstrModelOutput,Bo=o.MgpstrPreTrainedModel,Do=o.MgpstrProcessor,No=o.MgpstrTokenizer,Ro=o.MinLengthLogitsProcessor,jo=o.MinNewTokensLengthLogitsProcessor,Vo=o.MistralForCausalLM,Go=o.MistralModel,Uo=o.MistralPreTrainedModel,qo=o.MobileBertForMaskedLM,Wo=o.MobileBertForQuestionAnswering,Ho=o.MobileBertForSequenceClassification,Xo=o.MobileBertModel,Qo=o.MobileBertPreTrainedModel,Ko=o.MobileBertTokenizer,Yo=o.MobileLLMForCausalLM,Jo=o.MobileLLMModel,Zo=o.MobileLLMPreTrainedModel,ei=o.MobileNetV1FeatureExtractor,ti=o.MobileNetV1ForImageClassification,ni=o.MobileNetV1ImageProcessor,ri=o.MobileNetV1Model,si=o.MobileNetV1PreTrainedModel,oi=o.MobileNetV2FeatureExtractor,ii=o.MobileNetV2ForImageClassification,ai=o.MobileNetV2ImageProcessor,li=o.MobileNetV2Model,di=o.MobileNetV2PreTrainedModel,ui=o.MobileNetV3FeatureExtractor,ci=o.MobileNetV3ForImageClassification,pi=o.MobileNetV3ImageProcessor,hi=o.MobileNetV3Model,mi=o.MobileNetV3PreTrainedModel,fi=o.MobileNetV4FeatureExtractor,gi=o.MobileNetV4ForImageClassification,_i=o.MobileNetV4ImageProcessor,wi=o.MobileNetV4Model,yi=o.MobileNetV4PreTrainedModel,bi=o.MobileViTFeatureExtractor,vi=o.MobileViTForImageClassification,xi=o.MobileViTImageProcessor,Mi=o.MobileViTModel,Ti=o.MobileViTPreTrainedModel,ki=o.MobileViTV2ForImageClassification,$i=o.MobileViTV2Model,Ci=o.MobileViTV2PreTrainedModel,Pi=o.ModelOutput,Si=o.Moondream1ForConditionalGeneration,Ei=o.MptForCausalLM,Fi=o.MptModel,Ai=o.MptPreTrainedModel,Ii=o.MultiModalityCausalLM,zi=o.MultiModalityPreTrainedModel,Li=o.MusicgenForCausalLM,Oi=o.MusicgenForConditionalGeneration,Bi=o.MusicgenModel,Di=o.MusicgenPreTrainedModel,Ni=o.NllbTokenizer,Ri=o.NoBadWordsLogitsProcessor,ji=o.NoRepeatNGramLogitsProcessor,Vi=o.NomicBertModel,Gi=o.NomicBertPreTrainedModel,Ui=o.NougatImageProcessor,qi=o.NougatTokenizer,Wi=o.OPTForCausalLM,Hi=o.OPTModel,Xi=o.OPTPreTrainedModel,Qi=o.ObjectDetectionPipeline,Ki=o.OlmoForCausalLM,Yi=o.OlmoModel,Ji=o.OlmoPreTrainedModel,Zi=o.OpenELMForCausalLM,ea=o.OpenELMModel,ta=o.OpenELMPreTrainedModel,na=o.OwlViTFeatureExtractor,ra=o.OwlViTForObjectDetection,sa=o.OwlViTImageProcessor,oa=o.OwlViTModel,ia=o.OwlViTPreTrainedModel,aa=o.OwlViTProcessor,la=o.Owlv2ForObjectDetection,da=o.Owlv2ImageProcessor,ua=o.Owlv2Model,ca=o.Owlv2PreTrainedModel,pa=o.PatchTSMixerForPrediction,ha=o.PatchTSMixerModel,ma=o.PatchTSMixerPreTrainedModel,fa=o.PatchTSTForPrediction,ga=o.PatchTSTModel,_a=o.PatchTSTPreTrainedModel,wa=o.Phi3ForCausalLM,ya=o.Phi3Model,ba=o.Phi3PreTrainedModel,va=o.PhiForCausalLM,xa=o.PhiModel,Ma=o.PhiPreTrainedModel,Ta=o.Pipeline,ka=o.PreTrainedModel,$a=o.PreTrainedTokenizer,Ca=o.PretrainedConfig,Pa=o.PretrainedMixin,Sa=o.Processor,Ea=o.PvtForImageClassification,Fa=o.PvtImageProcessor,Aa=o.PvtModel,Ia=o.PvtPreTrainedModel,za=o.PyAnnoteFeatureExtractor,La=o.PyAnnoteForAudioFrameClassification,Oa=o.PyAnnoteModel,Ba=o.PyAnnotePreTrainedModel,Da=o.PyAnnoteProcessor,Na=o.QuestionAnsweringModelOutput,Ra=o.QuestionAnsweringPipeline,ja=o.Qwen2ForCausalLM,Va=o.Qwen2Model,Ga=o.Qwen2PreTrainedModel,Ua=o.Qwen2Tokenizer,qa=o.Qwen2VLForConditionalGeneration,Wa=o.Qwen2VLImageProcessor,Ha=o.Qwen2VLPreTrainedModel,Xa=o.Qwen2VLProcessor,Qa=o.RTDetrForObjectDetection,Ka=o.RTDetrImageProcessor,Ya=o.RTDetrModel,Ja=o.RTDetrObjectDetectionOutput,Za=o.RTDetrPreTrainedModel,el=o.RawImage,tl=o.RepetitionPenaltyLogitsProcessor,nl=o.ResNetForImageClassification,rl=o.ResNetModel,sl=o.ResNetPreTrainedModel,ol=o.RoFormerForMaskedLM,il=o.RoFormerForQuestionAnswering,al=o.RoFormerForSequenceClassification,ll=o.RoFormerForTokenClassification,dl=o.RoFormerModel,ul=o.RoFormerPreTrainedModel,cl=o.RoFormerTokenizer,pl=o.RobertaForMaskedLM,hl=o.RobertaForQuestionAnswering,ml=o.RobertaForSequenceClassification,fl=o.RobertaForTokenClassification,gl=o.RobertaModel,_l=o.RobertaPreTrainedModel,wl=o.RobertaTokenizer,yl=o.SamImageProcessor,bl=o.SamImageSegmentationOutput,vl=o.SamModel,xl=o.SamPreTrainedModel,Ml=o.SamProcessor,Tl=o.SapiensForDepthEstimation,kl=o.SapiensForNormalEstimation,$l=o.SapiensForSemanticSegmentation,Cl=o.SapiensPreTrainedModel,Pl=o.SeamlessM4TFeatureExtractor,Sl=o.SegformerFeatureExtractor,El=o.SegformerForImageClassification,Fl=o.SegformerForSemanticSegmentation,Al=o.SegformerImageProcessor,Il=o.SegformerModel,zl=o.SegformerPreTrainedModel,Ll=o.Seq2SeqLMOutput,Ol=o.SequenceClassifierOutput,Bl=o.SiglipImageProcessor,Dl=o.SiglipModel,Nl=o.SiglipPreTrainedModel,Rl=o.SiglipTextModel,jl=o.SiglipTokenizer,Vl=o.SiglipVisionModel,Gl=o.SpeechT5FeatureExtractor,Ul=o.SpeechT5ForSpeechToText,ql=o.SpeechT5ForTextToSpeech,Wl=o.SpeechT5HifiGan,Hl=o.SpeechT5Model,Xl=o.SpeechT5PreTrainedModel,Ql=o.SpeechT5Processor,Kl=o.SpeechT5Tokenizer,Yl=o.SqueezeBertForMaskedLM,Jl=o.SqueezeBertForQuestionAnswering,Zl=o.SqueezeBertForSequenceClassification,ed=o.SqueezeBertModel,td=o.SqueezeBertPreTrainedModel,nd=o.SqueezeBertTokenizer,rd=o.StableLmForCausalLM,sd=o.StableLmModel,od=o.StableLmPreTrainedModel,id=o.Starcoder2ForCausalLM,ad=o.Starcoder2Model,ld=o.Starcoder2PreTrainedModel,dd=o.StoppingCriteria,ud=o.StoppingCriteriaList,cd=o.SummarizationPipeline,pd=o.SuppressTokensAtBeginLogitsProcessor,hd=o.Swin2SRForImageSuperResolution,md=o.Swin2SRImageProcessor,fd=o.Swin2SRModel,gd=o.Swin2SRPreTrainedModel,_d=o.SwinForImageClassification,wd=o.SwinModel,yd=o.SwinPreTrainedModel,bd=o.T5ForConditionalGeneration,vd=o.T5Model,xd=o.T5PreTrainedModel,Md=o.T5Tokenizer,Td=o.TableTransformerForObjectDetection,kd=o.TableTransformerModel,$d=o.TableTransformerObjectDetectionOutput,Cd=o.TableTransformerPreTrainedModel,Pd=o.TemperatureLogitsWarper,Sd=o.Tensor,Ed=o.Text2TextGenerationPipeline,Fd=o.TextClassificationPipeline,Ad=o.TextGenerationPipeline,Id=o.TextStreamer,zd=o.TextToAudioPipeline,Ld=o.TokenClassificationPipeline,Od=o.TokenClassifierOutput,Bd=o.TokenizerModel,Dd=o.TopKLogitsWarper,Nd=o.TopPLogitsWarper,Rd=o.TrOCRForCausalLM,jd=o.TrOCRPreTrainedModel,Vd=o.TranslationPipeline,Gd=o.UniSpeechForCTC,Ud=o.UniSpeechForSequenceClassification,qd=o.UniSpeechModel,Wd=o.UniSpeechPreTrainedModel,Hd=o.UniSpeechSatForAudioFrameClassification,Xd=o.UniSpeechSatForCTC,Qd=o.UniSpeechSatForSequenceClassification,Kd=o.UniSpeechSatModel,Yd=o.UniSpeechSatPreTrainedModel,Jd=o.VLChatProcessor,Zd=o.VLMImageProcessor,eu=o.ViTFeatureExtractor,tu=o.ViTForImageClassification,nu=o.ViTImageProcessor,ru=o.ViTMAEModel,su=o.ViTMAEPreTrainedModel,ou=o.ViTMSNForImageClassification,iu=o.ViTMSNModel,au=o.ViTMSNPreTrainedModel,lu=o.ViTModel,du=o.ViTPreTrainedModel,uu=o.VisionEncoderDecoderModel,cu=o.VitMatteForImageMatting,pu=o.VitMatteImageProcessor,hu=o.VitMattePreTrainedModel,mu=o.VitPoseForPoseEstimation,fu=o.VitPoseImageProcessor,gu=o.VitPosePreTrainedModel,_u=o.VitsModel,wu=o.VitsModelOutput,yu=o.VitsPreTrainedModel,bu=o.VitsTokenizer,vu=o.Wav2Vec2BertForCTC,xu=o.Wav2Vec2BertForSequenceClassification,Mu=o.Wav2Vec2BertModel,Tu=o.Wav2Vec2BertPreTrainedModel,ku=o.Wav2Vec2CTCTokenizer,$u=o.Wav2Vec2FeatureExtractor,Cu=o.Wav2Vec2ForAudioFrameClassification,Pu=o.Wav2Vec2ForCTC,Su=o.Wav2Vec2ForSequenceClassification,Eu=o.Wav2Vec2Model,Fu=o.Wav2Vec2PreTrainedModel,Au=o.Wav2Vec2ProcessorWithLM,Iu=o.WavLMForAudioFrameClassification,zu=o.WavLMForCTC,Lu=o.WavLMForSequenceClassification,Ou=o.WavLMForXVector,Bu=o.WavLMModel,Du=o.WavLMPreTrainedModel,Nu=o.WeSpeakerFeatureExtractor,Ru=o.WeSpeakerResNetModel,ju=o.WeSpeakerResNetPreTrainedModel,Vu=o.WhisperFeatureExtractor,Gu=o.WhisperForConditionalGeneration,Uu=o.WhisperModel,qu=o.WhisperPreTrainedModel,Wu=o.WhisperProcessor,Hu=o.WhisperTextStreamer,Xu=o.WhisperTimeStampLogitsProcessor,Qu=o.WhisperTokenizer,Ku=o.XLMForQuestionAnswering,Yu=o.XLMForSequenceClassification,Ju=o.XLMForTokenClassification,Zu=o.XLMModel,ec=o.XLMPreTrainedModel,tc=o.XLMRobertaForMaskedLM,nc=o.XLMRobertaForQuestionAnswering,rc=o.XLMRobertaForSequenceClassification,sc=o.XLMRobertaForTokenClassification,oc=o.XLMRobertaModel,ic=o.XLMRobertaPreTrainedModel,ac=o.XLMRobertaTokenizer,lc=o.XLMTokenizer,dc=o.XLMWithLMHeadModel,uc=o.XVectorOutput,cc=o.YolosFeatureExtractor,pc=o.YolosForObjectDetection,hc=o.YolosImageProcessor,mc=o.YolosModel,fc=o.YolosObjectDetectionOutput,gc=o.YolosPreTrainedModel,_c=o.ZeroShotAudioClassificationPipeline,wc=o.ZeroShotClassificationPipeline,yc=o.ZeroShotImageClassificationPipeline,bc=o.ZeroShotObjectDetectionPipeline,vc=o.bankers_round,xc=o.cat,Mc=o.cos_sim,Tc=o.dot,kc=o.dynamic_time_warping,$c=o.env,Cc=o.full,Pc=o.full_like,Sc=o.getKeyValueShapes,Ec=o.hamming,Fc=o.hanning,Ac=o.interpolate,Ic=o.interpolate_4d,zc=o.interpolate_data,Lc=o.is_chinese_char,Oc=o.layer_norm,Bc=o.load_image,Dc=o.log_softmax,Nc=o.magnitude,Rc=o.matmul,jc=o.max,Vc=o.mean,Gc=o.mean_pooling,Uc=o.medianFilter,qc=o.mel_filter_bank,Wc=o.min,Hc=o.ones,Xc=o.ones_like,Qc=o.permute,Kc=o.permute_data,Yc=o.pipeline,Jc=o.quantize_embeddings,Zc=o.read_audio,ep=o.rfft,tp=o.round,np=o.softmax,rp=o.spectrogram,sp=o.stack,op=o.std_mean,ip=o.topk,ap=o.window_function,lp=o.zeros,dp=o.zeros_like;export{C as ASTFeatureExtractor,P as ASTForAudioClassification,S as ASTModel,E as ASTPreTrainedModel,F as AlbertForMaskedLM,A as AlbertForQuestionAnswering,I as AlbertForSequenceClassification,z as AlbertModel,L as AlbertPreTrainedModel,O as AlbertTokenizer,B as AudioClassificationPipeline,D as AutoConfig,N as AutoFeatureExtractor,R as AutoImageProcessor,j as AutoModel,V as AutoModelForAudioClassification,G as AutoModelForAudioFrameClassification,U as AutoModelForCTC,q as AutoModelForCausalLM,W as AutoModelForDepthEstimation,H as AutoModelForDocumentQuestionAnswering,X as AutoModelForImageClassification,Q as AutoModelForImageFeatureExtraction,K as AutoModelForImageMatting,Y as AutoModelForImageSegmentation,J as AutoModelForImageToImage,Z as AutoModelForMaskGeneration,ee as AutoModelForMaskedLM,te as AutoModelForNormalEstimation,ne as AutoModelForObjectDetection,re as AutoModelForPoseEstimation,se as AutoModelForQuestionAnswering,oe as AutoModelForSemanticSegmentation,ie as AutoModelForSeq2SeqLM,ae as AutoModelForSequenceClassification,le as AutoModelForSpeechSeq2Seq,de as AutoModelForTextToSpectrogram,ue as AutoModelForTextToWaveform,ce as AutoModelForTokenClassification,pe as AutoModelForUniversalSegmentation,he as AutoModelForVision2Seq,me as AutoModelForXVector,fe as AutoModelForZeroShotObjectDetection,ge as AutoProcessor,_e as AutoTokenizer,we as AutomaticSpeechRecognitionPipeline,ye as BartForConditionalGeneration,be as BartForSequenceClassification,ve as BartModel,xe as BartPretrainedModel,Me as BartTokenizer,Te as BaseModelOutput,ke as BaseStreamer,$e as BeitFeatureExtractor,Ce as BeitForImageClassification,Pe as BeitModel,Se as BeitPreTrainedModel,Ee as BertForMaskedLM,Fe as BertForQuestionAnswering,Ae as BertForSequenceClassification,Ie as BertForTokenClassification,ze as BertModel,Le as BertPreTrainedModel,Oe as BertTokenizer,Be as BitImageProcessor,De as BlenderbotForConditionalGeneration,Ne as BlenderbotModel,Re as BlenderbotPreTrainedModel,je as BlenderbotSmallForConditionalGeneration,Ve as BlenderbotSmallModel,Ge as BlenderbotSmallPreTrainedModel,Ue as BlenderbotSmallTokenizer,qe as BlenderbotTokenizer,We as BloomForCausalLM,He as BloomModel,Xe as BloomPreTrainedModel,Qe as BloomTokenizer,Ke as CLIPFeatureExtractor,Ye as CLIPImageProcessor,Je as CLIPModel,Ze as CLIPPreTrainedModel,et as CLIPSegForImageSegmentation,tt as CLIPSegModel,nt as CLIPSegPreTrainedModel,rt as CLIPTextModel,st as CLIPTextModelWithProjection,ot as CLIPTokenizer,it as CLIPVisionModel,at as CLIPVisionModelWithProjection,lt as CamembertForMaskedLM,dt as CamembertForQuestionAnswering,ut as CamembertForSequenceClassification,ct as CamembertForTokenClassification,pt as CamembertModel,ht as CamembertPreTrainedModel,mt as CamembertTokenizer,ft as CausalLMOutput,gt as CausalLMOutputWithPast,_t as ChineseCLIPFeatureExtractor,wt as ChineseCLIPModel,yt as ChineseCLIPPreTrainedModel,bt as ClapAudioModelWithProjection,vt as ClapFeatureExtractor,xt as ClapModel,Mt as ClapPreTrainedModel,Tt as ClapTextModelWithProjection,kt as ClassifierFreeGuidanceLogitsProcessor,$t as CodeGenForCausalLM,Ct as CodeGenModel,Pt as CodeGenPreTrainedModel,St as CodeGenTokenizer,Et as CodeLlamaTokenizer,Ft as CohereForCausalLM,At as CohereModel,It as CoherePreTrainedModel,zt as CohereTokenizer,Lt as ConvBertForMaskedLM,Ot as ConvBertForQuestionAnswering,Bt as ConvBertForSequenceClassification,Dt as ConvBertForTokenClassification,Nt as ConvBertModel,Rt as ConvBertPreTrainedModel,jt as ConvBertTokenizer,Vt as ConvNextFeatureExtractor,Gt as ConvNextForImageClassification,Ut as ConvNextImageProcessor,qt as ConvNextModel,Wt as ConvNextPreTrainedModel,Ht as ConvNextV2ForImageClassification,Xt as ConvNextV2Model,Qt as ConvNextV2PreTrainedModel,Kt as DPTFeatureExtractor,Yt as DPTForDepthEstimation,Jt as DPTImageProcessor,Zt as DPTModel,en as DPTPreTrainedModel,tn as DebertaForMaskedLM,nn as DebertaForQuestionAnswering,rn as DebertaForSequenceClassification,sn as DebertaForTokenClassification,on as DebertaModel,an as DebertaPreTrainedModel,ln as DebertaTokenizer,dn as DebertaV2ForMaskedLM,un as DebertaV2ForQuestionAnswering,cn as DebertaV2ForSequenceClassification,pn as DebertaV2ForTokenClassification,hn as DebertaV2Model,mn as DebertaV2PreTrainedModel,fn as DebertaV2Tokenizer,gn as DecisionTransformerModel,_n as DecisionTransformerPreTrainedModel,wn as DeiTFeatureExtractor,yn as DeiTForImageClassification,bn as DeiTImageProcessor,vn as DeiTModel,xn as DeiTPreTrainedModel,Mn as DepthAnythingForDepthEstimation,Tn as DepthAnythingPreTrainedModel,kn as DepthEstimationPipeline,$n as DepthProForDepthEstimation,Cn as DepthProPreTrainedModel,Pn as DetrFeatureExtractor,Sn as DetrForObjectDetection,En as DetrForSegmentation,Fn as DetrImageProcessor,An as DetrModel,In as DetrObjectDetectionOutput,zn as DetrPreTrainedModel,Ln as DetrSegmentationOutput,On as Dinov2ForImageClassification,Bn as Dinov2Model,Dn as Dinov2PreTrainedModel,Nn as DistilBertForMaskedLM,Rn as DistilBertForQuestionAnswering,jn as DistilBertForSequenceClassification,Vn as DistilBertForTokenClassification,Gn as DistilBertModel,Un as DistilBertPreTrainedModel,qn as DistilBertTokenizer,Wn as DocumentQuestionAnsweringPipeline,Hn as DonutFeatureExtractor,Xn as DonutImageProcessor,Qn as DonutSwinModel,Kn as DonutSwinPreTrainedModel,Yn as EfficientNetForImageClassification,Jn as EfficientNetImageProcessor,Zn as EfficientNetModel,er as EfficientNetPreTrainedModel,tr as ElectraForMaskedLM,nr as ElectraForQuestionAnswering,rr as ElectraForSequenceClassification,sr as ElectraForTokenClassification,or as ElectraModel,ir as ElectraPreTrainedModel,ar as ElectraTokenizer,lr as EosTokenCriteria,dr as EsmForMaskedLM,ur as EsmForSequenceClassification,cr as EsmForTokenClassification,pr as EsmModel,hr as EsmPreTrainedModel,mr as EsmTokenizer,fr as FFT,gr as FalconForCausalLM,_r as FalconModel,wr as FalconPreTrainedModel,yr as FalconTokenizer,br as FastViTForImageClassification,vr as FastViTModel,xr as FastViTPreTrainedModel,Mr as FeatureExtractionPipeline,Tr as FeatureExtractor,kr as FillMaskPipeline,$r as Florence2ForConditionalGeneration,Cr as Florence2PreTrainedModel,Pr as Florence2Processor,Sr as ForcedBOSTokenLogitsProcessor,Er as ForcedEOSTokenLogitsProcessor,Fr as GLPNFeatureExtractor,Ar as GLPNForDepthEstimation,Ir as GLPNModel,zr as GLPNPreTrainedModel,Lr as GPT2LMHeadModel,Or as GPT2Model,Br as GPT2PreTrainedModel,Dr as GPT2Tokenizer,Nr as GPTBigCodeForCausalLM,Rr as GPTBigCodeModel,jr as GPTBigCodePreTrainedModel,Vr as GPTJForCausalLM,Gr as GPTJModel,Ur as GPTJPreTrainedModel,qr as GPTNeoForCausalLM,Wr as GPTNeoModel,Hr as GPTNeoPreTrainedModel,Xr as GPTNeoXForCausalLM,Qr as GPTNeoXModel,Kr as GPTNeoXPreTrainedModel,Yr as GPTNeoXTokenizer,Jr as Gemma2ForCausalLM,Zr as Gemma2Model,es as Gemma2PreTrainedModel,ts as GemmaForCausalLM,ns as GemmaModel,rs as GemmaPreTrainedModel,ss as GemmaTokenizer,os as GraniteForCausalLM,is as GraniteModel,as as GranitePreTrainedModel,ls as Grok1Tokenizer,ds as GroupViTModel,us as GroupViTPreTrainedModel,cs as HerbertTokenizer,ps as HieraForImageClassification,hs as HieraModel,ms as HieraPreTrainedModel,fs as HubertForCTC,gs as HubertForSequenceClassification,_s as HubertModel,ws as HubertPreTrainedModel,ys as Idefics3ForConditionalGeneration,bs as Idefics3ImageProcessor,vs as Idefics3PreTrainedModel,xs as Idefics3Processor,Ms as ImageClassificationPipeline,Ts as ImageFeatureExtractionPipeline,ks as ImageFeatureExtractor,$s as ImageMattingOutput,Cs as ImageProcessor,Ps as ImageSegmentationPipeline,Ss as ImageToImagePipeline,Es as ImageToTextPipeline,Fs as InterruptableStoppingCriteria,As as JAISLMHeadModel,Is as JAISModel,zs as JAISPreTrainedModel,Ls as JinaCLIPImageProcessor,Os as JinaCLIPModel,Bs as JinaCLIPPreTrainedModel,Ds as JinaCLIPProcessor,Ns as JinaCLIPTextModel,Rs as JinaCLIPVisionModel,js as LlamaForCausalLM,Vs as LlamaModel,Gs as LlamaPreTrainedModel,Us as LlamaTokenizer,qs as LlavaForConditionalGeneration,Ws as LlavaOnevisionForConditionalGeneration,Hs as LlavaOnevisionImageProcessor,Xs as LlavaPreTrainedModel,Qs as LogitsProcessor,Ks as LogitsProcessorList,Ys as LogitsWarper,Js as LongT5ForConditionalGeneration,Zs as LongT5Model,eo as LongT5PreTrainedModel,to as M2M100ForConditionalGeneration,no as M2M100Model,ro as M2M100PreTrainedModel,so as M2M100Tokenizer,oo as MBart50Tokenizer,io as MBartForCausalLM,ao as MBartForConditionalGeneration,lo as MBartForSequenceClassification,uo as MBartModel,co as MBartPreTrainedModel,po as MBartTokenizer,ho as MPNetForMaskedLM,mo as MPNetForQuestionAnswering,fo as MPNetForSequenceClassification,go as MPNetForTokenClassification,_o as MPNetModel,wo as MPNetPreTrainedModel,yo as MPNetTokenizer,bo as MT5ForConditionalGeneration,vo as MT5Model,xo as MT5PreTrainedModel,Mo as MarianMTModel,To as MarianModel,ko as MarianPreTrainedModel,$o as MarianTokenizer,Co as Mask2FormerImageProcessor,Po as MaskFormerFeatureExtractor,So as MaskFormerForInstanceSegmentation,Eo as MaskFormerImageProcessor,Fo as MaskFormerModel,Ao as MaskFormerPreTrainedModel,Io as MaskedLMOutput,zo as MaxLengthCriteria,Lo as MgpstrForSceneTextRecognition,Oo as MgpstrModelOutput,Bo as MgpstrPreTrainedModel,Do as MgpstrProcessor,No as MgpstrTokenizer,Ro as MinLengthLogitsProcessor,jo as MinNewTokensLengthLogitsProcessor,Vo as MistralForCausalLM,Go as MistralModel,Uo as MistralPreTrainedModel,qo as MobileBertForMaskedLM,Wo as MobileBertForQuestionAnswering,Ho as MobileBertForSequenceClassification,Xo as MobileBertModel,Qo as MobileBertPreTrainedModel,Ko as MobileBertTokenizer,Yo as MobileLLMForCausalLM,Jo as MobileLLMModel,Zo as MobileLLMPreTrainedModel,ei as MobileNetV1FeatureExtractor,ti as MobileNetV1ForImageClassification,ni as MobileNetV1ImageProcessor,ri as MobileNetV1Model,si as MobileNetV1PreTrainedModel,oi as MobileNetV2FeatureExtractor,ii as MobileNetV2ForImageClassification,ai as MobileNetV2ImageProcessor,li as MobileNetV2Model,di as MobileNetV2PreTrainedModel,ui as MobileNetV3FeatureExtractor,ci as MobileNetV3ForImageClassification,pi as MobileNetV3ImageProcessor,hi as MobileNetV3Model,mi as MobileNetV3PreTrainedModel,fi as MobileNetV4FeatureExtractor,gi as MobileNetV4ForImageClassification,_i as MobileNetV4ImageProcessor,wi as MobileNetV4Model,yi as MobileNetV4PreTrainedModel,bi as MobileViTFeatureExtractor,vi as MobileViTForImageClassification,xi as MobileViTImageProcessor,Mi as MobileViTModel,Ti as MobileViTPreTrainedModel,ki as MobileViTV2ForImageClassification,$i as MobileViTV2Model,Ci as MobileViTV2PreTrainedModel,Pi as ModelOutput,Si as Moondream1ForConditionalGeneration,Ei as MptForCausalLM,Fi as MptModel,Ai as MptPreTrainedModel,Ii as MultiModalityCausalLM,zi as MultiModalityPreTrainedModel,Li as MusicgenForCausalLM,Oi as MusicgenForConditionalGeneration,Bi as MusicgenModel,Di as MusicgenPreTrainedModel,Ni as NllbTokenizer,Ri as NoBadWordsLogitsProcessor,ji as NoRepeatNGramLogitsProcessor,Vi as NomicBertModel,Gi as NomicBertPreTrainedModel,Ui as NougatImageProcessor,qi as NougatTokenizer,Wi as OPTForCausalLM,Hi as OPTModel,Xi as OPTPreTrainedModel,Qi as ObjectDetectionPipeline,Ki as OlmoForCausalLM,Yi as OlmoModel,Ji as OlmoPreTrainedModel,Zi as OpenELMForCausalLM,ea as OpenELMModel,ta as OpenELMPreTrainedModel,na as OwlViTFeatureExtractor,ra as OwlViTForObjectDetection,sa as OwlViTImageProcessor,oa as OwlViTModel,ia as OwlViTPreTrainedModel,aa as OwlViTProcessor,la as Owlv2ForObjectDetection,da as Owlv2ImageProcessor,ua as Owlv2Model,ca as Owlv2PreTrainedModel,pa as PatchTSMixerForPrediction,ha as PatchTSMixerModel,ma as PatchTSMixerPreTrainedModel,fa as PatchTSTForPrediction,ga as PatchTSTModel,_a as PatchTSTPreTrainedModel,wa as Phi3ForCausalLM,ya as Phi3Model,ba as Phi3PreTrainedModel,va as PhiForCausalLM,xa as PhiModel,Ma as PhiPreTrainedModel,Ta as Pipeline,ka as PreTrainedModel,$a as PreTrainedTokenizer,Ca as PretrainedConfig,Pa as PretrainedMixin,Sa as Processor,Ea as PvtForImageClassification,Fa as PvtImageProcessor,Aa as PvtModel,Ia as PvtPreTrainedModel,za as PyAnnoteFeatureExtractor,La as PyAnnoteForAudioFrameClassification,Oa as PyAnnoteModel,Ba as PyAnnotePreTrainedModel,Da as PyAnnoteProcessor,Na as QuestionAnsweringModelOutput,Ra as QuestionAnsweringPipeline,ja as Qwen2ForCausalLM,Va as Qwen2Model,Ga as Qwen2PreTrainedModel,Ua as Qwen2Tokenizer,qa as Qwen2VLForConditionalGeneration,Wa as Qwen2VLImageProcessor,Ha as Qwen2VLPreTrainedModel,Xa as Qwen2VLProcessor,Qa as RTDetrForObjectDetection,Ka as RTDetrImageProcessor,Ya as RTDetrModel,Ja as RTDetrObjectDetectionOutput,Za as RTDetrPreTrainedModel,el as RawImage,tl as RepetitionPenaltyLogitsProcessor,nl as ResNetForImageClassification,rl as ResNetModel,sl as ResNetPreTrainedModel,ol as RoFormerForMaskedLM,il as RoFormerForQuestionAnswering,al as RoFormerForSequenceClassification,ll as RoFormerForTokenClassification,dl as RoFormerModel,ul as RoFormerPreTrainedModel,cl as RoFormerTokenizer,pl as RobertaForMaskedLM,hl as RobertaForQuestionAnswering,ml as RobertaForSequenceClassification,fl as RobertaForTokenClassification,gl as RobertaModel,_l as RobertaPreTrainedModel,wl as RobertaTokenizer,yl as SamImageProcessor,bl as SamImageSegmentationOutput,vl as SamModel,xl as SamPreTrainedModel,Ml as SamProcessor,Tl as SapiensForDepthEstimation,kl as SapiensForNormalEstimation,$l as SapiensForSemanticSegmentation,Cl as SapiensPreTrainedModel,Pl as SeamlessM4TFeatureExtractor,Sl as SegformerFeatureExtractor,El as SegformerForImageClassification,Fl as SegformerForSemanticSegmentation,Al as SegformerImageProcessor,Il as SegformerModel,zl as SegformerPreTrainedModel,Ll as Seq2SeqLMOutput,Ol as SequenceClassifierOutput,Bl as SiglipImageProcessor,Dl as SiglipModel,Nl as SiglipPreTrainedModel,Rl as SiglipTextModel,jl as SiglipTokenizer,Vl as SiglipVisionModel,Gl as SpeechT5FeatureExtractor,Ul as SpeechT5ForSpeechToText,ql as SpeechT5ForTextToSpeech,Wl as SpeechT5HifiGan,Hl as SpeechT5Model,Xl as SpeechT5PreTrainedModel,Ql as SpeechT5Processor,Kl as SpeechT5Tokenizer,Yl as SqueezeBertForMaskedLM,Jl as SqueezeBertForQuestionAnswering,Zl as SqueezeBertForSequenceClassification,ed as SqueezeBertModel,td as SqueezeBertPreTrainedModel,nd as SqueezeBertTokenizer,rd as StableLmForCausalLM,sd as StableLmModel,od as StableLmPreTrainedModel,id as Starcoder2ForCausalLM,ad as Starcoder2Model,ld as Starcoder2PreTrainedModel,dd as StoppingCriteria,ud as StoppingCriteriaList,cd as SummarizationPipeline,pd as SuppressTokensAtBeginLogitsProcessor,hd as Swin2SRForImageSuperResolution,md as Swin2SRImageProcessor,fd as Swin2SRModel,gd as Swin2SRPreTrainedModel,_d as SwinForImageClassification,wd as SwinModel,yd as SwinPreTrainedModel,bd as T5ForConditionalGeneration,vd as T5Model,xd as T5PreTrainedModel,Md as T5Tokenizer,Td as TableTransformerForObjectDetection,kd as TableTransformerModel,$d as TableTransformerObjectDetectionOutput,Cd as TableTransformerPreTrainedModel,Pd as TemperatureLogitsWarper,Sd as Tensor,Ed as Text2TextGenerationPipeline,Fd as TextClassificationPipeline,Ad as TextGenerationPipeline,Id as TextStreamer,zd as TextToAudioPipeline,Ld as TokenClassificationPipeline,Od as TokenClassifierOutput,Bd as TokenizerModel,Dd as TopKLogitsWarper,Nd as TopPLogitsWarper,Rd as TrOCRForCausalLM,jd as TrOCRPreTrainedModel,Vd as TranslationPipeline,Gd as UniSpeechForCTC,Ud as UniSpeechForSequenceClassification,qd as UniSpeechModel,Wd as UniSpeechPreTrainedModel,Hd as UniSpeechSatForAudioFrameClassification,Xd as UniSpeechSatForCTC,Qd as UniSpeechSatForSequenceClassification,Kd as UniSpeechSatModel,Yd as UniSpeechSatPreTrainedModel,Jd as VLChatProcessor,Zd as VLMImageProcessor,eu as ViTFeatureExtractor,tu as ViTForImageClassification,nu as ViTImageProcessor,ru as ViTMAEModel,su as ViTMAEPreTrainedModel,ou as ViTMSNForImageClassification,iu as ViTMSNModel,au as ViTMSNPreTrainedModel,lu as ViTModel,du as ViTPreTrainedModel,uu as VisionEncoderDecoderModel,cu as VitMatteForImageMatting,pu as VitMatteImageProcessor,hu as VitMattePreTrainedModel,mu as VitPoseForPoseEstimation,fu as VitPoseImageProcessor,gu as VitPosePreTrainedModel,_u as VitsModel,wu as VitsModelOutput,yu as VitsPreTrainedModel,bu as VitsTokenizer,vu as Wav2Vec2BertForCTC,xu as Wav2Vec2BertForSequenceClassification,Mu as Wav2Vec2BertModel,Tu as Wav2Vec2BertPreTrainedModel,ku as Wav2Vec2CTCTokenizer,$u as Wav2Vec2FeatureExtractor,Cu as Wav2Vec2ForAudioFrameClassification,Pu as Wav2Vec2ForCTC,Su as Wav2Vec2ForSequenceClassification,Eu as Wav2Vec2Model,Fu as Wav2Vec2PreTrainedModel,Au as Wav2Vec2ProcessorWithLM,Iu as WavLMForAudioFrameClassification,zu as WavLMForCTC,Lu as WavLMForSequenceClassification,Ou as WavLMForXVector,Bu as WavLMModel,Du as WavLMPreTrainedModel,Nu as WeSpeakerFeatureExtractor,Ru as WeSpeakerResNetModel,ju as WeSpeakerResNetPreTrainedModel,Vu as WhisperFeatureExtractor,Gu as WhisperForConditionalGeneration,Uu as WhisperModel,qu as WhisperPreTrainedModel,Wu as WhisperProcessor,Hu as WhisperTextStreamer,Xu as WhisperTimeStampLogitsProcessor,Qu as WhisperTokenizer,Ku as XLMForQuestionAnswering,Yu as XLMForSequenceClassification,Ju as XLMForTokenClassification,Zu as XLMModel,ec as XLMPreTrainedModel,tc as XLMRobertaForMaskedLM,nc as XLMRobertaForQuestionAnswering,rc as XLMRobertaForSequenceClassification,sc as XLMRobertaForTokenClassification,oc as XLMRobertaModel,ic as XLMRobertaPreTrainedModel,ac as XLMRobertaTokenizer,lc as XLMTokenizer,dc as XLMWithLMHeadModel,uc as XVectorOutput,cc as YolosFeatureExtractor,pc as YolosForObjectDetection,hc as YolosImageProcessor,mc as YolosModel,fc as YolosObjectDetectionOutput,gc as YolosPreTrainedModel,_c as ZeroShotAudioClassificationPipeline,wc as ZeroShotClassificationPipeline,yc as ZeroShotImageClassificationPipeline,bc as ZeroShotObjectDetectionPipeline,vc as bankers_round,xc as cat,Mc as cos_sim,Tc as dot,kc as dynamic_time_warping,$c as env,Cc as full,Pc as full_like,Sc as getKeyValueShapes,Ec as hamming,Fc as hanning,Ac as interpolate,Ic as interpolate_4d,zc as interpolate_data,Lc as is_chinese_char,Oc as layer_norm,Bc as load_image,Dc as log_softmax,Nc as magnitude,Rc as matmul,jc as max,Vc as mean,Gc as mean_pooling,Uc as medianFilter,qc as mel_filter_bank,Wc as min,Hc as ones,Xc as ones_like,Qc as permute,Kc as permute_data,Yc as pipeline,Jc as quantize_embeddings,Zc as read_audio,ep as rfft,tp as round,np as softmax,rp as spectrogram,sp as stack,op as std_mean,ip as topk,ap as window_function,lp as zeros,dp as zeros_like};
|
|
416
422
|
//# sourceMappingURL=transformers.min.js.map
|