@huggingface/transformers 3.0.0-alpha.15 → 3.0.0-alpha.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -151,10 +151,10 @@ var r,i,a,s,o,l,u,d,c,p,h,m,f,g,_,w,y,b,v,x,M,T,k,$,S,C,P,E,F,A,I,z,O,B=Object.d
151
151
  \******************************/(e,t,n)=>{var r;n.r(t),n.d(t,{Tensor:()=>o.Tensor,createInferenceSession:()=>f,deviceToExecutionProviders:()=>h,isONNXProxy:()=>w,isONNXTensor:()=>g});var i=n(/*! ../env.js */"./src/env.js"),a=n(/*! onnxruntime-node */"?2ce3"),s=n(/*! #onnxruntime-webgpu */"./node_modules/onnxruntime-web/dist/ort.webgpu.bundle.min.mjs"),o=n(/*! onnxruntime-common */"./node_modules/onnxruntime-common/dist/esm/index.js");const l=Object.freeze({auto:null,gpu:null,cpu:"cpu",wasm:"wasm",webgpu:"webgpu",cuda:"cuda",dml:"dml",webnn:{name:"webnn",deviceType:"cpu"},"webnn-npu":{name:"webnn",deviceType:"npu"},"webnn-gpu":{name:"webnn",deviceType:"gpu"},"webnn-cpu":{name:"webnn",deviceType:"cpu"}}),u=[];let d,c;if(i.apis.IS_NODE_ENV){switch(c=a??(r||(r=n.t(a,2))),process.platform){case"win32":u.push("dml");break;case"linux":"x64"===process.arch&&u.push("cuda")}u.push("cpu"),d=["cpu"]}else c=s,i.apis.IS_WEBNN_AVAILABLE&&u.push("webnn-npu","webnn-gpu","webnn-cpu","webnn"),i.apis.IS_WEBGPU_AVAILABLE&&u.push("webgpu"),u.push("wasm"),d=["wasm"];const p=c.InferenceSession;function h(e=null){if(!e)return d;switch(e){case"auto":return u;case"gpu":return u.filter((e=>["webgpu","cuda","dml","webnn-gpu"].includes(e)))}if(u.includes(e))return[l[e]??e];throw new Error(`Unsupported device: "${e}". Should be one of: ${u.join(", ")}.`)}let m=null;async function f(e,t){m&&await m;const n=p.create(e,t);return m??=n,await n}function g(e){return e instanceof c.Tensor}const _=c?.env;function w(){return _?.wasm?.proxy}_?.wasm&&(_.wasm.wasmPaths=`https://cdn.jsdelivr.net/npm/@huggingface/transformers@${i.env.version}/dist/`,_.wasm.proxy=!1,"undefined"!=typeof crossOriginIsolated&&crossOriginIsolated||(_.wasm.numThreads=1)),_?.webgpu&&(_.webgpu.powerPreference="high-performance"),i.env.backends.onnx=_},"./src/configs.js":
152
152
  /*!************************!*\
153
153
  !*** ./src/configs.js ***!
154
- \************************/(e,t,n)=>{n.r(t),n.d(t,{AutoConfig:()=>l,PretrainedConfig:()=>o,getKeyValueShapes:()=>s});var r=n(/*! ./utils/core.js */"./src/utils/core.js"),i=n(/*! ./utils/hub.js */"./src/utils/hub.js");function a(e){const t={};let n={};switch(e.model_type){case"llava":case"paligemma":case"florence2":n=a(e.text_config);break;case"moondream1":n=a(e.phi_config);break;case"musicgen":n=a(e.decoder);break;case"gpt2":case"gptj":case"jais":case"codegen":case"gpt_bigcode":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="n_embd";break;case"gpt_neox":case"stablelm":case"opt":case"phi":case"phi3":case"falcon":t.num_heads="num_attention_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size";break;case"llama":case"cohere":case"mistral":case"starcoder2":case"qwen2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size",t.num_attention_heads="num_attention_heads";break;case"gemma":case"gemma2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.dim_kv="head_dim";break;case"openelm":t.num_heads="num_kv_heads",t.num_layers="num_transformer_layers",t.dim_kv="head_dim";break;case"gpt_neo":case"donut-swin":t.num_heads="num_heads",t.num_layers="num_layers",t.hidden_size="hidden_size";break;case"bloom":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="hidden_size";break;case"mpt":t.num_heads="n_heads",t.num_layers="n_layers",t.hidden_size="d_model";break;case"t5":case"mt5":case"longt5":t.num_decoder_layers="num_decoder_layers",t.num_decoder_heads="num_heads",t.decoder_dim_kv="d_kv",t.num_encoder_layers="num_layers",t.num_encoder_heads="num_heads",t.encoder_dim_kv="d_kv";break;case"bart":case"mbart":case"marian":case"whisper":case"m2m_100":case"blenderbot":case"blenderbot-small":case"florence2_language":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="d_model",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="d_model";break;case"speecht5":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="hidden_size",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="hidden_size";break;case"trocr":t.num_encoder_layers=t.num_decoder_layers="decoder_layers",t.num_encoder_heads=t.num_decoder_heads="decoder_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="d_model";break;case"musicgen_decoder":t.num_encoder_layers=t.num_decoder_layers="num_hidden_layers",t.num_encoder_heads=t.num_decoder_heads="num_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="hidden_size";break;case"vision-encoder-decoder":const i=a(e.decoder),s="num_decoder_layers"in i,o=(0,r.pick)(e,["model_type","is_encoder_decoder"]);return s?(o.num_decoder_layers=i.num_decoder_layers,o.num_decoder_heads=i.num_decoder_heads,o.decoder_hidden_size=i.decoder_hidden_size,o.num_encoder_layers=i.num_encoder_layers,o.num_encoder_heads=i.num_encoder_heads,o.encoder_hidden_size=i.encoder_hidden_size):(o.num_layers=i.num_layers,o.num_heads=i.num_heads,o.hidden_size=i.hidden_size),o}const i={...n,...(0,r.pick)(e,["model_type","multi_query","is_encoder_decoder"])};for(const n in t)i[n]=e[t[n]];return i}function s(e,{prefix:t="past_key_values"}={}){const n={},r=e.normalized_config;if(r.is_encoder_decoder&&"num_encoder_heads"in r&&"num_decoder_heads"in r){const e=r.encoder_dim_kv??r.encoder_hidden_size/r.num_encoder_heads,i=r.decoder_dim_kv??r.decoder_hidden_size/r.num_decoder_heads,a=[1,r.num_encoder_heads,0,e],s=[1,r.num_decoder_heads,0,i];for(let e=0;e<r.num_decoder_layers;++e)n[`${t}.${e}.encoder.key`]=a,n[`${t}.${e}.encoder.value`]=a,n[`${t}.${e}.decoder.key`]=s,n[`${t}.${e}.decoder.value`]=s}else{const e=r.num_heads,i=r.num_layers,a=r.dim_kv??r.hidden_size/(r.num_attention_heads??e);if("falcon"===r.model_type){const r=[1*e,0,a];for(let e=0;e<i;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=r}else if(r.multi_query){const r=[1*e,0,2*a];for(let e=0;e<i;++e)n[`${t}.${e}.key_value`]=r}else if("bloom"===r.model_type){const r=[1*e,a,0],s=[1*e,0,a];for(let e=0;e<i;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=s}else if("openelm"===r.model_type)for(let r=0;r<i;++r){const i=[1,e[r],0,a];n[`${t}.${r}.key`]=i,n[`${t}.${r}.value`]=i}else{const r=[1,e,0,a];for(let e=0;e<i;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=r}}return n}class o{max_position_embeddings;constructor(e){this.model_type=null,this.is_encoder_decoder=!1,Object.assign(this,e),this.normalized_config=a(this)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:a=!1,revision:s="main"}={}){!n||n instanceof o||(n=new o(n));const l=n??await async function(e,t){return await(0,i.getModelJSON)(e,"config.json",!0,t)}(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:a,revision:s});return new this(l)}}class l{static async from_pretrained(...e){return o.from_pretrained(...e)}}},"./src/env.js":
154
+ \************************/(e,t,n)=>{n.r(t),n.d(t,{AutoConfig:()=>l,PretrainedConfig:()=>o,getKeyValueShapes:()=>s});var r=n(/*! ./utils/core.js */"./src/utils/core.js"),i=n(/*! ./utils/hub.js */"./src/utils/hub.js");function a(e){const t={};let n={};switch(e.model_type){case"llava":case"paligemma":case"florence2":n=a(e.text_config);break;case"moondream1":n=a(e.phi_config);break;case"musicgen":n=a(e.decoder);break;case"gpt2":case"gptj":case"jais":case"codegen":case"gpt_bigcode":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="n_embd";break;case"gpt_neox":case"stablelm":case"opt":case"phi":case"phi3":case"falcon":t.num_heads="num_attention_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size";break;case"llama":case"cohere":case"mistral":case"starcoder2":case"qwen2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size",t.num_attention_heads="num_attention_heads";break;case"gemma":case"gemma2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.dim_kv="head_dim";break;case"openelm":t.num_heads="num_kv_heads",t.num_layers="num_transformer_layers",t.dim_kv="head_dim";break;case"gpt_neo":case"donut-swin":t.num_heads="num_heads",t.num_layers="num_layers",t.hidden_size="hidden_size";break;case"bloom":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="hidden_size";break;case"mpt":t.num_heads="n_heads",t.num_layers="n_layers",t.hidden_size="d_model";break;case"t5":case"mt5":case"longt5":t.num_decoder_layers="num_decoder_layers",t.num_decoder_heads="num_heads",t.decoder_dim_kv="d_kv",t.num_encoder_layers="num_layers",t.num_encoder_heads="num_heads",t.encoder_dim_kv="d_kv";break;case"bart":case"mbart":case"marian":case"whisper":case"m2m_100":case"blenderbot":case"blenderbot-small":case"florence2_language":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="d_model",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="d_model";break;case"speecht5":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="hidden_size",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="hidden_size";break;case"trocr":t.num_encoder_layers=t.num_decoder_layers="decoder_layers",t.num_encoder_heads=t.num_decoder_heads="decoder_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="d_model";break;case"musicgen_decoder":t.num_encoder_layers=t.num_decoder_layers="num_hidden_layers",t.num_encoder_heads=t.num_decoder_heads="num_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="hidden_size";break;case"vision-encoder-decoder":const i=a(e.decoder),s="num_decoder_layers"in i,o=(0,r.pick)(e,["model_type","is_encoder_decoder"]);return s?(o.num_decoder_layers=i.num_decoder_layers,o.num_decoder_heads=i.num_decoder_heads,o.decoder_hidden_size=i.decoder_hidden_size,o.num_encoder_layers=i.num_encoder_layers,o.num_encoder_heads=i.num_encoder_heads,o.encoder_hidden_size=i.encoder_hidden_size):(o.num_layers=i.num_layers,o.num_heads=i.num_heads,o.hidden_size=i.hidden_size),o}const i={...n,...(0,r.pick)(e,["model_type","multi_query","is_encoder_decoder"])};for(const n in t)i[n]=e[t[n]];return i}function s(e,{prefix:t="past_key_values"}={}){const n={},r=e.normalized_config;if(r.is_encoder_decoder&&"num_encoder_heads"in r&&"num_decoder_heads"in r){const e=r.encoder_dim_kv??r.encoder_hidden_size/r.num_encoder_heads,i=r.decoder_dim_kv??r.decoder_hidden_size/r.num_decoder_heads,a=[1,r.num_encoder_heads,0,e],s=[1,r.num_decoder_heads,0,i];for(let e=0;e<r.num_decoder_layers;++e)n[`${t}.${e}.encoder.key`]=a,n[`${t}.${e}.encoder.value`]=a,n[`${t}.${e}.decoder.key`]=s,n[`${t}.${e}.decoder.value`]=s}else{const e=r.num_heads,i=r.num_layers,a=r.dim_kv??r.hidden_size/(r.num_attention_heads??e);if("falcon"===r.model_type){const r=[1*e,0,a];for(let e=0;e<i;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=r}else if(r.multi_query){const r=[1*e,0,2*a];for(let e=0;e<i;++e)n[`${t}.${e}.key_value`]=r}else if("bloom"===r.model_type){const r=[1*e,a,0],s=[1*e,0,a];for(let e=0;e<i;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=s}else if("openelm"===r.model_type)for(let r=0;r<i;++r){const i=[1,e[r],0,a];n[`${t}.${r}.key`]=i,n[`${t}.${r}.value`]=i}else{const r=[1,e,0,a];for(let e=0;e<i;++e)n[`${t}.${e}.key`]=r,n[`${t}.${e}.value`]=r}}return n}class o{model_type=null;is_encoder_decoder=!1;max_position_embeddings;"transformers.js_config";constructor(e){Object.assign(this,e),this.normalized_config=a(this)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:a=!1,revision:s="main"}={}){!n||n instanceof o||(n=new o(n));const l=n??await async function(e,t){return await(0,i.getModelJSON)(e,"config.json",!0,t)}(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:a,revision:s});return new this(l)}}class l{static async from_pretrained(...e){return o.from_pretrained(...e)}}},"./src/env.js":
155
155
  /*!********************!*\
156
156
  !*** ./src/env.js ***!
157
- \********************/(e,t,n)=>{n.r(t),n.d(t,{apis:()=>f,env:()=>b});var r=n(/*! fs */"?569f"),i=n(/*! path */"?3f59"),a=n(/*! url */"?154a");const s="undefined"!=typeof self,o=s&&"DedicatedWorkerGlobalScope"===self.constructor.name,l=s&&"caches"in self,u="undefined"!=typeof navigator&&"gpu"in navigator,d="undefined"!=typeof navigator&&"ml"in navigator,c="undefined"!=typeof process,p=c&&"node"===process?.release?.name,h=!v(r),m=!v(i),f=Object.freeze({IS_BROWSER_ENV:s,IS_WEBWORKER_ENV:o,IS_WEB_CACHE_AVAILABLE:l,IS_WEBGPU_AVAILABLE:u,IS_WEBNN_AVAILABLE:d,IS_PROCESS_AVAILABLE:c,IS_NODE_ENV:p,IS_FS_AVAILABLE:h,IS_PATH_AVAILABLE:m}),g=h&&m,_=g?i.dirname(i.dirname(a.fileURLToPath(import.meta.url))):"./",w=g?i.join(_,"/.cache/"):null,y="/models/",b={version:"3.0.0-alpha.15",backends:{onnx:{}},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!s,localModelPath:g?i.join(_,y):y,useFS:h,useBrowserCache:l,useFSCache:h,cacheDir:w,useCustomCache:!1,customCache:null};function v(e){return 0===Object.keys(e).length}},"./src/generation/configuration_utils.js":
157
+ \********************/(e,t,n)=>{n.r(t),n.d(t,{apis:()=>f,env:()=>b});var r=n(/*! fs */"?569f"),i=n(/*! path */"?3f59"),a=n(/*! url */"?154a");const s="undefined"!=typeof self,o=s&&"DedicatedWorkerGlobalScope"===self.constructor.name,l=s&&"caches"in self,u="undefined"!=typeof navigator&&"gpu"in navigator,d="undefined"!=typeof navigator&&"ml"in navigator,c="undefined"!=typeof process,p=c&&"node"===process?.release?.name,h=!v(r),m=!v(i),f=Object.freeze({IS_BROWSER_ENV:s,IS_WEBWORKER_ENV:o,IS_WEB_CACHE_AVAILABLE:l,IS_WEBGPU_AVAILABLE:u,IS_WEBNN_AVAILABLE:d,IS_PROCESS_AVAILABLE:c,IS_NODE_ENV:p,IS_FS_AVAILABLE:h,IS_PATH_AVAILABLE:m}),g=h&&m,_=g?i.dirname(i.dirname(a.fileURLToPath(import.meta.url))):"./",w=g?i.join(_,"/.cache/"):null,y="/models/",b={version:"3.0.0-alpha.16",backends:{onnx:{}},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!s,localModelPath:g?i.join(_,y):y,useFS:h,useBrowserCache:l,useFSCache:h,cacheDir:w,useCustomCache:!1,customCache:null};function v(e){return 0===Object.keys(e).length}},"./src/generation/configuration_utils.js":
158
158
  /*!***********************************************!*\
159
159
  !*** ./src/generation/configuration_utils.js ***!
160
160
  \***********************************************/(e,t,n)=>{n.r(t),n.d(t,{GenerationConfig:()=>i});var r=n(/*! ../utils/core.js */"./src/utils/core.js");class i{max_length=20;max_new_tokens=null;min_length=0;min_new_tokens=null;early_stopping=!1;max_time=null;do_sample=!1;num_beams=1;num_beam_groups=1;penalty_alpha=null;use_cache=!0;temperature=1;top_k=50;top_p=1;typical_p=1;epsilon_cutoff=0;eta_cutoff=0;diversity_penalty=0;repetition_penalty=1;encoder_repetition_penalty=1;length_penalty=1;no_repeat_ngram_size=0;bad_words_ids=null;force_words_ids=null;renormalize_logits=!1;constraints=null;forced_bos_token_id=null;forced_eos_token_id=null;remove_invalid_values=!1;exponential_decay_length_penalty=null;suppress_tokens=null;begin_suppress_tokens=null;forced_decoder_ids=null;guidance_scale=null;num_return_sequences=1;output_attentions=!1;output_hidden_states=!1;output_scores=!1;return_dict_in_generate=!1;pad_token_id=null;bos_token_id=null;eos_token_id=null;encoder_no_repeat_ngram_size=0;decoder_start_token_id=null;generation_kwargs={};constructor(e){Object.assign(this,(0,r.pick)(e,Object.getOwnPropertyNames(this)))}}},"./src/generation/logits_process.js":
@@ -172,7 +172,7 @@ var r,i,a,s,o,l,u,d,c,p,h,m,f,g,_,w,y,b,v,x,M,T,k,$,S,C,P,E,F,A,I,z,O,B=Object.d
172
172
  \*************************************/(e,t,n)=>{n.r(t),n.d(t,{BaseStreamer:()=>s,TextStreamer:()=>l,WhisperTextStreamer:()=>u});var r=n(/*! ../utils/core.js */"./src/utils/core.js"),i=n(/*! ../tokenizers.js */"./src/tokenizers.js"),a=n(/*! ../env.js */"./src/env.js");class s{put(e){throw Error("Not implemented")}end(){throw Error("Not implemented")}}const o=a.apis.IS_PROCESS_AVAILABLE?e=>process.stdout.write(e):e=>console.log(e);class l extends s{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:r=null,decode_kwargs:i={},...a}={}){super(),this.tokenizer=e,this.skip_prompt=t,this.callback_function=n??o,this.token_callback_function=r,this.decode_kwargs={...i,...a},this.token_cache=[],this.print_len=0,this.next_tokens_are_prompt=!0}put(e){if(e.length>1)throw Error("TextStreamer only supports batch size of 1");if(this.skip_prompt&&this.next_tokens_are_prompt)return void(this.next_tokens_are_prompt=!1);const t=e[0];this.token_callback_function?.(t),this.token_cache=(0,r.mergeArrays)(this.token_cache,t);const n=this.tokenizer.decode(this.token_cache,this.decode_kwargs);let a;n.endsWith("\n")?(a=n.slice(this.print_len),this.token_cache=[],this.print_len=0):n.length>0&&(0,i.is_chinese_char)(n.charCodeAt(n.length-1))?(a=n.slice(this.print_len),this.print_len+=a.length):(a=n.slice(this.print_len,n.lastIndexOf(" ")+1),this.print_len+=a.length),this.on_finalized_text(a,!1)}end(){let e;if(this.token_cache.length>0){e=this.tokenizer.decode(this.token_cache,this.decode_kwargs).slice(this.print_len),this.token_cache=[],this.print_len=0}else e="";this.next_tokens_are_prompt=!0,this.on_finalized_text(e,!0)}on_finalized_text(e,t){e.length>0&&this.callback_function?.(e),t&&this.callback_function===o&&a.apis.IS_PROCESS_AVAILABLE&&this.callback_function?.("\n")}}class u extends l{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:r=null,on_chunk_start:i=null,on_chunk_end:a=null,on_finalize:s=null,time_precision:o=.02,skip_special_tokens:l=!0,decode_kwargs:u={}}={}){super(e,{skip_prompt:t,callback_function:n,token_callback_function:r,decode_kwargs:{skip_special_tokens:l,...u}}),this.timestamp_begin=e.timestamp_begin,this.on_chunk_start=i,this.on_chunk_end=a,this.on_finalize=s,this.time_precision=o,this.waiting_for_timestamp=!1}put(e){if(e.length>1)throw Error("WhisperTextStreamer only supports batch size of 1");const t=e[0];if(1===t.length){const n=Number(t[0])-this.timestamp_begin;if(n>=0){const t=n*this.time_precision;this.waiting_for_timestamp?this.on_chunk_end?.(t):this.on_chunk_start?.(t),this.waiting_for_timestamp=!this.waiting_for_timestamp,e=[[]]}}return super.put(e)}end(){super.end(),this.on_finalize?.()}}},"./src/models.js":
173
173
  /*!***********************!*\
174
174
  !*** ./src/models.js ***!
175
- \***********************/(e,t,n)=>{n.r(t),n.d(t,{ASTForAudioClassification:()=>nn,ASTModel:()=>tn,ASTPreTrainedModel:()=>en,AlbertForMaskedLM:()=>ct,AlbertForQuestionAnswering:()=>dt,AlbertForSequenceClassification:()=>ut,AlbertModel:()=>lt,AlbertPreTrainedModel:()=>ot,AutoModel:()=>Fo,AutoModelForAudioClassification:()=>Qo,AutoModelForAudioFrameClassification:()=>Jo,AutoModelForCTC:()=>Ko,AutoModelForCausalLM:()=>Do,AutoModelForDepthEstimation:()=>nl,AutoModelForDocumentQuestionAnswering:()=>Zo,AutoModelForImageClassification:()=>jo,AutoModelForImageFeatureExtraction:()=>il,AutoModelForImageMatting:()=>el,AutoModelForImageSegmentation:()=>Go,AutoModelForImageToImage:()=>tl,AutoModelForMaskGeneration:()=>Xo,AutoModelForMaskedLM:()=>Ro,AutoModelForNormalEstimation:()=>rl,AutoModelForObjectDetection:()=>Wo,AutoModelForQuestionAnswering:()=>No,AutoModelForSemanticSegmentation:()=>qo,AutoModelForSeq2SeqLM:()=>zo,AutoModelForSequenceClassification:()=>Ao,AutoModelForSpeechSeq2Seq:()=>Oo,AutoModelForTextToSpectrogram:()=>Bo,AutoModelForTextToWaveform:()=>Lo,AutoModelForTokenClassification:()=>Io,AutoModelForUniversalSegmentation:()=>Uo,AutoModelForVision2Seq:()=>Vo,AutoModelForXVector:()=>Yo,AutoModelForZeroShotObjectDetection:()=>Ho,BartForConditionalGeneration:()=>Mt,BartForSequenceClassification:()=>Tt,BartModel:()=>xt,BartPretrainedModel:()=>vt,BaseModelOutput:()=>G,BeitForImageClassification:()=>si,BeitModel:()=>ai,BeitPreTrainedModel:()=>ii,BertForMaskedLM:()=>W,BertForQuestionAnswering:()=>K,BertForSequenceClassification:()=>H,BertForTokenClassification:()=>X,BertModel:()=>U,BertPreTrainedModel:()=>q,BlenderbotForConditionalGeneration:()=>At,BlenderbotModel:()=>Ft,BlenderbotPreTrainedModel:()=>Et,BlenderbotSmallForConditionalGeneration:()=>Ot,BlenderbotSmallModel:()=>zt,BlenderbotSmallPreTrainedModel:()=>It,BloomForCausalLM:()=>vr,BloomModel:()=>br,BloomPreTrainedModel:()=>yr,CLIPModel:()=>mn,CLIPPreTrainedModel:()=>hn,CLIPSegForImageSegmentation:()=>Sn,CLIPSegModel:()=>$n,CLIPSegPreTrainedModel:()=>kn,CLIPTextModel:()=>fn,CLIPTextModelWithProjection:()=>gn,CLIPVisionModel:()=>_n,CLIPVisionModelWithProjection:()=>wn,CamembertForMaskedLM:()=>we,CamembertForQuestionAnswering:()=>ve,CamembertForSequenceClassification:()=>ye,CamembertForTokenClassification:()=>be,CamembertModel:()=>_e,CamembertPreTrainedModel:()=>ge,CausalLMOutput:()=>cl,CausalLMOutputWithPast:()=>pl,ChineseCLIPModel:()=>Tn,ChineseCLIPPreTrainedModel:()=>Mn,ClapAudioModelWithProjection:()=>ys,ClapModel:()=>_s,ClapPreTrainedModel:()=>gs,ClapTextModelWithProjection:()=>ws,CodeGenForCausalLM:()=>Xn,CodeGenModel:()=>Hn,CodeGenPreTrainedModel:()=>Wn,CohereForCausalLM:()=>er,CohereModel:()=>Zn,CoherePreTrainedModel:()=>Jn,ConvBertForMaskedLM:()=>se,ConvBertForQuestionAnswering:()=>ue,ConvBertForSequenceClassification:()=>oe,ConvBertForTokenClassification:()=>le,ConvBertModel:()=>ae,ConvBertPreTrainedModel:()=>ie,ConvNextForImageClassification:()=>ta,ConvNextModel:()=>ea,ConvNextPreTrainedModel:()=>Zi,ConvNextV2ForImageClassification:()=>ia,ConvNextV2Model:()=>ra,ConvNextV2PreTrainedModel:()=>na,DPTForDepthEstimation:()=>Di,DPTModel:()=>Li,DPTPreTrainedModel:()=>Bi,DebertaForMaskedLM:()=>Te,DebertaForQuestionAnswering:()=>Se,DebertaForSequenceClassification:()=>ke,DebertaForTokenClassification:()=>$e,DebertaModel:()=>Me,DebertaPreTrainedModel:()=>xe,DebertaV2ForMaskedLM:()=>Ee,DebertaV2ForQuestionAnswering:()=>Ie,DebertaV2ForSequenceClassification:()=>Fe,DebertaV2ForTokenClassification:()=>Ae,DebertaV2Model:()=>Pe,DebertaV2PreTrainedModel:()=>Ce,DecisionTransformerModel:()=>Ks,DecisionTransformerPreTrainedModel:()=>Xs,DeiTForImageClassification:()=>Mi,DeiTModel:()=>xi,DeiTPreTrainedModel:()=>vi,DepthAnythingForDepthEstimation:()=>Ni,DepthAnythingPreTrainedModel:()=>Ri,DetrForObjectDetection:()=>ui,DetrForSegmentation:()=>di,DetrModel:()=>li,DetrObjectDetectionOutput:()=>ci,DetrPreTrainedModel:()=>oi,DetrSegmentationOutput:()=>pi,Dinov2ForImageClassification:()=>oa,Dinov2Model:()=>sa,Dinov2PreTrainedModel:()=>aa,DistilBertForMaskedLM:()=>Re,DistilBertForQuestionAnswering:()=>De,DistilBertForSequenceClassification:()=>Be,DistilBertForTokenClassification:()=>Le,DistilBertModel:()=>Oe,DistilBertPreTrainedModel:()=>ze,DonutSwinModel:()=>Ji,DonutSwinPreTrainedModel:()=>Yi,EfficientNetForImageClassification:()=>Fs,EfficientNetModel:()=>Es,EfficientNetPreTrainedModel:()=>Ps,ElectraForMaskedLM:()=>pe,ElectraForQuestionAnswering:()=>fe,ElectraForSequenceClassification:()=>he,ElectraForTokenClassification:()=>me,ElectraModel:()=>ce,ElectraPreTrainedModel:()=>de,EsmForMaskedLM:()=>je,EsmForSequenceClassification:()=>Ge,EsmForTokenClassification:()=>qe,EsmModel:()=>Ve,EsmPreTrainedModel:()=>Ne,FalconForCausalLM:()=>fs,FalconModel:()=>ms,FalconPreTrainedModel:()=>hs,FastViTForImageClassification:()=>Gr,FastViTModel:()=>jr,FastViTPreTrainedModel:()=>Vr,Florence2ForConditionalGeneration:()=>pn,Florence2PreTrainedModel:()=>cn,GLPNForDepthEstimation:()=>Qi,GLPNModel:()=>Ki,GLPNPreTrainedModel:()=>Xi,GPT2LMHeadModel:()=>En,GPT2Model:()=>Pn,GPT2PreTrainedModel:()=>Cn,GPTBigCodeForCausalLM:()=>Un,GPTBigCodeModel:()=>qn,GPTBigCodePreTrainedModel:()=>Gn,GPTJForCausalLM:()=>jn,GPTJModel:()=>Vn,GPTJPreTrainedModel:()=>Nn,GPTNeoForCausalLM:()=>Bn,GPTNeoModel:()=>On,GPTNeoPreTrainedModel:()=>zn,GPTNeoXForCausalLM:()=>Rn,GPTNeoXModel:()=>Dn,GPTNeoXPreTrainedModel:()=>Ln,Gemma2ForCausalLM:()=>sr,Gemma2Model:()=>ar,Gemma2PreTrainedModel:()=>ir,GemmaForCausalLM:()=>rr,GemmaModel:()=>nr,GemmaPreTrainedModel:()=>tr,GroupViTModel:()=>Nr,GroupViTPreTrainedModel:()=>Rr,HieraForImageClassification:()=>$i,HieraModel:()=>ki,HieraPreTrainedModel:()=>Ti,HubertForCTC:()=>Wa,HubertForSequenceClassification:()=>Ha,HubertModel:()=>Ua,HubertPreTrainedModel:()=>qa,ImageMattingOutput:()=>hl,JAISLMHeadModel:()=>In,JAISModel:()=>An,JAISPreTrainedModel:()=>Fn,LlamaForCausalLM:()=>Yn,LlamaModel:()=>Qn,LlamaPreTrainedModel:()=>Kn,LlavaForConditionalGeneration:()=>un,LlavaPreTrainedModel:()=>ln,LongT5ForConditionalGeneration:()=>_t,LongT5Model:()=>gt,LongT5PreTrainedModel:()=>ft,M2M100ForConditionalGeneration:()=>ba,M2M100Model:()=>ya,M2M100PreTrainedModel:()=>wa,MBartForCausalLM:()=>Pt,MBartForConditionalGeneration:()=>St,MBartForSequenceClassification:()=>Ct,MBartModel:()=>$t,MBartPreTrainedModel:()=>kt,MPNetForMaskedLM:()=>Je,MPNetForQuestionAnswering:()=>tt,MPNetForSequenceClassification:()=>Ze,MPNetForTokenClassification:()=>et,MPNetModel:()=>Ye,MPNetPreTrainedModel:()=>Qe,MT5ForConditionalGeneration:()=>bt,MT5Model:()=>yt,MT5PreTrainedModel:()=>wt,MarianMTModel:()=>_a,MarianModel:()=>ga,MarianPreTrainedModel:()=>fa,MaskFormerForInstanceSegmentation:()=>Hi,MaskFormerModel:()=>Wi,MaskFormerPreTrainedModel:()=>Ui,MaskedLMOutput:()=>ul,MistralForCausalLM:()=>us,MistralModel:()=>ls,MistralPreTrainedModel:()=>os,MobileBertForMaskedLM:()=>He,MobileBertForQuestionAnswering:()=>Ke,MobileBertForSequenceClassification:()=>Xe,MobileBertModel:()=>We,MobileBertPreTrainedModel:()=>Ue,MobileNetV1ForImageClassification:()=>Ds,MobileNetV1Model:()=>Ls,MobileNetV1PreTrainedModel:()=>Bs,MobileNetV2ForImageClassification:()=>Vs,MobileNetV2Model:()=>Ns,MobileNetV2PreTrainedModel:()=>Rs,MobileNetV3ForImageClassification:()=>qs,MobileNetV3Model:()=>Gs,MobileNetV3PreTrainedModel:()=>js,MobileNetV4ForImageClassification:()=>Hs,MobileNetV4Model:()=>Ws,MobileNetV4PreTrainedModel:()=>Us,MobileViTForImageClassification:()=>Xr,MobileViTModel:()=>Hr,MobileViTPreTrainedModel:()=>Wr,MobileViTV2ForImageClassification:()=>Yr,MobileViTV2Model:()=>Qr,MobileViTV2PreTrainedModel:()=>Kr,ModelOutput:()=>j,Moondream1ForConditionalGeneration:()=>dn,MptForCausalLM:()=>Tr,MptModel:()=>Mr,MptPreTrainedModel:()=>xr,MusicgenForCausalLM:()=>zs,MusicgenForConditionalGeneration:()=>Os,MusicgenModel:()=>Is,MusicgenPreTrainedModel:()=>As,NomicBertModel:()=>Y,NomicBertPreTrainedModel:()=>Q,OPTForCausalLM:()=>Sr,OPTModel:()=>$r,OPTPreTrainedModel:()=>kr,OpenELMForCausalLM:()=>ur,OpenELMModel:()=>lr,OpenELMPreTrainedModel:()=>or,OwlViTForObjectDetection:()=>ei,OwlViTModel:()=>Zr,OwlViTPreTrainedModel:()=>Jr,Owlv2ForObjectDetection:()=>ri,Owlv2Model:()=>ni,Owlv2PreTrainedModel:()=>ti,Phi3ForCausalLM:()=>wr,Phi3Model:()=>_r,Phi3PreTrainedModel:()=>gr,PhiForCausalLM:()=>fr,PhiModel:()=>mr,PhiPreTrainedModel:()=>hr,PreTrainedModel:()=>V,PretrainedMixin:()=>Qs,PvtForImageClassification:()=>Ir,PvtModel:()=>Ar,PvtPreTrainedModel:()=>Fr,PyAnnoteForAudioFrameClassification:()=>Ca,PyAnnoteModel:()=>Sa,PyAnnotePreTrainedModel:()=>$a,QuestionAnsweringModelOutput:()=>dl,Qwen2ForCausalLM:()=>pr,Qwen2Model:()=>cr,Qwen2PreTrainedModel:()=>dr,RTDetrForObjectDetection:()=>fi,RTDetrModel:()=>mi,RTDetrObjectDetectionOutput:()=>gi,RTDetrPreTrainedModel:()=>hi,ResNetForImageClassification:()=>Pi,ResNetModel:()=>Ci,ResNetPreTrainedModel:()=>Si,RoFormerForMaskedLM:()=>ee,RoFormerForQuestionAnswering:()=>re,RoFormerForSequenceClassification:()=>te,RoFormerForTokenClassification:()=>ne,RoFormerModel:()=>Z,RoFormerPreTrainedModel:()=>J,RobertaForMaskedLM:()=>Dt,RobertaForQuestionAnswering:()=>Vt,RobertaForSequenceClassification:()=>Rt,RobertaForTokenClassification:()=>Nt,RobertaModel:()=>Lt,RobertaPreTrainedModel:()=>Bt,SamImageSegmentationOutput:()=>ma,SamModel:()=>ha,SamPreTrainedModel:()=>pa,SapiensForDepthEstimation:()=>Gi,SapiensForNormalEstimation:()=>qi,SapiensForSemanticSegmentation:()=>ji,SapiensPreTrainedModel:()=>Vi,SegformerForImageClassification:()=>Ts,SegformerForSemanticSegmentation:()=>ks,SegformerModel:()=>Ms,SegformerPreTrainedModel:()=>xs,Seq2SeqLMOutput:()=>al,SequenceClassifierOutput:()=>sl,SiglipModel:()=>bn,SiglipPreTrainedModel:()=>yn,SiglipTextModel:()=>vn,SiglipVisionModel:()=>xn,SpeechT5ForSpeechToText:()=>ns,SpeechT5ForTextToSpeech:()=>rs,SpeechT5HifiGan:()=>is,SpeechT5Model:()=>ts,SpeechT5PreTrainedModel:()=>es,SqueezeBertForMaskedLM:()=>it,SqueezeBertForQuestionAnswering:()=>st,SqueezeBertForSequenceClassification:()=>at,SqueezeBertModel:()=>rt,SqueezeBertPreTrainedModel:()=>nt,StableLmForCausalLM:()=>Cs,StableLmModel:()=>Ss,StableLmPreTrainedModel:()=>$s,Starcoder2ForCausalLM:()=>ps,Starcoder2Model:()=>cs,Starcoder2PreTrainedModel:()=>ds,Swin2SRForImageSuperResolution:()=>Oi,Swin2SRModel:()=>zi,Swin2SRPreTrainedModel:()=>Ii,SwinForImageClassification:()=>Ai,SwinModel:()=>Fi,SwinPreTrainedModel:()=>Ei,T5ForConditionalGeneration:()=>mt,T5Model:()=>ht,T5PreTrainedModel:()=>pt,TableTransformerForObjectDetection:()=>yi,TableTransformerModel:()=>wi,TableTransformerObjectDetectionOutput:()=>bi,TableTransformerPreTrainedModel:()=>_i,TokenClassifierOutput:()=>ll,TrOCRForCausalLM:()=>ss,TrOCRPreTrainedModel:()=>as,UniSpeechForCTC:()=>Ia,UniSpeechForSequenceClassification:()=>za,UniSpeechModel:()=>Aa,UniSpeechPreTrainedModel:()=>Fa,UniSpeechSatForAudioFrameClassification:()=>Ra,UniSpeechSatForCTC:()=>La,UniSpeechSatForSequenceClassification:()=>Da,UniSpeechSatModel:()=>Ba,UniSpeechSatPreTrainedModel:()=>Oa,ViTForImageClassification:()=>Er,ViTMAEModel:()=>Or,ViTMAEPreTrainedModel:()=>zr,ViTMSNForImageClassification:()=>Dr,ViTMSNModel:()=>Lr,ViTMSNPreTrainedModel:()=>Br,ViTModel:()=>Pr,ViTPreTrainedModel:()=>Cr,VisionEncoderDecoderModel:()=>on,VitMatteForImageMatting:()=>Ur,VitMattePreTrainedModel:()=>qr,VitsModel:()=>vs,VitsModelOutput:()=>ml,VitsPreTrainedModel:()=>bs,Wav2Vec2BertForCTC:()=>ja,Wav2Vec2BertForSequenceClassification:()=>Ga,Wav2Vec2BertModel:()=>Va,Wav2Vec2BertPreTrainedModel:()=>Na,Wav2Vec2ForAudioFrameClassification:()=>ka,Wav2Vec2ForCTC:()=>Ma,Wav2Vec2ForSequenceClassification:()=>Ta,Wav2Vec2Model:()=>xa,Wav2Vec2PreTrainedModel:()=>va,WavLMForAudioFrameClassification:()=>Za,WavLMForCTC:()=>Qa,WavLMForSequenceClassification:()=>Ya,WavLMForXVector:()=>Ja,WavLMModel:()=>Ka,WavLMPreTrainedModel:()=>Xa,WeSpeakerResNetModel:()=>Ea,WeSpeakerResNetPreTrainedModel:()=>Pa,WhisperForConditionalGeneration:()=>sn,WhisperModel:()=>an,WhisperPreTrainedModel:()=>rn,XLMForQuestionAnswering:()=>Ht,XLMForSequenceClassification:()=>Ut,XLMForTokenClassification:()=>Wt,XLMModel:()=>Gt,XLMPreTrainedModel:()=>jt,XLMRobertaForMaskedLM:()=>Qt,XLMRobertaForQuestionAnswering:()=>Zt,XLMRobertaForSequenceClassification:()=>Yt,XLMRobertaForTokenClassification:()=>Jt,XLMRobertaModel:()=>Kt,XLMRobertaPreTrainedModel:()=>Xt,XLMWithLMHeadModel:()=>qt,XVectorOutput:()=>ol,YolosForObjectDetection:()=>da,YolosModel:()=>ua,YolosObjectDetectionOutput:()=>ca,YolosPreTrainedModel:()=>la});var r=n(/*! ./configs.js */"./src/configs.js"),i=n(/*! ./backends/onnx.js */"./src/backends/onnx.js"),a=n(/*! ./utils/dtypes.js */"./src/utils/dtypes.js"),s=n(/*! ./utils/generic.js */"./src/utils/generic.js"),o=n(/*! ./utils/core.js */"./src/utils/core.js"),l=n(/*! ./utils/hub.js */"./src/utils/hub.js"),u=n(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),d=n(/*! ./generation/configuration_utils.js */"./src/generation/configuration_utils.js"),c=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),p=n(/*! ./utils/maths.js */"./src/utils/maths.js"),h=n(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),m=n(/*! ./generation/logits_sampler.js */"./src/generation/logits_sampler.js"),f=n(/*! ./env.js */"./src/env.js"),g=n(/*! ./models/whisper/generation_whisper.js */"./src/models/whisper/generation_whisper.js"),_=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");const w=0,y=1,b=2,v=3,x=4,M=5,T=6,k=7,$=new Map,S=new Map,C=new Map;async function P(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async s=>{const{buffer:o,session_options:u}=await async function(e,t,n){let s=n.device;s&&"string"!=typeof s&&(s.hasOwnProperty(t)?s=s[t]:(console.warn(`device not specified for "${t}". Using the default device.`),s=null));const o=s??(f.apis.IS_NODE_ENV?"cpu":"wasm"),u=(0,i.deviceToExecutionProviders)(o);let d=n.dtype;"string"!=typeof d&&(d&&d.hasOwnProperty(t)?d=d[t]:(d=a.DEFAULT_DEVICE_DTYPE_MAPPING[o]??a.DATA_TYPES.fp32,console.warn(`dtype not specified for "${t}". Using the default dtype (${d}) for this device (${o}).`)));const c=d;if(!a.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(c))throw new Error(`Invalid dtype: ${c}. Should be one of: ${Object.keys(a.DATA_TYPES).join(", ")}`);if(c===a.DATA_TYPES.fp16&&"webgpu"===o&&!await(0,a.isWebGpuFp16Supported)())throw new Error(`The device (${o}) does not support fp16.`);const p=a.DEFAULT_DTYPE_SUFFIX_MAPPING[c],h=`${n.subfolder??""}/${t}${p}.onnx`,m={...n.session_options}??{};m.executionProviders??=u;const g=(0,l.getModelFile)(e,h,!0,n);let _=[];if(n.use_external_data_format&&(!0===n.use_external_data_format||"object"==typeof n.use_external_data_format&&n.use_external_data_format.hasOwnProperty(t)&&!0===n.use_external_data_format[t])){if(f.apis.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const r=`${t}${p}.onnx_data`,i=`${n.subfolder??""}/${r}`;_.push(new Promise((async(t,a)=>{const s=await(0,l.getModelFile)(e,i,!0,n);t({path:r,data:s})})))}else void 0!==m.externalData&&(_=m.externalData.map((async t=>{if("string"==typeof t.data){const r=await(0,l.getModelFile)(e,t.data,!0,n);return{...t,data:r}}return t})));if(_.length>0&&(m.externalData=await Promise.all(_)),"webgpu"===o){const e=(0,r.getKeyValueShapes)(n.config,{prefix:"present"});if(Object.keys(e).length>0&&!(0,i.isONNXProxy)()){const t={};for(const n in e)t[n]="gpu-buffer";m.preferredOutputLocation=t}}return{buffer:await g,session_options:m}}(e,t[s],n);return[s,await(0,i.createInferenceSession)(o,u)]}))))}async function E(e,t){const n=function(e,t){const n=Object.create(null),r=[];for(const a of e.inputNames){const e=t[a];e instanceof c.Tensor?n[a]=(0,i.isONNXProxy)()?e.clone():e:r.push(a)}if(r.length>0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${r.join(", ")}.`);const a=Object.keys(t).length,s=e.inputNames.length;if(a>s){let n=Object.keys(t).filter((t=>!e.inputNames.includes(t)));console.warn(`WARNING: Too many inputs were provided (${a} > ${s}). The following inputs will be ignored: "${n.join(", ")}".`)}return n}(e,t);try{const t=Object.fromEntries(Object.entries(n).map((([e,t])=>[e,t.ort_tensor])));let r=await e.run(t);return r=F(r),r}catch(e){throw console.error(`An error occurred during model execution: "${e}".`),console.error("Inputs given to model:",n),e}}function F(e){for(let t in e)(0,i.isONNXTensor)(e[t])?e[t]=new c.Tensor(e[t]):"object"==typeof e[t]&&F(e[t]);return e}function A(e){if(e instanceof c.Tensor)return e;if(0===e.length)throw Error("items must be non-empty");if(Array.isArray(e[0])){if(e.some((t=>t.length!==e[0].length)))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new c.Tensor("int64",BigInt64Array.from(e.flat().map((e=>BigInt(e)))),[e.length,e[0].length])}return new c.Tensor("int64",BigInt64Array.from(e.map((e=>BigInt(e)))),[1,e.length])}function I(e){return new c.Tensor("bool",[e],[1])}async function z(e,t){let{encoder_outputs:n,input_ids:r,decoder_input_ids:i,...a}=t;if(!n){const r=(0,o.pick)(t,e.sessions.model.inputNames);n=(await O(e,r)).last_hidden_state}a.input_ids=i,a.encoder_hidden_states=n,e.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(a.encoder_attention_mask=t.attention_mask);return await B(e,a,!0)}async function O(e,t){const n=e.sessions.model,r=(0,o.pick)(t,n.inputNames);if(n.inputNames.includes("inputs_embeds")&&!r.inputs_embeds){if(!t.input_ids)throw new Error("Both `input_ids` and `inputs_embeds` are missing in the model inputs.");r.inputs_embeds=await e.encode_text({input_ids:t.input_ids})}return n.inputNames.includes("token_type_ids")&&!r.token_type_ids&&(r.token_type_ids=new c.Tensor("int64",new BigInt64Array(r.input_ids.data.length),r.input_ids.dims)),await E(n,r)}async function B(e,t,n=!1){const r=e.sessions[n?"decoder_model_merged":"model"],{past_key_values:i,...a}=t;r.inputNames.includes("use_cache_branch")&&(a.use_cache_branch=I(!!i)),r.inputNames.includes("position_ids")&&a.attention_mask&&!a.position_ids&&(a.position_ids=function(e,t=null){const{input_ids:n,inputs_embeds:r,attention_mask:i}=e,[a,s]=i.dims,o=new BigInt64Array(i.data.length);for(let e=0;e<a;++e){const t=e*s;let n=BigInt(0);for(let e=0;e<s;++e){const r=t+e;0n===i.data[r]?o[r]=BigInt(1):(o[r]=n,n+=i.data[r])}}let l=new c.Tensor("int64",o,i.dims);if(t){const e=-(n??r).dims.at(1);l=l.slice(null,[e,null])}return l}(a,i)),e.addPastKeyValues(a,i);const s=(0,o.pick)(a,r.inputNames);return await E(r,s)}async function L(e,{input_ids:t=null,attention_mask:n=null,pixel_values:r=null,position_ids:i=null,inputs_embeds:a=null,past_key_values:s=null,generation_config:o=null,logits_processor:l=null,...u}){if(!a)if(a=await e.encode_text({input_ids:t}),r&&1!==t.dims[1]){const i=await e.encode_image({pixel_values:r});({inputs_embeds:a,attention_mask:n}=e._merge_input_ids_with_image_features({image_features:i,inputs_embeds:a,input_ids:t,attention_mask:n}))}else if(s&&r&&1===t.dims[1]){const e=t.dims[1],r=Object.values(s)[0].dims.at(-2);n=(0,c.cat)([(0,c.ones)([t.dims[0],r]),n.slice(null,[n.dims[1]-e,n.dims[1]])],1)}return await B(e,{inputs_embeds:a,past_key_values:s,attention_mask:n,position_ids:i,generation_config:o,logits_processor:l},!0)}function D(e,t,n,r){if(n.past_key_values){const t=Object.values(n.past_key_values)[0].dims.at(-2),{input_ids:r,attention_mask:i}=n;if(i&&i.dims[1]>r.dims[1]);else if(t<r.dims[1])n.input_ids=r.slice(null,[t,null]);else if(null!=e.config.image_token_index&&r.data.some((t=>t==e.config.image_token_index))){const i=e.config.num_image_tokens;if(!i)throw new Error("`num_image_tokens` is missing in the model configuration.");const a=r.dims[1]-(t-i);n.input_ids=r.slice(null,[-a,null]),n.attention_mask=(0,c.ones)([1,t+a])}}return n}function R(e,t,n,r){return n.past_key_values&&(t=t.map((e=>[e.at(-1)]))),{...n,decoder_input_ids:A(t)}}function N(e,...t){return e.config.is_encoder_decoder?R(e,...t):D(e,...t)}class V extends s.Callable{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,t){super(),this.config=e,this.sessions=t;const n=C.get(this.constructor),r=$.get(n);switch(this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,r){case x:this.can_generate=!0,this._forward=B,this._prepare_inputs_for_generation=D;break;case b:case v:case k:this.can_generate=!0,this._forward=z,this._prepare_inputs_for_generation=R;break;case y:this._forward=z;break;case T:this.can_generate=!0,this._forward=L,this._prepare_inputs_for_generation=N;break;default:this._forward=O}this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const t of Object.values(this.sessions))t?.handler?.dispose&&e.push(t.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:i=null,local_files_only:a=!1,revision:s="main",model_file_name:o=null,subfolder:u="onnx",device:d=null,dtype:c=null,use_external_data_format:p=null,session_options:h={}}={}){let m={progress_callback:t,config:n,cache_dir:i,local_files_only:a,revision:s,model_file_name:o,subfolder:u,device:d,dtype:c,use_external_data_format:p,session_options:h};const f=C.get(this),g=$.get(f);let _;if(n=m.config=await r.AutoConfig.from_pretrained(e,m),g===x)_=await Promise.all([P(e,{model:m.model_file_name??"model"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(g===b||g===v)_=await Promise.all([P(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(g===M)_=await Promise.all([P(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},m)]);else if(g===y)_=await Promise.all([P(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m)]);else if(g===T){const t={embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"};n.is_encoder_decoder&&(t.model="encoder_model"),_=await Promise.all([P(e,t,m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)])}else g===k?_=await Promise.all([P(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]):(g!==w&&console.warn(`Model type for '${f??n?.model_type}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`),_=await Promise.all([P(e,{model:m.model_file_name??"model"},m)]));return new this(n,..._)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}_get_logits_warper(e){const t=new u.LogitsProcessorList;return null!==e.temperature&&1!==e.temperature&&t.push(new u.TemperatureLogitsWarper(e.temperature)),null!==e.top_k&&0!==e.top_k&&t.push(new u.TopKLogitsWarper(e.top_k)),null!==e.top_p&&e.top_p<1&&t.push(new u.TopPLogitsWarper(e.top_p)),t}_get_logits_processor(e,t,n=null){const r=new u.LogitsProcessorList;if(null!==e.repetition_penalty&&1!==e.repetition_penalty&&r.push(new u.RepetitionPenaltyLogitsProcessor(e.repetition_penalty)),null!==e.no_repeat_ngram_size&&e.no_repeat_ngram_size>0&&r.push(new u.NoRepeatNGramLogitsProcessor(e.no_repeat_ngram_size)),null!==e.bad_words_ids&&r.push(new u.NoBadWordsLogitsProcessor(e.bad_words_ids,e.eos_token_id)),null!==e.min_length&&null!==e.eos_token_id&&e.min_length>0&&r.push(new u.MinLengthLogitsProcessor(e.min_length,e.eos_token_id)),null!==e.min_new_tokens&&null!==e.eos_token_id&&e.min_new_tokens>0&&r.push(new u.MinNewTokensLengthLogitsProcessor(t,e.min_new_tokens,e.eos_token_id)),null!==e.forced_bos_token_id&&r.push(new u.ForcedBOSTokenLogitsProcessor(e.forced_bos_token_id)),null!==e.forced_eos_token_id&&r.push(new u.ForcedEOSTokenLogitsProcessor(e.max_length,e.forced_eos_token_id)),null!==e.begin_suppress_tokens){const n=t>1||null===e.forced_bos_token_id?t:t+1;r.push(new u.SuppressTokensAtBeginLogitsProcessor(e.begin_suppress_tokens,n))}return null!==e.guidance_scale&&e.guidance_scale>1&&r.push(new u.ClassifierFreeGuidanceLogitsProcessor(e.guidance_scale)),null!==n&&r.extend(n),r}_prepare_generation_config(e,t,n=d.GenerationConfig){const r={...this.config};for(const e of["decoder","generator","text_config"])e in r&&Object.assign(r,r[e]);const i=new n(r);return"generation_config"in this&&Object.assign(i,this.generation_config),e&&Object.assign(i,e),t&&Object.assign(i,(0,o.pick)(t,Object.getOwnPropertyNames(i))),i}_get_stopping_criteria(e,t=null){const n=new h.StoppingCriteriaList;return null!==e.max_length&&n.push(new h.MaxLengthCriteria(e.max_length,this.config.max_position_embeddings??null)),null!==e.eos_token_id&&n.push(new h.EosTokenCriteria(e.eos_token_id)),t&&n.extend(t),n}_validate_model_class(){if(!this.can_generate){const e=[so,uo,ao,eo],t=C.get(this.constructor),n=new Set,r=this.config.model_type;for(const t of e){const e=t.get(r);e&&n.add(e[0])}let i=`The current model class (${t}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw n.size>0&&(i+=` Please use the following class instead: ${[...n].join(", ")}`),Error(i)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:t,model_inputs:n,is_encoder_decoder:r}){return n.past_key_values=this.getPastKeyValues(t,n.past_key_values),n.input_ids=new c.Tensor("int64",e.flat(),[e.length,1]),r||(n.attention_mask=(0,c.cat)([n.attention_mask,(0,c.ones)([n.attention_mask.dims[0],1])],1)),n.position_ids=null,n}_prepare_model_inputs({inputs:e,bos_token_id:t,model_kwargs:n}){const r=(0,o.pick)(n,this.forward_params),i=this.main_input_name;if(i in r){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else r[i]=e;return{inputs_tensor:r[i],model_inputs:r,model_input_name:i}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:t,model_input_name:n,generation_config:r}){if(this.sessions.model.inputNames.includes("inputs_embeds")&&!t.inputs_embeds&&"_prepare_inputs_embeds"in this){const{input_ids:e,pixel_values:n,attention_mask:r,...i}=t,a=await this._prepare_inputs_embeds(t);t={...i,...(0,o.pick)(a,["inputs_embeds","attention_mask"])}}let{last_hidden_state:i}=await O(this,t);if(null!==r.guidance_scale&&r.guidance_scale>1)i=(0,c.cat)([i,(0,c.full_like)(i,0)],0),"attention_mask"in t&&(t.attention_mask=(0,c.cat)([t.attention_mask,(0,c.zeros_like)(t.attention_mask)],0));else if(t.decoder_input_ids){const e=A(t.decoder_input_ids).dims[0];if(e!==i.dims[0]){if(1!==i.dims[0])throw new Error(`The encoder outputs have a different batch size (${i.dims[0]}) than the decoder inputs (${e}).`);i=(0,c.cat)(Array.from({length:e},(()=>i)),0)}}return t.encoder_outputs=i,t}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:t,model_kwargs:n,decoder_start_token_id:r,bos_token_id:i,generation_config:a}){let{decoder_input_ids:s,...o}=n;if(s)Array.isArray(s[0])||(s=Array.from({length:e},(()=>s)));else if(r??=i,"musicgen"===this.config.model_type)s=Array.from({length:e*this.config.decoder.num_codebooks},(()=>[r]));else if(Array.isArray(r)){if(r.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${r.length}`);s=r}else s=Array.from({length:e},(()=>[r]));return s=A(s),n.decoder_attention_mask=(0,c.ones_like)(s),{input_ids:s,model_inputs:o}}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,streamer:i=null,...a}){this._validate_model_class(),t=this._prepare_generation_config(t,a);let{inputs_tensor:s,model_inputs:o,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:a});const u=this.config.is_encoder_decoder;let d;u&&("encoder_outputs"in o||(o=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:s,model_inputs:o,model_input_name:l,generation_config:t}))),u?({input_ids:d,model_inputs:o}=this._prepare_decoder_input_ids_for_generation({batch_size:o[l].dims.at(0),model_input_name:l,model_kwargs:o,decoder_start_token_id:t.decoder_start_token_id,bos_token_id:t.bos_token_id,generation_config:t})):d=o[l];let p=d.dims.at(-1);null!==t.max_new_tokens&&(t.max_length=p+t.max_new_tokens);const h=this._get_logits_processor(t,p,n),f=this._get_stopping_criteria(t,r),g=o[l].dims.at(0),_=m.LogitsSampler.getSampler(t),w=new Array(g).fill(0),y=d.tolist();i&&i.put(y);let b=null,v={};for(;;){o=this.prepare_inputs_for_generation(y,o,t);const e=await this.forward(o);if(t.output_attentions&&t.return_dict_in_generate){const t=this.getAttentions(e);for(const e in t)e in v||(v[e]=[]),v[e].push(t[e])}const n=h(y,e.logits.slice(null,-1,null)),r=[];for(let e=0;e<n.dims.at(0);++e){const t=n[e],i=await _(t);for(const[t,n]of i){const i=BigInt(t);w[e]+=n,y[e].push(i),r.push([i]);break}}i&&i.put(r);if(f(y).every((e=>e))){t.return_dict_in_generate&&(b=this.getPastKeyValues(e,o.past_key_values,!1));break}o=this._update_model_kwargs_for_generation({generated_input_ids:r,outputs:e,model_inputs:o,is_encoder_decoder:u})}i&&i.end();const x=new c.Tensor("int64",y.flat(),[y.length,y[0].length]);return t.return_dict_in_generate?{sequences:x,past_key_values:b,...v}:x}getPastKeyValues(e,t,n=!0){const r=Object.create(null);for(const i in e)if(i.startsWith("present")){const a=i.replace("present","past_key_values");if(t&&i.includes("encoder"))r[a]=t[a];else{if(n&&t){const e=t[a];"gpu-buffer"===e.location&&e.dispose()}r[a]=e[i]}}return r}getAttentions(e){const t={};for(const n of["cross_attentions","encoder_attentions","decoder_attentions"])for(const r in e)r.startsWith(n)&&(n in t||(t[n]=[]),t[n].push(e[r]));return t}addPastKeyValues(e,t){if(t)Object.assign(e,t);else{const t=this.custom_config.kv_cache_dtype??"float32",n="float16"===t?new Uint16Array:[],i=(0,r.getKeyValueShapes)(this.config);for(const r in i)e[r]=new c.Tensor(t,n,i[r])}}async encode_image({pixel_values:e}){const t=(await E(this.sessions.vision_encoder,{pixel_values:e})).image_features;return this.config.num_image_tokens||(console.warn(`The number of image tokens was not set in the model configuration. Setting it to the number of features detected by the vision encoder (${t.dims[1]}).`),this.config.num_image_tokens=t.dims[1]),t}async encode_text({input_ids:e}){return(await E(this.sessions.embed_tokens,{input_ids:e})).inputs_embeds}}class j{}class G extends j{constructor({last_hidden_state:e,hidden_states:t=null,attentions:n=null}){super(),this.last_hidden_state=e,this.hidden_states=t,this.attentions=n}}class q extends V{}class U extends q{}class W extends q{async _call(e){return new ul(await super._call(e))}}class H extends q{async _call(e){return new sl(await super._call(e))}}class X extends q{async _call(e){return new ll(await super._call(e))}}class K extends q{async _call(e){return new dl(await super._call(e))}}class Q extends V{}class Y extends Q{}class J extends V{}class Z extends J{}class ee extends J{async _call(e){return new ul(await super._call(e))}}class te extends J{async _call(e){return new sl(await super._call(e))}}class ne extends J{async _call(e){return new ll(await super._call(e))}}class re extends J{async _call(e){return new dl(await super._call(e))}}class ie extends V{}class ae extends ie{}class se extends ie{async _call(e){return new ul(await super._call(e))}}class oe extends ie{async _call(e){return new sl(await super._call(e))}}class le extends ie{async _call(e){return new ll(await super._call(e))}}class ue extends ie{async _call(e){return new dl(await super._call(e))}}class de extends V{}class ce extends de{}class pe extends de{async _call(e){return new ul(await super._call(e))}}class he extends de{async _call(e){return new sl(await super._call(e))}}class me extends de{async _call(e){return new ll(await super._call(e))}}class fe extends de{async _call(e){return new dl(await super._call(e))}}class ge extends V{}class _e extends ge{}class we extends ge{async _call(e){return new ul(await super._call(e))}}class ye extends ge{async _call(e){return new sl(await super._call(e))}}class be extends ge{async _call(e){return new ll(await super._call(e))}}class ve extends ge{async _call(e){return new dl(await super._call(e))}}class xe extends V{}class Me extends xe{}class Te extends xe{async _call(e){return new ul(await super._call(e))}}class ke extends xe{async _call(e){return new sl(await super._call(e))}}class $e extends xe{async _call(e){return new ll(await super._call(e))}}class Se extends xe{async _call(e){return new dl(await super._call(e))}}class Ce extends V{}class Pe extends Ce{}class Ee extends Ce{async _call(e){return new ul(await super._call(e))}}class Fe extends Ce{async _call(e){return new sl(await super._call(e))}}class Ae extends Ce{async _call(e){return new ll(await super._call(e))}}class Ie extends Ce{async _call(e){return new dl(await super._call(e))}}class ze extends V{}class Oe extends ze{}class Be extends ze{async _call(e){return new sl(await super._call(e))}}class Le extends ze{async _call(e){return new ll(await super._call(e))}}class De extends ze{async _call(e){return new dl(await super._call(e))}}class Re extends ze{async _call(e){return new ul(await super._call(e))}}class Ne extends V{}class Ve extends Ne{}class je extends Ne{async _call(e){return new ul(await super._call(e))}}class Ge extends Ne{async _call(e){return new sl(await super._call(e))}}class qe extends Ne{async _call(e){return new ll(await super._call(e))}}class Ue extends V{}class We extends Ue{}class He extends Ue{async _call(e){return new ul(await super._call(e))}}class Xe extends Ue{async _call(e){return new sl(await super._call(e))}}class Ke extends Ue{async _call(e){return new dl(await super._call(e))}}class Qe extends V{}class Ye extends Qe{}class Je extends Qe{async _call(e){return new ul(await super._call(e))}}class Ze extends Qe{async _call(e){return new sl(await super._call(e))}}class et extends Qe{async _call(e){return new ll(await super._call(e))}}class tt extends Qe{async _call(e){return new dl(await super._call(e))}}class nt extends V{}class rt extends nt{}class it extends nt{async _call(e){return new ul(await super._call(e))}}class at extends nt{async _call(e){return new sl(await super._call(e))}}class st extends nt{async _call(e){return new dl(await super._call(e))}}class ot extends V{}class lt extends ot{}class ut extends ot{async _call(e){return new sl(await super._call(e))}}class dt extends ot{async _call(e){return new dl(await super._call(e))}}class ct extends ot{async _call(e){return new ul(await super._call(e))}}class pt extends V{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class ht extends pt{}class mt extends pt{}class ft extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class gt extends ft{}class _t extends ft{}class wt extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class yt extends wt{}class bt extends wt{}class vt extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class xt extends vt{}class Mt extends vt{}class Tt extends vt{async _call(e){return new sl(await super._call(e))}}class kt extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class $t extends kt{}class St extends kt{}class Ct extends kt{async _call(e){return new sl(await super._call(e))}}class Pt extends kt{}class Et extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ft extends Et{}class At extends Et{}class It extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class zt extends It{}class Ot extends It{}class Bt extends V{}class Lt extends Bt{}class Dt extends Bt{async _call(e){return new ul(await super._call(e))}}class Rt extends Bt{async _call(e){return new sl(await super._call(e))}}class Nt extends Bt{async _call(e){return new ll(await super._call(e))}}class Vt extends Bt{async _call(e){return new dl(await super._call(e))}}class jt extends V{}class Gt extends jt{}class qt extends jt{async _call(e){return new ul(await super._call(e))}}class Ut extends jt{async _call(e){return new sl(await super._call(e))}}class Wt extends jt{async _call(e){return new ll(await super._call(e))}}class Ht extends jt{async _call(e){return new dl(await super._call(e))}}class Xt extends V{}class Kt extends Xt{}class Qt extends Xt{async _call(e){return new ul(await super._call(e))}}class Yt extends Xt{async _call(e){return new sl(await super._call(e))}}class Jt extends Xt{async _call(e){return new ll(await super._call(e))}}class Zt extends Xt{async _call(e){return new dl(await super._call(e))}}class en extends V{}class tn extends en{}class nn extends en{}class rn extends V{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class an extends rn{}class sn extends rn{_prepare_generation_config(e,t){return super._prepare_generation_config(e,t,g.WhisperGenerationConfig)}_retrieve_init_tokens(e){const t=[e.decoder_start_token_id];let n=e.language;const r=e.task;if(e.is_multilingual){n||(console.warn("No language specified - defaulting to English (en)."),n="en");const i=`<|${(0,_.whisper_language_to_code)(n)}|>`;t.push(e.lang_to_id[i]),t.push(e.task_to_id[r??"transcribe"])}else if(n||r)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&t.at(-1)!==e.no_timestamps_token_id?t.push(e.no_timestamps_token_id):e.return_timestamps&&t.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),t.pop()),t.filter((e=>null!=e))}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,...i}){t=this._prepare_generation_config(t,i);const a=i.decoder_input_ids??this._retrieve_init_tokens(t);if(t.return_timestamps&&(n??=new u.LogitsProcessorList,n.push(new u.WhisperTimeStampLogitsProcessor(t,a))),t.begin_suppress_tokens&&(n??=new u.LogitsProcessorList,n.push(new u.SuppressTokensAtBeginLogitsProcessor(t.begin_suppress_tokens,a.length))),t.return_token_timestamps){if(!t.alignment_heads)throw new Error("Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config.");"translate"===t.task&&console.warn("Token-level timestamps may not be reliable for task 'translate'."),t.output_attentions=!0,t.return_dict_in_generate=!0}const s=await super.generate({inputs:e,generation_config:t,logits_processor:n,decoder_input_ids:a,...i});return t.return_token_timestamps&&(s.token_timestamps=this._extract_token_timestamps(s,t.alignment_heads,t.num_frames)),s}_extract_token_timestamps(e,t,n=null,r=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");null==n&&console.warn("`num_frames` has not been set, meaning the entire audio will be analyzed. This may lead to inaccurate token-level timestamps for short audios (< 30 seconds).");let i=this.config.median_filter_width;void 0===i&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),i=7);const a=e.cross_attentions,s=Array.from({length:this.config.decoder_layers},((e,t)=>(0,c.cat)(a.map((e=>e[t])),2))),l=(0,c.stack)(t.map((([e,t])=>{if(e>=s.length)throw new Error(`Layer index ${e} is out of bounds for cross attentions (length ${s.length}).`);return n?s[e].slice(null,t,null,[0,n]):s[e].slice(null,t)}))).transpose(1,0,2,3),[u,d]=(0,c.std_mean)(l,-2,0,!0),h=l.clone();for(let e=0;e<h.dims[0];++e){const t=h[e];for(let n=0;n<t.dims[0];++n){const r=t[n],a=u[e][n][0].data,s=d[e][n][0].data;for(let e=0;e<r.dims[0];++e){let t=r[e].data;for(let e=0;e<t.length;++e)t[e]=(t[e]-s[e])/a[e];t.set((0,p.medianFilter)(t,i))}}}const m=[(0,c.mean)(h,1)],f=e.sequences.dims,g=new c.Tensor("float32",new Float32Array(f[0]*f[1]),f);for(let e=0;e<f[0];++e){const t=m[e].neg().squeeze_(0),[n,i]=(0,p.dynamic_time_warping)(t.tolist()),a=Array.from({length:n.length-1},((e,t)=>n[t+1]-n[t])),s=(0,o.mergeArrays)([1],a).map((e=>!!e)),l=[];for(let e=0;e<s.length;++e)s[e]&&l.push(i[e]*r);g[e].data.set(l,1)}return g}}class on extends V{main_input_name="pixel_values";forward_params=["pixel_values","input_ids","encoder_hidden_states","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class ln extends V{forward_params=["input_ids","pixel_values","attention_mask","position_ids","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class un extends ln{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){const i=this.config.image_token_index,a=n.tolist().map((e=>e.findIndex((e=>e==i)))),s=a.every((e=>-1===e)),o=a.every((e=>-1!==e));if(!s&&!o)throw new Error("Every input should contain either 0 or 1 image token.");if(s)return{inputs_embeds:e,attention_mask:r};const l=[],u=[];for(let n=0;n<a.length;++n){const i=a[n],s=e[n],o=t[n],d=r[n];l.push((0,c.cat)([s.slice([0,i]),o,s.slice([i+1,s.dims[0]])],0)),u.push((0,c.cat)([d.slice([0,i]),(0,c.ones)([o.dims[0]]),d.slice([i+1,d.dims[0]])],0))}return{inputs_embeds:(0,c.stack)(l,0),attention_mask:(0,c.stack)(u,0)}}}class dn extends un{}class cn extends V{forward_params=["input_ids","inputs_embeds","attention_mask","pixel_values","encoder_outputs","decoder_input_ids","decoder_inputs_embeds","decoder_attention_mask","past_key_values"];main_input_name="inputs_embeds";constructor(e,t,n){super(e,t),this.generation_config=n}}class pn extends cn{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){return{inputs_embeds:(0,c.cat)([t,e],1),attention_mask:(0,c.cat)([(0,c.ones)(t.dims.slice(0,2)),r],1)}}async _prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:n,attention_mask:r}){if(!e&&!t)throw new Error("Either `input_ids` or `pixel_values` should be provided.");let i,a;return e&&(i=await this.encode_text({input_ids:e})),t&&(a=await this.encode_image({pixel_values:t})),i&&a?({inputs_embeds:n,attention_mask:r}=this._merge_input_ids_with_image_features({inputs_embeds:i,image_features:a,input_ids:e,attention_mask:r})):n=i||a,{inputs_embeds:n,attention_mask:r}}async forward({input_ids:e,pixel_values:t,attention_mask:n,decoder_input_ids:r,decoder_attention_mask:i,encoder_outputs:a,past_key_values:s,inputs_embeds:o,decoder_inputs_embeds:l}){if(o||({inputs_embeds:o,attention_mask:n}=await this._prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:o,attention_mask:n})),!a){let{last_hidden_state:e}=await O(this,{inputs_embeds:o,attention_mask:n});a=e}if(!l){if(!r)throw new Error("Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.");l=await this.encode_text({input_ids:r})}const u={inputs_embeds:l,attention_mask:i,encoder_attention_mask:n,encoder_hidden_states:a,past_key_values:s};return await B(this,u,!0)}}class hn extends V{}class mn extends hn{}class fn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class gn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class _n extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class wn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class yn extends V{}class bn extends yn{}class vn extends yn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class xn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class Mn extends V{}class Tn extends Mn{}class kn extends V{}class $n extends kn{}class Sn extends kn{}class Cn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Pn extends Cn{}class En extends Cn{}class Fn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class An extends Fn{}class In extends Fn{}class zn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class On extends zn{}class Bn extends zn{}class Ln extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Dn extends Ln{}class Rn extends Ln{}class Nn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Vn extends Nn{}class jn extends Nn{}class Gn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class qn extends Gn{}class Un extends Gn{}class Wn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Hn extends Wn{}class Xn extends Wn{}class Kn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Qn extends Kn{}class Yn extends Kn{}class Jn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Zn extends Jn{}class er extends Jn{}class tr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class nr extends tr{}class rr extends tr{}class ir extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ar extends ir{}class sr extends ir{}class or extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class lr extends or{}class ur extends or{}class dr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class cr extends dr{}class pr extends dr{}class hr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class mr extends hr{}class fr extends hr{}class gr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class _r extends gr{}class wr extends gr{}class yr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class br extends yr{}class vr extends yr{}class xr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Mr extends xr{}class Tr extends xr{}class kr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class $r extends kr{}class Sr extends kr{}class Cr extends V{}class Pr extends Cr{}class Er extends Cr{async _call(e){return new sl(await super._call(e))}}class Fr extends V{}class Ar extends Fr{}class Ir extends Fr{async _call(e){return new sl(await super._call(e))}}class zr extends V{}class Or extends zr{}class Br extends V{}class Lr extends Br{}class Dr extends Br{async _call(e){return new sl(await super._call(e))}}class Rr extends V{}class Nr extends Rr{}class Vr extends V{}class jr extends Vr{}class Gr extends Vr{async _call(e){return new sl(await super._call(e))}}class qr extends V{}class Ur extends qr{async _call(e){return new hl(await super._call(e))}}class Wr extends V{}class Hr extends Wr{}class Xr extends Wr{async _call(e){return new sl(await super._call(e))}}class Kr extends V{}class Qr extends Kr{}class Yr extends Kr{async _call(e){return new sl(await super._call(e))}}class Jr extends V{}class Zr extends Jr{}class ei extends Jr{}class ti extends V{}class ni extends ti{}class ri extends ti{}class ii extends V{}class ai extends ii{}class si extends ii{async _call(e){return new sl(await super._call(e))}}class oi extends V{}class li extends oi{}class ui extends oi{async _call(e){return new ci(await super._call(e))}}class di extends oi{async _call(e){return new pi(await super._call(e))}}class ci extends j{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class pi extends j{constructor({logits:e,pred_boxes:t,pred_masks:n}){super(),this.logits=e,this.pred_boxes=t,this.pred_masks=n}}class hi extends V{}class mi extends hi{}class fi extends hi{async _call(e){return new gi(await super._call(e))}}class gi extends j{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class _i extends V{}class wi extends _i{}class yi extends _i{async _call(e){return new bi(await super._call(e))}}class bi extends ci{}class vi extends V{}class xi extends vi{}class Mi extends vi{async _call(e){return new sl(await super._call(e))}}class Ti extends V{}class ki extends Ti{}class $i extends Ti{async _call(e){return new sl(await super._call(e))}}class Si extends V{}class Ci extends Si{}class Pi extends Si{async _call(e){return new sl(await super._call(e))}}class Ei extends V{}class Fi extends Ei{}class Ai extends Ei{async _call(e){return new sl(await super._call(e))}}class Ii extends V{}class zi extends Ii{}class Oi extends Ii{}class Bi extends V{}class Li extends Bi{}class Di extends Bi{}class Ri extends V{}class Ni extends Ri{}class Vi extends V{}class ji extends Vi{}class Gi extends Vi{}class qi extends Vi{}class Ui extends V{}class Wi extends Ui{}class Hi extends Ui{}class Xi extends V{}class Ki extends Xi{}class Qi extends Xi{}class Yi extends V{}class Ji extends Yi{}class Zi extends V{}class ea extends Zi{}class ta extends Zi{async _call(e){return new sl(await super._call(e))}}class na extends V{}class ra extends na{}class ia extends na{async _call(e){return new sl(await super._call(e))}}class aa extends V{}class sa extends aa{}class oa extends aa{async _call(e){return new sl(await super._call(e))}}class la extends V{}class ua extends la{}class da extends la{async _call(e){return new ca(await super._call(e))}}class ca extends j{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class pa extends V{}class ha extends pa{async get_image_embeddings({pixel_values:e}){return await O(this,{pixel_values:e})}async forward(e){if(e.image_embeddings&&e.image_positional_embeddings||(e={...e,...await this.get_image_embeddings(e)}),!e.input_labels&&e.input_points){const t=e.input_points.dims.slice(0,-1),n=t.reduce(((e,t)=>e*t),1);e.input_labels=new c.Tensor("int64",new BigInt64Array(n).fill(1n),t)}const t={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(t.input_points=e.input_points),e.input_labels&&(t.input_labels=e.input_labels),e.input_boxes&&(t.input_boxes=e.input_boxes),await E(this.sessions.prompt_encoder_mask_decoder,t)}async _call(e){return new ma(await super._call(e))}}class ma extends j{constructor({iou_scores:e,pred_masks:t}){super(),this.iou_scores=e,this.pred_masks=t}}class fa extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ga extends fa{}class _a extends fa{}class wa extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ya extends wa{}class ba extends wa{}class va extends V{}class xa extends va{}class Ma extends va{async _call(e){return new cl(await super._call(e))}}class Ta extends va{async _call(e){return new sl(await super._call(e))}}class ka extends va{async _call(e){return new ll(await super._call(e))}}class $a extends V{}class Sa extends $a{}class Ca extends $a{async _call(e){return new ll(await super._call(e))}}class Pa extends V{}class Ea extends Pa{}class Fa extends V{}class Aa extends Fa{}class Ia extends Fa{async _call(e){return new cl(await super._call(e))}}class za extends Fa{async _call(e){return new sl(await super._call(e))}}class Oa extends V{}class Ba extends Oa{}class La extends Oa{async _call(e){return new cl(await super._call(e))}}class Da extends Oa{async _call(e){return new sl(await super._call(e))}}class Ra extends Oa{async _call(e){return new ll(await super._call(e))}}class Na extends V{}class Va extends Na{}class ja extends Na{async _call(e){return new cl(await super._call(e))}}class Ga extends Na{async _call(e){return new sl(await super._call(e))}}class qa extends V{}class Ua extends va{}class Wa extends va{async _call(e){return new cl(await super._call(e))}}class Ha extends va{async _call(e){return new sl(await super._call(e))}}class Xa extends V{}class Ka extends Xa{}class Qa extends Xa{async _call(e){return new cl(await super._call(e))}}class Ya extends Xa{async _call(e){return new sl(await super._call(e))}}class Ja extends Xa{async _call(e){return new ol(await super._call(e))}}class Za extends Xa{async _call(e){return new ll(await super._call(e))}}class es extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ts extends es{}class ns extends es{}class rs extends es{async generate_speech(e,t,{threshold:n=.5,minlenratio:r=0,maxlenratio:i=20,vocoder:a=null}={}){const s={input_ids:e},{encoder_outputs:o,encoder_attention_mask:l}=await O(this,s),u=o.dims[1]/this.config.reduction_factor,d=Math.floor(u*i),p=Math.floor(u*r),h=this.config.num_mel_bins;let m=[],f=null,g=null,_=0;for(;;){++_;const e=I(!!g);let r;r=g?g.output_sequence_out:new c.Tensor("float32",new Float32Array(h),[1,1,h]);let i={use_cache_branch:e,output_sequence:r,encoder_attention_mask:l,speaker_embeddings:t,encoder_hidden_states:o};this.addPastKeyValues(i,f),g=await E(this.sessions.decoder_model_merged,i),f=this.getPastKeyValues(g,f);const{prob:a,spectrum:s}=g;if(m.push(s),_>=p&&(Array.from(a.data).filter((e=>e>=n)).length>0||_>=d))break}const w=(0,c.cat)(m),{waveform:y}=await E(a.sessions.model,{spectrogram:w});return{spectrogram:w,waveform:y}}}class is extends V{main_input_name="spectrogram"}class as extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ss extends as{}class os extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ls extends os{}class us extends os{}class ds extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class cs extends ds{}class ps extends ds{}class hs extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ms extends hs{}class fs extends hs{}class gs extends V{}class _s extends gs{}class ws extends gs{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class ys extends gs{static async from_pretrained(e,t={}){return t.model_file_name??="audio_model",super.from_pretrained(e,t)}}class bs extends V{}class vs extends bs{async _call(e){return new ml(await super._call(e))}}class xs extends V{}class Ms extends xs{}class Ts extends xs{}class ks extends xs{}class $s extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ss extends $s{}class Cs extends $s{}class Ps extends V{}class Es extends Ps{}class Fs extends Ps{async _call(e){return new sl(await super._call(e))}}class As extends V{}class Is extends As{}class zs extends As{}class Os extends V{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}_apply_and_filter_by_delay_pattern_mask(e){const[t,n]=e.dims,r=this.config.decoder.num_codebooks,i=n-r;let a=0;for(let t=0;t<e.size;++t){if(e.data[t]===this.config.decoder.pad_token_id)continue;const s=t%n-Math.floor(t/n)%r;s>0&&s<=i&&(e.data[a++]=e.data[t])}const s=Math.floor(t/r),o=a/(s*r);return new c.Tensor(e.type,e.data.slice(0,a),[s,r,o])}prepare_inputs_for_generation(e,t,n){let r=structuredClone(e);for(let e=0;e<r.length;++e)for(let t=0;t<r[e].length;++t)e%this.config.decoder.num_codebooks>=t&&(r[e][t]=BigInt(this.config.decoder.pad_token_id));null!==n.guidance_scale&&n.guidance_scale>1&&(r=r.concat(r));return super.prepare_inputs_for_generation(r,t,n)}async generate(e){const t=await super.generate(e),n=this._apply_and_filter_by_delay_pattern_mask(t).unsqueeze_(0),{audio_values:r}=await E(this.sessions.encodec_decode,{audio_codes:n});return r}}class Bs extends V{}class Ls extends Bs{}class Ds extends Bs{async _call(e){return new sl(await super._call(e))}}class Rs extends V{}class Ns extends Rs{}class Vs extends Rs{async _call(e){return new sl(await super._call(e))}}class js extends V{}class Gs extends js{}class qs extends js{async _call(e){return new sl(await super._call(e))}}class Us extends V{}class Ws extends Us{}class Hs extends Us{async _call(e){return new sl(await super._call(e))}}class Xs extends V{}class Ks extends Xs{}class Qs{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:i=null,local_files_only:a=!1,revision:s="main",model_file_name:o=null,subfolder:l="onnx",device:u=null,dtype:d=null,use_external_data_format:c=null,session_options:p={}}={}){const h={progress_callback:t,config:n,cache_dir:i,local_files_only:a,revision:s,model_file_name:o,subfolder:l,device:u,dtype:d,use_external_data_format:c,session_options:p};if(h.config=await r.AutoConfig.from_pretrained(e,h),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(const t of this.MODEL_CLASS_MAPPINGS){const n=t.get(h.config.model_type);if(n)return await n[1].from_pretrained(e,h)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${h.config.model_type}", attempting to construct from base class.`),await V.from_pretrained(e,h);throw Error(`Unsupported model type: ${h.config.model_type}`)}}const Ys=new Map([["bert",["BertModel",U]],["nomic_bert",["NomicBertModel",Y]],["roformer",["RoFormerModel",Z]],["electra",["ElectraModel",ce]],["esm",["EsmModel",Ve]],["convbert",["ConvBertModel",ae]],["camembert",["CamembertModel",_e]],["deberta",["DebertaModel",Me]],["deberta-v2",["DebertaV2Model",Pe]],["mpnet",["MPNetModel",Ye]],["albert",["AlbertModel",lt]],["distilbert",["DistilBertModel",Oe]],["roberta",["RobertaModel",Lt]],["xlm",["XLMModel",Gt]],["xlm-roberta",["XLMRobertaModel",Kt]],["clap",["ClapModel",_s]],["clip",["CLIPModel",mn]],["clipseg",["CLIPSegModel",$n]],["chinese_clip",["ChineseCLIPModel",Tn]],["siglip",["SiglipModel",bn]],["mobilebert",["MobileBertModel",We]],["squeezebert",["SqueezeBertModel",rt]],["wav2vec2",["Wav2Vec2Model",xa]],["wav2vec2-bert",["Wav2Vec2BertModel",Va]],["unispeech",["UniSpeechModel",Aa]],["unispeech-sat",["UniSpeechSatModel",Ba]],["hubert",["HubertModel",Ua]],["wavlm",["WavLMModel",Ka]],["audio-spectrogram-transformer",["ASTModel",tn]],["vits",["VitsModel",vs]],["pyannote",["PyAnnoteModel",Sa]],["wespeaker-resnet",["WeSpeakerResNetModel",Ea]],["detr",["DetrModel",li]],["rt_detr",["RTDetrModel",mi]],["table-transformer",["TableTransformerModel",wi]],["vit",["ViTModel",Pr]],["pvt",["PvtModel",Ar]],["vit_msn",["ViTMSNModel",Lr]],["vit_mae",["ViTMAEModel",Or]],["groupvit",["GroupViTModel",Nr]],["fastvit",["FastViTModel",jr]],["mobilevit",["MobileViTModel",Hr]],["mobilevitv2",["MobileViTV2Model",Qr]],["owlvit",["OwlViTModel",Zr]],["owlv2",["Owlv2Model",ni]],["beit",["BeitModel",ai]],["deit",["DeiTModel",xi]],["hiera",["HieraModel",ki]],["convnext",["ConvNextModel",ea]],["convnextv2",["ConvNextV2Model",ra]],["dinov2",["Dinov2Model",sa]],["resnet",["ResNetModel",Ci]],["swin",["SwinModel",Fi]],["swin2sr",["Swin2SRModel",zi]],["donut-swin",["DonutSwinModel",Ji]],["yolos",["YolosModel",ua]],["dpt",["DPTModel",Li]],["glpn",["GLPNModel",Ki]],["hifigan",["SpeechT5HifiGan",is]],["efficientnet",["EfficientNetModel",Es]],["decision_transformer",["DecisionTransformerModel",Ks]],["mobilenet_v1",["MobileNetV1Model",Ls]],["mobilenet_v2",["MobileNetV2Model",Ns]],["mobilenet_v3",["MobileNetV3Model",Gs]],["mobilenet_v4",["MobileNetV4Model",Ws]],["maskformer",["MaskFormerModel",Wi]]]),Js=new Map([["t5",["T5Model",ht]],["longt5",["LongT5Model",gt]],["mt5",["MT5Model",yt]],["bart",["BartModel",xt]],["mbart",["MBartModel",$t]],["marian",["MarianModel",ga]],["whisper",["WhisperModel",an]],["m2m_100",["M2M100Model",ya]],["blenderbot",["BlenderbotModel",Ft]],["blenderbot-small",["BlenderbotSmallModel",zt]]]),Zs=new Map([["bloom",["BloomModel",br]],["jais",["JAISModel",An]],["gpt2",["GPT2Model",Pn]],["gptj",["GPTJModel",Vn]],["gpt_bigcode",["GPTBigCodeModel",qn]],["gpt_neo",["GPTNeoModel",On]],["gpt_neox",["GPTNeoXModel",Dn]],["codegen",["CodeGenModel",Hn]],["llama",["LlamaModel",Qn]],["cohere",["CohereModel",Zn]],["gemma",["GemmaModel",nr]],["gemma2",["Gemma2Model",ar]],["openelm",["OpenELMModel",lr]],["qwen2",["Qwen2Model",cr]],["phi",["PhiModel",mr]],["phi3",["Phi3Model",_r]],["mpt",["MptModel",Mr]],["opt",["OPTModel",$r]],["mistral",["MistralModel",ls]],["starcoder2",["Starcoder2Model",cs]],["falcon",["FalconModel",ms]],["stablelm",["StableLmModel",Ss]]]),eo=new Map([["speecht5",["SpeechT5ForSpeechToText",ns]],["whisper",["WhisperForConditionalGeneration",sn]]]),to=new Map([["speecht5",["SpeechT5ForTextToSpeech",rs]]]),no=new Map([["vits",["VitsModel",vs]],["musicgen",["MusicgenForConditionalGeneration",Os]]]),ro=new Map([["bert",["BertForSequenceClassification",H]],["roformer",["RoFormerForSequenceClassification",te]],["electra",["ElectraForSequenceClassification",he]],["esm",["EsmForSequenceClassification",Ge]],["convbert",["ConvBertForSequenceClassification",oe]],["camembert",["CamembertForSequenceClassification",ye]],["deberta",["DebertaForSequenceClassification",ke]],["deberta-v2",["DebertaV2ForSequenceClassification",Fe]],["mpnet",["MPNetForSequenceClassification",Ze]],["albert",["AlbertForSequenceClassification",ut]],["distilbert",["DistilBertForSequenceClassification",Be]],["roberta",["RobertaForSequenceClassification",Rt]],["xlm",["XLMForSequenceClassification",Ut]],["xlm-roberta",["XLMRobertaForSequenceClassification",Yt]],["bart",["BartForSequenceClassification",Tt]],["mbart",["MBartForSequenceClassification",Ct]],["mobilebert",["MobileBertForSequenceClassification",Xe]],["squeezebert",["SqueezeBertForSequenceClassification",at]]]),io=new Map([["bert",["BertForTokenClassification",X]],["roformer",["RoFormerForTokenClassification",ne]],["electra",["ElectraForTokenClassification",me]],["esm",["EsmForTokenClassification",qe]],["convbert",["ConvBertForTokenClassification",le]],["camembert",["CamembertForTokenClassification",be]],["deberta",["DebertaForTokenClassification",$e]],["deberta-v2",["DebertaV2ForTokenClassification",Ae]],["mpnet",["MPNetForTokenClassification",et]],["distilbert",["DistilBertForTokenClassification",Le]],["roberta",["RobertaForTokenClassification",Nt]],["xlm",["XLMForTokenClassification",Wt]],["xlm-roberta",["XLMRobertaForTokenClassification",Jt]]]),ao=new Map([["t5",["T5ForConditionalGeneration",mt]],["longt5",["LongT5ForConditionalGeneration",_t]],["mt5",["MT5ForConditionalGeneration",bt]],["bart",["BartForConditionalGeneration",Mt]],["mbart",["MBartForConditionalGeneration",St]],["marian",["MarianMTModel",_a]],["m2m_100",["M2M100ForConditionalGeneration",ba]],["blenderbot",["BlenderbotForConditionalGeneration",At]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",Ot]]]),so=new Map([["bloom",["BloomForCausalLM",vr]],["gpt2",["GPT2LMHeadModel",En]],["jais",["JAISLMHeadModel",In]],["gptj",["GPTJForCausalLM",jn]],["gpt_bigcode",["GPTBigCodeForCausalLM",Un]],["gpt_neo",["GPTNeoForCausalLM",Bn]],["gpt_neox",["GPTNeoXForCausalLM",Rn]],["codegen",["CodeGenForCausalLM",Xn]],["llama",["LlamaForCausalLM",Yn]],["cohere",["CohereForCausalLM",er]],["gemma",["GemmaForCausalLM",rr]],["gemma2",["Gemma2ForCausalLM",sr]],["openelm",["OpenELMForCausalLM",ur]],["qwen2",["Qwen2ForCausalLM",pr]],["phi",["PhiForCausalLM",fr]],["phi3",["Phi3ForCausalLM",wr]],["mpt",["MptForCausalLM",Tr]],["opt",["OPTForCausalLM",Sr]],["mbart",["MBartForCausalLM",Pt]],["mistral",["MistralForCausalLM",us]],["starcoder2",["Starcoder2ForCausalLM",ps]],["falcon",["FalconForCausalLM",fs]],["trocr",["TrOCRForCausalLM",ss]],["stablelm",["StableLmForCausalLM",Cs]]]),oo=new Map([["bert",["BertForMaskedLM",W]],["roformer",["RoFormerForMaskedLM",ee]],["electra",["ElectraForMaskedLM",pe]],["esm",["EsmForMaskedLM",je]],["convbert",["ConvBertForMaskedLM",se]],["camembert",["CamembertForMaskedLM",we]],["deberta",["DebertaForMaskedLM",Te]],["deberta-v2",["DebertaV2ForMaskedLM",Ee]],["mpnet",["MPNetForMaskedLM",Je]],["albert",["AlbertForMaskedLM",ct]],["distilbert",["DistilBertForMaskedLM",Re]],["roberta",["RobertaForMaskedLM",Dt]],["xlm",["XLMWithLMHeadModel",qt]],["xlm-roberta",["XLMRobertaForMaskedLM",Qt]],["mobilebert",["MobileBertForMaskedLM",He]],["squeezebert",["SqueezeBertForMaskedLM",it]]]),lo=new Map([["bert",["BertForQuestionAnswering",K]],["roformer",["RoFormerForQuestionAnswering",re]],["electra",["ElectraForQuestionAnswering",fe]],["convbert",["ConvBertForQuestionAnswering",ue]],["camembert",["CamembertForQuestionAnswering",ve]],["deberta",["DebertaForQuestionAnswering",Se]],["deberta-v2",["DebertaV2ForQuestionAnswering",Ie]],["mpnet",["MPNetForQuestionAnswering",tt]],["albert",["AlbertForQuestionAnswering",dt]],["distilbert",["DistilBertForQuestionAnswering",De]],["roberta",["RobertaForQuestionAnswering",Vt]],["xlm",["XLMForQuestionAnswering",Ht]],["xlm-roberta",["XLMRobertaForQuestionAnswering",Zt]],["mobilebert",["MobileBertForQuestionAnswering",Ke]],["squeezebert",["SqueezeBertForQuestionAnswering",st]]]),uo=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",on]]]),co=new Map([["llava",["LlavaForConditionalGeneration",un]],["moondream1",["Moondream1ForConditionalGeneration",dn]],["florence2",["Florence2ForConditionalGeneration",pn]]]),po=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",on]]]),ho=new Map([["vit",["ViTForImageClassification",Er]],["pvt",["PvtForImageClassification",Ir]],["vit_msn",["ViTMSNForImageClassification",Dr]],["fastvit",["FastViTForImageClassification",Gr]],["mobilevit",["MobileViTForImageClassification",Xr]],["mobilevitv2",["MobileViTV2ForImageClassification",Yr]],["beit",["BeitForImageClassification",si]],["deit",["DeiTForImageClassification",Mi]],["hiera",["HieraForImageClassification",$i]],["convnext",["ConvNextForImageClassification",ta]],["convnextv2",["ConvNextV2ForImageClassification",ia]],["dinov2",["Dinov2ForImageClassification",oa]],["resnet",["ResNetForImageClassification",Pi]],["swin",["SwinForImageClassification",Ai]],["segformer",["SegformerForImageClassification",Ts]],["efficientnet",["EfficientNetForImageClassification",Fs]],["mobilenet_v1",["MobileNetV1ForImageClassification",Ds]],["mobilenet_v2",["MobileNetV2ForImageClassification",Vs]],["mobilenet_v3",["MobileNetV3ForImageClassification",qs]],["mobilenet_v4",["MobileNetV4ForImageClassification",Hs]]]),mo=new Map([["detr",["DetrForObjectDetection",ui]],["rt_detr",["RTDetrForObjectDetection",fi]],["table-transformer",["TableTransformerForObjectDetection",yi]],["yolos",["YolosForObjectDetection",da]]]),fo=new Map([["owlvit",["OwlViTForObjectDetection",ei]],["owlv2",["Owlv2ForObjectDetection",ri]]]),go=new Map([["detr",["DetrForSegmentation",di]],["clipseg",["CLIPSegForImageSegmentation",Sn]]]),_o=new Map([["segformer",["SegformerForSemanticSegmentation",ks]],["sapiens",["SapiensForSemanticSegmentation",ji]]]),wo=new Map([["detr",["DetrForSegmentation",di]],["maskformer",["MaskFormerForInstanceSegmentation",Hi]]]),yo=new Map([["sam",["SamModel",ha]]]),bo=new Map([["wav2vec2",["Wav2Vec2ForCTC",Ma]],["wav2vec2-bert",["Wav2Vec2BertForCTC",ja]],["unispeech",["UniSpeechForCTC",Ia]],["unispeech-sat",["UniSpeechSatForCTC",La]],["wavlm",["WavLMForCTC",Qa]],["hubert",["HubertForCTC",Wa]]]),vo=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",Ta]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",Ga]],["unispeech",["UniSpeechForSequenceClassification",za]],["unispeech-sat",["UniSpeechSatForSequenceClassification",Da]],["wavlm",["WavLMForSequenceClassification",Ya]],["hubert",["HubertForSequenceClassification",Ha]],["audio-spectrogram-transformer",["ASTForAudioClassification",nn]]]),xo=new Map([["wavlm",["WavLMForXVector",Ja]]]),Mo=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",Ra]],["wavlm",["WavLMForAudioFrameClassification",Za]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",ka]],["pyannote",["PyAnnoteForAudioFrameClassification",Ca]]]),To=new Map([["vitmatte",["VitMatteForImageMatting",Ur]]]),ko=new Map([["swin2sr",["Swin2SRForImageSuperResolution",Oi]]]),$o=new Map([["dpt",["DPTForDepthEstimation",Di]],["depth_anything",["DepthAnythingForDepthEstimation",Ni]],["glpn",["GLPNForDepthEstimation",Qi]],["sapiens",["SapiensForDepthEstimation",Gi]]]),So=new Map([["sapiens",["SapiensForNormalEstimation",qi]]]),Co=new Map([["clip",["CLIPVisionModelWithProjection",wn]],["siglip",["SiglipVisionModel",xn]]]),Po=[[Ys,w],[Js,y],[Zs,x],[ro,w],[io,w],[ao,b],[eo,b],[so,x],[oo,w],[lo,w],[uo,v],[co,T],[ho,w],[go,w],[wo,w],[_o,w],[To,w],[ko,w],[$o,w],[So,w],[mo,w],[fo,w],[yo,M],[bo,w],[vo,w],[to,b],[no,w],[xo,w],[Mo,w],[Co,w]];for(const[e,t]of Po)for(const[n,r]of e.values())$.set(n,t),C.set(r,n),S.set(n,r);const Eo=[["MusicgenForConditionalGeneration",Os,k],["CLIPTextModelWithProjection",gn,w],["SiglipTextModel",vn,w],["ClapTextModelWithProjection",ws,w],["ClapAudioModelWithProjection",ys,w]];for(const[e,t,n]of Eo)$.set(e,n),C.set(t,e),S.set(e,t);class Fo extends Qs{static MODEL_CLASS_MAPPINGS=Po.map((e=>e[0]));static BASE_IF_FAIL=!0}class Ao extends Qs{static MODEL_CLASS_MAPPINGS=[ro]}class Io extends Qs{static MODEL_CLASS_MAPPINGS=[io]}class zo extends Qs{static MODEL_CLASS_MAPPINGS=[ao]}class Oo extends Qs{static MODEL_CLASS_MAPPINGS=[eo]}class Bo extends Qs{static MODEL_CLASS_MAPPINGS=[to]}class Lo extends Qs{static MODEL_CLASS_MAPPINGS=[no]}class Do extends Qs{static MODEL_CLASS_MAPPINGS=[so]}class Ro extends Qs{static MODEL_CLASS_MAPPINGS=[oo]}class No extends Qs{static MODEL_CLASS_MAPPINGS=[lo]}class Vo extends Qs{static MODEL_CLASS_MAPPINGS=[uo]}class jo extends Qs{static MODEL_CLASS_MAPPINGS=[ho]}class Go extends Qs{static MODEL_CLASS_MAPPINGS=[go]}class qo extends Qs{static MODEL_CLASS_MAPPINGS=[_o]}class Uo extends Qs{static MODEL_CLASS_MAPPINGS=[wo]}class Wo extends Qs{static MODEL_CLASS_MAPPINGS=[mo]}class Ho extends Qs{static MODEL_CLASS_MAPPINGS=[fo]}class Xo extends Qs{static MODEL_CLASS_MAPPINGS=[yo]}class Ko extends Qs{static MODEL_CLASS_MAPPINGS=[bo]}class Qo extends Qs{static MODEL_CLASS_MAPPINGS=[vo]}class Yo extends Qs{static MODEL_CLASS_MAPPINGS=[xo]}class Jo extends Qs{static MODEL_CLASS_MAPPINGS=[Mo]}class Zo extends Qs{static MODEL_CLASS_MAPPINGS=[po]}class el extends Qs{static MODEL_CLASS_MAPPINGS=[To]}class tl extends Qs{static MODEL_CLASS_MAPPINGS=[ko]}class nl extends Qs{static MODEL_CLASS_MAPPINGS=[$o]}class rl extends Qs{static MODEL_CLASS_MAPPINGS=[So]}class il extends Qs{static MODEL_CLASS_MAPPINGS=[Co]}class al extends j{constructor({logits:e,past_key_values:t,encoder_outputs:n,decoder_attentions:r=null,cross_attentions:i=null}){super(),this.logits=e,this.past_key_values=t,this.encoder_outputs=n,this.decoder_attentions=r,this.cross_attentions=i}}class sl extends j{constructor({logits:e}){super(),this.logits=e}}class ol extends j{constructor({logits:e,embeddings:t}){super(),this.logits=e,this.embeddings=t}}class ll extends j{constructor({logits:e}){super(),this.logits=e}}class ul extends j{constructor({logits:e}){super(),this.logits=e}}class dl extends j{constructor({start_logits:e,end_logits:t}){super(),this.start_logits=e,this.end_logits=t}}class cl extends j{constructor({logits:e}){super(),this.logits=e}}class pl extends j{constructor({logits:e,past_key_values:t}){super(),this.logits=e,this.past_key_values=t}}class hl extends j{constructor({alphas:e}){super(),this.alphas=e}}class ml extends j{constructor({waveform:e,spectrogram:t}){super(),this.waveform=e,this.spectrogram=t}}},"./src/models/whisper/common_whisper.js":
175
+ \***********************/(e,t,n)=>{n.r(t),n.d(t,{ASTForAudioClassification:()=>nn,ASTModel:()=>tn,ASTPreTrainedModel:()=>en,AlbertForMaskedLM:()=>ct,AlbertForQuestionAnswering:()=>dt,AlbertForSequenceClassification:()=>ut,AlbertModel:()=>lt,AlbertPreTrainedModel:()=>ot,AutoModel:()=>Fo,AutoModelForAudioClassification:()=>Qo,AutoModelForAudioFrameClassification:()=>Jo,AutoModelForCTC:()=>Ko,AutoModelForCausalLM:()=>Do,AutoModelForDepthEstimation:()=>nl,AutoModelForDocumentQuestionAnswering:()=>Zo,AutoModelForImageClassification:()=>jo,AutoModelForImageFeatureExtraction:()=>il,AutoModelForImageMatting:()=>el,AutoModelForImageSegmentation:()=>Go,AutoModelForImageToImage:()=>tl,AutoModelForMaskGeneration:()=>Xo,AutoModelForMaskedLM:()=>Ro,AutoModelForNormalEstimation:()=>rl,AutoModelForObjectDetection:()=>Wo,AutoModelForQuestionAnswering:()=>No,AutoModelForSemanticSegmentation:()=>qo,AutoModelForSeq2SeqLM:()=>zo,AutoModelForSequenceClassification:()=>Ao,AutoModelForSpeechSeq2Seq:()=>Oo,AutoModelForTextToSpectrogram:()=>Bo,AutoModelForTextToWaveform:()=>Lo,AutoModelForTokenClassification:()=>Io,AutoModelForUniversalSegmentation:()=>Uo,AutoModelForVision2Seq:()=>Vo,AutoModelForXVector:()=>Yo,AutoModelForZeroShotObjectDetection:()=>Ho,BartForConditionalGeneration:()=>Mt,BartForSequenceClassification:()=>Tt,BartModel:()=>xt,BartPretrainedModel:()=>vt,BaseModelOutput:()=>G,BeitForImageClassification:()=>si,BeitModel:()=>ai,BeitPreTrainedModel:()=>ii,BertForMaskedLM:()=>W,BertForQuestionAnswering:()=>K,BertForSequenceClassification:()=>H,BertForTokenClassification:()=>X,BertModel:()=>U,BertPreTrainedModel:()=>q,BlenderbotForConditionalGeneration:()=>At,BlenderbotModel:()=>Ft,BlenderbotPreTrainedModel:()=>Et,BlenderbotSmallForConditionalGeneration:()=>Ot,BlenderbotSmallModel:()=>zt,BlenderbotSmallPreTrainedModel:()=>It,BloomForCausalLM:()=>vr,BloomModel:()=>br,BloomPreTrainedModel:()=>yr,CLIPModel:()=>mn,CLIPPreTrainedModel:()=>hn,CLIPSegForImageSegmentation:()=>Sn,CLIPSegModel:()=>$n,CLIPSegPreTrainedModel:()=>kn,CLIPTextModel:()=>fn,CLIPTextModelWithProjection:()=>gn,CLIPVisionModel:()=>_n,CLIPVisionModelWithProjection:()=>wn,CamembertForMaskedLM:()=>we,CamembertForQuestionAnswering:()=>ve,CamembertForSequenceClassification:()=>ye,CamembertForTokenClassification:()=>be,CamembertModel:()=>_e,CamembertPreTrainedModel:()=>ge,CausalLMOutput:()=>cl,CausalLMOutputWithPast:()=>pl,ChineseCLIPModel:()=>Tn,ChineseCLIPPreTrainedModel:()=>Mn,ClapAudioModelWithProjection:()=>ys,ClapModel:()=>_s,ClapPreTrainedModel:()=>gs,ClapTextModelWithProjection:()=>ws,CodeGenForCausalLM:()=>Xn,CodeGenModel:()=>Hn,CodeGenPreTrainedModel:()=>Wn,CohereForCausalLM:()=>er,CohereModel:()=>Zn,CoherePreTrainedModel:()=>Jn,ConvBertForMaskedLM:()=>se,ConvBertForQuestionAnswering:()=>ue,ConvBertForSequenceClassification:()=>oe,ConvBertForTokenClassification:()=>le,ConvBertModel:()=>ae,ConvBertPreTrainedModel:()=>ie,ConvNextForImageClassification:()=>ta,ConvNextModel:()=>ea,ConvNextPreTrainedModel:()=>Zi,ConvNextV2ForImageClassification:()=>ia,ConvNextV2Model:()=>ra,ConvNextV2PreTrainedModel:()=>na,DPTForDepthEstimation:()=>Di,DPTModel:()=>Li,DPTPreTrainedModel:()=>Bi,DebertaForMaskedLM:()=>Te,DebertaForQuestionAnswering:()=>Se,DebertaForSequenceClassification:()=>ke,DebertaForTokenClassification:()=>$e,DebertaModel:()=>Me,DebertaPreTrainedModel:()=>xe,DebertaV2ForMaskedLM:()=>Ee,DebertaV2ForQuestionAnswering:()=>Ie,DebertaV2ForSequenceClassification:()=>Fe,DebertaV2ForTokenClassification:()=>Ae,DebertaV2Model:()=>Pe,DebertaV2PreTrainedModel:()=>Ce,DecisionTransformerModel:()=>Ks,DecisionTransformerPreTrainedModel:()=>Xs,DeiTForImageClassification:()=>Mi,DeiTModel:()=>xi,DeiTPreTrainedModel:()=>vi,DepthAnythingForDepthEstimation:()=>Ni,DepthAnythingPreTrainedModel:()=>Ri,DetrForObjectDetection:()=>ui,DetrForSegmentation:()=>di,DetrModel:()=>li,DetrObjectDetectionOutput:()=>ci,DetrPreTrainedModel:()=>oi,DetrSegmentationOutput:()=>pi,Dinov2ForImageClassification:()=>oa,Dinov2Model:()=>sa,Dinov2PreTrainedModel:()=>aa,DistilBertForMaskedLM:()=>Re,DistilBertForQuestionAnswering:()=>De,DistilBertForSequenceClassification:()=>Be,DistilBertForTokenClassification:()=>Le,DistilBertModel:()=>Oe,DistilBertPreTrainedModel:()=>ze,DonutSwinModel:()=>Ji,DonutSwinPreTrainedModel:()=>Yi,EfficientNetForImageClassification:()=>Fs,EfficientNetModel:()=>Es,EfficientNetPreTrainedModel:()=>Ps,ElectraForMaskedLM:()=>pe,ElectraForQuestionAnswering:()=>fe,ElectraForSequenceClassification:()=>he,ElectraForTokenClassification:()=>me,ElectraModel:()=>ce,ElectraPreTrainedModel:()=>de,EsmForMaskedLM:()=>je,EsmForSequenceClassification:()=>Ge,EsmForTokenClassification:()=>qe,EsmModel:()=>Ve,EsmPreTrainedModel:()=>Ne,FalconForCausalLM:()=>fs,FalconModel:()=>ms,FalconPreTrainedModel:()=>hs,FastViTForImageClassification:()=>Gr,FastViTModel:()=>jr,FastViTPreTrainedModel:()=>Vr,Florence2ForConditionalGeneration:()=>pn,Florence2PreTrainedModel:()=>cn,GLPNForDepthEstimation:()=>Qi,GLPNModel:()=>Ki,GLPNPreTrainedModel:()=>Xi,GPT2LMHeadModel:()=>En,GPT2Model:()=>Pn,GPT2PreTrainedModel:()=>Cn,GPTBigCodeForCausalLM:()=>Un,GPTBigCodeModel:()=>qn,GPTBigCodePreTrainedModel:()=>Gn,GPTJForCausalLM:()=>jn,GPTJModel:()=>Vn,GPTJPreTrainedModel:()=>Nn,GPTNeoForCausalLM:()=>Bn,GPTNeoModel:()=>On,GPTNeoPreTrainedModel:()=>zn,GPTNeoXForCausalLM:()=>Rn,GPTNeoXModel:()=>Dn,GPTNeoXPreTrainedModel:()=>Ln,Gemma2ForCausalLM:()=>sr,Gemma2Model:()=>ar,Gemma2PreTrainedModel:()=>ir,GemmaForCausalLM:()=>rr,GemmaModel:()=>nr,GemmaPreTrainedModel:()=>tr,GroupViTModel:()=>Nr,GroupViTPreTrainedModel:()=>Rr,HieraForImageClassification:()=>$i,HieraModel:()=>ki,HieraPreTrainedModel:()=>Ti,HubertForCTC:()=>Wa,HubertForSequenceClassification:()=>Ha,HubertModel:()=>Ua,HubertPreTrainedModel:()=>qa,ImageMattingOutput:()=>hl,JAISLMHeadModel:()=>In,JAISModel:()=>An,JAISPreTrainedModel:()=>Fn,LlamaForCausalLM:()=>Yn,LlamaModel:()=>Qn,LlamaPreTrainedModel:()=>Kn,LlavaForConditionalGeneration:()=>un,LlavaPreTrainedModel:()=>ln,LongT5ForConditionalGeneration:()=>_t,LongT5Model:()=>gt,LongT5PreTrainedModel:()=>ft,M2M100ForConditionalGeneration:()=>ba,M2M100Model:()=>ya,M2M100PreTrainedModel:()=>wa,MBartForCausalLM:()=>Pt,MBartForConditionalGeneration:()=>St,MBartForSequenceClassification:()=>Ct,MBartModel:()=>$t,MBartPreTrainedModel:()=>kt,MPNetForMaskedLM:()=>Je,MPNetForQuestionAnswering:()=>tt,MPNetForSequenceClassification:()=>Ze,MPNetForTokenClassification:()=>et,MPNetModel:()=>Ye,MPNetPreTrainedModel:()=>Qe,MT5ForConditionalGeneration:()=>bt,MT5Model:()=>yt,MT5PreTrainedModel:()=>wt,MarianMTModel:()=>_a,MarianModel:()=>ga,MarianPreTrainedModel:()=>fa,MaskFormerForInstanceSegmentation:()=>Hi,MaskFormerModel:()=>Wi,MaskFormerPreTrainedModel:()=>Ui,MaskedLMOutput:()=>ul,MistralForCausalLM:()=>us,MistralModel:()=>ls,MistralPreTrainedModel:()=>os,MobileBertForMaskedLM:()=>He,MobileBertForQuestionAnswering:()=>Ke,MobileBertForSequenceClassification:()=>Xe,MobileBertModel:()=>We,MobileBertPreTrainedModel:()=>Ue,MobileNetV1ForImageClassification:()=>Ds,MobileNetV1Model:()=>Ls,MobileNetV1PreTrainedModel:()=>Bs,MobileNetV2ForImageClassification:()=>Vs,MobileNetV2Model:()=>Ns,MobileNetV2PreTrainedModel:()=>Rs,MobileNetV3ForImageClassification:()=>qs,MobileNetV3Model:()=>Gs,MobileNetV3PreTrainedModel:()=>js,MobileNetV4ForImageClassification:()=>Hs,MobileNetV4Model:()=>Ws,MobileNetV4PreTrainedModel:()=>Us,MobileViTForImageClassification:()=>Xr,MobileViTModel:()=>Hr,MobileViTPreTrainedModel:()=>Wr,MobileViTV2ForImageClassification:()=>Yr,MobileViTV2Model:()=>Qr,MobileViTV2PreTrainedModel:()=>Kr,ModelOutput:()=>j,Moondream1ForConditionalGeneration:()=>dn,MptForCausalLM:()=>Tr,MptModel:()=>Mr,MptPreTrainedModel:()=>xr,MusicgenForCausalLM:()=>zs,MusicgenForConditionalGeneration:()=>Os,MusicgenModel:()=>Is,MusicgenPreTrainedModel:()=>As,NomicBertModel:()=>Y,NomicBertPreTrainedModel:()=>Q,OPTForCausalLM:()=>Sr,OPTModel:()=>$r,OPTPreTrainedModel:()=>kr,OpenELMForCausalLM:()=>ur,OpenELMModel:()=>lr,OpenELMPreTrainedModel:()=>or,OwlViTForObjectDetection:()=>ei,OwlViTModel:()=>Zr,OwlViTPreTrainedModel:()=>Jr,Owlv2ForObjectDetection:()=>ri,Owlv2Model:()=>ni,Owlv2PreTrainedModel:()=>ti,Phi3ForCausalLM:()=>wr,Phi3Model:()=>_r,Phi3PreTrainedModel:()=>gr,PhiForCausalLM:()=>fr,PhiModel:()=>mr,PhiPreTrainedModel:()=>hr,PreTrainedModel:()=>V,PretrainedMixin:()=>Qs,PvtForImageClassification:()=>Ir,PvtModel:()=>Ar,PvtPreTrainedModel:()=>Fr,PyAnnoteForAudioFrameClassification:()=>Ca,PyAnnoteModel:()=>Sa,PyAnnotePreTrainedModel:()=>$a,QuestionAnsweringModelOutput:()=>dl,Qwen2ForCausalLM:()=>pr,Qwen2Model:()=>cr,Qwen2PreTrainedModel:()=>dr,RTDetrForObjectDetection:()=>fi,RTDetrModel:()=>mi,RTDetrObjectDetectionOutput:()=>gi,RTDetrPreTrainedModel:()=>hi,ResNetForImageClassification:()=>Pi,ResNetModel:()=>Ci,ResNetPreTrainedModel:()=>Si,RoFormerForMaskedLM:()=>ee,RoFormerForQuestionAnswering:()=>re,RoFormerForSequenceClassification:()=>te,RoFormerForTokenClassification:()=>ne,RoFormerModel:()=>Z,RoFormerPreTrainedModel:()=>J,RobertaForMaskedLM:()=>Dt,RobertaForQuestionAnswering:()=>Vt,RobertaForSequenceClassification:()=>Rt,RobertaForTokenClassification:()=>Nt,RobertaModel:()=>Lt,RobertaPreTrainedModel:()=>Bt,SamImageSegmentationOutput:()=>ma,SamModel:()=>ha,SamPreTrainedModel:()=>pa,SapiensForDepthEstimation:()=>Gi,SapiensForNormalEstimation:()=>qi,SapiensForSemanticSegmentation:()=>ji,SapiensPreTrainedModel:()=>Vi,SegformerForImageClassification:()=>Ts,SegformerForSemanticSegmentation:()=>ks,SegformerModel:()=>Ms,SegformerPreTrainedModel:()=>xs,Seq2SeqLMOutput:()=>al,SequenceClassifierOutput:()=>sl,SiglipModel:()=>bn,SiglipPreTrainedModel:()=>yn,SiglipTextModel:()=>vn,SiglipVisionModel:()=>xn,SpeechT5ForSpeechToText:()=>ns,SpeechT5ForTextToSpeech:()=>rs,SpeechT5HifiGan:()=>is,SpeechT5Model:()=>ts,SpeechT5PreTrainedModel:()=>es,SqueezeBertForMaskedLM:()=>it,SqueezeBertForQuestionAnswering:()=>st,SqueezeBertForSequenceClassification:()=>at,SqueezeBertModel:()=>rt,SqueezeBertPreTrainedModel:()=>nt,StableLmForCausalLM:()=>Cs,StableLmModel:()=>Ss,StableLmPreTrainedModel:()=>$s,Starcoder2ForCausalLM:()=>ps,Starcoder2Model:()=>cs,Starcoder2PreTrainedModel:()=>ds,Swin2SRForImageSuperResolution:()=>Oi,Swin2SRModel:()=>zi,Swin2SRPreTrainedModel:()=>Ii,SwinForImageClassification:()=>Ai,SwinModel:()=>Fi,SwinPreTrainedModel:()=>Ei,T5ForConditionalGeneration:()=>mt,T5Model:()=>ht,T5PreTrainedModel:()=>pt,TableTransformerForObjectDetection:()=>yi,TableTransformerModel:()=>wi,TableTransformerObjectDetectionOutput:()=>bi,TableTransformerPreTrainedModel:()=>_i,TokenClassifierOutput:()=>ll,TrOCRForCausalLM:()=>ss,TrOCRPreTrainedModel:()=>as,UniSpeechForCTC:()=>Ia,UniSpeechForSequenceClassification:()=>za,UniSpeechModel:()=>Aa,UniSpeechPreTrainedModel:()=>Fa,UniSpeechSatForAudioFrameClassification:()=>Ra,UniSpeechSatForCTC:()=>La,UniSpeechSatForSequenceClassification:()=>Da,UniSpeechSatModel:()=>Ba,UniSpeechSatPreTrainedModel:()=>Oa,ViTForImageClassification:()=>Er,ViTMAEModel:()=>Or,ViTMAEPreTrainedModel:()=>zr,ViTMSNForImageClassification:()=>Dr,ViTMSNModel:()=>Lr,ViTMSNPreTrainedModel:()=>Br,ViTModel:()=>Pr,ViTPreTrainedModel:()=>Cr,VisionEncoderDecoderModel:()=>on,VitMatteForImageMatting:()=>Ur,VitMattePreTrainedModel:()=>qr,VitsModel:()=>vs,VitsModelOutput:()=>ml,VitsPreTrainedModel:()=>bs,Wav2Vec2BertForCTC:()=>ja,Wav2Vec2BertForSequenceClassification:()=>Ga,Wav2Vec2BertModel:()=>Va,Wav2Vec2BertPreTrainedModel:()=>Na,Wav2Vec2ForAudioFrameClassification:()=>ka,Wav2Vec2ForCTC:()=>Ma,Wav2Vec2ForSequenceClassification:()=>Ta,Wav2Vec2Model:()=>xa,Wav2Vec2PreTrainedModel:()=>va,WavLMForAudioFrameClassification:()=>Za,WavLMForCTC:()=>Qa,WavLMForSequenceClassification:()=>Ya,WavLMForXVector:()=>Ja,WavLMModel:()=>Ka,WavLMPreTrainedModel:()=>Xa,WeSpeakerResNetModel:()=>Ea,WeSpeakerResNetPreTrainedModel:()=>Pa,WhisperForConditionalGeneration:()=>sn,WhisperModel:()=>an,WhisperPreTrainedModel:()=>rn,XLMForQuestionAnswering:()=>Ht,XLMForSequenceClassification:()=>Ut,XLMForTokenClassification:()=>Wt,XLMModel:()=>Gt,XLMPreTrainedModel:()=>jt,XLMRobertaForMaskedLM:()=>Qt,XLMRobertaForQuestionAnswering:()=>Zt,XLMRobertaForSequenceClassification:()=>Yt,XLMRobertaForTokenClassification:()=>Jt,XLMRobertaModel:()=>Kt,XLMRobertaPreTrainedModel:()=>Xt,XLMWithLMHeadModel:()=>qt,XVectorOutput:()=>ol,YolosForObjectDetection:()=>da,YolosModel:()=>ua,YolosObjectDetectionOutput:()=>ca,YolosPreTrainedModel:()=>la});var r=n(/*! ./configs.js */"./src/configs.js"),i=n(/*! ./backends/onnx.js */"./src/backends/onnx.js"),a=n(/*! ./utils/dtypes.js */"./src/utils/dtypes.js"),s=n(/*! ./utils/generic.js */"./src/utils/generic.js"),o=n(/*! ./utils/core.js */"./src/utils/core.js"),l=n(/*! ./utils/hub.js */"./src/utils/hub.js"),u=n(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),d=n(/*! ./generation/configuration_utils.js */"./src/generation/configuration_utils.js"),c=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),p=n(/*! ./utils/maths.js */"./src/utils/maths.js"),h=n(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),m=n(/*! ./generation/logits_sampler.js */"./src/generation/logits_sampler.js"),f=n(/*! ./env.js */"./src/env.js"),g=n(/*! ./models/whisper/generation_whisper.js */"./src/models/whisper/generation_whisper.js"),_=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");const w=0,y=1,b=2,v=3,x=4,M=5,T=6,k=7,$=new Map,S=new Map,C=new Map;async function P(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async s=>{const{buffer:o,session_options:u}=await async function(e,t,n){const s=n.config?.["transformers.js_config"]??{};let o=n.device??s.device;o&&"string"!=typeof o&&(o.hasOwnProperty(t)?o=o[t]:(console.warn(`device not specified for "${t}". Using the default device.`),o=null));const u=o??(f.apis.IS_NODE_ENV?"cpu":"wasm"),d=(0,i.deviceToExecutionProviders)(u);let c=n.dtype??s.dtype;"string"!=typeof c&&(c&&c.hasOwnProperty(t)?c=c[t]:(c=a.DEFAULT_DEVICE_DTYPE_MAPPING[u]??a.DATA_TYPES.fp32,console.warn(`dtype not specified for "${t}". Using the default dtype (${c}) for this device (${u}).`)));const p=c;if(!a.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(p))throw new Error(`Invalid dtype: ${p}. Should be one of: ${Object.keys(a.DATA_TYPES).join(", ")}`);if(p===a.DATA_TYPES.fp16&&"webgpu"===u&&!await(0,a.isWebGpuFp16Supported)())throw new Error(`The device (${u}) does not support fp16.`);const h=a.DEFAULT_DTYPE_SUFFIX_MAPPING[p],m=`${n.subfolder??""}/${t}${h}.onnx`,g={...n.session_options}??{};g.executionProviders??=d;const _=s.free_dimension_overrides;_?g.freeDimensionOverrides??=_:u.startsWith("webnn")&&!g.freeDimensionOverrides&&console.warn('WebNN does not currently support dynamic shapes and requires `free_dimension_overrides` to be set in config.json as a field within "transformers.js_config". When `free_dimension_overrides` is not set, you may experience significant performance degradation.');const w=(0,l.getModelFile)(e,m,!0,n);let y=[];if(n.use_external_data_format&&(!0===n.use_external_data_format||"object"==typeof n.use_external_data_format&&n.use_external_data_format.hasOwnProperty(t)&&!0===n.use_external_data_format[t])){if(f.apis.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const r=`${t}${h}.onnx_data`,i=`${n.subfolder??""}/${r}`;y.push(new Promise((async(t,a)=>{const s=await(0,l.getModelFile)(e,i,!0,n);t({path:r,data:s})})))}else void 0!==g.externalData&&(y=g.externalData.map((async t=>{if("string"==typeof t.data){const r=await(0,l.getModelFile)(e,t.data,!0,n);return{...t,data:r}}return t})));if(y.length>0&&(g.externalData=await Promise.all(y)),"webgpu"===u){const e=(0,r.getKeyValueShapes)(n.config,{prefix:"present"});if(Object.keys(e).length>0&&!(0,i.isONNXProxy)()){const t={};for(const n in e)n.includes("encoder")||(t[n]="gpu-buffer");g.preferredOutputLocation=t}}return{buffer:await w,session_options:g}}(e,t[s],n);return[s,await(0,i.createInferenceSession)(o,u)]}))))}async function E(e,t){const n=function(e,t){const n=Object.create(null),r=[];for(const a of e.inputNames){const e=t[a];e instanceof c.Tensor?n[a]=(0,i.isONNXProxy)()?e.clone():e:r.push(a)}if(r.length>0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${r.join(", ")}.`);const a=Object.keys(t).length,s=e.inputNames.length;if(a>s){let n=Object.keys(t).filter((t=>!e.inputNames.includes(t)));console.warn(`WARNING: Too many inputs were provided (${a} > ${s}). The following inputs will be ignored: "${n.join(", ")}".`)}return n}(e,t);try{const t=Object.fromEntries(Object.entries(n).map((([e,t])=>[e,t.ort_tensor])));let r=await e.run(t);return r=F(r),r}catch(e){throw console.error(`An error occurred during model execution: "${e}".`),console.error("Inputs given to model:",n),e}}function F(e){for(let t in e)(0,i.isONNXTensor)(e[t])?e[t]=new c.Tensor(e[t]):"object"==typeof e[t]&&F(e[t]);return e}function A(e){if(e instanceof c.Tensor)return e;if(0===e.length)throw Error("items must be non-empty");if(Array.isArray(e[0])){if(e.some((t=>t.length!==e[0].length)))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new c.Tensor("int64",BigInt64Array.from(e.flat().map((e=>BigInt(e)))),[e.length,e[0].length])}return new c.Tensor("int64",BigInt64Array.from(e.map((e=>BigInt(e)))),[1,e.length])}function I(e){return new c.Tensor("bool",[e],[1])}async function z(e,t){let{encoder_outputs:n,input_ids:r,decoder_input_ids:i,...a}=t;if(!n){const r=(0,o.pick)(t,e.sessions.model.inputNames);n=(await O(e,r)).last_hidden_state}a.input_ids=i,a.encoder_hidden_states=n,e.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(a.encoder_attention_mask=t.attention_mask);return await B(e,a,!0)}async function O(e,t){const n=e.sessions.model,r=(0,o.pick)(t,n.inputNames);if(n.inputNames.includes("inputs_embeds")&&!r.inputs_embeds){if(!t.input_ids)throw new Error("Both `input_ids` and `inputs_embeds` are missing in the model inputs.");r.inputs_embeds=await e.encode_text({input_ids:t.input_ids})}return n.inputNames.includes("token_type_ids")&&!r.token_type_ids&&(r.token_type_ids=new c.Tensor("int64",new BigInt64Array(r.input_ids.data.length),r.input_ids.dims)),await E(n,r)}async function B(e,t,n=!1){const r=e.sessions[n?"decoder_model_merged":"model"],{past_key_values:i,...a}=t;r.inputNames.includes("use_cache_branch")&&(a.use_cache_branch=I(!!i)),r.inputNames.includes("position_ids")&&a.attention_mask&&!a.position_ids&&(a.position_ids=function(e,t=null){const{input_ids:n,inputs_embeds:r,attention_mask:i}=e,[a,s]=i.dims,o=new BigInt64Array(i.data.length);for(let e=0;e<a;++e){const t=e*s;let n=BigInt(0);for(let e=0;e<s;++e){const r=t+e;0n===i.data[r]?o[r]=BigInt(1):(o[r]=n,n+=i.data[r])}}let l=new c.Tensor("int64",o,i.dims);if(t){const e=-(n??r).dims.at(1);l=l.slice(null,[e,null])}return l}(a,i)),e.addPastKeyValues(a,i);const s=(0,o.pick)(a,r.inputNames);return await E(r,s)}async function L(e,{input_ids:t=null,attention_mask:n=null,pixel_values:r=null,position_ids:i=null,inputs_embeds:a=null,past_key_values:s=null,generation_config:o=null,logits_processor:l=null,...u}){if(!a)if(a=await e.encode_text({input_ids:t}),r&&1!==t.dims[1]){const i=await e.encode_image({pixel_values:r});({inputs_embeds:a,attention_mask:n}=e._merge_input_ids_with_image_features({image_features:i,inputs_embeds:a,input_ids:t,attention_mask:n}))}else if(s&&r&&1===t.dims[1]){const e=t.dims[1],r=Object.values(s)[0].dims.at(-2);n=(0,c.cat)([(0,c.ones)([t.dims[0],r]),n.slice(null,[n.dims[1]-e,n.dims[1]])],1)}return await B(e,{inputs_embeds:a,past_key_values:s,attention_mask:n,position_ids:i,generation_config:o,logits_processor:l},!0)}function D(e,t,n,r){if(n.past_key_values){const t=Object.values(n.past_key_values)[0].dims.at(-2),{input_ids:r,attention_mask:i}=n;if(i&&i.dims[1]>r.dims[1]);else if(t<r.dims[1])n.input_ids=r.slice(null,[t,null]);else if(null!=e.config.image_token_index&&r.data.some((t=>t==e.config.image_token_index))){const i=e.config.num_image_tokens;if(!i)throw new Error("`num_image_tokens` is missing in the model configuration.");const a=r.dims[1]-(t-i);n.input_ids=r.slice(null,[-a,null]),n.attention_mask=(0,c.ones)([1,t+a])}}return n}function R(e,t,n,r){return n.past_key_values&&(t=t.map((e=>[e.at(-1)]))),{...n,decoder_input_ids:A(t)}}function N(e,...t){return e.config.is_encoder_decoder?R(e,...t):D(e,...t)}class V extends s.Callable{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,t){super(),this.config=e,this.sessions=t;const n=C.get(this.constructor),r=$.get(n);switch(this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,r){case x:this.can_generate=!0,this._forward=B,this._prepare_inputs_for_generation=D;break;case b:case v:case k:this.can_generate=!0,this._forward=z,this._prepare_inputs_for_generation=R;break;case y:this._forward=z;break;case T:this.can_generate=!0,this._forward=L,this._prepare_inputs_for_generation=N;break;default:this._forward=O}this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const t of Object.values(this.sessions))t?.handler?.dispose&&e.push(t.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:i=null,local_files_only:a=!1,revision:s="main",model_file_name:o=null,subfolder:u="onnx",device:d=null,dtype:c=null,use_external_data_format:p=null,session_options:h={}}={}){let m={progress_callback:t,config:n,cache_dir:i,local_files_only:a,revision:s,model_file_name:o,subfolder:u,device:d,dtype:c,use_external_data_format:p,session_options:h};const f=C.get(this),g=$.get(f);let _;if(n=m.config=await r.AutoConfig.from_pretrained(e,m),g===x)_=await Promise.all([P(e,{model:m.model_file_name??"model"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(g===b||g===v)_=await Promise.all([P(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(g===M)_=await Promise.all([P(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},m)]);else if(g===y)_=await Promise.all([P(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m)]);else if(g===T){const t={embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"};n.is_encoder_decoder&&(t.model="encoder_model"),_=await Promise.all([P(e,t,m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)])}else g===k?_=await Promise.all([P(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]):(g!==w&&console.warn(`Model type for '${f??n?.model_type}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`),_=await Promise.all([P(e,{model:m.model_file_name??"model"},m)]));return new this(n,..._)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}_get_logits_warper(e){const t=new u.LogitsProcessorList;return null!==e.temperature&&1!==e.temperature&&t.push(new u.TemperatureLogitsWarper(e.temperature)),null!==e.top_k&&0!==e.top_k&&t.push(new u.TopKLogitsWarper(e.top_k)),null!==e.top_p&&e.top_p<1&&t.push(new u.TopPLogitsWarper(e.top_p)),t}_get_logits_processor(e,t,n=null){const r=new u.LogitsProcessorList;if(null!==e.repetition_penalty&&1!==e.repetition_penalty&&r.push(new u.RepetitionPenaltyLogitsProcessor(e.repetition_penalty)),null!==e.no_repeat_ngram_size&&e.no_repeat_ngram_size>0&&r.push(new u.NoRepeatNGramLogitsProcessor(e.no_repeat_ngram_size)),null!==e.bad_words_ids&&r.push(new u.NoBadWordsLogitsProcessor(e.bad_words_ids,e.eos_token_id)),null!==e.min_length&&null!==e.eos_token_id&&e.min_length>0&&r.push(new u.MinLengthLogitsProcessor(e.min_length,e.eos_token_id)),null!==e.min_new_tokens&&null!==e.eos_token_id&&e.min_new_tokens>0&&r.push(new u.MinNewTokensLengthLogitsProcessor(t,e.min_new_tokens,e.eos_token_id)),null!==e.forced_bos_token_id&&r.push(new u.ForcedBOSTokenLogitsProcessor(e.forced_bos_token_id)),null!==e.forced_eos_token_id&&r.push(new u.ForcedEOSTokenLogitsProcessor(e.max_length,e.forced_eos_token_id)),null!==e.begin_suppress_tokens){const n=t>1||null===e.forced_bos_token_id?t:t+1;r.push(new u.SuppressTokensAtBeginLogitsProcessor(e.begin_suppress_tokens,n))}return null!==e.guidance_scale&&e.guidance_scale>1&&r.push(new u.ClassifierFreeGuidanceLogitsProcessor(e.guidance_scale)),null!==n&&r.extend(n),r}_prepare_generation_config(e,t,n=d.GenerationConfig){const r={...this.config};for(const e of["decoder","generator","text_config"])e in r&&Object.assign(r,r[e]);const i=new n(r);return"generation_config"in this&&Object.assign(i,this.generation_config),e&&Object.assign(i,e),t&&Object.assign(i,(0,o.pick)(t,Object.getOwnPropertyNames(i))),i}_get_stopping_criteria(e,t=null){const n=new h.StoppingCriteriaList;return null!==e.max_length&&n.push(new h.MaxLengthCriteria(e.max_length,this.config.max_position_embeddings??null)),null!==e.eos_token_id&&n.push(new h.EosTokenCriteria(e.eos_token_id)),t&&n.extend(t),n}_validate_model_class(){if(!this.can_generate){const e=[so,uo,ao,eo],t=C.get(this.constructor),n=new Set,r=this.config.model_type;for(const t of e){const e=t.get(r);e&&n.add(e[0])}let i=`The current model class (${t}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw n.size>0&&(i+=` Please use the following class instead: ${[...n].join(", ")}`),Error(i)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:t,model_inputs:n,is_encoder_decoder:r}){return n.past_key_values=this.getPastKeyValues(t,n.past_key_values),n.input_ids=new c.Tensor("int64",e.flat(),[e.length,1]),r||(n.attention_mask=(0,c.cat)([n.attention_mask,(0,c.ones)([n.attention_mask.dims[0],1])],1)),n.position_ids=null,n}_prepare_model_inputs({inputs:e,bos_token_id:t,model_kwargs:n}){const r=(0,o.pick)(n,this.forward_params),i=this.main_input_name;if(i in r){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else r[i]=e;return{inputs_tensor:r[i],model_inputs:r,model_input_name:i}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:t,model_input_name:n,generation_config:r}){if(this.sessions.model.inputNames.includes("inputs_embeds")&&!t.inputs_embeds&&"_prepare_inputs_embeds"in this){const{input_ids:e,pixel_values:n,attention_mask:r,...i}=t,a=await this._prepare_inputs_embeds(t);t={...i,...(0,o.pick)(a,["inputs_embeds","attention_mask"])}}let{last_hidden_state:i}=await O(this,t);if(null!==r.guidance_scale&&r.guidance_scale>1)i=(0,c.cat)([i,(0,c.full_like)(i,0)],0),"attention_mask"in t&&(t.attention_mask=(0,c.cat)([t.attention_mask,(0,c.zeros_like)(t.attention_mask)],0));else if(t.decoder_input_ids){const e=A(t.decoder_input_ids).dims[0];if(e!==i.dims[0]){if(1!==i.dims[0])throw new Error(`The encoder outputs have a different batch size (${i.dims[0]}) than the decoder inputs (${e}).`);i=(0,c.cat)(Array.from({length:e},(()=>i)),0)}}return t.encoder_outputs=i,t}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:t,model_kwargs:n,decoder_start_token_id:r,bos_token_id:i,generation_config:a}){let{decoder_input_ids:s,...o}=n;if(s)Array.isArray(s[0])||(s=Array.from({length:e},(()=>s)));else if(r??=i,"musicgen"===this.config.model_type)s=Array.from({length:e*this.config.decoder.num_codebooks},(()=>[r]));else if(Array.isArray(r)){if(r.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${r.length}`);s=r}else s=Array.from({length:e},(()=>[r]));return s=A(s),n.decoder_attention_mask=(0,c.ones_like)(s),{input_ids:s,model_inputs:o}}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,streamer:i=null,...a}){this._validate_model_class(),t=this._prepare_generation_config(t,a);let{inputs_tensor:s,model_inputs:o,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:a});const u=this.config.is_encoder_decoder;let d;u&&("encoder_outputs"in o||(o=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:s,model_inputs:o,model_input_name:l,generation_config:t}))),u?({input_ids:d,model_inputs:o}=this._prepare_decoder_input_ids_for_generation({batch_size:o[l].dims.at(0),model_input_name:l,model_kwargs:o,decoder_start_token_id:t.decoder_start_token_id,bos_token_id:t.bos_token_id,generation_config:t})):d=o[l];let p=d.dims.at(-1);null!==t.max_new_tokens&&(t.max_length=p+t.max_new_tokens);const h=this._get_logits_processor(t,p,n),f=this._get_stopping_criteria(t,r),g=o[l].dims.at(0),_=m.LogitsSampler.getSampler(t),w=new Array(g).fill(0),y=d.tolist();let b;i&&i.put(y);let v={};for(;;){if(o=this.prepare_inputs_for_generation(y,o,t),b=await this.forward(o),t.output_attentions&&t.return_dict_in_generate){const e=this.getAttentions(b);for(const t in e)t in v||(v[t]=[]),v[t].push(e[t])}const e=h(y,b.logits.slice(null,-1,null)),n=[];for(let t=0;t<e.dims.at(0);++t){const r=e[t],i=await _(r);for(const[e,r]of i){const i=BigInt(e);w[t]+=r,y[t].push(i),n.push([i]);break}}i&&i.put(n);if(f(y).every((e=>e)))break;o=this._update_model_kwargs_for_generation({generated_input_ids:n,outputs:b,model_inputs:o,is_encoder_decoder:u})}i&&i.end();const x=this.getPastKeyValues(b,o.past_key_values,!0),M=new c.Tensor("int64",y.flat(),[y.length,y[0].length]);if(t.return_dict_in_generate)return{sequences:M,past_key_values:x,...v};for(const e of Object.values(b))"gpu-buffer"===e.location&&e.dispose();return M}getPastKeyValues(e,t,n=!1){const r=Object.create(null);for(const i in e)if(i.startsWith("present")){const a=i.replace("present","past_key_values"),s=i.includes("encoder");if(r[a]=s&&t?t[a]:e[i],t&&(!s||n)){const e=t[a];"gpu-buffer"===e.location&&e.dispose()}}return r}getAttentions(e){const t={};for(const n of["cross_attentions","encoder_attentions","decoder_attentions"])for(const r in e)r.startsWith(n)&&(n in t||(t[n]=[]),t[n].push(e[r]));return t}addPastKeyValues(e,t){if(t)Object.assign(e,t);else{const t=this.custom_config.kv_cache_dtype??"float32",n="float16"===t?new Uint16Array:[],i=(0,r.getKeyValueShapes)(this.config);for(const r in i)e[r]=new c.Tensor(t,n,i[r])}}async encode_image({pixel_values:e}){const t=(await E(this.sessions.vision_encoder,{pixel_values:e})).image_features;return this.config.num_image_tokens||(console.warn(`The number of image tokens was not set in the model configuration. Setting it to the number of features detected by the vision encoder (${t.dims[1]}).`),this.config.num_image_tokens=t.dims[1]),t}async encode_text({input_ids:e}){return(await E(this.sessions.embed_tokens,{input_ids:e})).inputs_embeds}}class j{}class G extends j{constructor({last_hidden_state:e,hidden_states:t=null,attentions:n=null}){super(),this.last_hidden_state=e,this.hidden_states=t,this.attentions=n}}class q extends V{}class U extends q{}class W extends q{async _call(e){return new ul(await super._call(e))}}class H extends q{async _call(e){return new sl(await super._call(e))}}class X extends q{async _call(e){return new ll(await super._call(e))}}class K extends q{async _call(e){return new dl(await super._call(e))}}class Q extends V{}class Y extends Q{}class J extends V{}class Z extends J{}class ee extends J{async _call(e){return new ul(await super._call(e))}}class te extends J{async _call(e){return new sl(await super._call(e))}}class ne extends J{async _call(e){return new ll(await super._call(e))}}class re extends J{async _call(e){return new dl(await super._call(e))}}class ie extends V{}class ae extends ie{}class se extends ie{async _call(e){return new ul(await super._call(e))}}class oe extends ie{async _call(e){return new sl(await super._call(e))}}class le extends ie{async _call(e){return new ll(await super._call(e))}}class ue extends ie{async _call(e){return new dl(await super._call(e))}}class de extends V{}class ce extends de{}class pe extends de{async _call(e){return new ul(await super._call(e))}}class he extends de{async _call(e){return new sl(await super._call(e))}}class me extends de{async _call(e){return new ll(await super._call(e))}}class fe extends de{async _call(e){return new dl(await super._call(e))}}class ge extends V{}class _e extends ge{}class we extends ge{async _call(e){return new ul(await super._call(e))}}class ye extends ge{async _call(e){return new sl(await super._call(e))}}class be extends ge{async _call(e){return new ll(await super._call(e))}}class ve extends ge{async _call(e){return new dl(await super._call(e))}}class xe extends V{}class Me extends xe{}class Te extends xe{async _call(e){return new ul(await super._call(e))}}class ke extends xe{async _call(e){return new sl(await super._call(e))}}class $e extends xe{async _call(e){return new ll(await super._call(e))}}class Se extends xe{async _call(e){return new dl(await super._call(e))}}class Ce extends V{}class Pe extends Ce{}class Ee extends Ce{async _call(e){return new ul(await super._call(e))}}class Fe extends Ce{async _call(e){return new sl(await super._call(e))}}class Ae extends Ce{async _call(e){return new ll(await super._call(e))}}class Ie extends Ce{async _call(e){return new dl(await super._call(e))}}class ze extends V{}class Oe extends ze{}class Be extends ze{async _call(e){return new sl(await super._call(e))}}class Le extends ze{async _call(e){return new ll(await super._call(e))}}class De extends ze{async _call(e){return new dl(await super._call(e))}}class Re extends ze{async _call(e){return new ul(await super._call(e))}}class Ne extends V{}class Ve extends Ne{}class je extends Ne{async _call(e){return new ul(await super._call(e))}}class Ge extends Ne{async _call(e){return new sl(await super._call(e))}}class qe extends Ne{async _call(e){return new ll(await super._call(e))}}class Ue extends V{}class We extends Ue{}class He extends Ue{async _call(e){return new ul(await super._call(e))}}class Xe extends Ue{async _call(e){return new sl(await super._call(e))}}class Ke extends Ue{async _call(e){return new dl(await super._call(e))}}class Qe extends V{}class Ye extends Qe{}class Je extends Qe{async _call(e){return new ul(await super._call(e))}}class Ze extends Qe{async _call(e){return new sl(await super._call(e))}}class et extends Qe{async _call(e){return new ll(await super._call(e))}}class tt extends Qe{async _call(e){return new dl(await super._call(e))}}class nt extends V{}class rt extends nt{}class it extends nt{async _call(e){return new ul(await super._call(e))}}class at extends nt{async _call(e){return new sl(await super._call(e))}}class st extends nt{async _call(e){return new dl(await super._call(e))}}class ot extends V{}class lt extends ot{}class ut extends ot{async _call(e){return new sl(await super._call(e))}}class dt extends ot{async _call(e){return new dl(await super._call(e))}}class ct extends ot{async _call(e){return new ul(await super._call(e))}}class pt extends V{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class ht extends pt{}class mt extends pt{}class ft extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class gt extends ft{}class _t extends ft{}class wt extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class yt extends wt{}class bt extends wt{}class vt extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class xt extends vt{}class Mt extends vt{}class Tt extends vt{async _call(e){return new sl(await super._call(e))}}class kt extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class $t extends kt{}class St extends kt{}class Ct extends kt{async _call(e){return new sl(await super._call(e))}}class Pt extends kt{}class Et extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ft extends Et{}class At extends Et{}class It extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class zt extends It{}class Ot extends It{}class Bt extends V{}class Lt extends Bt{}class Dt extends Bt{async _call(e){return new ul(await super._call(e))}}class Rt extends Bt{async _call(e){return new sl(await super._call(e))}}class Nt extends Bt{async _call(e){return new ll(await super._call(e))}}class Vt extends Bt{async _call(e){return new dl(await super._call(e))}}class jt extends V{}class Gt extends jt{}class qt extends jt{async _call(e){return new ul(await super._call(e))}}class Ut extends jt{async _call(e){return new sl(await super._call(e))}}class Wt extends jt{async _call(e){return new ll(await super._call(e))}}class Ht extends jt{async _call(e){return new dl(await super._call(e))}}class Xt extends V{}class Kt extends Xt{}class Qt extends Xt{async _call(e){return new ul(await super._call(e))}}class Yt extends Xt{async _call(e){return new sl(await super._call(e))}}class Jt extends Xt{async _call(e){return new ll(await super._call(e))}}class Zt extends Xt{async _call(e){return new dl(await super._call(e))}}class en extends V{}class tn extends en{}class nn extends en{}class rn extends V{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class an extends rn{}class sn extends rn{_prepare_generation_config(e,t){return super._prepare_generation_config(e,t,g.WhisperGenerationConfig)}_retrieve_init_tokens(e){const t=[e.decoder_start_token_id];let n=e.language;const r=e.task;if(e.is_multilingual){n||(console.warn("No language specified - defaulting to English (en)."),n="en");const i=`<|${(0,_.whisper_language_to_code)(n)}|>`;t.push(e.lang_to_id[i]),t.push(e.task_to_id[r??"transcribe"])}else if(n||r)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&t.at(-1)!==e.no_timestamps_token_id?t.push(e.no_timestamps_token_id):e.return_timestamps&&t.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),t.pop()),t.filter((e=>null!=e))}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:r=null,...i}){t=this._prepare_generation_config(t,i);const a=i.decoder_input_ids??this._retrieve_init_tokens(t);if(t.return_timestamps&&(n??=new u.LogitsProcessorList,n.push(new u.WhisperTimeStampLogitsProcessor(t,a))),t.begin_suppress_tokens&&(n??=new u.LogitsProcessorList,n.push(new u.SuppressTokensAtBeginLogitsProcessor(t.begin_suppress_tokens,a.length))),t.return_token_timestamps){if(!t.alignment_heads)throw new Error("Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config.");"translate"===t.task&&console.warn("Token-level timestamps may not be reliable for task 'translate'."),t.output_attentions=!0,t.return_dict_in_generate=!0}const s=await super.generate({inputs:e,generation_config:t,logits_processor:n,decoder_input_ids:a,...i});return t.return_token_timestamps&&(s.token_timestamps=this._extract_token_timestamps(s,t.alignment_heads,t.num_frames)),s}_extract_token_timestamps(e,t,n=null,r=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");null==n&&console.warn("`num_frames` has not been set, meaning the entire audio will be analyzed. This may lead to inaccurate token-level timestamps for short audios (< 30 seconds).");let i=this.config.median_filter_width;void 0===i&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),i=7);const a=e.cross_attentions,s=Array.from({length:this.config.decoder_layers},((e,t)=>(0,c.cat)(a.map((e=>e[t])),2))),l=(0,c.stack)(t.map((([e,t])=>{if(e>=s.length)throw new Error(`Layer index ${e} is out of bounds for cross attentions (length ${s.length}).`);return n?s[e].slice(null,t,null,[0,n]):s[e].slice(null,t)}))).transpose(1,0,2,3),[u,d]=(0,c.std_mean)(l,-2,0,!0),h=l.clone();for(let e=0;e<h.dims[0];++e){const t=h[e];for(let n=0;n<t.dims[0];++n){const r=t[n],a=u[e][n][0].data,s=d[e][n][0].data;for(let e=0;e<r.dims[0];++e){let t=r[e].data;for(let e=0;e<t.length;++e)t[e]=(t[e]-s[e])/a[e];t.set((0,p.medianFilter)(t,i))}}}const m=[(0,c.mean)(h,1)],f=e.sequences.dims,g=new c.Tensor("float32",new Float32Array(f[0]*f[1]),f);for(let e=0;e<f[0];++e){const t=m[e].neg().squeeze_(0),[n,i]=(0,p.dynamic_time_warping)(t.tolist()),a=Array.from({length:n.length-1},((e,t)=>n[t+1]-n[t])),s=(0,o.mergeArrays)([1],a).map((e=>!!e)),l=[];for(let e=0;e<s.length;++e)s[e]&&l.push(i[e]*r);g[e].data.set(l,1)}return g}}class on extends V{main_input_name="pixel_values";forward_params=["pixel_values","input_ids","encoder_hidden_states","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class ln extends V{forward_params=["input_ids","pixel_values","attention_mask","position_ids","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class un extends ln{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){const i=this.config.image_token_index,a=n.tolist().map((e=>e.findIndex((e=>e==i)))),s=a.every((e=>-1===e)),o=a.every((e=>-1!==e));if(!s&&!o)throw new Error("Every input should contain either 0 or 1 image token.");if(s)return{inputs_embeds:e,attention_mask:r};const l=[],u=[];for(let n=0;n<a.length;++n){const i=a[n],s=e[n],o=t[n],d=r[n];l.push((0,c.cat)([s.slice([0,i]),o,s.slice([i+1,s.dims[0]])],0)),u.push((0,c.cat)([d.slice([0,i]),(0,c.ones)([o.dims[0]]),d.slice([i+1,d.dims[0]])],0))}return{inputs_embeds:(0,c.stack)(l,0),attention_mask:(0,c.stack)(u,0)}}}class dn extends un{}class cn extends V{forward_params=["input_ids","inputs_embeds","attention_mask","pixel_values","encoder_outputs","decoder_input_ids","decoder_inputs_embeds","decoder_attention_mask","past_key_values"];main_input_name="inputs_embeds";constructor(e,t,n){super(e,t),this.generation_config=n}}class pn extends cn{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:r}){return{inputs_embeds:(0,c.cat)([t,e],1),attention_mask:(0,c.cat)([(0,c.ones)(t.dims.slice(0,2)),r],1)}}async _prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:n,attention_mask:r}){if(!e&&!t)throw new Error("Either `input_ids` or `pixel_values` should be provided.");let i,a;return e&&(i=await this.encode_text({input_ids:e})),t&&(a=await this.encode_image({pixel_values:t})),i&&a?({inputs_embeds:n,attention_mask:r}=this._merge_input_ids_with_image_features({inputs_embeds:i,image_features:a,input_ids:e,attention_mask:r})):n=i||a,{inputs_embeds:n,attention_mask:r}}async forward({input_ids:e,pixel_values:t,attention_mask:n,decoder_input_ids:r,decoder_attention_mask:i,encoder_outputs:a,past_key_values:s,inputs_embeds:o,decoder_inputs_embeds:l}){if(o||({inputs_embeds:o,attention_mask:n}=await this._prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:o,attention_mask:n})),!a){let{last_hidden_state:e}=await O(this,{inputs_embeds:o,attention_mask:n});a=e}if(!l){if(!r)throw new Error("Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.");l=await this.encode_text({input_ids:r})}const u={inputs_embeds:l,attention_mask:i,encoder_attention_mask:n,encoder_hidden_states:a,past_key_values:s};return await B(this,u,!0)}}class hn extends V{}class mn extends hn{}class fn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class gn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class _n extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class wn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class yn extends V{}class bn extends yn{}class vn extends yn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class xn extends hn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class Mn extends V{}class Tn extends Mn{}class kn extends V{}class $n extends kn{}class Sn extends kn{}class Cn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Pn extends Cn{}class En extends Cn{}class Fn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class An extends Fn{}class In extends Fn{}class zn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class On extends zn{}class Bn extends zn{}class Ln extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Dn extends Ln{}class Rn extends Ln{}class Nn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Vn extends Nn{}class jn extends Nn{}class Gn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class qn extends Gn{}class Un extends Gn{}class Wn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Hn extends Wn{}class Xn extends Wn{}class Kn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Qn extends Kn{}class Yn extends Kn{}class Jn extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Zn extends Jn{}class er extends Jn{}class tr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class nr extends tr{}class rr extends tr{}class ir extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ar extends ir{}class sr extends ir{}class or extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class lr extends or{}class ur extends or{}class dr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class cr extends dr{}class pr extends dr{}class hr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class mr extends hr{}class fr extends hr{}class gr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class _r extends gr{}class wr extends gr{}class yr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class br extends yr{}class vr extends yr{}class xr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Mr extends xr{}class Tr extends xr{}class kr extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class $r extends kr{}class Sr extends kr{}class Cr extends V{}class Pr extends Cr{}class Er extends Cr{async _call(e){return new sl(await super._call(e))}}class Fr extends V{}class Ar extends Fr{}class Ir extends Fr{async _call(e){return new sl(await super._call(e))}}class zr extends V{}class Or extends zr{}class Br extends V{}class Lr extends Br{}class Dr extends Br{async _call(e){return new sl(await super._call(e))}}class Rr extends V{}class Nr extends Rr{}class Vr extends V{}class jr extends Vr{}class Gr extends Vr{async _call(e){return new sl(await super._call(e))}}class qr extends V{}class Ur extends qr{async _call(e){return new hl(await super._call(e))}}class Wr extends V{}class Hr extends Wr{}class Xr extends Wr{async _call(e){return new sl(await super._call(e))}}class Kr extends V{}class Qr extends Kr{}class Yr extends Kr{async _call(e){return new sl(await super._call(e))}}class Jr extends V{}class Zr extends Jr{}class ei extends Jr{}class ti extends V{}class ni extends ti{}class ri extends ti{}class ii extends V{}class ai extends ii{}class si extends ii{async _call(e){return new sl(await super._call(e))}}class oi extends V{}class li extends oi{}class ui extends oi{async _call(e){return new ci(await super._call(e))}}class di extends oi{async _call(e){return new pi(await super._call(e))}}class ci extends j{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class pi extends j{constructor({logits:e,pred_boxes:t,pred_masks:n}){super(),this.logits=e,this.pred_boxes=t,this.pred_masks=n}}class hi extends V{}class mi extends hi{}class fi extends hi{async _call(e){return new gi(await super._call(e))}}class gi extends j{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class _i extends V{}class wi extends _i{}class yi extends _i{async _call(e){return new bi(await super._call(e))}}class bi extends ci{}class vi extends V{}class xi extends vi{}class Mi extends vi{async _call(e){return new sl(await super._call(e))}}class Ti extends V{}class ki extends Ti{}class $i extends Ti{async _call(e){return new sl(await super._call(e))}}class Si extends V{}class Ci extends Si{}class Pi extends Si{async _call(e){return new sl(await super._call(e))}}class Ei extends V{}class Fi extends Ei{}class Ai extends Ei{async _call(e){return new sl(await super._call(e))}}class Ii extends V{}class zi extends Ii{}class Oi extends Ii{}class Bi extends V{}class Li extends Bi{}class Di extends Bi{}class Ri extends V{}class Ni extends Ri{}class Vi extends V{}class ji extends Vi{}class Gi extends Vi{}class qi extends Vi{}class Ui extends V{}class Wi extends Ui{}class Hi extends Ui{}class Xi extends V{}class Ki extends Xi{}class Qi extends Xi{}class Yi extends V{}class Ji extends Yi{}class Zi extends V{}class ea extends Zi{}class ta extends Zi{async _call(e){return new sl(await super._call(e))}}class na extends V{}class ra extends na{}class ia extends na{async _call(e){return new sl(await super._call(e))}}class aa extends V{}class sa extends aa{}class oa extends aa{async _call(e){return new sl(await super._call(e))}}class la extends V{}class ua extends la{}class da extends la{async _call(e){return new ca(await super._call(e))}}class ca extends j{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class pa extends V{}class ha extends pa{async get_image_embeddings({pixel_values:e}){return await O(this,{pixel_values:e})}async forward(e){if(e.image_embeddings&&e.image_positional_embeddings||(e={...e,...await this.get_image_embeddings(e)}),!e.input_labels&&e.input_points){const t=e.input_points.dims.slice(0,-1),n=t.reduce(((e,t)=>e*t),1);e.input_labels=new c.Tensor("int64",new BigInt64Array(n).fill(1n),t)}const t={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(t.input_points=e.input_points),e.input_labels&&(t.input_labels=e.input_labels),e.input_boxes&&(t.input_boxes=e.input_boxes),await E(this.sessions.prompt_encoder_mask_decoder,t)}async _call(e){return new ma(await super._call(e))}}class ma extends j{constructor({iou_scores:e,pred_masks:t}){super(),this.iou_scores=e,this.pred_masks=t}}class fa extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ga extends fa{}class _a extends fa{}class wa extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ya extends wa{}class ba extends wa{}class va extends V{}class xa extends va{}class Ma extends va{async _call(e){return new cl(await super._call(e))}}class Ta extends va{async _call(e){return new sl(await super._call(e))}}class ka extends va{async _call(e){return new ll(await super._call(e))}}class $a extends V{}class Sa extends $a{}class Ca extends $a{async _call(e){return new ll(await super._call(e))}}class Pa extends V{}class Ea extends Pa{}class Fa extends V{}class Aa extends Fa{}class Ia extends Fa{async _call(e){return new cl(await super._call(e))}}class za extends Fa{async _call(e){return new sl(await super._call(e))}}class Oa extends V{}class Ba extends Oa{}class La extends Oa{async _call(e){return new cl(await super._call(e))}}class Da extends Oa{async _call(e){return new sl(await super._call(e))}}class Ra extends Oa{async _call(e){return new ll(await super._call(e))}}class Na extends V{}class Va extends Na{}class ja extends Na{async _call(e){return new cl(await super._call(e))}}class Ga extends Na{async _call(e){return new sl(await super._call(e))}}class qa extends V{}class Ua extends va{}class Wa extends va{async _call(e){return new cl(await super._call(e))}}class Ha extends va{async _call(e){return new sl(await super._call(e))}}class Xa extends V{}class Ka extends Xa{}class Qa extends Xa{async _call(e){return new cl(await super._call(e))}}class Ya extends Xa{async _call(e){return new sl(await super._call(e))}}class Ja extends Xa{async _call(e){return new ol(await super._call(e))}}class Za extends Xa{async _call(e){return new ll(await super._call(e))}}class es extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ts extends es{}class ns extends es{}class rs extends es{async generate_speech(e,t,{threshold:n=.5,minlenratio:r=0,maxlenratio:i=20,vocoder:a=null}={}){const s={input_ids:e},{encoder_outputs:o,encoder_attention_mask:l}=await O(this,s),u=o.dims[1]/this.config.reduction_factor,d=Math.floor(u*i),p=Math.floor(u*r),h=this.config.num_mel_bins;let m=[],f=null,g=null,_=0;for(;;){++_;const e=I(!!g);let r;r=g?g.output_sequence_out:new c.Tensor("float32",new Float32Array(h),[1,1,h]);let i={use_cache_branch:e,output_sequence:r,encoder_attention_mask:l,speaker_embeddings:t,encoder_hidden_states:o};this.addPastKeyValues(i,f),g=await E(this.sessions.decoder_model_merged,i),f=this.getPastKeyValues(g,f);const{prob:a,spectrum:s}=g;if(m.push(s),_>=p&&(Array.from(a.data).filter((e=>e>=n)).length>0||_>=d))break}const w=(0,c.cat)(m),{waveform:y}=await E(a.sessions.model,{spectrogram:w});return{spectrogram:w,waveform:y}}}class is extends V{main_input_name="spectrogram"}class as extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ss extends as{}class os extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ls extends os{}class us extends os{}class ds extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class cs extends ds{}class ps extends ds{}class hs extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class ms extends hs{}class fs extends hs{}class gs extends V{}class _s extends gs{}class ws extends gs{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class ys extends gs{static async from_pretrained(e,t={}){return t.model_file_name??="audio_model",super.from_pretrained(e,t)}}class bs extends V{}class vs extends bs{async _call(e){return new ml(await super._call(e))}}class xs extends V{}class Ms extends xs{}class Ts extends xs{}class ks extends xs{}class $s extends V{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ss extends $s{}class Cs extends $s{}class Ps extends V{}class Es extends Ps{}class Fs extends Ps{async _call(e){return new sl(await super._call(e))}}class As extends V{}class Is extends As{}class zs extends As{}class Os extends V{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}_apply_and_filter_by_delay_pattern_mask(e){const[t,n]=e.dims,r=this.config.decoder.num_codebooks,i=n-r;let a=0;for(let t=0;t<e.size;++t){if(e.data[t]===this.config.decoder.pad_token_id)continue;const s=t%n-Math.floor(t/n)%r;s>0&&s<=i&&(e.data[a++]=e.data[t])}const s=Math.floor(t/r),o=a/(s*r);return new c.Tensor(e.type,e.data.slice(0,a),[s,r,o])}prepare_inputs_for_generation(e,t,n){let r=structuredClone(e);for(let e=0;e<r.length;++e)for(let t=0;t<r[e].length;++t)e%this.config.decoder.num_codebooks>=t&&(r[e][t]=BigInt(this.config.decoder.pad_token_id));null!==n.guidance_scale&&n.guidance_scale>1&&(r=r.concat(r));return super.prepare_inputs_for_generation(r,t,n)}async generate(e){const t=await super.generate(e),n=this._apply_and_filter_by_delay_pattern_mask(t).unsqueeze_(0),{audio_values:r}=await E(this.sessions.encodec_decode,{audio_codes:n});return r}}class Bs extends V{}class Ls extends Bs{}class Ds extends Bs{async _call(e){return new sl(await super._call(e))}}class Rs extends V{}class Ns extends Rs{}class Vs extends Rs{async _call(e){return new sl(await super._call(e))}}class js extends V{}class Gs extends js{}class qs extends js{async _call(e){return new sl(await super._call(e))}}class Us extends V{}class Ws extends Us{}class Hs extends Us{async _call(e){return new sl(await super._call(e))}}class Xs extends V{}class Ks extends Xs{}class Qs{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:i=null,local_files_only:a=!1,revision:s="main",model_file_name:o=null,subfolder:l="onnx",device:u=null,dtype:d=null,use_external_data_format:c=null,session_options:p={}}={}){const h={progress_callback:t,config:n,cache_dir:i,local_files_only:a,revision:s,model_file_name:o,subfolder:l,device:u,dtype:d,use_external_data_format:c,session_options:p};if(h.config=await r.AutoConfig.from_pretrained(e,h),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(const t of this.MODEL_CLASS_MAPPINGS){const n=t.get(h.config.model_type);if(n)return await n[1].from_pretrained(e,h)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${h.config.model_type}", attempting to construct from base class.`),await V.from_pretrained(e,h);throw Error(`Unsupported model type: ${h.config.model_type}`)}}const Ys=new Map([["bert",["BertModel",U]],["nomic_bert",["NomicBertModel",Y]],["roformer",["RoFormerModel",Z]],["electra",["ElectraModel",ce]],["esm",["EsmModel",Ve]],["convbert",["ConvBertModel",ae]],["camembert",["CamembertModel",_e]],["deberta",["DebertaModel",Me]],["deberta-v2",["DebertaV2Model",Pe]],["mpnet",["MPNetModel",Ye]],["albert",["AlbertModel",lt]],["distilbert",["DistilBertModel",Oe]],["roberta",["RobertaModel",Lt]],["xlm",["XLMModel",Gt]],["xlm-roberta",["XLMRobertaModel",Kt]],["clap",["ClapModel",_s]],["clip",["CLIPModel",mn]],["clipseg",["CLIPSegModel",$n]],["chinese_clip",["ChineseCLIPModel",Tn]],["siglip",["SiglipModel",bn]],["mobilebert",["MobileBertModel",We]],["squeezebert",["SqueezeBertModel",rt]],["wav2vec2",["Wav2Vec2Model",xa]],["wav2vec2-bert",["Wav2Vec2BertModel",Va]],["unispeech",["UniSpeechModel",Aa]],["unispeech-sat",["UniSpeechSatModel",Ba]],["hubert",["HubertModel",Ua]],["wavlm",["WavLMModel",Ka]],["audio-spectrogram-transformer",["ASTModel",tn]],["vits",["VitsModel",vs]],["pyannote",["PyAnnoteModel",Sa]],["wespeaker-resnet",["WeSpeakerResNetModel",Ea]],["detr",["DetrModel",li]],["rt_detr",["RTDetrModel",mi]],["table-transformer",["TableTransformerModel",wi]],["vit",["ViTModel",Pr]],["pvt",["PvtModel",Ar]],["vit_msn",["ViTMSNModel",Lr]],["vit_mae",["ViTMAEModel",Or]],["groupvit",["GroupViTModel",Nr]],["fastvit",["FastViTModel",jr]],["mobilevit",["MobileViTModel",Hr]],["mobilevitv2",["MobileViTV2Model",Qr]],["owlvit",["OwlViTModel",Zr]],["owlv2",["Owlv2Model",ni]],["beit",["BeitModel",ai]],["deit",["DeiTModel",xi]],["hiera",["HieraModel",ki]],["convnext",["ConvNextModel",ea]],["convnextv2",["ConvNextV2Model",ra]],["dinov2",["Dinov2Model",sa]],["resnet",["ResNetModel",Ci]],["swin",["SwinModel",Fi]],["swin2sr",["Swin2SRModel",zi]],["donut-swin",["DonutSwinModel",Ji]],["yolos",["YolosModel",ua]],["dpt",["DPTModel",Li]],["glpn",["GLPNModel",Ki]],["hifigan",["SpeechT5HifiGan",is]],["efficientnet",["EfficientNetModel",Es]],["decision_transformer",["DecisionTransformerModel",Ks]],["mobilenet_v1",["MobileNetV1Model",Ls]],["mobilenet_v2",["MobileNetV2Model",Ns]],["mobilenet_v3",["MobileNetV3Model",Gs]],["mobilenet_v4",["MobileNetV4Model",Ws]],["maskformer",["MaskFormerModel",Wi]]]),Js=new Map([["t5",["T5Model",ht]],["longt5",["LongT5Model",gt]],["mt5",["MT5Model",yt]],["bart",["BartModel",xt]],["mbart",["MBartModel",$t]],["marian",["MarianModel",ga]],["whisper",["WhisperModel",an]],["m2m_100",["M2M100Model",ya]],["blenderbot",["BlenderbotModel",Ft]],["blenderbot-small",["BlenderbotSmallModel",zt]]]),Zs=new Map([["bloom",["BloomModel",br]],["jais",["JAISModel",An]],["gpt2",["GPT2Model",Pn]],["gptj",["GPTJModel",Vn]],["gpt_bigcode",["GPTBigCodeModel",qn]],["gpt_neo",["GPTNeoModel",On]],["gpt_neox",["GPTNeoXModel",Dn]],["codegen",["CodeGenModel",Hn]],["llama",["LlamaModel",Qn]],["cohere",["CohereModel",Zn]],["gemma",["GemmaModel",nr]],["gemma2",["Gemma2Model",ar]],["openelm",["OpenELMModel",lr]],["qwen2",["Qwen2Model",cr]],["phi",["PhiModel",mr]],["phi3",["Phi3Model",_r]],["mpt",["MptModel",Mr]],["opt",["OPTModel",$r]],["mistral",["MistralModel",ls]],["starcoder2",["Starcoder2Model",cs]],["falcon",["FalconModel",ms]],["stablelm",["StableLmModel",Ss]]]),eo=new Map([["speecht5",["SpeechT5ForSpeechToText",ns]],["whisper",["WhisperForConditionalGeneration",sn]]]),to=new Map([["speecht5",["SpeechT5ForTextToSpeech",rs]]]),no=new Map([["vits",["VitsModel",vs]],["musicgen",["MusicgenForConditionalGeneration",Os]]]),ro=new Map([["bert",["BertForSequenceClassification",H]],["roformer",["RoFormerForSequenceClassification",te]],["electra",["ElectraForSequenceClassification",he]],["esm",["EsmForSequenceClassification",Ge]],["convbert",["ConvBertForSequenceClassification",oe]],["camembert",["CamembertForSequenceClassification",ye]],["deberta",["DebertaForSequenceClassification",ke]],["deberta-v2",["DebertaV2ForSequenceClassification",Fe]],["mpnet",["MPNetForSequenceClassification",Ze]],["albert",["AlbertForSequenceClassification",ut]],["distilbert",["DistilBertForSequenceClassification",Be]],["roberta",["RobertaForSequenceClassification",Rt]],["xlm",["XLMForSequenceClassification",Ut]],["xlm-roberta",["XLMRobertaForSequenceClassification",Yt]],["bart",["BartForSequenceClassification",Tt]],["mbart",["MBartForSequenceClassification",Ct]],["mobilebert",["MobileBertForSequenceClassification",Xe]],["squeezebert",["SqueezeBertForSequenceClassification",at]]]),io=new Map([["bert",["BertForTokenClassification",X]],["roformer",["RoFormerForTokenClassification",ne]],["electra",["ElectraForTokenClassification",me]],["esm",["EsmForTokenClassification",qe]],["convbert",["ConvBertForTokenClassification",le]],["camembert",["CamembertForTokenClassification",be]],["deberta",["DebertaForTokenClassification",$e]],["deberta-v2",["DebertaV2ForTokenClassification",Ae]],["mpnet",["MPNetForTokenClassification",et]],["distilbert",["DistilBertForTokenClassification",Le]],["roberta",["RobertaForTokenClassification",Nt]],["xlm",["XLMForTokenClassification",Wt]],["xlm-roberta",["XLMRobertaForTokenClassification",Jt]]]),ao=new Map([["t5",["T5ForConditionalGeneration",mt]],["longt5",["LongT5ForConditionalGeneration",_t]],["mt5",["MT5ForConditionalGeneration",bt]],["bart",["BartForConditionalGeneration",Mt]],["mbart",["MBartForConditionalGeneration",St]],["marian",["MarianMTModel",_a]],["m2m_100",["M2M100ForConditionalGeneration",ba]],["blenderbot",["BlenderbotForConditionalGeneration",At]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",Ot]]]),so=new Map([["bloom",["BloomForCausalLM",vr]],["gpt2",["GPT2LMHeadModel",En]],["jais",["JAISLMHeadModel",In]],["gptj",["GPTJForCausalLM",jn]],["gpt_bigcode",["GPTBigCodeForCausalLM",Un]],["gpt_neo",["GPTNeoForCausalLM",Bn]],["gpt_neox",["GPTNeoXForCausalLM",Rn]],["codegen",["CodeGenForCausalLM",Xn]],["llama",["LlamaForCausalLM",Yn]],["cohere",["CohereForCausalLM",er]],["gemma",["GemmaForCausalLM",rr]],["gemma2",["Gemma2ForCausalLM",sr]],["openelm",["OpenELMForCausalLM",ur]],["qwen2",["Qwen2ForCausalLM",pr]],["phi",["PhiForCausalLM",fr]],["phi3",["Phi3ForCausalLM",wr]],["mpt",["MptForCausalLM",Tr]],["opt",["OPTForCausalLM",Sr]],["mbart",["MBartForCausalLM",Pt]],["mistral",["MistralForCausalLM",us]],["starcoder2",["Starcoder2ForCausalLM",ps]],["falcon",["FalconForCausalLM",fs]],["trocr",["TrOCRForCausalLM",ss]],["stablelm",["StableLmForCausalLM",Cs]]]),oo=new Map([["bert",["BertForMaskedLM",W]],["roformer",["RoFormerForMaskedLM",ee]],["electra",["ElectraForMaskedLM",pe]],["esm",["EsmForMaskedLM",je]],["convbert",["ConvBertForMaskedLM",se]],["camembert",["CamembertForMaskedLM",we]],["deberta",["DebertaForMaskedLM",Te]],["deberta-v2",["DebertaV2ForMaskedLM",Ee]],["mpnet",["MPNetForMaskedLM",Je]],["albert",["AlbertForMaskedLM",ct]],["distilbert",["DistilBertForMaskedLM",Re]],["roberta",["RobertaForMaskedLM",Dt]],["xlm",["XLMWithLMHeadModel",qt]],["xlm-roberta",["XLMRobertaForMaskedLM",Qt]],["mobilebert",["MobileBertForMaskedLM",He]],["squeezebert",["SqueezeBertForMaskedLM",it]]]),lo=new Map([["bert",["BertForQuestionAnswering",K]],["roformer",["RoFormerForQuestionAnswering",re]],["electra",["ElectraForQuestionAnswering",fe]],["convbert",["ConvBertForQuestionAnswering",ue]],["camembert",["CamembertForQuestionAnswering",ve]],["deberta",["DebertaForQuestionAnswering",Se]],["deberta-v2",["DebertaV2ForQuestionAnswering",Ie]],["mpnet",["MPNetForQuestionAnswering",tt]],["albert",["AlbertForQuestionAnswering",dt]],["distilbert",["DistilBertForQuestionAnswering",De]],["roberta",["RobertaForQuestionAnswering",Vt]],["xlm",["XLMForQuestionAnswering",Ht]],["xlm-roberta",["XLMRobertaForQuestionAnswering",Zt]],["mobilebert",["MobileBertForQuestionAnswering",Ke]],["squeezebert",["SqueezeBertForQuestionAnswering",st]]]),uo=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",on]]]),co=new Map([["llava",["LlavaForConditionalGeneration",un]],["moondream1",["Moondream1ForConditionalGeneration",dn]],["florence2",["Florence2ForConditionalGeneration",pn]]]),po=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",on]]]),ho=new Map([["vit",["ViTForImageClassification",Er]],["pvt",["PvtForImageClassification",Ir]],["vit_msn",["ViTMSNForImageClassification",Dr]],["fastvit",["FastViTForImageClassification",Gr]],["mobilevit",["MobileViTForImageClassification",Xr]],["mobilevitv2",["MobileViTV2ForImageClassification",Yr]],["beit",["BeitForImageClassification",si]],["deit",["DeiTForImageClassification",Mi]],["hiera",["HieraForImageClassification",$i]],["convnext",["ConvNextForImageClassification",ta]],["convnextv2",["ConvNextV2ForImageClassification",ia]],["dinov2",["Dinov2ForImageClassification",oa]],["resnet",["ResNetForImageClassification",Pi]],["swin",["SwinForImageClassification",Ai]],["segformer",["SegformerForImageClassification",Ts]],["efficientnet",["EfficientNetForImageClassification",Fs]],["mobilenet_v1",["MobileNetV1ForImageClassification",Ds]],["mobilenet_v2",["MobileNetV2ForImageClassification",Vs]],["mobilenet_v3",["MobileNetV3ForImageClassification",qs]],["mobilenet_v4",["MobileNetV4ForImageClassification",Hs]]]),mo=new Map([["detr",["DetrForObjectDetection",ui]],["rt_detr",["RTDetrForObjectDetection",fi]],["table-transformer",["TableTransformerForObjectDetection",yi]],["yolos",["YolosForObjectDetection",da]]]),fo=new Map([["owlvit",["OwlViTForObjectDetection",ei]],["owlv2",["Owlv2ForObjectDetection",ri]]]),go=new Map([["detr",["DetrForSegmentation",di]],["clipseg",["CLIPSegForImageSegmentation",Sn]]]),_o=new Map([["segformer",["SegformerForSemanticSegmentation",ks]],["sapiens",["SapiensForSemanticSegmentation",ji]]]),wo=new Map([["detr",["DetrForSegmentation",di]],["maskformer",["MaskFormerForInstanceSegmentation",Hi]]]),yo=new Map([["sam",["SamModel",ha]]]),bo=new Map([["wav2vec2",["Wav2Vec2ForCTC",Ma]],["wav2vec2-bert",["Wav2Vec2BertForCTC",ja]],["unispeech",["UniSpeechForCTC",Ia]],["unispeech-sat",["UniSpeechSatForCTC",La]],["wavlm",["WavLMForCTC",Qa]],["hubert",["HubertForCTC",Wa]]]),vo=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",Ta]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",Ga]],["unispeech",["UniSpeechForSequenceClassification",za]],["unispeech-sat",["UniSpeechSatForSequenceClassification",Da]],["wavlm",["WavLMForSequenceClassification",Ya]],["hubert",["HubertForSequenceClassification",Ha]],["audio-spectrogram-transformer",["ASTForAudioClassification",nn]]]),xo=new Map([["wavlm",["WavLMForXVector",Ja]]]),Mo=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",Ra]],["wavlm",["WavLMForAudioFrameClassification",Za]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",ka]],["pyannote",["PyAnnoteForAudioFrameClassification",Ca]]]),To=new Map([["vitmatte",["VitMatteForImageMatting",Ur]]]),ko=new Map([["swin2sr",["Swin2SRForImageSuperResolution",Oi]]]),$o=new Map([["dpt",["DPTForDepthEstimation",Di]],["depth_anything",["DepthAnythingForDepthEstimation",Ni]],["glpn",["GLPNForDepthEstimation",Qi]],["sapiens",["SapiensForDepthEstimation",Gi]]]),So=new Map([["sapiens",["SapiensForNormalEstimation",qi]]]),Co=new Map([["clip",["CLIPVisionModelWithProjection",wn]],["siglip",["SiglipVisionModel",xn]]]),Po=[[Ys,w],[Js,y],[Zs,x],[ro,w],[io,w],[ao,b],[eo,b],[so,x],[oo,w],[lo,w],[uo,v],[co,T],[ho,w],[go,w],[wo,w],[_o,w],[To,w],[ko,w],[$o,w],[So,w],[mo,w],[fo,w],[yo,M],[bo,w],[vo,w],[to,b],[no,w],[xo,w],[Mo,w],[Co,w]];for(const[e,t]of Po)for(const[n,r]of e.values())$.set(n,t),C.set(r,n),S.set(n,r);const Eo=[["MusicgenForConditionalGeneration",Os,k],["CLIPTextModelWithProjection",gn,w],["SiglipTextModel",vn,w],["ClapTextModelWithProjection",ws,w],["ClapAudioModelWithProjection",ys,w]];for(const[e,t,n]of Eo)$.set(e,n),C.set(t,e),S.set(e,t);class Fo extends Qs{static MODEL_CLASS_MAPPINGS=Po.map((e=>e[0]));static BASE_IF_FAIL=!0}class Ao extends Qs{static MODEL_CLASS_MAPPINGS=[ro]}class Io extends Qs{static MODEL_CLASS_MAPPINGS=[io]}class zo extends Qs{static MODEL_CLASS_MAPPINGS=[ao]}class Oo extends Qs{static MODEL_CLASS_MAPPINGS=[eo]}class Bo extends Qs{static MODEL_CLASS_MAPPINGS=[to]}class Lo extends Qs{static MODEL_CLASS_MAPPINGS=[no]}class Do extends Qs{static MODEL_CLASS_MAPPINGS=[so]}class Ro extends Qs{static MODEL_CLASS_MAPPINGS=[oo]}class No extends Qs{static MODEL_CLASS_MAPPINGS=[lo]}class Vo extends Qs{static MODEL_CLASS_MAPPINGS=[uo]}class jo extends Qs{static MODEL_CLASS_MAPPINGS=[ho]}class Go extends Qs{static MODEL_CLASS_MAPPINGS=[go]}class qo extends Qs{static MODEL_CLASS_MAPPINGS=[_o]}class Uo extends Qs{static MODEL_CLASS_MAPPINGS=[wo]}class Wo extends Qs{static MODEL_CLASS_MAPPINGS=[mo]}class Ho extends Qs{static MODEL_CLASS_MAPPINGS=[fo]}class Xo extends Qs{static MODEL_CLASS_MAPPINGS=[yo]}class Ko extends Qs{static MODEL_CLASS_MAPPINGS=[bo]}class Qo extends Qs{static MODEL_CLASS_MAPPINGS=[vo]}class Yo extends Qs{static MODEL_CLASS_MAPPINGS=[xo]}class Jo extends Qs{static MODEL_CLASS_MAPPINGS=[Mo]}class Zo extends Qs{static MODEL_CLASS_MAPPINGS=[po]}class el extends Qs{static MODEL_CLASS_MAPPINGS=[To]}class tl extends Qs{static MODEL_CLASS_MAPPINGS=[ko]}class nl extends Qs{static MODEL_CLASS_MAPPINGS=[$o]}class rl extends Qs{static MODEL_CLASS_MAPPINGS=[So]}class il extends Qs{static MODEL_CLASS_MAPPINGS=[Co]}class al extends j{constructor({logits:e,past_key_values:t,encoder_outputs:n,decoder_attentions:r=null,cross_attentions:i=null}){super(),this.logits=e,this.past_key_values=t,this.encoder_outputs=n,this.decoder_attentions=r,this.cross_attentions=i}}class sl extends j{constructor({logits:e}){super(),this.logits=e}}class ol extends j{constructor({logits:e,embeddings:t}){super(),this.logits=e,this.embeddings=t}}class ll extends j{constructor({logits:e}){super(),this.logits=e}}class ul extends j{constructor({logits:e}){super(),this.logits=e}}class dl extends j{constructor({start_logits:e,end_logits:t}){super(),this.start_logits=e,this.end_logits=t}}class cl extends j{constructor({logits:e}){super(),this.logits=e}}class pl extends j{constructor({logits:e,past_key_values:t}){super(),this.logits=e,this.past_key_values=t}}class hl extends j{constructor({alphas:e}){super(),this.alphas=e}}class ml extends j{constructor({waveform:e,spectrogram:t}){super(),this.waveform=e,this.spectrogram=t}}},"./src/models/whisper/common_whisper.js":
176
176
  /*!**********************************************!*\
177
177
  !*** ./src/models/whisper/common_whisper.js ***!
178
178
  \**********************************************/(e,t,n)=>{n.r(t),n.d(t,{WHISPER_LANGUAGE_MAPPING:()=>i,WHISPER_TO_LANGUAGE_CODE_MAPPING:()=>a,whisper_language_to_code:()=>s});const r=[["en","english"],["zh","chinese"],["de","german"],["es","spanish"],["ru","russian"],["ko","korean"],["fr","french"],["ja","japanese"],["pt","portuguese"],["tr","turkish"],["pl","polish"],["ca","catalan"],["nl","dutch"],["ar","arabic"],["sv","swedish"],["it","italian"],["id","indonesian"],["hi","hindi"],["fi","finnish"],["vi","vietnamese"],["he","hebrew"],["uk","ukrainian"],["el","greek"],["ms","malay"],["cs","czech"],["ro","romanian"],["da","danish"],["hu","hungarian"],["ta","tamil"],["no","norwegian"],["th","thai"],["ur","urdu"],["hr","croatian"],["bg","bulgarian"],["lt","lithuanian"],["la","latin"],["mi","maori"],["ml","malayalam"],["cy","welsh"],["sk","slovak"],["te","telugu"],["fa","persian"],["lv","latvian"],["bn","bengali"],["sr","serbian"],["az","azerbaijani"],["sl","slovenian"],["kn","kannada"],["et","estonian"],["mk","macedonian"],["br","breton"],["eu","basque"],["is","icelandic"],["hy","armenian"],["ne","nepali"],["mn","mongolian"],["bs","bosnian"],["kk","kazakh"],["sq","albanian"],["sw","swahili"],["gl","galician"],["mr","marathi"],["pa","punjabi"],["si","sinhala"],["km","khmer"],["sn","shona"],["yo","yoruba"],["so","somali"],["af","afrikaans"],["oc","occitan"],["ka","georgian"],["be","belarusian"],["tg","tajik"],["sd","sindhi"],["gu","gujarati"],["am","amharic"],["yi","yiddish"],["lo","lao"],["uz","uzbek"],["fo","faroese"],["ht","haitian creole"],["ps","pashto"],["tk","turkmen"],["nn","nynorsk"],["mt","maltese"],["sa","sanskrit"],["lb","luxembourgish"],["my","myanmar"],["bo","tibetan"],["tl","tagalog"],["mg","malagasy"],["as","assamese"],["tt","tatar"],["haw","hawaiian"],["ln","lingala"],["ha","hausa"],["ba","bashkir"],["jw","javanese"],["su","sundanese"]],i=new Map(r),a=new Map([...r.map((([e,t])=>[t,e])),["burmese","my"],["valencian","ca"],["flemish","nl"],["haitian","ht"],["letzeburgesch","lb"],["pushto","ps"],["panjabi","pa"],["moldavian","ro"],["moldovan","ro"],["sinhalese","si"],["castilian","es"]]);function s(e){e=e.toLowerCase();let t=a.get(e);if(void 0===t){if(!i.has(e)){const t=2===e.length?i.keys():i.values();throw new Error(`Language "${e}" is not supported. Must be one of: ${JSON.stringify(t)}`)}t=e}return t}},"./src/models/whisper/generation_whisper.js":
@@ -190,7 +190,7 @@ var r,i,a,s,o,l,u,d,c,p,h,m,f,g,_,w,y,b,v,x,M,T,k,$,S,C,P,E,F,A,I,z,O,B=Object.d
190
190
  \***************************/(e,t,n)=>{n.r(t),n.d(t,{ASTFeatureExtractor:()=>ae,AutoProcessor:()=>we,BeitFeatureExtractor:()=>H,BitImageProcessor:()=>$,CLIPFeatureExtractor:()=>C,CLIPImageProcessor:()=>P,ChineseCLIPFeatureExtractor:()=>E,ClapFeatureExtractor:()=>se,ConvNextFeatureExtractor:()=>A,ConvNextImageProcessor:()=>I,DPTFeatureExtractor:()=>T,DPTImageProcessor:()=>k,DeiTFeatureExtractor:()=>W,DetrFeatureExtractor:()=>Q,DonutFeatureExtractor:()=>X,EfficientNetImageProcessor:()=>B,FeatureExtractor:()=>y,Florence2Processor:()=>_e,GLPNFeatureExtractor:()=>S,ImageFeatureExtractor:()=>b,MaskFormerFeatureExtractor:()=>Y,MobileNetV1FeatureExtractor:()=>L,MobileNetV2FeatureExtractor:()=>D,MobileNetV3FeatureExtractor:()=>R,MobileNetV4FeatureExtractor:()=>N,MobileViTFeatureExtractor:()=>V,MobileViTImageProcessor:()=>j,NougatImageProcessor:()=>K,OwlViTFeatureExtractor:()=>G,OwlViTProcessor:()=>ge,Owlv2ImageProcessor:()=>q,Processor:()=>de,PvtImageProcessor:()=>M,PyAnnoteFeatureExtractor:()=>oe,PyAnnoteProcessor:()=>me,RTDetrImageProcessor:()=>U,SamImageProcessor:()=>Z,SamProcessor:()=>ce,SapiensFeatureExtractor:()=>v,SeamlessM4TFeatureExtractor:()=>ie,SegformerFeatureExtractor:()=>x,SiglipImageProcessor:()=>F,SpeechT5FeatureExtractor:()=>ue,SpeechT5Processor:()=>fe,Swin2SRImageProcessor:()=>ee,ViTFeatureExtractor:()=>z,ViTImageProcessor:()=>O,VitMatteImageProcessor:()=>te,Wav2Vec2FeatureExtractor:()=>re,Wav2Vec2ProcessorWithLM:()=>he,WeSpeakerFeatureExtractor:()=>le,WhisperFeatureExtractor:()=>ne,WhisperProcessor:()=>pe,YolosFeatureExtractor:()=>J});var r=n(/*! ./utils/generic.js */"./src/utils/generic.js"),i=n(/*! ./utils/core.js */"./src/utils/core.js"),a=n(/*! ./utils/hub.js */"./src/utils/hub.js"),s=n(/*! ./utils/maths.js */"./src/utils/maths.js"),o=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),l=(n(/*! ./utils/image.js */"./src/utils/image.js"),n(/*! ./utils/audio.js */"./src/utils/audio.js"));function u([e,t,n,r]){return[e-n/2,t-r/2,e+n/2,t+r/2]}function d(e,t=.5,n=null,r=!1){const i=e.logits,a=e.pred_boxes,[o,l,d]=i.dims;if(null!==n&&n.length!==o)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");let c=[];for(let e=0;e<o;++e){let o=null!==n?n[e]:null,p={boxes:[],classes:[],scores:[]},h=i[e],m=a[e];for(let e=0;e<l;++e){let n,i=h[e],a=[];if(r){n=i.sigmoid().data;for(let e=0;e<n.length;++e)n[e]>t&&a.push(e)}else{let e=(0,s.max)(i.data)[1];if(e===d-1)continue;if(n=(0,s.softmax)(i.data),n[e]<t)continue;a.push(e)}for(const t of a){let r=m[e].data;r=u(r),null!==o&&(r=r.map(((e,t)=>e*o[(t+1)%2]))),p.boxes.push(r),p.classes.push(t),p.scores.push(n[t])}}c.push(p)}return c}function c(e,t=null){const n=e.logits,r=n.dims[0];if(null!==t&&t.length!==r)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");const i=[];for(let e=0;e<r;++e){const r=null!==t?t[e]:null;let a=n[e];null!==r&&(a=(0,o.interpolate)(a,r,"bilinear",!1));const[s,l]=r??a.dims.slice(-2),u=new o.Tensor("int32",new Int32Array(s*l),[s,l]),d=a[0].data,c=u.data;for(let e=1;e<a.dims[0];++e){const t=a[e].data;for(let n=0;n<t.length;++n)t[n]>d[n]&&(d[n]=t[n],c[n]=e)}const p=new Array(a.dims[0]);for(let e=0;e<c.length;++e){const t=c[e];p[t]=t}const h=p.filter((e=>void 0!==e));i.push({segmentation:u,labels:h})}return i}function p(e,t,n,r){const i=[],a=[],o=[];for(let l=0;l<e.dims[0];++l){const u=e[l],d=t[l],c=(0,s.max)(u.data)[1];if(c===r)continue;const p=(0,s.softmax)(u.data)[c];p>n&&(i.push(d),a.push(p),o.push(c))}return[i,a,o]}function h(e,t,n,r=.5,i=.8){const a=[];let s=0,o=0;const l=t[n].data;for(let t=0;t<e.length;++t)e[t]===n&&(a.push(t),++s),l[t]>=r&&++o;let u=s>0&&o>0;if(u){u=s/o>i}return[u,a]}function m(e,t,n,r,i,a=null,s=null){const[l,u]=s??e[0].dims,d=new o.Tensor("int32",new Int32Array(l*u),[l,u]),c=[];if(null!==s)for(let t=0;t<e.length;++t)e[t]=(0,o.interpolate)(e[t],s,"bilinear",!1);const p=new Int32Array(e[0].data.length),m=new Float32Array(e[0].data.length);for(let n=0;n<e.length;++n){let r=t[n];const i=e[n].data;for(let e=0;e<i.length;++e)i[e]*=r,i[e]>m[e]&&(p[e]=n,m[e]=i[e])}let f=0;const g=d.data;for(let a=0;a<n.length;++a){const s=n[a],[o,l]=h(p,e,a,r,i);if(o){++f;for(const e of l)g[e]=f;c.push({id:f,label_id:s,score:t[a]})}}return[d,c]}function f(e,t=.5,n=.5,r=.8,i=null,a=null){null===i&&(console.warn("`label_ids_to_fuse` unset. No instance will be fused."),i=new Set);const s=e.class_queries_logits??e.logits,l=(e.masks_queries_logits??e.pred_masks).sigmoid();let[u,d,c]=s.dims;if(c-=1,null!==a&&a.length!==u)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");let h=[];for(let e=0;e<u;++e){let u=null!==a?a[e]:null,d=s[e],f=l[e],[g,_,w]=p(d,f,t,c);if(0===w.length){let[e,t]=u??f.dims.slice(-2),n=new o.Tensor("int32",new Int32Array(e*t).fill(-1),[e,t]);h.push({segmentation:n,segments_info:[]});continue}let[y,b]=m(g,_,w,n,r,i,u);h.push({segmentation:y,segments_info:b})}return h}function g(e,t){if(!(e instanceof Float32Array||e instanceof Float64Array))throw new Error(`${t} expects input to be a Float32Array or a Float64Array, but got ${e?.constructor?.name??typeof e} instead. If using the feature extractor directly, remember to use \`read_audio(url, sampling_rate)\` to obtain the raw audio data of the file/url.`)}function _(e,t,n=0,r=null){const i=e/t;let a=(0,s.bankers_round)(i)*t;return null!==r&&a>r&&(a=Math.floor(i)*t),a<n&&(a=Math.ceil(i)*t),a}function w([e,t],n){return[Math.max(Math.floor(e/n),1)*n,Math.max(Math.floor(t/n),1)*n]}class y extends r.Callable{constructor(e){super(),this.config=e}}class b extends y{constructor(e){super(e),this.image_mean=this.config.image_mean??this.config.mean,this.image_std=this.config.image_std??this.config.std,this.resample=this.config.resample??2,this.do_rescale=this.config.do_rescale??!0,this.rescale_factor=this.config.rescale_factor??1/255,this.do_normalize=this.config.do_normalize,this.do_resize=this.config.do_resize,this.do_thumbnail=this.config.do_thumbnail,this.size=this.config.size,this.size_divisibility=this.config.size_divisibility??this.config.size_divisor,this.do_center_crop=this.config.do_center_crop,this.crop_size=this.config.crop_size,this.do_convert_rgb=this.config.do_convert_rgb??!0,this.do_crop_margin=this.config.do_crop_margin,this.pad_size=this.config.pad_size,this.do_pad=this.config.do_pad,this.do_pad&&!this.pad_size&&this.size&&void 0!==this.size.width&&void 0!==this.size.height&&(this.pad_size=this.size),this.do_flip_channel_order=this.config.do_flip_channel_order??!1}async thumbnail(e,t,n=2){const r=e.height,i=e.width,a=t.height,s=t.width;let o=Math.min(r,a),l=Math.min(i,s);return o===r&&l===i?e:(r>i?l=Math.floor(i*o/r):i>r&&(o=Math.floor(r*l/i)),await e.resize(l,o,{resample:n}))}async crop_margin(e,t=200){const n=e.clone().grayscale(),r=(0,s.min)(n.data)[0],i=(0,s.max)(n.data)[0]-r;if(0===i)return e;const a=t/255;let o=n.width,l=n.height,u=0,d=0;const c=n.data;for(let e=0;e<n.height;++e){const t=e*n.width;for(let s=0;s<n.width;++s)(c[t+s]-r)/i<a&&(o=Math.min(o,s),l=Math.min(l,e),u=Math.max(u,s),d=Math.max(d,e))}return e=await e.crop([o,l,u,d])}pad_image(e,t,n,{mode:r="constant",center:a=!1,constant_values:s=0}={}){const[o,l,u]=t;let d,c;if("number"==typeof n?(d=n,c=n):(d=n.width,c=n.height),d!==l||c!==o){const n=new Float32Array(d*c*u);if(Array.isArray(s))for(let e=0;e<n.length;++e)n[e]=s[e%u];else 0!==s&&n.fill(s);const[p,h]=a?[Math.floor((d-l)/2),Math.floor((c-o)/2)]:[0,0];for(let t=0;t<o;++t){const r=(t+h)*d,i=t*l;for(let t=0;t<l;++t){const a=(r+t+p)*u,s=(i+t)*u;for(let t=0;t<u;++t)n[a+t]=e[s+t]}}if("symmetric"===r){if(a)throw new Error("`center` padding is not supported when `mode` is set to `symmetric`.");const t=o-1,r=l-1;for(let a=0;a<c;++a){const s=a*d,c=(0,i.calculateReflectOffset)(a,t)*l;for(let t=0;t<d;++t){if(a<o&&t<l)continue;const d=(s+t)*u,p=(c+(0,i.calculateReflectOffset)(t,r))*u;for(let t=0;t<u;++t)n[d+t]=e[p+t]}}}e=n,t=[c,d,u]}return[e,t]}rescale(e){for(let t=0;t<e.length;++t)e[t]=this.rescale_factor*e[t]}get_resize_output_image_size(e,t){const[n,r]=e.size;let i,a;if(this.do_thumbnail){const{height:e,width:n}=t;i=Math.min(e,n)}else Number.isInteger(t)?(i=t,a=this.config.max_size??i):void 0!==t&&(i=t.shortest_edge,a=t.longest_edge);if(void 0!==i||void 0!==a){const e=void 0===i?1:Math.max(i/n,i/r),t=n*e,s=r*e,o=void 0===a?1:Math.min(a/t,a/s);let l=Math.floor(Number((t*o).toFixed(2))),u=Math.floor(Number((s*o).toFixed(2)));return void 0!==this.size_divisibility&&([l,u]=w([l,u],this.size_divisibility)),[l,u]}if(void 0!==t&&void 0!==t.width&&void 0!==t.height){let e=t.width,i=t.height;if(this.config.keep_aspect_ratio&&this.config.ensure_multiple_of){let t=i/r,a=e/n;Math.abs(1-a)<Math.abs(1-t)?t=a:a=t,i=_(t*r,this.config.ensure_multiple_of),e=_(a*n,this.config.ensure_multiple_of)}return[e,i]}if(void 0!==this.size_divisibility)return w([n,r],this.size_divisibility);throw new Error(`Could not resize image due to unsupported \`this.size\` option in config: ${JSON.stringify(t)}`)}async resize(e){const[t,n]=this.get_resize_output_image_size(e,this.size);return await e.resize(t,n,{resample:this.resample})}async preprocess(e,{do_normalize:t=null,do_pad:n=null,do_convert_rgb:r=null,do_convert_grayscale:i=null,do_flip_channel_order:a=null}={}){this.do_crop_margin&&(e=await this.crop_margin(e));const[s,l]=e.size;if(r??this.do_convert_rgb?e=e.rgb():i&&(e=e.grayscale()),this.do_resize&&(e=await this.resize(e)),this.do_thumbnail&&(e=await this.thumbnail(e,this.size,this.resample)),this.do_center_crop){let t,n;Number.isInteger(this.crop_size)?(t=this.crop_size,n=this.crop_size):(t=this.crop_size.width,n=this.crop_size.height),e=await e.center_crop(t,n)}const u=[e.height,e.width];let d=Float32Array.from(e.data),c=[e.height,e.width,e.channels];if(this.do_rescale&&this.rescale(d),t??this.do_normalize){let t=this.image_mean;Array.isArray(this.image_mean)||(t=new Array(e.channels).fill(t));let n=this.image_std;if(Array.isArray(this.image_std)||(n=new Array(e.channels).fill(t)),t.length!==e.channels||n.length!==e.channels)throw new Error(`When set to arrays, the length of \`image_mean\` (${t.length}) and \`image_std\` (${n.length}) must match the number of channels in the image (${e.channels}).`);for(let r=0;r<d.length;r+=e.channels)for(let i=0;i<e.channels;++i)d[r+i]=(d[r+i]-t[i])/n[i]}if(n??this.do_pad)if(this.pad_size){const t=this.pad_image(d,[e.height,e.width,e.channels],this.pad_size);[d,c]=t}else if(this.size_divisibility){const[e,t]=w([c[1],c[0]],this.size_divisibility);[d,c]=this.pad_image(d,c,{width:e,height:t})}if(a??this.do_flip_channel_order){if(3!==c[2])throw new Error("Flipping channel order is only supported for RGB images.");for(let e=0;e<d.length;e+=3){const t=d[e];d[e]=d[e+2],d[e+2]=t}}return{original_size:[l,s],reshaped_input_size:u,pixel_values:new o.Tensor("float32",d,c).permute(2,0,1)}}async _call(e,...t){Array.isArray(e)||(e=[e]);const n=await Promise.all(e.map((e=>this.preprocess(e))));return{pixel_values:(0,o.stack)(n.map((e=>e.pixel_values)),0),original_sizes:n.map((e=>e.original_size)),reshaped_input_sizes:n.map((e=>e.reshaped_input_size))}}}class v extends b{post_process_semantic_segmentation(...e){return c(...e)}}class x extends b{post_process_semantic_segmentation(...e){return c(...e)}}class M extends b{}class T extends b{}class k extends T{}class $ extends b{}class S extends b{}class C extends b{}class P extends C{}class E extends b{}class F extends b{}class A extends b{constructor(e){super(e),this.crop_pct=this.config.crop_pct??.875}async resize(e){const t=this.size?.shortest_edge;if(void 0===t)throw new Error("Size dictionary must contain 'shortest_edge' key.");if(t<384){const n=Math.floor(t/this.crop_pct),[r,i]=this.get_resize_output_image_size(e,{shortest_edge:n});e=await e.resize(r,i,{resample:this.resample}),e=await e.center_crop(t,t)}else e=await e.resize(t,t,{resample:this.resample});return e}}class I extends A{}class z extends b{}class O extends b{}class B extends b{constructor(e){super(e),this.include_top=this.config.include_top??!0,this.include_top&&(this.image_std=this.image_std.map((e=>e*e)))}}class L extends b{}class D extends b{}class R extends b{}class N extends b{}class V extends b{}class j extends V{}class G extends b{post_process_object_detection(...e){return d(...e)}}class q extends G{}class U extends b{post_process_object_detection(...e){return d(...e)}}class W extends b{}class H extends b{}class X extends b{pad_image(e,t,n,r={}){const[i,a,s]=t;let o=this.image_mean;Array.isArray(this.image_mean)||(o=new Array(s).fill(o));let l=this.image_std;Array.isArray(l)||(l=new Array(s).fill(o));const u=o.map(((e,t)=>-e/l[t]));return super.pad_image(e,t,n,{center:!0,constant_values:u,...r})}}class K extends X{}class Q extends b{async _call(e){const t=await super._call(e),n=[t.pixel_values.dims[0],64,64],r=(0,o.full)(n,1n);return{...t,pixel_mask:r}}post_process_object_detection(...e){return d(...e)}post_process_panoptic_segmentation(...e){return f(...e)}post_process_instance_segmentation(){throw Error("Not implemented yet")}}class Y extends b{post_process_panoptic_segmentation(...e){return f(...e)}post_process_instance_segmentation(){throw Error("Not implemented yet")}}class J extends b{post_process_object_detection(...e){return d(...e)}}class Z extends b{reshape_input_points(e,t,n,r=!1){e=structuredClone(e);let a=(0,i.calculateDimensions)(e);if(3===a.length)r||(a=[1,...a]),e=[e];else if(4!==a.length)throw Error("The input_points must be a 4D tensor of shape `batch_size`, `point_batch_size`, `nb_points_per_image`, `2`.");for(let r=0;r<e.length;++r){let i=t[r],a=n[r],s=[a[0]/i[0],a[1]/i[1]];for(let t=0;t<e[r].length;++t)for(let n=0;n<e[r][t].length;++n)for(let i=0;i<e[r][t][n].length;++i)e[r][t][n][i]*=s[i%2]}return new o.Tensor("float32",Float32Array.from(e.flat(1/0)),a)}add_input_labels(e,t){let n=(0,i.calculateDimensions)(e);if(2===n.length)n=[1,...n],e=[e];else if(3!==n.length)throw Error("The input_points must be a 4D tensor of shape `batch_size`, `point_batch_size`, `nb_points_per_image`, `2`.");if(n.some(((e,n)=>e!==t.dims[n])))throw Error(`The first ${n.length} dimensions of 'input_points' and 'input_labels' must be the same.`);return new o.Tensor("int64",e.flat(1/0).map(BigInt),n)}async _call(e,{input_points:t=null,input_labels:n=null,input_boxes:r=null}={}){const i=await super._call(e);if(t&&(i.input_points=this.reshape_input_points(t,i.original_sizes,i.reshaped_input_sizes)),n){if(!i.input_points)throw Error("`input_points` must be provided if `input_labels` are provided.");i.input_labels=this.add_input_labels(n,i.input_points)}return r&&(i.input_boxes=this.reshape_input_points(r,i.original_sizes,i.reshaped_input_sizes,!0)),i}async post_process_masks(e,t,n,{mask_threshold:r=0,binarize:i=!0,pad_size:a=null}={}){const s=[],l=[(a=a??this.pad_size).height,a.width];for(let a=0;a<t.length;++a){const u=t[a],d=n[a];let c=await(0,o.interpolate_4d)(e[a],{mode:"bilinear",size:l});if(c=c.slice(null,null,[0,d[0]],[0,d[1]]),c=await(0,o.interpolate_4d)(c,{mode:"bilinear",size:u}),i){const e=c.data,t=new Uint8Array(e.length);for(let n=0;n<e.length;++n)e[n]>r&&(t[n]=1);c=new o.Tensor("bool",t,c.dims)}s.push(c)}return s}generate_crop_boxes(e,t,{crop_n_layers:n=0,overlap_ratio:r=512/1500,points_per_crop:i=32,crop_n_points_downscale_factor:a=1}={}){}}class ee extends b{pad_image(e,t,n,r={}){const[i,a,s]=t;return super.pad_image(e,t,{width:a+(n-a%n)%n,height:i+(n-i%n)%n},{mode:"symmetric",center:!1,constant_values:-1,...r})}}class te extends b{async _call(e,t){Array.isArray(e)||(e=[e]),Array.isArray(t)||(t=[t]);const n=await Promise.all(e.map((e=>this.preprocess(e)))),r=await Promise.all(t.map((e=>this.preprocess(e,{do_normalize:!1,do_convert_rgb:!1,do_convert_grayscale:!0}))));return{pixel_values:(0,o.stack)(n.map(((e,t)=>(0,o.cat)([e.pixel_values,r[t].pixel_values],0))),0),original_sizes:n.map((e=>e.original_size)),reshaped_input_sizes:n.map((e=>e.reshaped_input_size))}}}class ne extends y{constructor(e){super(e),this.config.mel_filters??=(0,l.mel_filter_bank)(Math.floor(1+this.config.n_fft/2),this.config.feature_size,0,8e3,this.config.sampling_rate,"slaney","slaney"),this.window=(0,l.window_function)(this.config.n_fft,"hann")}async _extract_fbank_features(e){const t=await(0,l.spectrogram)(e,this.window,this.config.n_fft,this.config.hop_length,{power:2,mel_filters:this.config.mel_filters,log_mel:"log10",max_num_frames:this.config.nb_max_frames}),n=t.data,r=(0,s.max)(n)[0];for(let e=0;e<n.length;++e)n[e]=(Math.max(n[e],r-8)+4)/4;return t}async _call(e){let t;g(e,"WhisperFeatureExtractor"),e.length>this.config.n_samples?(console.warn("Attempting to extract features for audio longer than 30 seconds. If using a pipeline to extract transcript from a long audio clip, remember to specify `chunk_length_s` and/or `stride_length_s`."),t=e.slice(0,this.config.n_samples)):(t=new Float32Array(this.config.n_samples),t.set(e));return{input_features:(await this._extract_fbank_features(t)).unsqueeze_(0)}}}class re extends y{_zero_mean_unit_var_norm(e){const t=e.reduce(((e,t)=>e+t),0)/e.length,n=e.reduce(((e,n)=>e+(n-t)**2),0)/e.length;return e.map((e=>(e-t)/Math.sqrt(n+1e-7)))}async _call(e){g(e,"Wav2Vec2FeatureExtractor"),e instanceof Float64Array&&(e=new Float32Array(e));let t=e;this.config.do_normalize&&(t=this._zero_mean_unit_var_norm(t));const n=[1,t.length];return{input_values:new o.Tensor("float32",t,n),attention_mask:new o.Tensor("int64",new BigInt64Array(t.length).fill(1n),n)}}}class ie extends y{constructor(e){super(e);const t=this.config.sampling_rate,n=(0,l.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<n.length;++e)n[e].push(0);this.mel_filters=n,this.window=(0,l.window_function)(400,"povey",{periodic:!1})}async _extract_fbank_features(e,t){return e=e.map((e=>32768*e)),(0,l.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,max_num_frames:t,transpose:!0})}async _call(e,{padding:t=!0,pad_to_multiple_of:n=2,do_normalize_per_mel_bins:r=!0,return_attention_mask:i=!0}={}){g(e,"SeamlessM4TFeatureExtractor");let a,s=await this._extract_fbank_features(e,this.config.max_length);if(r){const[e,t]=s.dims,n=s.data;for(let r=0;r<t;++r){let i=0;for(let a=0;a<e;++a)i+=n[a*t+r];const a=i/e;let s=0;for(let i=0;i<e;++i)s+=(n[i*t+r]-a)**2;s/=e-1;const o=Math.sqrt(s+1e-7);for(let i=0;i<e;++i){const e=i*t+r;n[e]=(n[e]-a)/o}}}if(t){const[e,t]=s.dims,r=s.data,l=e%n;if(l>0){const n=new Float32Array(t*(e+l));n.set(r),n.fill(this.config.padding_value,r.length);const u=e+l;s=new o.Tensor(s.type,n,[u,t]),i&&(a=new o.Tensor("int64",new BigInt64Array(u),[1,u]),a.data.fill(1n,0,e))}}const[l,u]=s.dims,d=this.config.stride;if(0!==l%d)throw new Error(`The number of frames (${l}) must be a multiple of the stride (${d}).`);const c=s.view(1,Math.floor(l/d),u*d),p={input_features:c};if(i){const e=c.dims[1],t=new BigInt64Array(e);if(a){const e=a.data;for(let n=1,r=0;n<l;n+=d,++r)t[r]=e[n]}else t.fill(1n);p.attention_mask=new o.Tensor("int64",t,[1,e])}return p}}class ae extends y{constructor(e){super(e);const t=this.config.sampling_rate,n=(0,l.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<n.length;++e)n[e].push(0);this.mel_filters=n,this.window=(0,l.window_function)(400,"hann",{periodic:!1}),this.mean=this.config.mean,this.std=this.config.std}async _extract_fbank_features(e,t){return(0,l.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,max_num_frames:t,transpose:!0})}async _call(e){g(e,"ASTFeatureExtractor");const t=await this._extract_fbank_features(e,this.config.max_length);if(this.config.do_normalize){const e=2*this.std,n=t.data;for(let t=0;t<n.length;++t)n[t]=(n[t]-this.mean)/e}return{input_values:t.unsqueeze_(0)}}}class se extends y{constructor(e){super(e),this.mel_filters=(0,l.mel_filter_bank)(this.config.nb_frequency_bins,this.config.feature_size,this.config.frequency_min,this.config.frequency_max,this.config.sampling_rate,null,"htk"),this.mel_filters_slaney=(0,l.mel_filter_bank)(this.config.nb_frequency_bins,this.config.feature_size,this.config.frequency_min,this.config.frequency_max,this.config.sampling_rate,"slaney","slaney"),this.window=(0,l.window_function)(this.config.fft_window_size,"hann")}async _get_input_mel(e,t,n,r){let i,a=!1;const s=e.length-t;if(s>0){if("rand_trunc"!==n)throw new Error(`Truncation strategy "${n}" not implemented`);{a=!0;const n=Math.floor(Math.random()*(s+1));e=e.subarray(n,n+t),i=await this._extract_fbank_features(e,this.mel_filters_slaney,this.config.nb_max_samples)}}else{if(s<0){let n=new Float64Array(t);if(n.set(e),"repeat"===r)for(let r=e.length;r<t;r+=e.length)n.set(e.subarray(0,Math.min(e.length,t-r)),r);else if("repeatpad"===r)for(let t=e.length;t<-s;t+=e.length)n.set(e,t);e=n}if("fusion"===n)throw new Error(`Truncation strategy "${n}" not implemented`);i=await this._extract_fbank_features(e,this.mel_filters_slaney,this.config.nb_max_samples)}return i.unsqueeze_(0)}async _extract_fbank_features(e,t,n=null){return(0,l.spectrogram)(e,this.window,this.config.fft_window_size,this.config.hop_length,{power:2,mel_filters:t,log_mel:"dB",max_num_frames:n,do_pad:!1,transpose:!0})}async _call(e,{max_length:t=null}={}){g(e,"ClapFeatureExtractor");return{input_features:(await this._get_input_mel(e,t??this.config.nb_max_samples,this.config.truncation,this.config.padding)).unsqueeze_(0)}}}class oe extends y{async _call(e){g(e,"PyAnnoteFeatureExtractor"),e instanceof Float64Array&&(e=new Float32Array(e));const t=[1,1,e.length];return{input_values:new o.Tensor("float32",e,t)}}samples_to_frames(e){return(e-this.config.offset)/this.config.step}post_process_speaker_diarization(e,t){const n=t/this.samples_to_frames(t)/this.config.sampling_rate,r=[];for(const t of e.tolist()){const e=[];let i=-1;for(let n=0;n<t.length;++n){const r=(0,s.softmax)(t[n]),[a,o]=(0,s.max)(r),[l,u]=[n,n+1];o!==i?(i=o,e.push({id:o,start:l,end:u,score:a})):(e.at(-1).end=u,e.at(-1).score+=a)}r.push(e.map((({id:e,start:t,end:r,score:i})=>({id:e,start:t*n,end:r*n,confidence:i/(r-t)}))))}return r}}class le extends y{constructor(e){super(e);const t=this.config.sampling_rate,n=(0,l.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<n.length;++e)n[e].push(0);this.mel_filters=n,this.window=(0,l.window_function)(400,"hamming",{periodic:!1}),this.min_num_frames=this.config.min_num_frames}async _extract_fbank_features(e){return e=e.map((e=>32768*e)),(0,l.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,transpose:!0,min_num_frames:this.min_num_frames})}async _call(e){g(e,"WeSpeakerFeatureExtractor");const t=(await this._extract_fbank_features(e)).unsqueeze_(0);if(null===this.config.fbank_centering_span){const e=t.mean(1).data,n=t.data,[r,i,a]=t.dims;for(let t=0;t<r;++t){const r=t*i*a,s=t*a;for(let t=0;t<i;++t){const i=r+t*a;for(let t=0;t<a;++t)n[i+t]-=e[s+t]}}}return{input_features:t}}}class ue extends y{}class de extends r.Callable{constructor(e){super(),this.feature_extractor=e}async _call(e,...t){return await this.feature_extractor(e,...t)}}class ce extends de{async _call(...e){return await this.feature_extractor(...e)}post_process_masks(...e){return this.feature_extractor.post_process_masks(...e)}reshape_input_points(...e){return this.feature_extractor.reshape_input_points(...e)}}class pe extends de{async _call(e){return await this.feature_extractor(e)}}class he extends de{async _call(e){return await this.feature_extractor(e)}}class me extends de{async _call(e){return await this.feature_extractor(e)}post_process_speaker_diarization(...e){return this.feature_extractor.post_process_speaker_diarization(...e)}}class fe extends de{async _call(e){return await this.feature_extractor(e)}}class ge extends de{}class _e extends de{constructor(e){super(e);const{tasks_answer_post_processing_type:t,task_prompts_without_inputs:n,task_prompts_with_input:r}=e.config;this.tasks_answer_post_processing_type=new Map(Object.entries(t??{})),this.task_prompts_without_inputs=new Map(Object.entries(n??{})),this.task_prompts_with_input=new Map(Object.entries(r??{})),this.regexes={quad_boxes:/(.+?)<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>/gm,bboxes:/([^<]+)?<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>/gm},this.size_per_bin=1e3}construct_prompts(e){"string"==typeof e&&(e=[e]);const t=[];for(const n of e)if(this.task_prompts_without_inputs.has(n))t.push(this.task_prompts_without_inputs.get(n));else{for(const[e,r]of this.task_prompts_with_input)if(n.includes(e)){t.push(r.replaceAll("{input}",n).replaceAll(e,""));break}t.length!==e.length&&t.push(n)}return t}post_process_generation(e,t,n){const r=this.tasks_answer_post_processing_type.get(t)??"pure_text";let i;switch(e=e.replaceAll("<s>","").replaceAll("</s>",""),r){case"pure_text":i=e;break;case"description_with_bboxes":case"bboxes":case"phrase_grounding":case"ocr":const a="ocr"===r?"quad_boxes":"bboxes",s=e.matchAll(this.regexes[a]),o=[],l=[];for(const[e,t,...r]of s)o.push(t?t.trim():o.at(-1)??""),l.push(r.map(((e,t)=>(Number(e)+.5)/this.size_per_bin*n[t%2])));i={labels:o,[a]:l};break;default:throw new Error(`Task "${t}" (of type "${r}") not yet implemented.`)}return{[t]:i}}}class we{static FEATURE_EXTRACTOR_CLASS_MAPPING={ImageFeatureExtractor:b,WhisperFeatureExtractor:ne,ViTFeatureExtractor:z,MobileViTFeatureExtractor:V,MobileViTImageProcessor:j,MobileNetV1FeatureExtractor:L,MobileNetV2FeatureExtractor:D,MobileNetV3FeatureExtractor:R,MobileNetV4FeatureExtractor:N,OwlViTFeatureExtractor:G,Owlv2ImageProcessor:q,CLIPFeatureExtractor:C,CLIPImageProcessor:P,Florence2Processor:_e,ChineseCLIPFeatureExtractor:E,SiglipImageProcessor:F,ConvNextFeatureExtractor:A,ConvNextImageProcessor:I,SegformerFeatureExtractor:x,SapiensFeatureExtractor:v,BitImageProcessor:$,DPTImageProcessor:k,DPTFeatureExtractor:T,PvtImageProcessor:M,GLPNFeatureExtractor:S,BeitFeatureExtractor:H,DeiTFeatureExtractor:W,DetrFeatureExtractor:Q,RTDetrImageProcessor:U,MaskFormerFeatureExtractor:Y,YolosFeatureExtractor:J,DonutFeatureExtractor:X,NougatImageProcessor:K,EfficientNetImageProcessor:B,ViTImageProcessor:O,VitMatteImageProcessor:te,SamImageProcessor:Z,Swin2SRImageProcessor:ee,Wav2Vec2FeatureExtractor:re,SeamlessM4TFeatureExtractor:ie,SpeechT5FeatureExtractor:ue,ASTFeatureExtractor:ae,ClapFeatureExtractor:se,PyAnnoteFeatureExtractor:oe,WeSpeakerFeatureExtractor:le};static PROCESSOR_CLASS_MAPPING={WhisperProcessor:pe,Wav2Vec2ProcessorWithLM:he,PyAnnoteProcessor:me,SamProcessor:ce,SpeechT5Processor:fe,OwlViTProcessor:ge,Florence2Processor:_e};static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:i=!1,revision:s="main"}={}){let o=n??await(0,a.getModelJSON)(e,"preprocessor_config.json",!0,{progress_callback:t,config:n,cache_dir:r,local_files_only:i,revision:s}),l=o.feature_extractor_type??o.image_processor_type,u=this.FEATURE_EXTRACTOR_CLASS_MAPPING[l];if(!u){if(void 0===o.size)throw new Error(`Unknown Feature Extractor type: ${l}`);console.warn(`Feature extractor type "${l}" not found, assuming ImageFeatureExtractor due to size parameter in config.`),u=b}return new(this.PROCESSOR_CLASS_MAPPING[o.processor_class]??de)(new u(o))}}},"./src/tokenizers.js":
191
191
  /*!***************************!*\
192
192
  !*** ./src/tokenizers.js ***!
193
- \***************************/(e,t,n)=>{n.r(t),n.d(t,{AlbertTokenizer:()=>xe,AutoTokenizer:()=>ht,BartTokenizer:()=>Be,BertTokenizer:()=>ve,BlenderbotSmallTokenizer:()=>lt,BlenderbotTokenizer:()=>ot,BloomTokenizer:()=>Ne,CLIPTokenizer:()=>rt,CamembertTokenizer:()=>Fe,CodeGenTokenizer:()=>nt,CodeLlamaTokenizer:()=>Ge,CohereTokenizer:()=>pt,ConvBertTokenizer:()=>Ce,DebertaTokenizer:()=>ke,DebertaV2Tokenizer:()=>$e,DistilBertTokenizer:()=>Ee,ElectraTokenizer:()=>Ie,EsmTokenizer:()=>Xe,FalconTokenizer:()=>We,GPT2Tokenizer:()=>Oe,GPTNeoXTokenizer:()=>He,GemmaTokenizer:()=>Qe,Grok1Tokenizer:()=>Ye,HerbertTokenizer:()=>Se,LlamaTokenizer:()=>je,M2M100Tokenizer:()=>et,MBart50Tokenizer:()=>De,MBartTokenizer:()=>Le,MPNetTokenizer:()=>Ue,MarianTokenizer:()=>at,MobileBertTokenizer:()=>Me,NllbTokenizer:()=>Ze,NougatTokenizer:()=>dt,PreTrainedTokenizer:()=>be,Qwen2Tokenizer:()=>Ke,RoFormerTokenizer:()=>Pe,RobertaTokenizer:()=>Re,SiglipTokenizer:()=>it,SpeechT5Tokenizer:()=>ut,SqueezeBertTokenizer:()=>Te,T5Tokenizer:()=>ze,TokenizerModel:()=>M,VitsTokenizer:()=>ct,Wav2Vec2CTCTokenizer:()=>st,WhisperTokenizer:()=>tt,XLMRobertaTokenizer:()=>qe,XLMTokenizer:()=>Ae,is_chinese_char:()=>w});var r=n(/*! ./utils/generic.js */"./src/utils/generic.js"),i=n(/*! ./utils/core.js */"./src/utils/core.js"),a=n(/*! ./utils/hub.js */"./src/utils/hub.js"),s=n(/*! ./utils/maths.js */"./src/utils/maths.js"),o=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),l=n(/*! ./utils/data-structures.js */"./src/utils/data-structures.js"),u=n(/*! @huggingface/jinja */"./node_modules/@huggingface/jinja/dist/index.js"),d=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js"),c=n(/*! ./utils/constants.js */"./src/utils/constants.js");async function p(e,t){const n=await Promise.all([(0,a.getModelJSON)(e,"tokenizer.json",!0,t),(0,a.getModelJSON)(e,"tokenizer_config.json",!0,t)]);return null!==t.legacy&&(n[1].legacy=t.legacy),n}function h(e,t=!0){if(void 0!==e.Regex){let t=e.Regex.replace(/\\([#&~])/g,"$1");for(const[e,n]of v)t=t.replaceAll(e,n);return new RegExp(t,"gu")}if(void 0!==e.String){const n=(0,i.escapeRegExp)(e.String);return new RegExp(t?n:`(${n})`,"gu")}return console.warn("Unknown pattern type:",e),null}function m(e){return new Map(Object.entries(e))}function f(e){const t=e.dims;switch(t.length){case 1:return e.tolist();case 2:if(1!==t[0])throw new Error("Unable to decode tensor with `batch size !== 1`. Use `tokenizer.batch_decode(...)` for batched inputs.");return e.tolist()[0];default:throw new Error(`Expected tensor to have 1-2 dimensions, got ${t.length}.`)}}function g(e){return e.replace(/ \./g,".").replace(/ \?/g,"?").replace(/ \!/g,"!").replace(/ ,/g,",").replace(/ \' /g,"'").replace(/ n\'t/g,"n't").replace(/ \'m/g,"'m").replace(/ \'s/g,"'s").replace(/ \'ve/g,"'ve").replace(/ \'re/g,"'re")}function _(e){return e.replace(/[\u0300-\u036f]/g,"")}function w(e){return e>=19968&&e<=40959||e>=13312&&e<=19903||e>=131072&&e<=173791||e>=173824&&e<=177983||e>=177984&&e<=178207||e>=178208&&e<=183983||e>=63744&&e<=64255||e>=194560&&e<=195103}const y="\\p{P}\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E",b=new RegExp(`^[${y}]+$`,"gu"),v=new Map([["(?i:'s|'t|'re|'ve|'m|'ll|'d)","(?:'([sS]|[tT]|[rR][eE]|[vV][eE]|[mM]|[lL][lL]|[dD]))"]]);class x{constructor(e){this.content=e.content,this.id=e.id,this.single_word=e.single_word??!1,this.lstrip=e.lstrip??!1,this.rstrip=e.rstrip??!1,this.special=e.special??!1,this.normalized=e.normalized??null}}class M extends r.Callable{constructor(e){super(),this.config=e,this.vocab=[],this.tokens_to_ids=new Map,this.unk_token_id=void 0,this.unk_token=void 0,this.end_of_word_suffix=void 0,this.fuse_unk=this.config.fuse_unk??!1}static fromConfig(e,...t){switch(e.type){case"WordPiece":return new T(e);case"Unigram":return new k(e,...t);case"BPE":return new C(e);default:if(e.vocab)return new P(e,...t);throw new Error(`Unknown TokenizerModel type: ${e.type}`)}}_call(e){return e=this.encode(e),this.fuse_unk&&(e=function(e,t,n){const r=[];let i=0;for(;i<e.length;)if(r.push(e[i]),(t.get(e[i])??n)===n)for(;++i<e.length&&(t.get(e[i])??n)===n;)t.get(r.at(-1))!==n&&(r[r.length-1]+=e[i]);else++i;return r}(e,this.tokens_to_ids,this.unk_token_id)),e}encode(e){throw Error("encode should be implemented in subclass.")}convert_tokens_to_ids(e){return e.map((e=>this.tokens_to_ids.get(e)??this.unk_token_id))}convert_ids_to_tokens(e){return e.map((e=>this.vocab[e]??this.unk_token))}}class T extends M{constructor(e){super(e),this.tokens_to_ids=m(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.max_input_chars_per_word=e.max_input_chars_per_word??100,this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e}encode(e){const t=[];for(const n of e){const e=[...n];if(e.length>this.max_input_chars_per_word){t.push(this.unk_token);continue}let r=!1,i=0;const a=[];for(;i<e.length;){let t=e.length,n=null;for(;i<t;){let r=e.slice(i,t).join("");if(i>0&&(r=this.config.continuing_subword_prefix+r),this.tokens_to_ids.has(r)){n=r;break}--t}if(null===n){r=!0;break}a.push(n),i=t}r?t.push(this.unk_token):t.push(...a)}return t}}class k extends M{constructor(e,t){super(e);const n=e.vocab.length;this.vocab=new Array(n),this.scores=new Array(n);for(let t=0;t<n;++t){const n=e.vocab[t];this.vocab[t]=n[0],this.scores[t]=n[1]}this.unk_token_id=e.unk_id,this.unk_token=this.vocab[e.unk_id],this.tokens_to_ids=new Map(this.vocab.map(((e,t)=>[e,t]))),this.bosToken=" ",this.bosTokenId=this.tokens_to_ids.get(this.bosToken),this.eosToken=t.eos_token,this.eosTokenId=this.tokens_to_ids.get(this.eosToken),this.unkToken=this.vocab[this.unk_token_id],this.minScore=(0,s.min)(this.scores)[0],this.unkScore=this.minScore-10,this.scores[this.unk_token_id]=this.unkScore,this.trie=new l.CharTrie,this.trie.extend(this.vocab),this.fuse_unk=!0}populateNodes(e){const t=e.sentence,n=t.length;let r=0;for(;r<n;){const n=1;let i=!1;const a=[];for(let s of this.trie.commonPrefixSearch(t.slice(r))){a.push(s);const t=this.tokens_to_ids.get(s),o=this.scores[t],l=s.length;e.insert(r,l,o,t),i||l!==n||(i=!0)}i||e.insert(r,n,this.unkScore,this.unk_token_id),r+=n}}tokenize(e){const t=new l.TokenLattice(e,this.bosTokenId,this.eosTokenId);return this.populateNodes(t),t.tokens()}encode(e){const t=[];for(const n of e){const e=this.tokenize(n);t.push(...e)}return t}}const $=(()=>{const e=[...Array.from({length:"~".charCodeAt(0)-"!".charCodeAt(0)+1},((e,t)=>t+"!".charCodeAt(0))),...Array.from({length:"¬".charCodeAt(0)-"¡".charCodeAt(0)+1},((e,t)=>t+"¡".charCodeAt(0))),...Array.from({length:"ÿ".charCodeAt(0)-"®".charCodeAt(0)+1},((e,t)=>t+"®".charCodeAt(0)))],t=e.slice();let n=0;for(let r=0;r<256;++r)e.includes(r)||(e.push(r),t.push(256+n),n+=1);const r=t.map((e=>String.fromCharCode(e)));return Object.fromEntries(e.map(((e,t)=>[e,r[t]])))})(),S=(0,i.reverseDictionary)($);class C extends M{constructor(e){super(e),this.BPE_SPLIT_TOKEN=" ",this.tokens_to_ids=m(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e;this.bpe_ranks=new Map(e.merges.map(((e,t)=>[e,t]))),this.merges=e.merges.map((e=>e.split(this.BPE_SPLIT_TOKEN))),this.end_of_word_suffix=e.end_of_word_suffix,this.continuing_subword_suffix=e.continuing_subword_suffix??null,this.byte_fallback=this.config.byte_fallback??!1,this.byte_fallback&&(this.text_encoder=new TextEncoder),this.ignore_merges=this.config.ignore_merges??!1,this.cache=new Map}bpe(e){if(0===e.length)return[];const t=this.cache.get(e);if(void 0!==t)return t;const n=Array.from(e);this.end_of_word_suffix&&(n[n.length-1]+=this.end_of_word_suffix);let r=[];if(n.length>1){const e=new l.PriorityQueue(((e,t)=>e.score<t.score));let t={token:n[0],bias:0,prev:null,next:null},i=t;for(let t=1;t<n.length;++t){const r={bias:t/n.length,token:n[t],prev:i,next:null};i.next=r,this._add_node(e,i),i=r}for(;!e.isEmpty();){const n=e.pop();if(n.deleted||!n.next||n.next.deleted)continue;if(n.deleted=!0,n.next.deleted=!0,n.prev){const e={...n.prev};n.prev.deleted=!0,n.prev=e,e.prev?e.prev.next=e:t=e}const r={token:n.token+n.next.token,bias:n.bias,prev:n.prev,next:n.next.next};r.prev?(r.prev.next=r,this._add_node(e,r.prev)):t=r,r.next&&(r.next.prev=r,this._add_node(e,r))}for(let e=t;null!==e;e=e.next)r.push(e.token)}else r=n;if(this.continuing_subword_suffix)for(let e=0;e<r.length-1;++e)r[e]+=this.continuing_subword_suffix;return this.cache.set(e,r),r}_add_node(e,t){const n=this.bpe_ranks.get(t.token+this.BPE_SPLIT_TOKEN+t.next.token);void 0!==n&&(t.score=n+t.bias,e.push(t))}encode(e){const t=[];for(const n of e){if(this.ignore_merges&&this.tokens_to_ids.has(n)){t.push(n);continue}const e=this.bpe(n);for(const n of e)if(this.tokens_to_ids.has(n))t.push(n);else if(this.byte_fallback){const e=Array.from(this.text_encoder.encode(n)).map((e=>`<0x${e.toString(16).toUpperCase().padStart(2,"0")}>`));e.every((e=>this.tokens_to_ids.has(e)))?t.push(...e):t.push(this.unk_token)}else t.push(this.unk_token)}return t}}class P extends M{constructor(e,t){super(e),this.tokens_to_ids=m(t.target_lang?e.vocab[t.target_lang]:e.vocab),this.bos_token=t.bos_token,this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t.eos_token,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.pad_token=t.pad_token,this.pad_token_id=this.tokens_to_ids.get(this.pad_token),this.unk_token=t.unk_token,this.unk_token_id=this.tokens_to_ids.get(this.unk_token),this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e}encode(e){return e}}class E extends r.Callable{constructor(e){super(),this.config=e}static fromConfig(e){if(null===e)return null;switch(e.type){case"BertNormalizer":return new N(e);case"Precompiled":return new pe(e);case"Sequence":return new R(e);case"Replace":return new F(e);case"NFC":return new A(e);case"NFKC":return new I(e);case"NFKD":return new z(e);case"Strip":return new O(e);case"StripAccents":return new B(e);case"Lowercase":return new L(e);case"Prepend":return new D(e);default:throw new Error(`Unknown Normalizer type: ${e.type}`)}}normalize(e){throw Error("normalize should be implemented in subclass.")}_call(e){return this.normalize(e)}}class F extends E{normalize(e){const t=h(this.config.pattern);return null===t?e:e.replaceAll(t,this.config.content)}}class A extends E{normalize(e){return e=e.normalize("NFC")}}class I extends E{normalize(e){return e=e.normalize("NFKC")}}class z extends E{normalize(e){return e=e.normalize("NFKD")}}class O extends E{normalize(e){return this.config.strip_left&&this.config.strip_right?e=e.trim():(this.config.strip_left&&(e=e.trimStart()),this.config.strip_right&&(e=e.trimEnd())),e}}class B extends E{normalize(e){return e=_(e)}}class L extends E{normalize(e){return e=e.toLowerCase()}}class D extends E{normalize(e){return e=this.config.prepend+e}}class R extends E{constructor(e){super(e),this.normalizers=e.normalizers.map((e=>E.fromConfig(e)))}normalize(e){return this.normalizers.reduce(((e,t)=>t.normalize(e)),e)}}class N extends E{_tokenize_chinese_chars(e){const t=[];for(let n=0;n<e.length;++n){const r=e[n];w(r.charCodeAt(0))?(t.push(" "),t.push(r),t.push(" ")):t.push(r)}return t.join("")}stripAccents(e){return e.normalize("NFD").replace(/[\u0300-\u036f]/g,"")}_is_control(e){switch(e){case"\t":case"\n":case"\r":return!1;default:return/^\p{Cc}|\p{Cf}|\p{Co}|\p{Cs}$/u.test(e)}}_clean_text(e){const t=[];for(const n of e){const e=n.charCodeAt(0);0===e||65533===e||this._is_control(n)||(/^\s$/.test(n)?t.push(" "):t.push(n))}return t.join("")}normalize(e){return this.config.clean_text&&(e=this._clean_text(e)),this.config.handle_chinese_chars&&(e=this._tokenize_chinese_chars(e)),this.config.lowercase?(e=e.toLowerCase(),!1!==this.config.strip_accents&&(e=this.stripAccents(e))):this.config.strip_accents&&(e=this.stripAccents(e)),e}}class V extends r.Callable{static fromConfig(e){if(null===e)return null;switch(e.type){case"BertPreTokenizer":return new j(e);case"Sequence":return new he(e);case"Whitespace":return new me(e);case"WhitespaceSplit":return new fe(e);case"Metaspace":return new de(e);case"ByteLevel":return new G(e);case"Split":return new q(e);case"Punctuation":return new U(e);case"Digits":return new W(e);case"Replace":return new ge(e);default:throw new Error(`Unknown PreTokenizer type: ${e.type}`)}}pre_tokenize_text(e,t){throw Error("pre_tokenize_text should be implemented in subclass.")}pre_tokenize(e,t){return(Array.isArray(e)?e.map((e=>this.pre_tokenize_text(e,t))):this.pre_tokenize_text(e,t)).flat()}_call(e,t){return this.pre_tokenize(e,t)}}class j extends V{constructor(e){super(),this.pattern=new RegExp(`[^\\s${y}]+|[${y}]`,"gu")}pre_tokenize_text(e,t){return e.trim().match(this.pattern)||[]}}class G extends V{constructor(e){super(),this.config=e,this.add_prefix_space=this.config.add_prefix_space,this.trim_offsets=this.config.trim_offsets,this.use_regex=this.config.use_regex??!0,this.pattern=/'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu,this.byte_encoder=$,this.text_encoder=new TextEncoder}pre_tokenize_text(e,t){this.add_prefix_space&&!e.startsWith(" ")&&(e=" "+e);return(this.use_regex?e.match(this.pattern)||[]:[e]).map((e=>Array.from(this.text_encoder.encode(e),(e=>this.byte_encoder[e])).join("")))}}class q extends V{constructor(e){super(),this.config=e,this.pattern=h(this.config.pattern,this.config.invert)}pre_tokenize_text(e,t){return null===this.pattern?[]:this.config.invert?e.match(this.pattern)||[]:function(e,t){const n=[];let r=0;for(const i of e.matchAll(t)){const t=i[0];r<i.index&&n.push(e.slice(r,i.index)),t.length>0&&n.push(t),r=i.index+t.length}return r<e.length&&n.push(e.slice(r)),n}(e,this.pattern)}}class U extends V{constructor(e){super(),this.config=e,this.pattern=new RegExp(`[^${y}]+|[${y}]+`,"gu")}pre_tokenize_text(e,t){return e.match(this.pattern)||[]}}class W extends V{constructor(e){super(),this.config=e;const t="[^\\d]+|\\d"+(this.config.individual_digits?"":"+");this.pattern=new RegExp(t,"gu")}pre_tokenize_text(e,t){return e.match(this.pattern)||[]}}class H extends r.Callable{constructor(e){super(),this.config=e}static fromConfig(e){if(null===e)return null;switch(e.type){case"TemplateProcessing":return new Q(e);case"ByteLevel":return new Y(e);case"RobertaProcessing":return new K(e);case"BertProcessing":return new X(e);case"Sequence":return new J(e);default:throw new Error(`Unknown PostProcessor type: ${e.type}`)}}post_process(e,...t){throw Error("post_process should be implemented in subclass.")}_call(e,...t){return this.post_process(e,...t)}}class X extends H{constructor(e){super(e),this.cls=e.cls[0],this.sep=e.sep[0]}post_process(e,t=null,{add_special_tokens:n=!0}={}){n&&(e=(0,i.mergeArrays)([this.cls],e,[this.sep]));let r=new Array(e.length).fill(0);if(null!==t){const a=n&&this instanceof K?[this.sep]:[],s=n?[this.sep]:[];e=(0,i.mergeArrays)(e,a,t,s),r=(0,i.mergeArrays)(r,new Array(t.length+a.length+s.length).fill(1))}return{tokens:e,token_type_ids:r}}}class K extends X{}class Q extends H{constructor(e){super(e),this.single=e.single,this.pair=e.pair}post_process(e,t=null,{add_special_tokens:n=!0}={}){const r=null===t?this.single:this.pair;let a=[],s=[];for(const o of r)"SpecialToken"in o?n&&(a.push(o.SpecialToken.id),s.push(o.SpecialToken.type_id)):"Sequence"in o&&("A"===o.Sequence.id?(a=(0,i.mergeArrays)(a,e),s=(0,i.mergeArrays)(s,new Array(e.length).fill(o.Sequence.type_id))):"B"===o.Sequence.id&&(a=(0,i.mergeArrays)(a,t),s=(0,i.mergeArrays)(s,new Array(t.length).fill(o.Sequence.type_id))));return{tokens:a,token_type_ids:s}}}class Y extends H{post_process(e,t=null){return t&&(e=(0,i.mergeArrays)(e,t)),{tokens:e}}}class J extends H{constructor(e){super(e),this.processors=e.processors.map((e=>H.fromConfig(e)))}post_process(e,t=null,n={}){let r;for(const i of this.processors)if(i instanceof Y){if(e=i.post_process(e).tokens,t){t=i.post_process(t).tokens}}else{const a=i.post_process(e,t,n);e=a.tokens,r=a.token_type_ids}return{tokens:e,token_type_ids:r}}}class Z extends r.Callable{constructor(e){super(),this.config=e,this.added_tokens=[],this.end_of_word_suffix=null,this.trim_offsets=e.trim_offsets}static fromConfig(e){if(null===e)return null;switch(e.type){case"WordPiece":return new ie(e);case"Metaspace":return new ce(e);case"ByteLevel":return new ae(e);case"Replace":return new ee(e);case"ByteFallback":return new te(e);case"Fuse":return new ne(e);case"Strip":return new re(e);case"Sequence":return new oe(e);case"CTC":return new se(e);case"BPEDecoder":return new le(e);default:throw new Error(`Unknown Decoder type: ${e.type}`)}}_call(e){return this.decode(e)}decode(e){return this.decode_chain(e).join("")}decode_chain(e){throw Error("`decode_chain` should be implemented in subclass.")}}class ee extends Z{decode_chain(e){const t=h(this.config.pattern);return null===t?e:e.map((e=>e.replaceAll(t,this.config.content)))}}class te extends Z{constructor(e){super(e),this.text_decoder=new TextDecoder}decode_chain(e){const t=[];let n=[];for(const r of e){let e=null;if(6===r.length&&r.startsWith("<0x")&&r.endsWith(">")){const t=parseInt(r.slice(3,5),16);isNaN(t)||(e=t)}if(null!==e)n.push(e);else{if(n.length>0){const e=this.text_decoder.decode(Uint8Array.from(n));t.push(e),n=[]}t.push(r)}}if(n.length>0){const e=this.text_decoder.decode(Uint8Array.from(n));t.push(e),n=[]}return t}}class ne extends Z{decode_chain(e){return[e.join("")]}}class re extends Z{constructor(e){super(e),this.content=this.config.content,this.start=this.config.start,this.stop=this.config.stop}decode_chain(e){return e.map((e=>{let t=0;for(let n=0;n<this.start&&e[n]===this.content;++n)t=n+1;let n=e.length;for(let t=0;t<this.stop;++t){const r=e.length-t-1;if(e[r]!==this.content)break;n=r}return e.slice(t,n)}))}}class ie extends Z{constructor(e){super(e),this.cleanup=e.cleanup}decode_chain(e){return e.map(((e,t)=>(0!==t&&(e=e.startsWith(this.config.prefix)?e.replace(this.config.prefix,""):" "+e),this.cleanup&&(e=g(e)),e)))}}class ae extends Z{constructor(e){super(e),this.byte_decoder=S,this.text_decoder=new TextDecoder("utf-8",{fatal:!1,ignoreBOM:!0}),this.end_of_word_suffix=null}convert_tokens_to_string(e){const t=e.join(""),n=new Uint8Array([...t].map((e=>this.byte_decoder[e])));return this.text_decoder.decode(n)}decode_chain(e){const t=[];let n=[];for(const r of e)void 0!==this.added_tokens.find((e=>e.content===r))?(n.length>0&&(t.push(this.convert_tokens_to_string(n)),n=[]),t.push(r)):n.push(r);return n.length>0&&t.push(this.convert_tokens_to_string(n)),t}}class se extends Z{constructor(e){super(e),this.pad_token=this.config.pad_token,this.word_delimiter_token=this.config.word_delimiter_token,this.cleanup=this.config.cleanup}convert_tokens_to_string(e){if(0===e.length)return"";const t=[e[0]];for(let n=1;n<e.length;++n)e[n]!==t.at(-1)&&t.push(e[n]);let n=t.filter((e=>e!==this.pad_token)).join("");return this.cleanup&&(n=g(n).replaceAll(this.word_delimiter_token," ").trim()),n}decode_chain(e){return[this.convert_tokens_to_string(e)]}}class oe extends Z{constructor(e){super(e),this.decoders=e.decoders.map((e=>Z.fromConfig(e)))}decode_chain(e){return this.decoders.reduce(((e,t)=>t.decode_chain(e)),e)}}class le extends Z{constructor(e){super(e),this.suffix=this.config.suffix}decode_chain(e){return e.map(((t,n)=>t.replaceAll(this.suffix,n===e.length-1?"":" ")))}}class ue extends Z{decode_chain(e){let t="";for(let n=1;n<e.length;n+=2)t+=e[n];return[t]}}class de extends V{constructor(e){super(),this.addPrefixSpace=e.add_prefix_space,this.replacement=e.replacement,this.strRep=e.str_rep||this.replacement,this.prepend_scheme=e.prepend_scheme??"always"}pre_tokenize_text(e,{section_index:t}={}){let n=e.replaceAll(" ",this.strRep);return this.addPrefixSpace&&!n.startsWith(this.replacement)&&("always"===this.prepend_scheme||"first"===this.prepend_scheme&&0===t)&&(n=this.strRep+n),[n]}}class ce extends Z{constructor(e){super(e),this.addPrefixSpace=e.add_prefix_space,this.replacement=e.replacement}decode_chain(e){const t=[];for(let n=0;n<e.length;++n){let r=e[n].replaceAll(this.replacement," ");this.addPrefixSpace&&0==n&&r.startsWith(" ")&&(r=r.substring(1)),t.push(r)}return t}}class pe extends E{constructor(e){super(e),this.charsmap=e.precompiled_charsmap}normalize(e){if((e=(e=e.replace(/[\u0001-\u0008\u000B\u000E-\u001F\u007F\u008F\u009F]/gm,"")).replace(/[\u0009\u000A\u000C\u000D\u1680\u200B\u200C\u200E\u200F\u2028\u2029\u2581\uFEFF\uFFFD]/gm," ")).includes("~")){const t=e.split("~");e=t.map((e=>e.normalize("NFKC"))).join("~")}else e=e.normalize("NFKC");return e}}class he extends V{constructor(e){super(),this.tokenizers=e.pretokenizers.map((e=>V.fromConfig(e)))}pre_tokenize_text(e,t){return this.tokenizers.reduce(((e,n)=>n.pre_tokenize(e,t)),[e])}}class me extends V{constructor(e){super()}pre_tokenize_text(e,t){return e.match(/\w+|[^\w\s]+/g)||[]}}class fe extends V{constructor(e){super()}pre_tokenize_text(e,t){return function(e){return e.match(/\S+/g)||[]}(e)}}class ge extends V{constructor(e){super(),this.config=e,this.pattern=h(this.config.pattern),this.content=this.config.content}pre_tokenize_text(e,t){return null===this.pattern?[e]:[e.replaceAll(this.pattern,this.config.content)]}}const _e=["bos_token","eos_token","unk_token","sep_token","pad_token","cls_token","mask_token"];function we(e,t,n,r){for(const a of Object.keys(e)){const s=t-e[a].length,o=n(a),l=new Array(s).fill(o);e[a]="right"===r?(0,i.mergeArrays)(e[a],l):(0,i.mergeArrays)(l,e[a])}}function ye(e,t){for(const n of Object.keys(e))e[n].length=t}class be extends r.Callable{return_token_type_ids=!1;padding_side="right";constructor(e,t){super(),this._tokenizer_config=t,this.normalizer=E.fromConfig(e.normalizer),this.pre_tokenizer=V.fromConfig(e.pre_tokenizer),this.model=M.fromConfig(e.model,t),this.post_processor=H.fromConfig(e.post_processor),this.decoder=Z.fromConfig(e.decoder),this.special_tokens=[],this.all_special_ids=[],this.added_tokens=[];for(const t of e.added_tokens){const e=new x(t);this.added_tokens.push(e),this.model.tokens_to_ids.set(e.content,e.id),this.model.vocab[e.id]=e.content,e.special&&(this.special_tokens.push(e.content),this.all_special_ids.push(e.id))}if(this.additional_special_tokens=t.additional_special_tokens??[],this.special_tokens.push(...this.additional_special_tokens),this.special_tokens=[...new Set(this.special_tokens)],this.decoder&&(this.decoder.added_tokens=this.added_tokens,this.decoder.end_of_word_suffix=this.model.end_of_word_suffix),this.added_tokens_regex=this.added_tokens.length>0?new RegExp(this.added_tokens.slice().sort(((e,t)=>t.content.length-e.content.length)).map((e=>`${e.lstrip?"\\s*":""}(${(0,i.escapeRegExp)(e.content)})${e.rstrip?"\\s*":""}`)).join("|")):null,this.mask_token=this.getToken("mask_token"),this.mask_token_id=this.model.tokens_to_ids.get(this.mask_token),this.pad_token=this.getToken("pad_token","eos_token"),this.pad_token_id=this.model.tokens_to_ids.get(this.pad_token),this.sep_token=this.getToken("sep_token"),this.sep_token_id=this.model.tokens_to_ids.get(this.sep_token),this.unk_token=this.getToken("unk_token"),this.unk_token_id=this.model.tokens_to_ids.get(this.unk_token),this.model_max_length=t.model_max_length,this.remove_space=t.remove_space,this.clean_up_tokenization_spaces=t.clean_up_tokenization_spaces??!0,this.do_lowercase_and_remove_accent=t.do_lowercase_and_remove_accent??!1,t.padding_side&&(this.padding_side=t.padding_side),this.legacy=!1,this.chat_template=t.chat_template??null,Array.isArray(this.chat_template)){const e=Object.create(null);for(const{name:t,template:n}of this.chat_template){if("string"!=typeof t||"string"!=typeof n)throw new Error('Chat template must be a list of objects with "name" and "template" properties');e[t]=n}this.chat_template=e}this._compiled_template_cache=new Map}getToken(...e){for(const t of e){const e=this._tokenizer_config[t];if(e){if("object"==typeof e){if("AddedToken"===e.__type)return e.content;throw Error(`Unknown token: ${e}`)}return e}}return null}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:i=!1,revision:a="main",legacy:s=null}={}){return new this(...await p(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:i,revision:a,legacy:s}))}_call(e,{text_pair:t=null,add_special_tokens:n=!0,padding:r=!1,truncation:i=null,max_length:a=null,return_tensor:l=!0,return_token_type_ids:u=null}={}){const d=Array.isArray(e);let c;if(d){if(0===e.length)throw Error("text array must be non-empty");if(null!==t){if(!Array.isArray(t))throw Error("text_pair must also be an array");if(e.length!==t.length)throw Error("text and text_pair must have the same length");c=e.map(((e,r)=>this._encode_plus(e,{text_pair:t[r],add_special_tokens:n,return_token_type_ids:u})))}else c=e.map((e=>this._encode_plus(e,{add_special_tokens:n,return_token_type_ids:u})))}else{if(null==e)throw Error("text may not be null or undefined");if(Array.isArray(t))throw Error("When specifying `text_pair`, since `text` is a string, `text_pair` must also be a string (i.e., not an array).");c=[this._encode_plus(e,{text_pair:t,add_special_tokens:n,return_token_type_ids:u})]}if(null===a?a="max_length"===r?this.model_max_length:(0,s.max)(c.map((e=>e.input_ids.length)))[0]:i||console.warn("Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=true` to explicitly truncate examples to max length."),a=Math.min(a,this.model_max_length??1/0),r||i)for(let e=0;e<c.length;++e)c[e].input_ids.length!==a&&(c[e].input_ids.length>a?i&&ye(c[e],a):r&&we(c[e],a,(e=>"input_ids"===e?this.pad_token_id:0),this.padding_side));const p={};if(l){if((!r||!i)&&c.some((e=>{for(const t of Object.keys(e))if(e[t].length!==c[0][t]?.length)return!0;return!1})))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=true' and 'truncation=true' to have batched tensors with the same length.");const e=[c.length,c[0].input_ids.length];for(const t of Object.keys(c[0]))p[t]=new o.Tensor("int64",BigInt64Array.from(c.flatMap((e=>e[t])).map(BigInt)),e)}else{for(const e of Object.keys(c[0]))p[e]=c.map((t=>t[e]));if(!d)for(const e of Object.keys(p))p[e]=p[e][0]}return p}_encode_text(e){if(null===e)return null;const t=(this.added_tokens_regex?e.split(this.added_tokens_regex).filter((e=>e)):[e]).map(((e,t)=>{if(void 0!==this.added_tokens.find((t=>t.content===e)))return e;{if(!0===this.remove_space&&(e=e.trim().split(/\s+/).join(" ")),this.do_lowercase_and_remove_accent&&(e=function(e){return _(e.toLowerCase())}(e)),null!==this.normalizer&&(e=this.normalizer(e)),0===e.length)return[];const n=null!==this.pre_tokenizer?this.pre_tokenizer(e,{section_index:t}):[e];return this.model(n)}})).flat();return t}_encode_plus(e,{text_pair:t=null,add_special_tokens:n=!0,return_token_type_ids:r=null}={}){const{tokens:i,token_type_ids:a}=this._tokenize_helper(e,{pair:t,add_special_tokens:n}),s=this.model.convert_tokens_to_ids(i),o={input_ids:s,attention_mask:new Array(s.length).fill(1)};return(r??this.return_token_type_ids)&&a&&(o.token_type_ids=a),o}_tokenize_helper(e,{pair:t=null,add_special_tokens:n=!1}={}){const r=this._encode_text(e),a=this._encode_text(t);return this.post_processor?this.post_processor(r,a,{add_special_tokens:n}):{tokens:(0,i.mergeArrays)(r??[],a??[])}}tokenize(e,{pair:t=null,add_special_tokens:n=!1}={}){return this._tokenize_helper(e,{pair:t,add_special_tokens:n}).tokens}encode(e,{text_pair:t=null,add_special_tokens:n=!0,return_token_type_ids:r=null}={}){return this._encode_plus(e,{text_pair:t,add_special_tokens:n,return_token_type_ids:r}).input_ids}batch_decode(e,t={}){return e instanceof o.Tensor&&(e=e.tolist()),e.map((e=>this.decode(e,t)))}decode(e,t={}){if(e instanceof o.Tensor&&(e=f(e)),!Array.isArray(e)||0===e.length||!(0,i.isIntegralNumber)(e[0]))throw Error("token_ids must be a non-empty array of integers.");return this.decode_single(e,t)}decode_single(e,{skip_special_tokens:t=!1,clean_up_tokenization_spaces:n=null}){let r=this.model.convert_ids_to_tokens(e);t&&(r=r.filter((e=>!this.special_tokens.includes(e))));let i=this.decoder?this.decoder(r):r.join(" ");return this.decoder&&this.decoder.end_of_word_suffix&&(i=i.replaceAll(this.decoder.end_of_word_suffix," "),t&&(i=i.trim())),(n??this.clean_up_tokenization_spaces)&&(i=g(i)),i}get_chat_template({chat_template:e=null,tools:t=null}={}){if(this.chat_template&&"object"==typeof this.chat_template){const n=this.chat_template;if(null!==e&&Object.hasOwn(n,e))e=n[e];else if(null===e)if(null!==t&&"tool_use"in n)e=n.tool_use;else{if(!("default"in n))throw Error(`This model has multiple chat templates with no default specified! Please either pass a chat template or the name of the template you wish to use to the 'chat_template' argument. Available template names are ${Object.keys(n).sort()}.`);e=n.default}}else if(null===e){if(!this.chat_template)throw Error("Cannot use apply_chat_template() because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating");e=this.chat_template}return e}apply_chat_template(e,{tools:t=null,documents:n=null,chat_template:r=null,add_generation_prompt:i=!1,tokenize:a=!0,padding:s=!1,truncation:o=!1,max_length:l=null,return_tensor:d=!0,return_dict:c=!1,tokenizer_kwargs:p={},...h}={}){if("string"!=typeof(r=this.get_chat_template({chat_template:r,tools:t})))throw Error("chat_template must be a string, but got "+typeof r);let m=this._compiled_template_cache.get(r);void 0===m&&(m=new u.Template(r),this._compiled_template_cache.set(r,m));const f=Object.create(null);for(const e of _e){const t=this.getToken(e);t&&(f[e]=t)}const g=m.render({messages:e,add_generation_prompt:i,tools:t,documents:n,...f,...h});if(a){const e=this._call(g,{add_special_tokens:!1,padding:s,truncation:o,max_length:l,return_tensor:d,...p});return c?e:e.input_ids}return g}}class ve extends be{return_token_type_ids=!0}class xe extends be{return_token_type_ids=!0}class Me extends be{return_token_type_ids=!0}class Te extends be{return_token_type_ids=!0}class ke extends be{return_token_type_ids=!0}class $e extends be{return_token_type_ids=!0}class Se extends be{return_token_type_ids=!0}class Ce extends be{return_token_type_ids=!0}class Pe extends be{return_token_type_ids=!0}class Ee extends be{}class Fe extends be{}class Ae extends be{return_token_type_ids=!0;constructor(e,t){super(e,t),console.warn('WARNING: `XLMTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}}class Ie extends be{return_token_type_ids=!0}class ze extends be{}class Oe extends be{}class Be extends be{}class Le extends be{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{2}_[A-Z]{2}$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))),this.lang_to_token=e=>e}_build_translation_inputs(e,t,n){return Je(this,e,t,n)}}class De extends Le{}class Re extends be{}class Ne extends be{constructor(e,t){const n=".,!?…。,、।۔،",r=e.pre_tokenizer?.pretokenizers[0]?.pattern;r&&r.Regex===` ?[^(\\s|[${n}])]+`&&(r.Regex=` ?[^\\s${n}]+`),super(e,t)}}const Ve="▁";class je extends be{padding_side="left";constructor(e,t){super(e,t),this.legacy=t.legacy??!0,this.legacy||(this.normalizer=null,this.pre_tokenizer=new de({replacement:Ve,add_prefix_space:!0,prepend_scheme:"first"}))}_encode_text(e){if(null===e)return null;if(this.legacy||0===e.length)return super._encode_text(e);let t=super._encode_text(Ve+e.replaceAll(Ve," "));return t.length>1&&t[0]===Ve&&this.special_tokens.includes(t[1])&&(t=t.slice(1)),t}}class Ge extends be{}class qe extends be{}class Ue extends be{}class We extends be{}class He extends be{}class Xe extends be{}class Ke extends be{}class Qe extends be{}class Ye extends be{}function Je(e,t,n,r){if(!("language_codes"in e)||!Array.isArray(e.language_codes))throw new Error("Tokenizer must have `language_codes` attribute set and it should be an array of language ids.");if(!("languageRegex"in e&&e.languageRegex instanceof RegExp))throw new Error("Tokenizer must have `languageRegex` attribute set and it should be a regular expression.");if(!("lang_to_token"in e)||"function"!=typeof e.lang_to_token)throw new Error("Tokenizer must have `lang_to_token` attribute set and it should be a function.");const i=r.src_lang,a=r.tgt_lang;if(!e.language_codes.includes(a))throw new Error(`Target language code "${a}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);if(void 0!==i){if(!e.language_codes.includes(i))throw new Error(`Source language code "${i}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);for(const t of e.post_processor.config.single)if("SpecialToken"in t&&e.languageRegex.test(t.SpecialToken.id)){t.SpecialToken.id=e.lang_to_token(i);break}}return r.forced_bos_token_id=e.model.convert_tokens_to_ids([e.lang_to_token(a)])[0],e._call(t,n)}class Ze extends be{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{3}_[A-Z][a-z]{3}$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))),this.lang_to_token=e=>e}_build_translation_inputs(e,t,n){return Je(this,e,t,n)}}class et extends be{constructor(e,t){super(e,t),this.languageRegex=/^__[a-z]{2,3}__$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))).map((e=>e.slice(2,-2))),this.lang_to_token=e=>`__${e}__`}_build_translation_inputs(e,t,n){return Je(this,e,t,n)}}class tt extends be{get timestamp_begin(){return this.model.convert_tokens_to_ids(["<|notimestamps|>"])[0]+1}_decode_asr(e,{return_timestamps:t=!1,return_language:n=!1,time_precision:r=null,force_full_sequences:i=!0}={}){if(null===r)throw Error("Must specify time_precision");let a=null;const o="word"===t;function l(){return{language:a,timestamp:[null,null],text:""}}const u=[];let c=l(),p=0;const h=this.timestamp_begin;let m=[],f=[],g=!1,_=null;const w=new Set(this.all_special_ids);for(const n of e){const e=n.tokens,i=o?n.token_timestamps:null;let y=null,v=h;if("stride"in n){const[t,i,a]=n.stride;if(p-=i,_=t-a,i&&(v=i/r+h),a)for(let t=e.length-1;t>=0;--t){const n=Number(e[t]);if(n>=h){if(null!==y&&(n-h)*r<_)break;y=n}}}let x=[],M=[];for(let n=0;n<e.length;++n){const _=Number(e[n]);if(w.has(_)){const e=this.decode([_]),n=d.WHISPER_LANGUAGE_MAPPING.get(e.slice(2,-2));if(void 0!==n){if(null!==a&&n!==a&&!t){m.push(x);const e=this.findLongestCommonSequence(m)[0],t=this.decode(e);c.text=t,u.push(c),m=[],x=[],c=l()}a=c.language=n}}else if(_>=h){const e=(_-h)*r+p,t=(0,s.round)(e,2);if(null!==y&&_>=y)g=!0;else if(g||m.length>0&&_<v)g=!1;else if(null===c.timestamp[0])c.timestamp[0]=t;else if(t===c.timestamp[0]);else{c.timestamp[1]=t,m.push(x),o&&f.push(M);const[e,n]=this.findLongestCommonSequence(m,f),r=this.decode(e);c.text=r,o&&(c.words=this.collateWordTimestamps(e,n,a)),u.push(c),m=[],x=[],f=[],M=[],c=l()}}else if(x.push(_),o){let e,t=(0,s.round)(i[n]+p,2);if(n+1<i.length){e=(0,s.round)(i[n+1]+p,2);const a=this.decode([_]);b.test(a)&&(e=(0,s.round)(Math.min(t+r,e),2))}else e=null;M.push([t,e])}}if("stride"in n){const[e,t,r]=n.stride;p+=e-r}x.length>0?(m.push(x),o&&f.push(M)):m.every((e=>0===e.length))&&(c=l(),m=[],x=[],f=[],M=[])}if(m.length>0){if(i&&t)throw new Error("Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.");const[e,n]=this.findLongestCommonSequence(m,f),r=this.decode(e);c.text=r,o&&(c.words=this.collateWordTimestamps(e,n,a)),u.push(c)}let y=Object.create(null);const v=u.map((e=>e.text)).join("");if(t||n){for(let e=0;e<u.length;++e){const r=u[e];t||delete r.timestamp,n||delete r.language}if(o){const e=[];for(const t of u)for(const n of t.words)e.push(n);y={chunks:e}}else y={chunks:u}}return[v,y]}findLongestCommonSequence(e,t=null){let n=e[0],r=n.length,i=[];const a=Array.isArray(t)&&t.length>0;let s=a?[]:null,o=a?t[0]:null;for(let l=1;l<e.length;++l){const u=e[l];let d=0,c=[r,r,0,0];const p=u.length;for(let e=1;e<r+p;++e){const i=Math.max(0,r-e),s=Math.min(r,r+p-e),h=n.slice(i,s),m=Math.max(0,e-r),f=Math.min(p,e),g=u.slice(m,f);if(h.length!==g.length)throw new Error("There is a bug within whisper `decode_asr` function, please report it. Dropping to prevent bad inference.");let _;_=a?h.filter(((e,n)=>e===g[n]&&o[i+n]<=t[l][m+n])).length:h.filter(((e,t)=>e===g[t])).length;const w=_/e+e/1e4;_>1&&w>d&&(d=w,c=[i,s,m,f])}const[h,m,f,g]=c,_=Math.floor((m+h)/2),w=Math.floor((g+f)/2);i.push(...n.slice(0,_)),n=u.slice(w),r=n.length,a&&(s.push(...o.slice(0,_)),o=t[l].slice(w))}return i.push(...n),a?(s.push(...o),[i,s]):[i,[]]}collateWordTimestamps(e,t,n){const[r,i,a]=this.combineTokensIntoWords(e,n),s=[];for(let e=0;e<r.length;++e){const n=a[e];s.push({text:r[e],timestamp:[t[n.at(0)][0],t[n.at(-1)][1]]})}return s}combineTokensIntoWords(e,t,n="\"'“¡¿([{-",r="\"'.。,,!!??::”)]}、"){let i,a,s;return["chinese","japanese","thai","lao","myanmar"].includes(t=t??"english")?[i,a,s]=this.splitTokensOnUnicode(e):[i,a,s]=this.splitTokensOnSpaces(e),this.mergePunctuations(i,a,s,n,r)}decode(e,t){let n;return t?.decode_with_timestamps?(e instanceof o.Tensor&&(e=f(e)),n=this.decodeWithTimestamps(e,t)):n=super.decode(e,t),n}decodeWithTimestamps(e,t){const n=t?.time_precision??.02,r=Array.from(this.all_special_ids).at(-1)+1;let i=[[]];for(let t of e)if(t=Number(t),t>=r){const e=((t-r)*n).toFixed(2);i.push(`<|${e}|>`),i.push([])}else i[i.length-1].push(t);return i=i.map((e=>"string"==typeof e?e:super.decode(e,t))),i.join("")}splitTokensOnUnicode(e){const t=this.decode(e,{decode_with_timestamps:!0}),n=[],r=[],i=[];let a=[],s=[],o=0;for(let l=0;l<e.length;++l){const u=e[l];a.push(u),s.push(l);const d=this.decode(a,{decode_with_timestamps:!0});d.includes("�")&&"�"!==t[o+d.indexOf("�")]||(n.push(d),r.push(a),i.push(s),a=[],s=[],o+=d.length)}return[n,r,i]}splitTokensOnSpaces(e){const[t,n,r]=this.splitTokensOnUnicode(e),i=[],a=[],s=[],o=new RegExp(`^[${y}]$`,"gu");for(let e=0;e<t.length;++e){const l=t[e],u=n[e],d=r[e],c=u[0]>=this.model.tokens_to_ids.get("<|endoftext|>"),p=l.startsWith(" "),h=l.trim(),m=o.test(h);if(c||p||m||0===i.length)i.push(l),a.push(u),s.push(d);else{const e=i.length-1;i[e]+=l,a[e].push(...u),s[e].push(...d)}}return[i,a,s]}mergePunctuations(e,t,n,r,a){const s=structuredClone(e),o=structuredClone(t),l=structuredClone(n);let u=s.length-2,d=s.length-1;for(;u>=0;)s[u].startsWith(" ")&&r.includes(s[u].trim())?(s[d]=s[u]+s[d],o[d]=(0,i.mergeArrays)(o[u],o[d]),l[d]=(0,i.mergeArrays)(l[u],l[d]),s[u]="",o[u]=[],l[u]=[]):d=u,--u;for(u=0,d=1;d<s.length;)!s[u].endsWith(" ")&&a.includes(s[d])?(s[u]+=s[d],o[u]=(0,i.mergeArrays)(o[u],o[d]),l[u]=(0,i.mergeArrays)(l[u],l[d]),s[d]="",o[d]=[],l[d]=[]):u=d,++d;return[s.filter((e=>e)),o.filter((e=>e.length>0)),l.filter((e=>e.length>0))]}get_decoder_prompt_ids({language:e=null,task:t=null,no_timestamps:n=!0}={}){const r=[];if(e){const t=(0,d.whisper_language_to_code)(e),n=this.model.tokens_to_ids.get(`<|${t}|>`);if(void 0===n)throw new Error(`Unable to find language "${t}" in model vocabulary. Please report this issue at ${c.GITHUB_ISSUE_URL}.`);r.push(n)}else r.push(null);if(t){if("transcribe"!==(t=t.toLowerCase())&&"translate"!==t)throw new Error(`Task "${t}" is not supported. Must be one of: ["transcribe", "translate"]`);const e=this.model.tokens_to_ids.get(`<|${t}|>`);if(void 0===e)throw new Error(`Unable to find task "${t}" in model vocabulary. Please report this issue at ${c.GITHUB_ISSUE_URL}.`);r.push(e)}else r.push(null);if(n){const e=this.model.tokens_to_ids.get("<|notimestamps|>");if(void 0===e)throw new Error(`Unable to find "<|notimestamps|>" in model vocabulary. Please report this issue at ${c.GITHUB_ISSUE_URL}.`);r.push(e)}return r.map(((e,t)=>[t+1,e])).filter((e=>null!==e[1]))}}class nt extends be{}class rt extends be{}class it extends be{}class at extends be{constructor(e,t){super(e,t),this.languageRegex=/^(>>\w+<<)\s*/g,this.supported_language_codes=this.model.vocab.filter((e=>this.languageRegex.test(e))),console.warn('WARNING: `MarianTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}_encode_text(e){if(null===e)return null;const[t,...n]=e.trim().split(this.languageRegex);if(0===n.length)return super._encode_text(t);if(2===n.length){const[e,t]=n;return this.supported_language_codes.includes(e)||console.warn(`Unsupported language code "${e}" detected, which may lead to unexpected behavior. Should be one of: ${JSON.stringify(this.supported_language_codes)}`),(0,i.mergeArrays)([e],super._encode_text(t))}}}class st extends be{}class ot extends be{}class lt extends be{}class ut extends be{}class dt extends be{}class ct extends be{constructor(e,t){super(e,t),this.decoder=new ue({})}}class pt extends be{}class ht{static TOKENIZER_CLASS_MAPPING={T5Tokenizer:ze,DistilBertTokenizer:Ee,CamembertTokenizer:Fe,DebertaTokenizer:ke,DebertaV2Tokenizer:$e,BertTokenizer:ve,HerbertTokenizer:Se,ConvBertTokenizer:Ce,RoFormerTokenizer:Pe,XLMTokenizer:Ae,ElectraTokenizer:Ie,MobileBertTokenizer:Me,SqueezeBertTokenizer:Te,AlbertTokenizer:xe,GPT2Tokenizer:Oe,BartTokenizer:Be,MBartTokenizer:Le,MBart50Tokenizer:De,RobertaTokenizer:Re,WhisperTokenizer:tt,CodeGenTokenizer:nt,CLIPTokenizer:rt,SiglipTokenizer:it,MarianTokenizer:at,BloomTokenizer:Ne,NllbTokenizer:Ze,M2M100Tokenizer:et,LlamaTokenizer:je,CodeLlamaTokenizer:Ge,XLMRobertaTokenizer:qe,MPNetTokenizer:Ue,FalconTokenizer:We,GPTNeoXTokenizer:He,EsmTokenizer:Xe,Wav2Vec2CTCTokenizer:st,BlenderbotTokenizer:ot,BlenderbotSmallTokenizer:lt,SpeechT5Tokenizer:ut,NougatTokenizer:dt,VitsTokenizer:ct,Qwen2Tokenizer:Ke,GemmaTokenizer:Qe,Grok1Tokenizer:Ye,CohereTokenizer:pt,PreTrainedTokenizer:be};static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:i=!1,revision:a="main",legacy:s=null}={}){const[o,l]=await p(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:i,revision:a,legacy:s}),u=l.tokenizer_class?.replace(/Fast$/,"")??"PreTrainedTokenizer";let d=this.TOKENIZER_CLASS_MAPPING[u];return d||(console.warn(`Unknown tokenizer class "${u}", attempting to construct from base class.`),d=be),new d(o,l)}}},"./src/utils/audio.js":
193
+ \***************************/(e,t,n)=>{n.r(t),n.d(t,{AlbertTokenizer:()=>xe,AutoTokenizer:()=>ht,BartTokenizer:()=>Be,BertTokenizer:()=>ve,BlenderbotSmallTokenizer:()=>lt,BlenderbotTokenizer:()=>ot,BloomTokenizer:()=>Ne,CLIPTokenizer:()=>rt,CamembertTokenizer:()=>Fe,CodeGenTokenizer:()=>nt,CodeLlamaTokenizer:()=>Ge,CohereTokenizer:()=>pt,ConvBertTokenizer:()=>Ce,DebertaTokenizer:()=>ke,DebertaV2Tokenizer:()=>$e,DistilBertTokenizer:()=>Ee,ElectraTokenizer:()=>Ie,EsmTokenizer:()=>Xe,FalconTokenizer:()=>We,GPT2Tokenizer:()=>Oe,GPTNeoXTokenizer:()=>He,GemmaTokenizer:()=>Qe,Grok1Tokenizer:()=>Ye,HerbertTokenizer:()=>Se,LlamaTokenizer:()=>je,M2M100Tokenizer:()=>et,MBart50Tokenizer:()=>De,MBartTokenizer:()=>Le,MPNetTokenizer:()=>Ue,MarianTokenizer:()=>at,MobileBertTokenizer:()=>Me,NllbTokenizer:()=>Ze,NougatTokenizer:()=>dt,PreTrainedTokenizer:()=>be,Qwen2Tokenizer:()=>Ke,RoFormerTokenizer:()=>Pe,RobertaTokenizer:()=>Re,SiglipTokenizer:()=>it,SpeechT5Tokenizer:()=>ut,SqueezeBertTokenizer:()=>Te,T5Tokenizer:()=>ze,TokenizerModel:()=>M,VitsTokenizer:()=>ct,Wav2Vec2CTCTokenizer:()=>st,WhisperTokenizer:()=>tt,XLMRobertaTokenizer:()=>qe,XLMTokenizer:()=>Ae,is_chinese_char:()=>w});var r=n(/*! ./utils/generic.js */"./src/utils/generic.js"),i=n(/*! ./utils/core.js */"./src/utils/core.js"),a=n(/*! ./utils/hub.js */"./src/utils/hub.js"),s=n(/*! ./utils/maths.js */"./src/utils/maths.js"),o=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),l=n(/*! ./utils/data-structures.js */"./src/utils/data-structures.js"),u=n(/*! @huggingface/jinja */"./node_modules/@huggingface/jinja/dist/index.js"),d=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js"),c=n(/*! ./utils/constants.js */"./src/utils/constants.js");async function p(e,t){const n=await Promise.all([(0,a.getModelJSON)(e,"tokenizer.json",!0,t),(0,a.getModelJSON)(e,"tokenizer_config.json",!0,t)]);return null!==t.legacy&&(n[1].legacy=t.legacy),n}function h(e,t=!0){if(void 0!==e.Regex){let t=e.Regex.replace(/\\([#&~])/g,"$1");for(const[e,n]of v)t=t.replaceAll(e,n);return new RegExp(t,"gu")}if(void 0!==e.String){const n=(0,i.escapeRegExp)(e.String);return new RegExp(t?n:`(${n})`,"gu")}return console.warn("Unknown pattern type:",e),null}function m(e){return new Map(Object.entries(e))}function f(e){const t=e.dims;switch(t.length){case 1:return e.tolist();case 2:if(1!==t[0])throw new Error("Unable to decode tensor with `batch size !== 1`. Use `tokenizer.batch_decode(...)` for batched inputs.");return e.tolist()[0];default:throw new Error(`Expected tensor to have 1-2 dimensions, got ${t.length}.`)}}function g(e){return e.replace(/ \./g,".").replace(/ \?/g,"?").replace(/ \!/g,"!").replace(/ ,/g,",").replace(/ \' /g,"'").replace(/ n\'t/g,"n't").replace(/ \'m/g,"'m").replace(/ \'s/g,"'s").replace(/ \'ve/g,"'ve").replace(/ \'re/g,"'re")}function _(e){return e.replace(/\p{M}/gu,"")}function w(e){return e>=19968&&e<=40959||e>=13312&&e<=19903||e>=131072&&e<=173791||e>=173824&&e<=177983||e>=177984&&e<=178207||e>=178208&&e<=183983||e>=63744&&e<=64255||e>=194560&&e<=195103}const y="\\p{P}\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E",b=new RegExp(`^[${y}]+$`,"gu"),v=new Map([["(?i:'s|'t|'re|'ve|'m|'ll|'d)","(?:'([sS]|[tT]|[rR][eE]|[vV][eE]|[mM]|[lL][lL]|[dD]))"]]);class x{constructor(e){this.content=e.content,this.id=e.id,this.single_word=e.single_word??!1,this.lstrip=e.lstrip??!1,this.rstrip=e.rstrip??!1,this.special=e.special??!1,this.normalized=e.normalized??null}}class M extends r.Callable{constructor(e){super(),this.config=e,this.vocab=[],this.tokens_to_ids=new Map,this.unk_token_id=void 0,this.unk_token=void 0,this.end_of_word_suffix=void 0,this.fuse_unk=this.config.fuse_unk??!1}static fromConfig(e,...t){switch(e.type){case"WordPiece":return new T(e);case"Unigram":return new k(e,...t);case"BPE":return new C(e);default:if(e.vocab)return new P(e,...t);throw new Error(`Unknown TokenizerModel type: ${e.type}`)}}_call(e){return e=this.encode(e),this.fuse_unk&&(e=function(e,t,n){const r=[];let i=0;for(;i<e.length;)if(r.push(e[i]),(t.get(e[i])??n)===n)for(;++i<e.length&&(t.get(e[i])??n)===n;)t.get(r.at(-1))!==n&&(r[r.length-1]+=e[i]);else++i;return r}(e,this.tokens_to_ids,this.unk_token_id)),e}encode(e){throw Error("encode should be implemented in subclass.")}convert_tokens_to_ids(e){return e.map((e=>this.tokens_to_ids.get(e)??this.unk_token_id))}convert_ids_to_tokens(e){return e.map((e=>this.vocab[e]??this.unk_token))}}class T extends M{constructor(e){super(e),this.tokens_to_ids=m(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.max_input_chars_per_word=e.max_input_chars_per_word??100,this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e}encode(e){const t=[];for(const n of e){const e=[...n];if(e.length>this.max_input_chars_per_word){t.push(this.unk_token);continue}let r=!1,i=0;const a=[];for(;i<e.length;){let t=e.length,n=null;for(;i<t;){let r=e.slice(i,t).join("");if(i>0&&(r=this.config.continuing_subword_prefix+r),this.tokens_to_ids.has(r)){n=r;break}--t}if(null===n){r=!0;break}a.push(n),i=t}r?t.push(this.unk_token):t.push(...a)}return t}}class k extends M{constructor(e,t){super(e);const n=e.vocab.length;this.vocab=new Array(n),this.scores=new Array(n);for(let t=0;t<n;++t){const n=e.vocab[t];this.vocab[t]=n[0],this.scores[t]=n[1]}this.unk_token_id=e.unk_id,this.unk_token=this.vocab[e.unk_id],this.tokens_to_ids=new Map(this.vocab.map(((e,t)=>[e,t]))),this.bos_token=" ",this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t.eos_token,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.unk_token=this.vocab[this.unk_token_id],this.minScore=(0,s.min)(this.scores)[0],this.unk_score=this.minScore-10,this.scores[this.unk_token_id]=this.unk_score,this.trie=new l.CharTrie,this.trie.extend(this.vocab),this.fuse_unk=!0}populateNodes(e){const t=e.chars;let n=0;for(;n<t.length;){let r=!1;const a=[],s=t.slice(n).join(""),o=this.trie.commonPrefixSearch(s);for(const t of o){a.push(t);const s=this.tokens_to_ids.get(t),o=this.scores[s],l=(0,i.len)(t);e.insert(n,l,o,s),r||1!==l||(r=!0)}r||e.insert(n,1,this.unk_score,this.unk_token_id),n+=1}}tokenize(e){const t=new l.TokenLattice(e,this.bos_token_id,this.eos_token_id);return this.populateNodes(t),t.tokens()}encode(e){const t=[];for(const n of e){const e=this.tokenize(n);t.push(...e)}return t}}const $=(()=>{const e=[...Array.from({length:"~".charCodeAt(0)-"!".charCodeAt(0)+1},((e,t)=>t+"!".charCodeAt(0))),...Array.from({length:"¬".charCodeAt(0)-"¡".charCodeAt(0)+1},((e,t)=>t+"¡".charCodeAt(0))),...Array.from({length:"ÿ".charCodeAt(0)-"®".charCodeAt(0)+1},((e,t)=>t+"®".charCodeAt(0)))],t=e.slice();let n=0;for(let r=0;r<256;++r)e.includes(r)||(e.push(r),t.push(256+n),n+=1);const r=t.map((e=>String.fromCharCode(e)));return Object.fromEntries(e.map(((e,t)=>[e,r[t]])))})(),S=(0,i.reverseDictionary)($);class C extends M{constructor(e){super(e),this.BPE_SPLIT_TOKEN=" ",this.tokens_to_ids=m(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e;this.bpe_ranks=new Map(e.merges.map(((e,t)=>[e,t]))),this.merges=e.merges.map((e=>e.split(this.BPE_SPLIT_TOKEN))),this.end_of_word_suffix=e.end_of_word_suffix,this.continuing_subword_suffix=e.continuing_subword_suffix??null,this.byte_fallback=this.config.byte_fallback??!1,this.byte_fallback&&(this.text_encoder=new TextEncoder),this.ignore_merges=this.config.ignore_merges??!1,this.cache=new Map}bpe(e){if(0===e.length)return[];const t=this.cache.get(e);if(void 0!==t)return t;const n=Array.from(e);this.end_of_word_suffix&&(n[n.length-1]+=this.end_of_word_suffix);let r=[];if(n.length>1){const e=new l.PriorityQueue(((e,t)=>e.score<t.score));let t={token:n[0],bias:0,prev:null,next:null},i=t;for(let t=1;t<n.length;++t){const r={bias:t/n.length,token:n[t],prev:i,next:null};i.next=r,this._add_node(e,i),i=r}for(;!e.isEmpty();){const n=e.pop();if(n.deleted||!n.next||n.next.deleted)continue;if(n.deleted=!0,n.next.deleted=!0,n.prev){const e={...n.prev};n.prev.deleted=!0,n.prev=e,e.prev?e.prev.next=e:t=e}const r={token:n.token+n.next.token,bias:n.bias,prev:n.prev,next:n.next.next};r.prev?(r.prev.next=r,this._add_node(e,r.prev)):t=r,r.next&&(r.next.prev=r,this._add_node(e,r))}for(let e=t;null!==e;e=e.next)r.push(e.token)}else r=n;if(this.continuing_subword_suffix)for(let e=0;e<r.length-1;++e)r[e]+=this.continuing_subword_suffix;return this.cache.set(e,r),r}_add_node(e,t){const n=this.bpe_ranks.get(t.token+this.BPE_SPLIT_TOKEN+t.next.token);void 0!==n&&(t.score=n+t.bias,e.push(t))}encode(e){const t=[];for(const n of e){if(this.ignore_merges&&this.tokens_to_ids.has(n)){t.push(n);continue}const e=this.bpe(n);for(const n of e)if(this.tokens_to_ids.has(n))t.push(n);else if(this.byte_fallback){const e=Array.from(this.text_encoder.encode(n)).map((e=>`<0x${e.toString(16).toUpperCase().padStart(2,"0")}>`));e.every((e=>this.tokens_to_ids.has(e)))?t.push(...e):t.push(this.unk_token)}else t.push(this.unk_token)}return t}}class P extends M{constructor(e,t){super(e),this.tokens_to_ids=m(t.target_lang?e.vocab[t.target_lang]:e.vocab),this.bos_token=t.bos_token,this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t.eos_token,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.pad_token=t.pad_token,this.pad_token_id=this.tokens_to_ids.get(this.pad_token),this.unk_token=t.unk_token,this.unk_token_id=this.tokens_to_ids.get(this.unk_token),this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e}encode(e){return e}}class E extends r.Callable{constructor(e){super(),this.config=e}static fromConfig(e){if(null===e)return null;switch(e.type){case"BertNormalizer":return new N(e);case"Precompiled":return new pe(e);case"Sequence":return new R(e);case"Replace":return new F(e);case"NFC":return new A(e);case"NFKC":return new I(e);case"NFKD":return new z(e);case"Strip":return new O(e);case"StripAccents":return new B(e);case"Lowercase":return new L(e);case"Prepend":return new D(e);default:throw new Error(`Unknown Normalizer type: ${e.type}`)}}normalize(e){throw Error("normalize should be implemented in subclass.")}_call(e){return this.normalize(e)}}class F extends E{normalize(e){const t=h(this.config.pattern);return null===t?e:e.replaceAll(t,this.config.content)}}class A extends E{normalize(e){return e=e.normalize("NFC")}}class I extends E{normalize(e){return e=e.normalize("NFKC")}}class z extends E{normalize(e){return e=e.normalize("NFKD")}}class O extends E{normalize(e){return this.config.strip_left&&this.config.strip_right?e=e.trim():(this.config.strip_left&&(e=e.trimStart()),this.config.strip_right&&(e=e.trimEnd())),e}}class B extends E{normalize(e){return e=_(e)}}class L extends E{normalize(e){return e=e.toLowerCase()}}class D extends E{normalize(e){return e=this.config.prepend+e}}class R extends E{constructor(e){super(e),this.normalizers=e.normalizers.map((e=>E.fromConfig(e)))}normalize(e){return this.normalizers.reduce(((e,t)=>t.normalize(e)),e)}}class N extends E{_tokenize_chinese_chars(e){const t=[];for(let n=0;n<e.length;++n){const r=e[n];w(r.charCodeAt(0))?(t.push(" "),t.push(r),t.push(" ")):t.push(r)}return t.join("")}stripAccents(e){return e.normalize("NFD").replace(/\p{Mn}/gu,"")}_is_control(e){switch(e){case"\t":case"\n":case"\r":return!1;default:return/^\p{Cc}|\p{Cf}|\p{Co}|\p{Cs}$/u.test(e)}}_clean_text(e){const t=[];for(const n of e){const e=n.charCodeAt(0);0===e||65533===e||this._is_control(n)||(/^\s$/.test(n)?t.push(" "):t.push(n))}return t.join("")}normalize(e){return this.config.clean_text&&(e=this._clean_text(e)),this.config.handle_chinese_chars&&(e=this._tokenize_chinese_chars(e)),this.config.lowercase?(e=e.toLowerCase(),!1!==this.config.strip_accents&&(e=this.stripAccents(e))):this.config.strip_accents&&(e=this.stripAccents(e)),e}}class V extends r.Callable{static fromConfig(e){if(null===e)return null;switch(e.type){case"BertPreTokenizer":return new j(e);case"Sequence":return new he(e);case"Whitespace":return new me(e);case"WhitespaceSplit":return new fe(e);case"Metaspace":return new de(e);case"ByteLevel":return new G(e);case"Split":return new q(e);case"Punctuation":return new U(e);case"Digits":return new W(e);case"Replace":return new ge(e);default:throw new Error(`Unknown PreTokenizer type: ${e.type}`)}}pre_tokenize_text(e,t){throw Error("pre_tokenize_text should be implemented in subclass.")}pre_tokenize(e,t){return(Array.isArray(e)?e.map((e=>this.pre_tokenize_text(e,t))):this.pre_tokenize_text(e,t)).flat()}_call(e,t){return this.pre_tokenize(e,t)}}class j extends V{constructor(e){super(),this.pattern=new RegExp(`[^\\s${y}]+|[${y}]`,"gu")}pre_tokenize_text(e,t){return e.trim().match(this.pattern)||[]}}class G extends V{constructor(e){super(),this.config=e,this.add_prefix_space=this.config.add_prefix_space,this.trim_offsets=this.config.trim_offsets,this.use_regex=this.config.use_regex??!0,this.pattern=/'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu,this.byte_encoder=$,this.text_encoder=new TextEncoder}pre_tokenize_text(e,t){this.add_prefix_space&&!e.startsWith(" ")&&(e=" "+e);return(this.use_regex?e.match(this.pattern)||[]:[e]).map((e=>Array.from(this.text_encoder.encode(e),(e=>this.byte_encoder[e])).join("")))}}class q extends V{constructor(e){super(),this.config=e,this.pattern=h(this.config.pattern,this.config.invert)}pre_tokenize_text(e,t){return null===this.pattern?[]:this.config.invert?e.match(this.pattern)||[]:function(e,t){const n=[];let r=0;for(const i of e.matchAll(t)){const t=i[0];r<i.index&&n.push(e.slice(r,i.index)),t.length>0&&n.push(t),r=i.index+t.length}return r<e.length&&n.push(e.slice(r)),n}(e,this.pattern)}}class U extends V{constructor(e){super(),this.config=e,this.pattern=new RegExp(`[^${y}]+|[${y}]+`,"gu")}pre_tokenize_text(e,t){return e.match(this.pattern)||[]}}class W extends V{constructor(e){super(),this.config=e;const t="[^\\d]+|\\d"+(this.config.individual_digits?"":"+");this.pattern=new RegExp(t,"gu")}pre_tokenize_text(e,t){return e.match(this.pattern)||[]}}class H extends r.Callable{constructor(e){super(),this.config=e}static fromConfig(e){if(null===e)return null;switch(e.type){case"TemplateProcessing":return new Q(e);case"ByteLevel":return new Y(e);case"RobertaProcessing":return new K(e);case"BertProcessing":return new X(e);case"Sequence":return new J(e);default:throw new Error(`Unknown PostProcessor type: ${e.type}`)}}post_process(e,...t){throw Error("post_process should be implemented in subclass.")}_call(e,...t){return this.post_process(e,...t)}}class X extends H{constructor(e){super(e),this.cls=e.cls[0],this.sep=e.sep[0]}post_process(e,t=null,{add_special_tokens:n=!0}={}){n&&(e=(0,i.mergeArrays)([this.cls],e,[this.sep]));let r=new Array(e.length).fill(0);if(null!==t){const a=n&&this instanceof K?[this.sep]:[],s=n?[this.sep]:[];e=(0,i.mergeArrays)(e,a,t,s),r=(0,i.mergeArrays)(r,new Array(t.length+a.length+s.length).fill(1))}return{tokens:e,token_type_ids:r}}}class K extends X{}class Q extends H{constructor(e){super(e),this.single=e.single,this.pair=e.pair}post_process(e,t=null,{add_special_tokens:n=!0}={}){const r=null===t?this.single:this.pair;let a=[],s=[];for(const o of r)"SpecialToken"in o?n&&(a.push(o.SpecialToken.id),s.push(o.SpecialToken.type_id)):"Sequence"in o&&("A"===o.Sequence.id?(a=(0,i.mergeArrays)(a,e),s=(0,i.mergeArrays)(s,new Array(e.length).fill(o.Sequence.type_id))):"B"===o.Sequence.id&&(a=(0,i.mergeArrays)(a,t),s=(0,i.mergeArrays)(s,new Array(t.length).fill(o.Sequence.type_id))));return{tokens:a,token_type_ids:s}}}class Y extends H{post_process(e,t=null){return t&&(e=(0,i.mergeArrays)(e,t)),{tokens:e}}}class J extends H{constructor(e){super(e),this.processors=e.processors.map((e=>H.fromConfig(e)))}post_process(e,t=null,n={}){let r;for(const i of this.processors)if(i instanceof Y){if(e=i.post_process(e).tokens,t){t=i.post_process(t).tokens}}else{const a=i.post_process(e,t,n);e=a.tokens,r=a.token_type_ids}return{tokens:e,token_type_ids:r}}}class Z extends r.Callable{constructor(e){super(),this.config=e,this.added_tokens=[],this.end_of_word_suffix=null,this.trim_offsets=e.trim_offsets}static fromConfig(e){if(null===e)return null;switch(e.type){case"WordPiece":return new ie(e);case"Metaspace":return new ce(e);case"ByteLevel":return new ae(e);case"Replace":return new ee(e);case"ByteFallback":return new te(e);case"Fuse":return new ne(e);case"Strip":return new re(e);case"Sequence":return new oe(e);case"CTC":return new se(e);case"BPEDecoder":return new le(e);default:throw new Error(`Unknown Decoder type: ${e.type}`)}}_call(e){return this.decode(e)}decode(e){return this.decode_chain(e).join("")}decode_chain(e){throw Error("`decode_chain` should be implemented in subclass.")}}class ee extends Z{decode_chain(e){const t=h(this.config.pattern);return null===t?e:e.map((e=>e.replaceAll(t,this.config.content)))}}class te extends Z{constructor(e){super(e),this.text_decoder=new TextDecoder}decode_chain(e){const t=[];let n=[];for(const r of e){let e=null;if(6===r.length&&r.startsWith("<0x")&&r.endsWith(">")){const t=parseInt(r.slice(3,5),16);isNaN(t)||(e=t)}if(null!==e)n.push(e);else{if(n.length>0){const e=this.text_decoder.decode(Uint8Array.from(n));t.push(e),n=[]}t.push(r)}}if(n.length>0){const e=this.text_decoder.decode(Uint8Array.from(n));t.push(e),n=[]}return t}}class ne extends Z{decode_chain(e){return[e.join("")]}}class re extends Z{constructor(e){super(e),this.content=this.config.content,this.start=this.config.start,this.stop=this.config.stop}decode_chain(e){return e.map((e=>{let t=0;for(let n=0;n<this.start&&e[n]===this.content;++n)t=n+1;let n=e.length;for(let t=0;t<this.stop;++t){const r=e.length-t-1;if(e[r]!==this.content)break;n=r}return e.slice(t,n)}))}}class ie extends Z{constructor(e){super(e),this.cleanup=e.cleanup}decode_chain(e){return e.map(((e,t)=>(0!==t&&(e=e.startsWith(this.config.prefix)?e.replace(this.config.prefix,""):" "+e),this.cleanup&&(e=g(e)),e)))}}class ae extends Z{constructor(e){super(e),this.byte_decoder=S,this.text_decoder=new TextDecoder("utf-8",{fatal:!1,ignoreBOM:!0}),this.end_of_word_suffix=null}convert_tokens_to_string(e){const t=e.join(""),n=new Uint8Array([...t].map((e=>this.byte_decoder[e])));return this.text_decoder.decode(n)}decode_chain(e){const t=[];let n=[];for(const r of e)void 0!==this.added_tokens.find((e=>e.content===r))?(n.length>0&&(t.push(this.convert_tokens_to_string(n)),n=[]),t.push(r)):n.push(r);return n.length>0&&t.push(this.convert_tokens_to_string(n)),t}}class se extends Z{constructor(e){super(e),this.pad_token=this.config.pad_token,this.word_delimiter_token=this.config.word_delimiter_token,this.cleanup=this.config.cleanup}convert_tokens_to_string(e){if(0===e.length)return"";const t=[e[0]];for(let n=1;n<e.length;++n)e[n]!==t.at(-1)&&t.push(e[n]);let n=t.filter((e=>e!==this.pad_token)).join("");return this.cleanup&&(n=g(n).replaceAll(this.word_delimiter_token," ").trim()),n}decode_chain(e){return[this.convert_tokens_to_string(e)]}}class oe extends Z{constructor(e){super(e),this.decoders=e.decoders.map((e=>Z.fromConfig(e)))}decode_chain(e){return this.decoders.reduce(((e,t)=>t.decode_chain(e)),e)}}class le extends Z{constructor(e){super(e),this.suffix=this.config.suffix}decode_chain(e){return e.map(((t,n)=>t.replaceAll(this.suffix,n===e.length-1?"":" ")))}}class ue extends Z{decode_chain(e){let t="";for(let n=1;n<e.length;n+=2)t+=e[n];return[t]}}class de extends V{constructor(e){super(),this.addPrefixSpace=e.add_prefix_space,this.replacement=e.replacement,this.strRep=e.str_rep||this.replacement,this.prepend_scheme=e.prepend_scheme??"always"}pre_tokenize_text(e,{section_index:t}={}){let n=e.replaceAll(" ",this.strRep);return this.addPrefixSpace&&!n.startsWith(this.replacement)&&("always"===this.prepend_scheme||"first"===this.prepend_scheme&&0===t)&&(n=this.strRep+n),[n]}}class ce extends Z{constructor(e){super(e),this.addPrefixSpace=e.add_prefix_space,this.replacement=e.replacement}decode_chain(e){const t=[];for(let n=0;n<e.length;++n){let r=e[n].replaceAll(this.replacement," ");this.addPrefixSpace&&0==n&&r.startsWith(" ")&&(r=r.substring(1)),t.push(r)}return t}}class pe extends E{constructor(e){super(e),this.charsmap=e.precompiled_charsmap}normalize(e){if((e=(e=e.replace(/[\u0001-\u0008\u000B\u000E-\u001F\u007F\u008F\u009F]/gm,"")).replace(/[\u0009\u000A\u000C\u000D\u00A0\u1680\u2000-\u200F\u2028\u2029\u202F\u205F\u2581\u3000\uFEFF\uFFFD]/gm," ")).includes("~")){const t=e.split("~");e=t.map((e=>e.normalize("NFKC"))).join("~")}else e=e.normalize("NFKC");return e}}class he extends V{constructor(e){super(),this.tokenizers=e.pretokenizers.map((e=>V.fromConfig(e)))}pre_tokenize_text(e,t){return this.tokenizers.reduce(((e,n)=>n.pre_tokenize(e,t)),[e])}}class me extends V{constructor(e){super()}pre_tokenize_text(e,t){return e.match(/\w+|[^\w\s]+/g)||[]}}class fe extends V{constructor(e){super()}pre_tokenize_text(e,t){return function(e){return e.match(/\S+/g)||[]}(e)}}class ge extends V{constructor(e){super(),this.config=e,this.pattern=h(this.config.pattern),this.content=this.config.content}pre_tokenize_text(e,t){return null===this.pattern?[e]:[e.replaceAll(this.pattern,this.config.content)]}}const _e=["bos_token","eos_token","unk_token","sep_token","pad_token","cls_token","mask_token"];function we(e,t,n,r){for(const a of Object.keys(e)){const s=t-e[a].length,o=n(a),l=new Array(s).fill(o);e[a]="right"===r?(0,i.mergeArrays)(e[a],l):(0,i.mergeArrays)(l,e[a])}}function ye(e,t){for(const n of Object.keys(e))e[n].length=t}class be extends r.Callable{return_token_type_ids=!1;padding_side="right";constructor(e,t){super(),this._tokenizer_config=t,this.normalizer=E.fromConfig(e.normalizer),this.pre_tokenizer=V.fromConfig(e.pre_tokenizer),this.model=M.fromConfig(e.model,t),this.post_processor=H.fromConfig(e.post_processor),this.decoder=Z.fromConfig(e.decoder),this.special_tokens=[],this.all_special_ids=[],this.added_tokens=[];for(const t of e.added_tokens){const e=new x(t);this.added_tokens.push(e),this.model.tokens_to_ids.set(e.content,e.id),this.model.vocab[e.id]=e.content,e.special&&(this.special_tokens.push(e.content),this.all_special_ids.push(e.id))}if(this.additional_special_tokens=t.additional_special_tokens??[],this.special_tokens.push(...this.additional_special_tokens),this.special_tokens=[...new Set(this.special_tokens)],this.decoder&&(this.decoder.added_tokens=this.added_tokens,this.decoder.end_of_word_suffix=this.model.end_of_word_suffix),this.added_tokens_regex=this.added_tokens.length>0?new RegExp(this.added_tokens.slice().sort(((e,t)=>t.content.length-e.content.length)).map((e=>`${e.lstrip?"\\s*":""}(${(0,i.escapeRegExp)(e.content)})${e.rstrip?"\\s*":""}`)).join("|")):null,this.mask_token=this.getToken("mask_token"),this.mask_token_id=this.model.tokens_to_ids.get(this.mask_token),this.pad_token=this.getToken("pad_token","eos_token"),this.pad_token_id=this.model.tokens_to_ids.get(this.pad_token),this.sep_token=this.getToken("sep_token"),this.sep_token_id=this.model.tokens_to_ids.get(this.sep_token),this.unk_token=this.getToken("unk_token"),this.unk_token_id=this.model.tokens_to_ids.get(this.unk_token),this.model_max_length=t.model_max_length,this.remove_space=t.remove_space,this.clean_up_tokenization_spaces=t.clean_up_tokenization_spaces??!0,this.do_lowercase_and_remove_accent=t.do_lowercase_and_remove_accent??!1,t.padding_side&&(this.padding_side=t.padding_side),this.legacy=!1,this.chat_template=t.chat_template??null,Array.isArray(this.chat_template)){const e=Object.create(null);for(const{name:t,template:n}of this.chat_template){if("string"!=typeof t||"string"!=typeof n)throw new Error('Chat template must be a list of objects with "name" and "template" properties');e[t]=n}this.chat_template=e}this._compiled_template_cache=new Map}getToken(...e){for(const t of e){const e=this._tokenizer_config[t];if(e){if("object"==typeof e){if("AddedToken"===e.__type)return e.content;throw Error(`Unknown token: ${e}`)}return e}}return null}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:i=!1,revision:a="main",legacy:s=null}={}){return new this(...await p(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:i,revision:a,legacy:s}))}_call(e,{text_pair:t=null,add_special_tokens:n=!0,padding:r=!1,truncation:i=null,max_length:a=null,return_tensor:l=!0,return_token_type_ids:u=null}={}){const d=Array.isArray(e);let c;if(d){if(0===e.length)throw Error("text array must be non-empty");if(null!==t){if(!Array.isArray(t))throw Error("text_pair must also be an array");if(e.length!==t.length)throw Error("text and text_pair must have the same length");c=e.map(((e,r)=>this._encode_plus(e,{text_pair:t[r],add_special_tokens:n,return_token_type_ids:u})))}else c=e.map((e=>this._encode_plus(e,{add_special_tokens:n,return_token_type_ids:u})))}else{if(null==e)throw Error("text may not be null or undefined");if(Array.isArray(t))throw Error("When specifying `text_pair`, since `text` is a string, `text_pair` must also be a string (i.e., not an array).");c=[this._encode_plus(e,{text_pair:t,add_special_tokens:n,return_token_type_ids:u})]}if(null===a?a="max_length"===r?this.model_max_length:(0,s.max)(c.map((e=>e.input_ids.length)))[0]:i||console.warn("Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=true` to explicitly truncate examples to max length."),a=Math.min(a,this.model_max_length??1/0),r||i)for(let e=0;e<c.length;++e)c[e].input_ids.length!==a&&(c[e].input_ids.length>a?i&&ye(c[e],a):r&&we(c[e],a,(e=>"input_ids"===e?this.pad_token_id:0),this.padding_side));const p={};if(l){if((!r||!i)&&c.some((e=>{for(const t of Object.keys(e))if(e[t].length!==c[0][t]?.length)return!0;return!1})))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=true' and 'truncation=true' to have batched tensors with the same length.");const e=[c.length,c[0].input_ids.length];for(const t of Object.keys(c[0]))p[t]=new o.Tensor("int64",BigInt64Array.from(c.flatMap((e=>e[t])).map(BigInt)),e)}else{for(const e of Object.keys(c[0]))p[e]=c.map((t=>t[e]));if(!d)for(const e of Object.keys(p))p[e]=p[e][0]}return p}_encode_text(e){if(null===e)return null;const t=(this.added_tokens_regex?e.split(this.added_tokens_regex).filter((e=>e)):[e]).map(((e,t)=>{if(void 0!==this.added_tokens.find((t=>t.content===e)))return e;{if(!0===this.remove_space&&(e=e.trim().split(/\s+/).join(" ")),this.do_lowercase_and_remove_accent&&(e=function(e){return _(e.toLowerCase())}(e)),null!==this.normalizer&&(e=this.normalizer(e)),0===e.length)return[];const n=null!==this.pre_tokenizer?this.pre_tokenizer(e,{section_index:t}):[e];return this.model(n)}})).flat();return t}_encode_plus(e,{text_pair:t=null,add_special_tokens:n=!0,return_token_type_ids:r=null}={}){const{tokens:i,token_type_ids:a}=this._tokenize_helper(e,{pair:t,add_special_tokens:n}),s=this.model.convert_tokens_to_ids(i),o={input_ids:s,attention_mask:new Array(s.length).fill(1)};return(r??this.return_token_type_ids)&&a&&(o.token_type_ids=a),o}_tokenize_helper(e,{pair:t=null,add_special_tokens:n=!1}={}){const r=this._encode_text(e),a=this._encode_text(t);return this.post_processor?this.post_processor(r,a,{add_special_tokens:n}):{tokens:(0,i.mergeArrays)(r??[],a??[])}}tokenize(e,{pair:t=null,add_special_tokens:n=!1}={}){return this._tokenize_helper(e,{pair:t,add_special_tokens:n}).tokens}encode(e,{text_pair:t=null,add_special_tokens:n=!0,return_token_type_ids:r=null}={}){return this._encode_plus(e,{text_pair:t,add_special_tokens:n,return_token_type_ids:r}).input_ids}batch_decode(e,t={}){return e instanceof o.Tensor&&(e=e.tolist()),e.map((e=>this.decode(e,t)))}decode(e,t={}){if(e instanceof o.Tensor&&(e=f(e)),!Array.isArray(e)||0===e.length||!(0,i.isIntegralNumber)(e[0]))throw Error("token_ids must be a non-empty array of integers.");return this.decode_single(e,t)}decode_single(e,{skip_special_tokens:t=!1,clean_up_tokenization_spaces:n=null}){let r=this.model.convert_ids_to_tokens(e);t&&(r=r.filter((e=>!this.special_tokens.includes(e))));let i=this.decoder?this.decoder(r):r.join(" ");return this.decoder&&this.decoder.end_of_word_suffix&&(i=i.replaceAll(this.decoder.end_of_word_suffix," "),t&&(i=i.trim())),(n??this.clean_up_tokenization_spaces)&&(i=g(i)),i}get_chat_template({chat_template:e=null,tools:t=null}={}){if(this.chat_template&&"object"==typeof this.chat_template){const n=this.chat_template;if(null!==e&&Object.hasOwn(n,e))e=n[e];else if(null===e)if(null!==t&&"tool_use"in n)e=n.tool_use;else{if(!("default"in n))throw Error(`This model has multiple chat templates with no default specified! Please either pass a chat template or the name of the template you wish to use to the 'chat_template' argument. Available template names are ${Object.keys(n).sort()}.`);e=n.default}}else if(null===e){if(!this.chat_template)throw Error("Cannot use apply_chat_template() because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating");e=this.chat_template}return e}apply_chat_template(e,{tools:t=null,documents:n=null,chat_template:r=null,add_generation_prompt:i=!1,tokenize:a=!0,padding:s=!1,truncation:o=!1,max_length:l=null,return_tensor:d=!0,return_dict:c=!1,tokenizer_kwargs:p={},...h}={}){if("string"!=typeof(r=this.get_chat_template({chat_template:r,tools:t})))throw Error("chat_template must be a string, but got "+typeof r);let m=this._compiled_template_cache.get(r);void 0===m&&(m=new u.Template(r),this._compiled_template_cache.set(r,m));const f=Object.create(null);for(const e of _e){const t=this.getToken(e);t&&(f[e]=t)}const g=m.render({messages:e,add_generation_prompt:i,tools:t,documents:n,...f,...h});if(a){const e=this._call(g,{add_special_tokens:!1,padding:s,truncation:o,max_length:l,return_tensor:d,...p});return c?e:e.input_ids}return g}}class ve extends be{return_token_type_ids=!0}class xe extends be{return_token_type_ids=!0}class Me extends be{return_token_type_ids=!0}class Te extends be{return_token_type_ids=!0}class ke extends be{return_token_type_ids=!0}class $e extends be{return_token_type_ids=!0}class Se extends be{return_token_type_ids=!0}class Ce extends be{return_token_type_ids=!0}class Pe extends be{return_token_type_ids=!0}class Ee extends be{}class Fe extends be{}class Ae extends be{return_token_type_ids=!0;constructor(e,t){super(e,t),console.warn('WARNING: `XLMTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}}class Ie extends be{return_token_type_ids=!0}class ze extends be{}class Oe extends be{}class Be extends be{}class Le extends be{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{2}_[A-Z]{2}$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))),this.lang_to_token=e=>e}_build_translation_inputs(e,t,n){return Je(this,e,t,n)}}class De extends Le{}class Re extends be{}class Ne extends be{constructor(e,t){const n=".,!?…。,、।۔،",r=e.pre_tokenizer?.pretokenizers[0]?.pattern;r&&r.Regex===` ?[^(\\s|[${n}])]+`&&(r.Regex=` ?[^\\s${n}]+`),super(e,t)}}const Ve="▁";class je extends be{padding_side="left";constructor(e,t){super(e,t),this.legacy=t.legacy??!0,this.legacy||(this.normalizer=null,this.pre_tokenizer=new de({replacement:Ve,add_prefix_space:!0,prepend_scheme:"first"}))}_encode_text(e){if(null===e)return null;if(this.legacy||0===e.length)return super._encode_text(e);let t=super._encode_text(Ve+e.replaceAll(Ve," "));return t.length>1&&t[0]===Ve&&this.special_tokens.includes(t[1])&&(t=t.slice(1)),t}}class Ge extends be{}class qe extends be{}class Ue extends be{}class We extends be{}class He extends be{}class Xe extends be{}class Ke extends be{}class Qe extends be{}class Ye extends be{}function Je(e,t,n,r){if(!("language_codes"in e)||!Array.isArray(e.language_codes))throw new Error("Tokenizer must have `language_codes` attribute set and it should be an array of language ids.");if(!("languageRegex"in e&&e.languageRegex instanceof RegExp))throw new Error("Tokenizer must have `languageRegex` attribute set and it should be a regular expression.");if(!("lang_to_token"in e)||"function"!=typeof e.lang_to_token)throw new Error("Tokenizer must have `lang_to_token` attribute set and it should be a function.");const i=r.src_lang,a=r.tgt_lang;if(!e.language_codes.includes(a))throw new Error(`Target language code "${a}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);if(void 0!==i){if(!e.language_codes.includes(i))throw new Error(`Source language code "${i}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);for(const t of e.post_processor.config.single)if("SpecialToken"in t&&e.languageRegex.test(t.SpecialToken.id)){t.SpecialToken.id=e.lang_to_token(i);break}}return r.forced_bos_token_id=e.model.convert_tokens_to_ids([e.lang_to_token(a)])[0],e._call(t,n)}class Ze extends be{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{3}_[A-Z][a-z]{3}$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))),this.lang_to_token=e=>e}_build_translation_inputs(e,t,n){return Je(this,e,t,n)}}class et extends be{constructor(e,t){super(e,t),this.languageRegex=/^__[a-z]{2,3}__$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))).map((e=>e.slice(2,-2))),this.lang_to_token=e=>`__${e}__`}_build_translation_inputs(e,t,n){return Je(this,e,t,n)}}class tt extends be{get timestamp_begin(){return this.model.convert_tokens_to_ids(["<|notimestamps|>"])[0]+1}_decode_asr(e,{return_timestamps:t=!1,return_language:n=!1,time_precision:r=null,force_full_sequences:i=!0}={}){if(null===r)throw Error("Must specify time_precision");let a=null;const o="word"===t;function l(){return{language:a,timestamp:[null,null],text:""}}const u=[];let c=l(),p=0;const h=this.timestamp_begin;let m=[],f=[],g=!1,_=null;const w=new Set(this.all_special_ids);for(const n of e){const e=n.tokens,i=o?n.token_timestamps:null;let y=null,v=h;if("stride"in n){const[t,i,a]=n.stride;if(p-=i,_=t-a,i&&(v=i/r+h),a)for(let t=e.length-1;t>=0;--t){const n=Number(e[t]);if(n>=h){if(null!==y&&(n-h)*r<_)break;y=n}}}let x=[],M=[];for(let n=0;n<e.length;++n){const _=Number(e[n]);if(w.has(_)){const e=this.decode([_]),n=d.WHISPER_LANGUAGE_MAPPING.get(e.slice(2,-2));if(void 0!==n){if(null!==a&&n!==a&&!t){m.push(x);const e=this.findLongestCommonSequence(m)[0],t=this.decode(e);c.text=t,u.push(c),m=[],x=[],c=l()}a=c.language=n}}else if(_>=h){const e=(_-h)*r+p,t=(0,s.round)(e,2);if(null!==y&&_>=y)g=!0;else if(g||m.length>0&&_<v)g=!1;else if(null===c.timestamp[0])c.timestamp[0]=t;else if(t===c.timestamp[0]);else{c.timestamp[1]=t,m.push(x),o&&f.push(M);const[e,n]=this.findLongestCommonSequence(m,f),r=this.decode(e);c.text=r,o&&(c.words=this.collateWordTimestamps(e,n,a)),u.push(c),m=[],x=[],f=[],M=[],c=l()}}else if(x.push(_),o){let e,t=(0,s.round)(i[n]+p,2);if(n+1<i.length){e=(0,s.round)(i[n+1]+p,2);const a=this.decode([_]);b.test(a)&&(e=(0,s.round)(Math.min(t+r,e),2))}else e=null;M.push([t,e])}}if("stride"in n){const[e,t,r]=n.stride;p+=e-r}x.length>0?(m.push(x),o&&f.push(M)):m.every((e=>0===e.length))&&(c=l(),m=[],x=[],f=[],M=[])}if(m.length>0){if(i&&t)throw new Error("Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.");const[e,n]=this.findLongestCommonSequence(m,f),r=this.decode(e);c.text=r,o&&(c.words=this.collateWordTimestamps(e,n,a)),u.push(c)}let y=Object.create(null);const v=u.map((e=>e.text)).join("");if(t||n){for(let e=0;e<u.length;++e){const r=u[e];t||delete r.timestamp,n||delete r.language}if(o){const e=[];for(const t of u)for(const n of t.words)e.push(n);y={chunks:e}}else y={chunks:u}}return[v,y]}findLongestCommonSequence(e,t=null){let n=e[0],r=n.length,i=[];const a=Array.isArray(t)&&t.length>0;let s=a?[]:null,o=a?t[0]:null;for(let l=1;l<e.length;++l){const u=e[l];let d=0,c=[r,r,0,0];const p=u.length;for(let e=1;e<r+p;++e){const i=Math.max(0,r-e),s=Math.min(r,r+p-e),h=n.slice(i,s),m=Math.max(0,e-r),f=Math.min(p,e),g=u.slice(m,f);if(h.length!==g.length)throw new Error("There is a bug within whisper `decode_asr` function, please report it. Dropping to prevent bad inference.");let _;_=a?h.filter(((e,n)=>e===g[n]&&o[i+n]<=t[l][m+n])).length:h.filter(((e,t)=>e===g[t])).length;const w=_/e+e/1e4;_>1&&w>d&&(d=w,c=[i,s,m,f])}const[h,m,f,g]=c,_=Math.floor((m+h)/2),w=Math.floor((g+f)/2);i.push(...n.slice(0,_)),n=u.slice(w),r=n.length,a&&(s.push(...o.slice(0,_)),o=t[l].slice(w))}return i.push(...n),a?(s.push(...o),[i,s]):[i,[]]}collateWordTimestamps(e,t,n){const[r,i,a]=this.combineTokensIntoWords(e,n),s=[];for(let e=0;e<r.length;++e){const n=a[e];s.push({text:r[e],timestamp:[t[n.at(0)][0],t[n.at(-1)][1]]})}return s}combineTokensIntoWords(e,t,n="\"'“¡¿([{-",r="\"'.。,,!!??::”)]}、"){let i,a,s;return["chinese","japanese","thai","lao","myanmar"].includes(t=t??"english")?[i,a,s]=this.splitTokensOnUnicode(e):[i,a,s]=this.splitTokensOnSpaces(e),this.mergePunctuations(i,a,s,n,r)}decode(e,t){let n;return t?.decode_with_timestamps?(e instanceof o.Tensor&&(e=f(e)),n=this.decodeWithTimestamps(e,t)):n=super.decode(e,t),n}decodeWithTimestamps(e,t){const n=t?.time_precision??.02,r=Array.from(this.all_special_ids).at(-1)+1;let i=[[]];for(let t of e)if(t=Number(t),t>=r){const e=((t-r)*n).toFixed(2);i.push(`<|${e}|>`),i.push([])}else i[i.length-1].push(t);return i=i.map((e=>"string"==typeof e?e:super.decode(e,t))),i.join("")}splitTokensOnUnicode(e){const t=this.decode(e,{decode_with_timestamps:!0}),n=[],r=[],i=[];let a=[],s=[],o=0;for(let l=0;l<e.length;++l){const u=e[l];a.push(u),s.push(l);const d=this.decode(a,{decode_with_timestamps:!0});d.includes("�")&&"�"!==t[o+d.indexOf("�")]||(n.push(d),r.push(a),i.push(s),a=[],s=[],o+=d.length)}return[n,r,i]}splitTokensOnSpaces(e){const[t,n,r]=this.splitTokensOnUnicode(e),i=[],a=[],s=[],o=new RegExp(`^[${y}]$`,"gu");for(let e=0;e<t.length;++e){const l=t[e],u=n[e],d=r[e],c=u[0]>=this.model.tokens_to_ids.get("<|endoftext|>"),p=l.startsWith(" "),h=l.trim(),m=o.test(h);if(c||p||m||0===i.length)i.push(l),a.push(u),s.push(d);else{const e=i.length-1;i[e]+=l,a[e].push(...u),s[e].push(...d)}}return[i,a,s]}mergePunctuations(e,t,n,r,a){const s=structuredClone(e),o=structuredClone(t),l=structuredClone(n);let u=s.length-2,d=s.length-1;for(;u>=0;)s[u].startsWith(" ")&&r.includes(s[u].trim())?(s[d]=s[u]+s[d],o[d]=(0,i.mergeArrays)(o[u],o[d]),l[d]=(0,i.mergeArrays)(l[u],l[d]),s[u]="",o[u]=[],l[u]=[]):d=u,--u;for(u=0,d=1;d<s.length;)!s[u].endsWith(" ")&&a.includes(s[d])?(s[u]+=s[d],o[u]=(0,i.mergeArrays)(o[u],o[d]),l[u]=(0,i.mergeArrays)(l[u],l[d]),s[d]="",o[d]=[],l[d]=[]):u=d,++d;return[s.filter((e=>e)),o.filter((e=>e.length>0)),l.filter((e=>e.length>0))]}get_decoder_prompt_ids({language:e=null,task:t=null,no_timestamps:n=!0}={}){const r=[];if(e){const t=(0,d.whisper_language_to_code)(e),n=this.model.tokens_to_ids.get(`<|${t}|>`);if(void 0===n)throw new Error(`Unable to find language "${t}" in model vocabulary. Please report this issue at ${c.GITHUB_ISSUE_URL}.`);r.push(n)}else r.push(null);if(t){if("transcribe"!==(t=t.toLowerCase())&&"translate"!==t)throw new Error(`Task "${t}" is not supported. Must be one of: ["transcribe", "translate"]`);const e=this.model.tokens_to_ids.get(`<|${t}|>`);if(void 0===e)throw new Error(`Unable to find task "${t}" in model vocabulary. Please report this issue at ${c.GITHUB_ISSUE_URL}.`);r.push(e)}else r.push(null);if(n){const e=this.model.tokens_to_ids.get("<|notimestamps|>");if(void 0===e)throw new Error(`Unable to find "<|notimestamps|>" in model vocabulary. Please report this issue at ${c.GITHUB_ISSUE_URL}.`);r.push(e)}return r.map(((e,t)=>[t+1,e])).filter((e=>null!==e[1]))}}class nt extends be{}class rt extends be{}class it extends be{}class at extends be{constructor(e,t){super(e,t),this.languageRegex=/^(>>\w+<<)\s*/g,this.supported_language_codes=this.model.vocab.filter((e=>this.languageRegex.test(e))),console.warn('WARNING: `MarianTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}_encode_text(e){if(null===e)return null;const[t,...n]=e.trim().split(this.languageRegex);if(0===n.length)return super._encode_text(t);if(2===n.length){const[e,t]=n;return this.supported_language_codes.includes(e)||console.warn(`Unsupported language code "${e}" detected, which may lead to unexpected behavior. Should be one of: ${JSON.stringify(this.supported_language_codes)}`),(0,i.mergeArrays)([e],super._encode_text(t))}}}class st extends be{}class ot extends be{}class lt extends be{}class ut extends be{}class dt extends be{}class ct extends be{constructor(e,t){super(e,t),this.decoder=new ue({})}}class pt extends be{}class ht{static TOKENIZER_CLASS_MAPPING={T5Tokenizer:ze,DistilBertTokenizer:Ee,CamembertTokenizer:Fe,DebertaTokenizer:ke,DebertaV2Tokenizer:$e,BertTokenizer:ve,HerbertTokenizer:Se,ConvBertTokenizer:Ce,RoFormerTokenizer:Pe,XLMTokenizer:Ae,ElectraTokenizer:Ie,MobileBertTokenizer:Me,SqueezeBertTokenizer:Te,AlbertTokenizer:xe,GPT2Tokenizer:Oe,BartTokenizer:Be,MBartTokenizer:Le,MBart50Tokenizer:De,RobertaTokenizer:Re,WhisperTokenizer:tt,CodeGenTokenizer:nt,CLIPTokenizer:rt,SiglipTokenizer:it,MarianTokenizer:at,BloomTokenizer:Ne,NllbTokenizer:Ze,M2M100Tokenizer:et,LlamaTokenizer:je,CodeLlamaTokenizer:Ge,XLMRobertaTokenizer:qe,MPNetTokenizer:Ue,FalconTokenizer:We,GPTNeoXTokenizer:He,EsmTokenizer:Xe,Wav2Vec2CTCTokenizer:st,BlenderbotTokenizer:ot,BlenderbotSmallTokenizer:lt,SpeechT5Tokenizer:ut,NougatTokenizer:dt,VitsTokenizer:ct,Qwen2Tokenizer:Ke,GemmaTokenizer:Qe,Grok1Tokenizer:Ye,CohereTokenizer:pt,PreTrainedTokenizer:be};static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:i=!1,revision:a="main",legacy:s=null}={}){const[o,l]=await p(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:i,revision:a,legacy:s}),u=l.tokenizer_class?.replace(/Fast$/,"")??"PreTrainedTokenizer";let d=this.TOKENIZER_CLASS_MAPPING[u];return d||(console.warn(`Unknown tokenizer class "${u}", attempting to construct from base class.`),d=be),new d(o,l)}}},"./src/utils/audio.js":
194
194
  /*!****************************!*\
195
195
  !*** ./src/utils/audio.js ***!
196
196
  \****************************/(e,t,n)=>{n.r(t),n.d(t,{hamming:()=>d,hanning:()=>u,mel_filter_bank:()=>f,read_audio:()=>o,spectrogram:()=>_,window_function:()=>w});var r=n(/*! ./hub.js */"./src/utils/hub.js"),i=n(/*! ./maths.js */"./src/utils/maths.js"),a=n(/*! ./core.js */"./src/utils/core.js"),s=n(/*! ./tensor.js */"./src/utils/tensor.js");async function o(e,t){if("undefined"==typeof AudioContext)throw Error("Unable to load audio from path/URL since `AudioContext` is not available in your environment. Instead, audio data should be passed directly to the pipeline/processor. For more information and some example code, see https://huggingface.co/docs/transformers.js/guides/node-audio-processing.");const n=await(await(0,r.getFile)(e)).arrayBuffer(),i=new AudioContext({sampleRate:t});void 0===t&&console.warn(`No sampling rate provided, using default of ${i.sampleRate}Hz.`);const a=await i.decodeAudioData(n);let s;if(2===a.numberOfChannels){const e=Math.sqrt(2),t=a.getChannelData(0),n=a.getChannelData(1);s=new Float32Array(t.length);for(let r=0;r<a.length;++r)s[r]=e*(t[r]+n[r])/2}else s=a.getChannelData(0);return s}function l(e,t){if(e<1)return new Float64Array;if(1===e)return new Float64Array([1]);const n=1-t,r=2*Math.PI/(e-1),i=new Float64Array(e);for(let a=0;a<e;++a)i[a]=t-n*Math.cos(a*r);return i}function u(e){return l(e,.5)}function d(e){return l(e,.54)}const c={htk:e=>2595*Math.log10(1+e/700),kaldi:e=>1127*Math.log(1+e/700),slaney:(e,t=1e3,n=15,r=27/Math.log(6.4))=>e>=t?n+Math.log(e/t)*r:3*e/200};function p(e,t="htk"){const n=c[t];if(!n)throw new Error('mel_scale should be one of "htk", "slaney" or "kaldi".');return"number"==typeof e?n(e):e.map((e=>n(e)))}const h={htk:e=>700*(10**(e/2595)-1),kaldi:e=>700*(Math.exp(e/1127)-1),slaney:(e,t=1e3,n=15,r=Math.log(6.4)/27)=>e>=n?t*Math.exp(r*(e-n)):200*e/3};function m(e,t,n){const r=(t-e)/(n-1);return Float64Array.from({length:n},((t,n)=>e+r*n))}function f(e,t,n,r,i,a=null,s="htk",o=!1){if(null!==a&&"slaney"!==a)throw new Error('norm must be one of null or "slaney"');const l=m(p(n,s),p(r,s),t+2);let u,d=function(e,t="htk"){const n=h[t];if(!n)throw new Error('mel_scale should be one of "htk", "slaney" or "kaldi".');return"number"==typeof e?n(e):e.map((e=>n(e)))}(l,s);if(o){const t=i/(2*e);u=p(Float64Array.from({length:e},((e,n)=>n*t)),s),d=l}else u=m(0,Math.floor(i/2),e);const c=function(e,t){const n=Float64Array.from({length:t.length-1},((e,n)=>t[n+1]-t[n])),r=Array.from({length:e.length},(()=>new Array(t.length)));for(let n=0;n<e.length;++n){const i=r[n];for(let r=0;r<t.length;++r)i[r]=t[r]-e[n]}const i=t.length-2,a=Array.from({length:i},(()=>new Array(e.length)));for(let t=0;t<e.length;++t){const e=r[t];for(let r=0;r<i;++r){const i=-e[r]/n[r],s=e[r+2]/n[r+1];a[r][t]=Math.max(0,Math.min(i,s))}}return a}(u,d);if(null!==a&&"slaney"===a)for(let n=0;n<t;++n){const t=c[n],r=2/(d[n+2]-d[n]);for(let n=0;n<e;++n)t[n]*=r}return c}function g(e,t,n,r,a){if(n<=0)throw new Error("reference must be greater than zero");if(r<=0)throw new Error("min_value must be greater than zero");n=Math.max(r,n);const s=Math.log10(n);for(let n=0;n<e.length;++n)e[n]=t*Math.log10(Math.max(r,e[n])-s);if(null!==a){if(a<=0)throw new Error("db_range must be greater than zero");const t=(0,i.max)(e)[0]-a;for(let n=0;n<e.length;++n)e[n]=Math.max(e[n],t)}return e}async function _(e,t,n,r,{fft_length:o=null,power:l=1,center:u=!0,pad_mode:d="reflect",onesided:c=!0,preemphasis:p=null,mel_filters:h=null,mel_floor:m=1e-10,log_mel:f=null,reference:_=1,min_value:w=1e-10,db_range:y=null,remove_dc_offset:b=null,min_num_frames:v=null,max_num_frames:x=null,do_pad:M=!0,transpose:T=!1}={}){const k=t.length;if(null===o&&(o=n),n>o)throw Error(`frame_length (${n}) may not be larger than fft_length (${o})`);if(k!==n)throw new Error(`Length of the window (${k}) must equal frame_length (${n})`);if(r<=0)throw new Error("hop_length must be greater than zero");if(null===l&&null!==h)throw new Error("You have provided `mel_filters` but `power` is `None`. Mel spectrogram computation is not yet supported for complex-valued spectrogram. Specify `power` to fix this issue.");if(u){if("reflect"!==d)throw new Error(`pad_mode="${d}" not implemented yet.`);const t=Math.floor((o-1)/2)+1;e=function(e,t,n){const r=new e.constructor(e.length+t+n),i=e.length-1;for(let n=0;n<e.length;++n)r[t+n]=e[n];for(let n=1;n<=t;++n)r[t-n]=e[(0,a.calculateReflectOffset)(n,i)];for(let s=1;s<=n;++s)r[i+t+s]=e[(0,a.calculateReflectOffset)(i-s,i)];return r}(e,t,t)}let $=Math.floor(1+Math.floor((e.length-n)/r));null!==v&&$<v&&($=v);const S=c?Math.floor(o/2)+1:o;let C=$,P=$;null!==x&&(x>$?M&&(P=x):P=C=x);const E=new i.FFT(o),F=new Float64Array(o),A=new Float64Array(E.outputBufferSize),I=new Float32Array(S*P);for(let i=0;i<C;++i){const a=i*r,s=Math.min(e.length-a,n);s!==n&&F.fill(0,0,n);for(let t=0;t<s;++t)F[t]=e[a+t];if(b){let e=0;for(let t=0;t<s;++t)e+=F[t];const t=e/s;for(let e=0;e<s;++e)F[e]-=t}if(null!==p){for(let e=s-1;e>=1;--e)F[e]-=p*F[e-1];F[0]*=1-p}for(let e=0;e<t.length;++e)F[e]*=t[e];E.realTransform(A,F);for(let e=0;e<S;++e){const t=e<<1;I[e*P+i]=A[t]**2+A[t+1]**2}}if(null!==l&&2!==l){const e=2/l;for(let t=0;t<I.length;++t)I[t]**=e}const z=h.length;let O=await(0,s.matmul)(new s.Tensor("float32",h.flat(),[z,S]),new s.Tensor("float32",I,[S,P]));T&&(O=O.transpose(1,0));const B=O.data;for(let e=0;e<B.length;++e)B[e]=Math.max(m,B[e]);if(null!==l&&null!==f){const e=Math.min(B.length,C*z);switch(f){case"log":for(let t=0;t<e;++t)B[t]=Math.log(B[t]);break;case"log10":for(let t=0;t<e;++t)B[t]=Math.log10(B[t]);break;case"dB":if(1===l)!function(e,t=1,n=1e-5,r=null){g(e,20,t,n,r)}(B,_,w,y);else{if(2!==l)throw new Error(`Cannot use log_mel option '${f}' with power ${l}`);!function(e,t=1,n=1e-10,r=null){g(e,10,t,n,r)}(B,_,w,y)}break;default:throw new Error(`log_mel must be one of null, 'log', 'log10' or 'dB'. Got '${f}'`)}}return O}function w(e,t,{periodic:n=!0,frame_length:r=null,center:i=!0}={}){const a=n?e+1:e;let s;switch(t){case"boxcar":s=new Float64Array(a).fill(1);break;case"hann":case"hann_window":s=u(a);break;case"hamming":s=d(a);break;case"povey":s=u(a).map((e=>Math.pow(e,.85)));break;default:throw new Error(`Unknown window type ${t}.`)}if(n&&(s=s.subarray(0,e)),null===r)return s;if(e>r)throw new Error(`Length of the window (${e}) may not be larger than frame_length (${r})`);return s}},"./src/utils/constants.js":
@@ -199,10 +199,10 @@ var r,i,a,s,o,l,u,d,c,p,h,m,f,g,_,w,y,b,v,x,M,T,k,$,S,C,P,E,F,A,I,z,O,B=Object.d
199
199
  \********************************/(e,t,n)=>{n.r(t),n.d(t,{GITHUB_ISSUE_URL:()=>r});const r="https://github.com/xenova/transformers.js/issues/new/choose"},"./src/utils/core.js":
200
200
  /*!***************************!*\
201
201
  !*** ./src/utils/core.js ***!
202
- \***************************/(e,t,n)=>{function r(e,t){e&&e(t)}function i(e){return Object.fromEntries(Object.entries(e).map((([e,t])=>[t,e])))}function a(e){return e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&")}function s(e){return"TypedArray"===e?.prototype?.__proto__?.constructor?.name}function o(e){return Number.isInteger(e)||"bigint"==typeof e}function l(e){const t=[];let n=e;for(;Array.isArray(n);)t.push(n.length),n=n[0];return t}function u(e,t,n=void 0){const r=e[t];if(void 0!==r)return delete e[t],r;if(void 0===n)throw Error(`Key ${t} does not exist in object.`);return n}function d(...e){return Array.prototype.concat.apply([],e)}function c(...e){return e.reduce(((e,t)=>e.flatMap((e=>t.map((t=>[e,t]))))))}function p(e,t){return Math.abs((e+t)%(2*t)-t)}function h(e,t){return Object.assign({},...t.map((t=>{if(void 0!==e[t])return{[t]:e[t]}})))}n.r(t),n.d(t,{calculateDimensions:()=>l,calculateReflectOffset:()=>p,dispatchCallback:()=>r,escapeRegExp:()=>a,isIntegralNumber:()=>o,isTypedArray:()=>s,mergeArrays:()=>d,pick:()=>h,pop:()=>u,product:()=>c,reverseDictionary:()=>i})},"./src/utils/data-structures.js":
202
+ \***************************/(e,t,n)=>{function r(e,t){e&&e(t)}function i(e){return Object.fromEntries(Object.entries(e).map((([e,t])=>[t,e])))}function a(e){return e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&")}function s(e){return"TypedArray"===e?.prototype?.__proto__?.constructor?.name}function o(e){return Number.isInteger(e)||"bigint"==typeof e}function l(e){const t=[];let n=e;for(;Array.isArray(n);)t.push(n.length),n=n[0];return t}function u(e,t,n=void 0){const r=e[t];if(void 0!==r)return delete e[t],r;if(void 0===n)throw Error(`Key ${t} does not exist in object.`);return n}function d(...e){return Array.prototype.concat.apply([],e)}function c(...e){return e.reduce(((e,t)=>e.flatMap((e=>t.map((t=>[e,t]))))))}function p(e,t){return Math.abs((e+t)%(2*t)-t)}function h(e,t){return Object.assign({},...t.map((t=>{if(void 0!==e[t])return{[t]:e[t]}})))}function m(e){let t=0;for(const n of e)++t;return t}n.r(t),n.d(t,{calculateDimensions:()=>l,calculateReflectOffset:()=>p,dispatchCallback:()=>r,escapeRegExp:()=>a,isIntegralNumber:()=>o,isTypedArray:()=>s,len:()=>m,mergeArrays:()=>d,pick:()=>h,pop:()=>u,product:()=>c,reverseDictionary:()=>i})},"./src/utils/data-structures.js":
203
203
  /*!**************************************!*\
204
204
  !*** ./src/utils/data-structures.js ***!
205
- \**************************************/(e,t,n)=>{n.r(t),n.d(t,{CharTrie:()=>i,PriorityQueue:()=>r,TokenLattice:()=>s});class r{constructor(e=(e,t)=>e>t,t=1/0){this._heap=[],this._comparator=e,this._maxSize=t}get size(){return this._heap.length}isEmpty(){return 0===this.size}peek(){return this._heap[0]}push(...e){return this.extend(e)}extend(e){for(const t of e)if(this.size<this._maxSize)this._heap.push(t),this._siftUp();else{const e=this._smallest();this._comparator(t,this._heap[e])&&(this._heap[e]=t,this._siftUpFrom(e))}return this.size}pop(){const e=this.peek(),t=this.size-1;return t>0&&this._swap(0,t),this._heap.pop(),this._siftDown(),e}replace(e){const t=this.peek();return this._heap[0]=e,this._siftDown(),t}_parent(e){return(e+1>>>1)-1}_left(e){return 1+(e<<1)}_right(e){return e+1<<1}_greater(e,t){return this._comparator(this._heap[e],this._heap[t])}_swap(e,t){const n=this._heap[e];this._heap[e]=this._heap[t],this._heap[t]=n}_siftUp(){this._siftUpFrom(this.size-1)}_siftUpFrom(e){for(;e>0&&this._greater(e,this._parent(e));)this._swap(e,this._parent(e)),e=this._parent(e)}_siftDown(){let e=0;for(;this._left(e)<this.size&&this._greater(this._left(e),e)||this._right(e)<this.size&&this._greater(this._right(e),e);){const t=this._right(e)<this.size&&this._greater(this._right(e),this._left(e))?this._right(e):this._left(e);this._swap(e,t),e=t}}_smallest(){return 2**Math.floor(Math.log2(this.size))-1}}class i{constructor(){this.root=a.default()}extend(e){for(let t of e)this.push(t)}push(e){let t=this.root;for(let n of e){let e=t.children.get(n);void 0===e&&(e=a.default(),t.children.set(n,e)),t=e}t.isLeaf=!0}*commonPrefixSearch(e){let t=this.root,n="";for(let r=0;r<e.length&&void 0!==t;++r){const i=e[r];n+=i,t=t.children.get(i),void 0!==t&&t.isLeaf&&(yield n)}}}class a{constructor(e,t){this.isLeaf=e,this.children=t}static default(){return new a(!1,new Map)}}class s{constructor(e,t,n){this.sentence=e,this.len=e.length,this.bosTokenId=t,this.eosTokenId=n,this.nodes=[],this.beginNodes=Array.from({length:this.len+1},(()=>[])),this.endNodes=Array.from({length:this.len+1},(()=>[]));const r=new o(this.bosTokenId,0,0,0,0),i=new o(this.eosTokenId,1,this.len,0,0);this.nodes.push(r.clone()),this.nodes.push(i.clone()),this.beginNodes[this.len].push(i),this.endNodes[0].push(r)}insert(e,t,n,r){const i=this.nodes.length,a=new o(r,i,e,t,n);this.beginNodes[e].push(a),this.endNodes[e+t].push(a),this.nodes.push(a)}viterbi(){const e=this.len;let t=0;for(;t<=e;){if(0==this.beginNodes[t].length)return[];for(let e of this.beginNodes[t]){e.prev=null;let n=0,r=null;for(let i of this.endNodes[t]){const t=i.backtraceScore+e.score;(null===r||t>n)&&(r=i.clone(),n=t)}if(null===r)return[];e.prev=r,e.backtraceScore=n}++t}const n=[],r=this.beginNodes[e][0].prev;if(null===r)return[];let i=r.clone();for(;null!==i.prev;){n.push(i.clone());const e=i.clone();i=e.prev.clone()}return n.reverse(),n}piece(e){return this.sentence.slice(e.pos,e.pos+e.length)}tokens(){return this.viterbi().map((e=>this.piece(e)))}tokenIds(){return this.viterbi().map((e=>e.tokenId))}}class o{constructor(e,t,n,r,i){this.tokenId=e,this.nodeId=t,this.pos=n,this.length=r,this.score=i,this.prev=null,this.backtraceScore=0}clone(){const e=new o(this.tokenId,this.nodeId,this.pos,this.length,this.score);return e.prev=this.prev,e.backtraceScore=this.backtraceScore,e}}},"./src/utils/devices.js":
205
+ \**************************************/(e,t,n)=>{n.r(t),n.d(t,{CharTrie:()=>i,PriorityQueue:()=>r,TokenLattice:()=>s});class r{constructor(e=(e,t)=>e>t,t=1/0){this._heap=[],this._comparator=e,this._maxSize=t}get size(){return this._heap.length}isEmpty(){return 0===this.size}peek(){return this._heap[0]}push(...e){return this.extend(e)}extend(e){for(const t of e)if(this.size<this._maxSize)this._heap.push(t),this._siftUp();else{const e=this._smallest();this._comparator(t,this._heap[e])&&(this._heap[e]=t,this._siftUpFrom(e))}return this.size}pop(){const e=this.peek(),t=this.size-1;return t>0&&this._swap(0,t),this._heap.pop(),this._siftDown(),e}replace(e){const t=this.peek();return this._heap[0]=e,this._siftDown(),t}_parent(e){return(e+1>>>1)-1}_left(e){return 1+(e<<1)}_right(e){return e+1<<1}_greater(e,t){return this._comparator(this._heap[e],this._heap[t])}_swap(e,t){const n=this._heap[e];this._heap[e]=this._heap[t],this._heap[t]=n}_siftUp(){this._siftUpFrom(this.size-1)}_siftUpFrom(e){for(;e>0&&this._greater(e,this._parent(e));)this._swap(e,this._parent(e)),e=this._parent(e)}_siftDown(){let e=0;for(;this._left(e)<this.size&&this._greater(this._left(e),e)||this._right(e)<this.size&&this._greater(this._right(e),e);){const t=this._right(e)<this.size&&this._greater(this._right(e),this._left(e))?this._right(e):this._left(e);this._swap(e,t),e=t}}_smallest(){return 2**Math.floor(Math.log2(this.size))-1}}class i{constructor(){this.root=a.default()}extend(e){for(const t of e)this.push(t)}push(e){let t=this.root;for(const n of e){let e=t.children.get(n);void 0===e&&(e=a.default(),t.children.set(n,e)),t=e}t.isLeaf=!0}*commonPrefixSearch(e){let t=this.root;if(void 0===t)return;let n="";for(const r of e){if(n+=r,t=t.children.get(r),void 0===t)return;t.isLeaf&&(yield n)}}}class a{constructor(e,t){this.isLeaf=e,this.children=t}static default(){return new a(!1,new Map)}}class s{constructor(e,t,n){this.chars=Array.from(e),this.len=this.chars.length,this.bosTokenId=t,this.eosTokenId=n,this.nodes=[],this.beginNodes=Array.from({length:this.len+1},(()=>[])),this.endNodes=Array.from({length:this.len+1},(()=>[]));const r=new o(this.bosTokenId,0,0,0,0),i=new o(this.eosTokenId,1,this.len,0,0);this.nodes.push(r.clone()),this.nodes.push(i.clone()),this.beginNodes[this.len].push(i),this.endNodes[0].push(r)}insert(e,t,n,r){const i=this.nodes.length,a=new o(r,i,e,t,n);this.beginNodes[e].push(a),this.endNodes[e+t].push(a),this.nodes.push(a)}viterbi(){const e=this.len;let t=0;for(;t<=e;){if(0==this.beginNodes[t].length)return[];for(let e of this.beginNodes[t]){e.prev=null;let n=0,r=null;for(let i of this.endNodes[t]){const t=i.backtraceScore+e.score;(null===r||t>n)&&(r=i.clone(),n=t)}if(null===r)return[];e.prev=r,e.backtraceScore=n}++t}const n=[],r=this.beginNodes[e][0].prev;if(null===r)return[];let i=r.clone();for(;null!==i.prev;){n.push(i.clone());const e=i.clone();i=e.prev.clone()}return n.reverse(),n}piece(e){return this.chars.slice(e.pos,e.pos+e.length).join("")}tokens(){return this.viterbi().map((e=>this.piece(e)))}tokenIds(){return this.viterbi().map((e=>e.tokenId))}}class o{constructor(e,t,n,r,i){this.tokenId=e,this.nodeId=t,this.pos=n,this.length=r,this.score=i,this.prev=null,this.backtraceScore=0}clone(){const e=new o(this.tokenId,this.nodeId,this.pos,this.length,this.score);return e.prev=this.prev,e.backtraceScore=this.backtraceScore,e}}},"./src/utils/devices.js":
206
206
  /*!******************************!*\
207
207
  !*** ./src/utils/devices.js ***!
208
208
  \******************************/(e,t,n)=>{n.r(t),n.d(t,{DEVICE_TYPES:()=>r});const r=Object.freeze({auto:"auto",gpu:"gpu",cpu:"cpu",wasm:"wasm",webgpu:"webgpu",cuda:"cuda",dml:"dml",webnn:"webnn","webnn-npu":"webnn-npu","webnn-gpu":"webnn-gpu","webnn-cpu":"webnn-cpu"})},"./src/utils/dtypes.js":