@huggingface/transformers 3.0.0-alpha.15 → 3.0.0-alpha.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/transformers.cjs +108 -91
- package/dist/transformers.cjs.map +1 -1
- package/dist/transformers.js +108 -91
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.cjs +6 -6
- package/dist/transformers.min.cjs.map +1 -1
- package/dist/transformers.min.js +6 -6
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.min.mjs +6 -6
- package/dist/transformers.min.mjs.map +1 -1
- package/dist/transformers.mjs +108 -91
- package/dist/transformers.mjs.map +1 -1
- package/package.json +1 -1
- package/src/configs.js +16 -4
- package/src/env.js +1 -1
- package/src/models.js +43 -55
- package/src/tokenizers.js +22 -19
- package/src/utils/core.js +12 -0
- package/src/utils/data-structures.js +13 -11
- package/src/utils/hub.js +1 -1
- package/types/configs.d.ts +25 -3
- package/types/configs.d.ts.map +1 -1
- package/types/models.d.ts +1 -2
- package/types/models.d.ts.map +1 -1
- package/types/tokenizers.d.ts.map +1 -1
- package/types/utils/core.d.ts +7 -0
- package/types/utils/core.d.ts.map +1 -1
- package/types/utils/data-structures.d.ts +6 -6
- package/types/utils/data-structures.d.ts.map +1 -1
- package/types/utils/hub.d.ts +1 -1
- package/types/utils/hub.d.ts.map +1 -1
|
@@ -88,10 +88,10 @@ import*as e from"fs";import*as t from"onnxruntime-node";import*as r from"path";i
|
|
|
88
88
|
\******************************/(e,t,r)=>{var n;r.r(t),r.d(t,{Tensor:()=>i.Tensor,createInferenceSession:()=>_,deviceToExecutionProviders:()=>h,isONNXProxy:()=>M,isONNXTensor:()=>f});var o=r(/*! ../env.js */"./src/env.js"),s=r(/*! onnxruntime-node */"onnxruntime-node"),a=r(/*! #onnxruntime-webgpu */"?cb4d"),i=r(/*! onnxruntime-common */"./node_modules/onnxruntime-common/dist/esm/index.js");const l=Object.freeze({auto:null,gpu:null,cpu:"cpu",wasm:"wasm",webgpu:"webgpu",cuda:"cuda",dml:"dml",webnn:{name:"webnn",deviceType:"cpu"},"webnn-npu":{name:"webnn",deviceType:"npu"},"webnn-gpu":{name:"webnn",deviceType:"gpu"},"webnn-cpu":{name:"webnn",deviceType:"cpu"}}),c=[];let d,u;if(o.apis.IS_NODE_ENV){switch(u=s.default??s,process.platform){case"win32":c.push("dml");break;case"linux":"x64"===process.arch&&c.push("cuda")}c.push("cpu"),d=["cpu"]}else u=n||(n=r.t(a,2)),o.apis.IS_WEBNN_AVAILABLE&&c.push("webnn-npu","webnn-gpu","webnn-cpu","webnn"),o.apis.IS_WEBGPU_AVAILABLE&&c.push("webgpu"),c.push("wasm"),d=["wasm"];const p=u.InferenceSession;function h(e=null){if(!e)return d;switch(e){case"auto":return c;case"gpu":return c.filter((e=>["webgpu","cuda","dml","webnn-gpu"].includes(e)))}if(c.includes(e))return[l[e]??e];throw new Error(`Unsupported device: "${e}". Should be one of: ${c.join(", ")}.`)}let m=null;async function _(e,t){m&&await m;const r=p.create(e,t);return m??=r,await r}function f(e){return e instanceof u.Tensor}const g=u?.env;function M(){return g?.wasm?.proxy}g?.wasm&&(g.wasm.wasmPaths=`https://cdn.jsdelivr.net/npm/@huggingface/transformers@${o.env.version}/dist/`,g.wasm.proxy=!1,"undefined"!=typeof crossOriginIsolated&&crossOriginIsolated||(g.wasm.numThreads=1)),g?.webgpu&&(g.webgpu.powerPreference="high-performance"),o.env.backends.onnx=g},"./src/configs.js":
|
|
89
89
|
/*!************************!*\
|
|
90
90
|
!*** ./src/configs.js ***!
|
|
91
|
-
\************************/(e,t,r)=>{r.r(t),r.d(t,{AutoConfig:()=>l,PretrainedConfig:()=>i,getKeyValueShapes:()=>a});var n=r(/*! ./utils/core.js */"./src/utils/core.js"),o=r(/*! ./utils/hub.js */"./src/utils/hub.js");function s(e){const t={};let r={};switch(e.model_type){case"llava":case"paligemma":case"florence2":r=s(e.text_config);break;case"moondream1":r=s(e.phi_config);break;case"musicgen":r=s(e.decoder);break;case"gpt2":case"gptj":case"jais":case"codegen":case"gpt_bigcode":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="n_embd";break;case"gpt_neox":case"stablelm":case"opt":case"phi":case"phi3":case"falcon":t.num_heads="num_attention_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size";break;case"llama":case"cohere":case"mistral":case"starcoder2":case"qwen2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size",t.num_attention_heads="num_attention_heads";break;case"gemma":case"gemma2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.dim_kv="head_dim";break;case"openelm":t.num_heads="num_kv_heads",t.num_layers="num_transformer_layers",t.dim_kv="head_dim";break;case"gpt_neo":case"donut-swin":t.num_heads="num_heads",t.num_layers="num_layers",t.hidden_size="hidden_size";break;case"bloom":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="hidden_size";break;case"mpt":t.num_heads="n_heads",t.num_layers="n_layers",t.hidden_size="d_model";break;case"t5":case"mt5":case"longt5":t.num_decoder_layers="num_decoder_layers",t.num_decoder_heads="num_heads",t.decoder_dim_kv="d_kv",t.num_encoder_layers="num_layers",t.num_encoder_heads="num_heads",t.encoder_dim_kv="d_kv";break;case"bart":case"mbart":case"marian":case"whisper":case"m2m_100":case"blenderbot":case"blenderbot-small":case"florence2_language":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="d_model",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="d_model";break;case"speecht5":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="hidden_size",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="hidden_size";break;case"trocr":t.num_encoder_layers=t.num_decoder_layers="decoder_layers",t.num_encoder_heads=t.num_decoder_heads="decoder_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="d_model";break;case"musicgen_decoder":t.num_encoder_layers=t.num_decoder_layers="num_hidden_layers",t.num_encoder_heads=t.num_decoder_heads="num_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="hidden_size";break;case"vision-encoder-decoder":const o=s(e.decoder),a="num_decoder_layers"in o,i=(0,n.pick)(e,["model_type","is_encoder_decoder"]);return a?(i.num_decoder_layers=o.num_decoder_layers,i.num_decoder_heads=o.num_decoder_heads,i.decoder_hidden_size=o.decoder_hidden_size,i.num_encoder_layers=o.num_encoder_layers,i.num_encoder_heads=o.num_encoder_heads,i.encoder_hidden_size=o.encoder_hidden_size):(i.num_layers=o.num_layers,i.num_heads=o.num_heads,i.hidden_size=o.hidden_size),i}const o={...r,...(0,n.pick)(e,["model_type","multi_query","is_encoder_decoder"])};for(const r in t)o[r]=e[t[r]];return o}function a(e,{prefix:t="past_key_values"}={}){const r={},n=e.normalized_config;if(n.is_encoder_decoder&&"num_encoder_heads"in n&&"num_decoder_heads"in n){const e=n.encoder_dim_kv??n.encoder_hidden_size/n.num_encoder_heads,o=n.decoder_dim_kv??n.decoder_hidden_size/n.num_decoder_heads,s=[1,n.num_encoder_heads,0,e],a=[1,n.num_decoder_heads,0,o];for(let e=0;e<n.num_decoder_layers;++e)r[`${t}.${e}.encoder.key`]=s,r[`${t}.${e}.encoder.value`]=s,r[`${t}.${e}.decoder.key`]=a,r[`${t}.${e}.decoder.value`]=a}else{const e=n.num_heads,o=n.num_layers,s=n.dim_kv??n.hidden_size/(n.num_attention_heads??e);if("falcon"===n.model_type){const n=[1*e,0,s];for(let e=0;e<o;++e)r[`${t}.${e}.key`]=n,r[`${t}.${e}.value`]=n}else if(n.multi_query){const n=[1*e,0,2*s];for(let e=0;e<o;++e)r[`${t}.${e}.key_value`]=n}else if("bloom"===n.model_type){const n=[1*e,s,0],a=[1*e,0,s];for(let e=0;e<o;++e)r[`${t}.${e}.key`]=n,r[`${t}.${e}.value`]=a}else if("openelm"===n.model_type)for(let n=0;n<o;++n){const o=[1,e[n],0,s];r[`${t}.${n}.key`]=o,r[`${t}.${n}.value`]=o}else{const n=[1,e,0,s];for(let e=0;e<o;++e)r[`${t}.${e}.key`]=n,r[`${t}.${e}.value`]=n}}return r}class i{max_position_embeddings;constructor(e){
|
|
91
|
+
\************************/(e,t,r)=>{r.r(t),r.d(t,{AutoConfig:()=>l,PretrainedConfig:()=>i,getKeyValueShapes:()=>a});var n=r(/*! ./utils/core.js */"./src/utils/core.js"),o=r(/*! ./utils/hub.js */"./src/utils/hub.js");function s(e){const t={};let r={};switch(e.model_type){case"llava":case"paligemma":case"florence2":r=s(e.text_config);break;case"moondream1":r=s(e.phi_config);break;case"musicgen":r=s(e.decoder);break;case"gpt2":case"gptj":case"jais":case"codegen":case"gpt_bigcode":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="n_embd";break;case"gpt_neox":case"stablelm":case"opt":case"phi":case"phi3":case"falcon":t.num_heads="num_attention_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size";break;case"llama":case"cohere":case"mistral":case"starcoder2":case"qwen2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size",t.num_attention_heads="num_attention_heads";break;case"gemma":case"gemma2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.dim_kv="head_dim";break;case"openelm":t.num_heads="num_kv_heads",t.num_layers="num_transformer_layers",t.dim_kv="head_dim";break;case"gpt_neo":case"donut-swin":t.num_heads="num_heads",t.num_layers="num_layers",t.hidden_size="hidden_size";break;case"bloom":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="hidden_size";break;case"mpt":t.num_heads="n_heads",t.num_layers="n_layers",t.hidden_size="d_model";break;case"t5":case"mt5":case"longt5":t.num_decoder_layers="num_decoder_layers",t.num_decoder_heads="num_heads",t.decoder_dim_kv="d_kv",t.num_encoder_layers="num_layers",t.num_encoder_heads="num_heads",t.encoder_dim_kv="d_kv";break;case"bart":case"mbart":case"marian":case"whisper":case"m2m_100":case"blenderbot":case"blenderbot-small":case"florence2_language":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="d_model",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="d_model";break;case"speecht5":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="hidden_size",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="hidden_size";break;case"trocr":t.num_encoder_layers=t.num_decoder_layers="decoder_layers",t.num_encoder_heads=t.num_decoder_heads="decoder_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="d_model";break;case"musicgen_decoder":t.num_encoder_layers=t.num_decoder_layers="num_hidden_layers",t.num_encoder_heads=t.num_decoder_heads="num_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="hidden_size";break;case"vision-encoder-decoder":const o=s(e.decoder),a="num_decoder_layers"in o,i=(0,n.pick)(e,["model_type","is_encoder_decoder"]);return a?(i.num_decoder_layers=o.num_decoder_layers,i.num_decoder_heads=o.num_decoder_heads,i.decoder_hidden_size=o.decoder_hidden_size,i.num_encoder_layers=o.num_encoder_layers,i.num_encoder_heads=o.num_encoder_heads,i.encoder_hidden_size=o.encoder_hidden_size):(i.num_layers=o.num_layers,i.num_heads=o.num_heads,i.hidden_size=o.hidden_size),i}const o={...r,...(0,n.pick)(e,["model_type","multi_query","is_encoder_decoder"])};for(const r in t)o[r]=e[t[r]];return o}function a(e,{prefix:t="past_key_values"}={}){const r={},n=e.normalized_config;if(n.is_encoder_decoder&&"num_encoder_heads"in n&&"num_decoder_heads"in n){const e=n.encoder_dim_kv??n.encoder_hidden_size/n.num_encoder_heads,o=n.decoder_dim_kv??n.decoder_hidden_size/n.num_decoder_heads,s=[1,n.num_encoder_heads,0,e],a=[1,n.num_decoder_heads,0,o];for(let e=0;e<n.num_decoder_layers;++e)r[`${t}.${e}.encoder.key`]=s,r[`${t}.${e}.encoder.value`]=s,r[`${t}.${e}.decoder.key`]=a,r[`${t}.${e}.decoder.value`]=a}else{const e=n.num_heads,o=n.num_layers,s=n.dim_kv??n.hidden_size/(n.num_attention_heads??e);if("falcon"===n.model_type){const n=[1*e,0,s];for(let e=0;e<o;++e)r[`${t}.${e}.key`]=n,r[`${t}.${e}.value`]=n}else if(n.multi_query){const n=[1*e,0,2*s];for(let e=0;e<o;++e)r[`${t}.${e}.key_value`]=n}else if("bloom"===n.model_type){const n=[1*e,s,0],a=[1*e,0,s];for(let e=0;e<o;++e)r[`${t}.${e}.key`]=n,r[`${t}.${e}.value`]=a}else if("openelm"===n.model_type)for(let n=0;n<o;++n){const o=[1,e[n],0,s];r[`${t}.${n}.key`]=o,r[`${t}.${n}.value`]=o}else{const n=[1,e,0,s];for(let e=0;e<o;++e)r[`${t}.${e}.key`]=n,r[`${t}.${e}.value`]=n}}return r}class i{model_type=null;is_encoder_decoder=!1;max_position_embeddings;"transformers.js_config";constructor(e){Object.assign(this,e),this.normalized_config=s(this)}static async from_pretrained(e,{progress_callback:t=null,config:r=null,cache_dir:n=null,local_files_only:s=!1,revision:a="main"}={}){!r||r instanceof i||(r=new i(r));const l=r??await async function(e,t){return await(0,o.getModelJSON)(e,"config.json",!0,t)}(e,{progress_callback:t,config:r,cache_dir:n,local_files_only:s,revision:a});return new this(l)}}class l{static async from_pretrained(...e){return i.from_pretrained(...e)}}},"./src/env.js":
|
|
92
92
|
/*!********************!*\
|
|
93
93
|
!*** ./src/env.js ***!
|
|
94
|
-
\********************/(e,t,r)=>{r.r(t),r.d(t,{apis:()=>_,env:()=>T});var n=r(/*! fs */"fs"),o=r(/*! path */"path"),s=r(/*! url */"url");const a="undefined"!=typeof self,i=a&&"DedicatedWorkerGlobalScope"===self.constructor.name,l=a&&"caches"in self,c="undefined"!=typeof navigator&&"gpu"in navigator,d="undefined"!=typeof navigator&&"ml"in navigator,u="undefined"!=typeof process,p=u&&"node"===process?.release?.name,h=!b(n.default),m=!b(o.default),_=Object.freeze({IS_BROWSER_ENV:a,IS_WEBWORKER_ENV:i,IS_WEB_CACHE_AVAILABLE:l,IS_WEBGPU_AVAILABLE:c,IS_WEBNN_AVAILABLE:d,IS_PROCESS_AVAILABLE:u,IS_NODE_ENV:p,IS_FS_AVAILABLE:h,IS_PATH_AVAILABLE:m}),f=h&&m,g=f?o.default.dirname(o.default.dirname(s.default.fileURLToPath(import.meta.url))):"./",M=f?o.default.join(g,"/.cache/"):null,w="/models/",T={version:"3.0.0-alpha.
|
|
94
|
+
\********************/(e,t,r)=>{r.r(t),r.d(t,{apis:()=>_,env:()=>T});var n=r(/*! fs */"fs"),o=r(/*! path */"path"),s=r(/*! url */"url");const a="undefined"!=typeof self,i=a&&"DedicatedWorkerGlobalScope"===self.constructor.name,l=a&&"caches"in self,c="undefined"!=typeof navigator&&"gpu"in navigator,d="undefined"!=typeof navigator&&"ml"in navigator,u="undefined"!=typeof process,p=u&&"node"===process?.release?.name,h=!b(n.default),m=!b(o.default),_=Object.freeze({IS_BROWSER_ENV:a,IS_WEBWORKER_ENV:i,IS_WEB_CACHE_AVAILABLE:l,IS_WEBGPU_AVAILABLE:c,IS_WEBNN_AVAILABLE:d,IS_PROCESS_AVAILABLE:u,IS_NODE_ENV:p,IS_FS_AVAILABLE:h,IS_PATH_AVAILABLE:m}),f=h&&m,g=f?o.default.dirname(o.default.dirname(s.default.fileURLToPath(import.meta.url))):"./",M=f?o.default.join(g,"/.cache/"):null,w="/models/",T={version:"3.0.0-alpha.16",backends:{onnx:{}},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!a,localModelPath:f?o.default.join(g,w):w,useFS:h,useBrowserCache:l,useFSCache:h,cacheDir:M,useCustomCache:!1,customCache:null};function b(e){return 0===Object.keys(e).length}},"./src/generation/configuration_utils.js":
|
|
95
95
|
/*!***********************************************!*\
|
|
96
96
|
!*** ./src/generation/configuration_utils.js ***!
|
|
97
97
|
\***********************************************/(e,t,r)=>{r.r(t),r.d(t,{GenerationConfig:()=>o});var n=r(/*! ../utils/core.js */"./src/utils/core.js");class o{max_length=20;max_new_tokens=null;min_length=0;min_new_tokens=null;early_stopping=!1;max_time=null;do_sample=!1;num_beams=1;num_beam_groups=1;penalty_alpha=null;use_cache=!0;temperature=1;top_k=50;top_p=1;typical_p=1;epsilon_cutoff=0;eta_cutoff=0;diversity_penalty=0;repetition_penalty=1;encoder_repetition_penalty=1;length_penalty=1;no_repeat_ngram_size=0;bad_words_ids=null;force_words_ids=null;renormalize_logits=!1;constraints=null;forced_bos_token_id=null;forced_eos_token_id=null;remove_invalid_values=!1;exponential_decay_length_penalty=null;suppress_tokens=null;begin_suppress_tokens=null;forced_decoder_ids=null;guidance_scale=null;num_return_sequences=1;output_attentions=!1;output_hidden_states=!1;output_scores=!1;return_dict_in_generate=!1;pad_token_id=null;bos_token_id=null;eos_token_id=null;encoder_no_repeat_ngram_size=0;decoder_start_token_id=null;generation_kwargs={};constructor(e){Object.assign(this,(0,n.pick)(e,Object.getOwnPropertyNames(this)))}}},"./src/generation/logits_process.js":
|
|
@@ -109,7 +109,7 @@ import*as e from"fs";import*as t from"onnxruntime-node";import*as r from"path";i
|
|
|
109
109
|
\*************************************/(e,t,r)=>{r.r(t),r.d(t,{BaseStreamer:()=>a,TextStreamer:()=>l,WhisperTextStreamer:()=>c});var n=r(/*! ../utils/core.js */"./src/utils/core.js"),o=r(/*! ../tokenizers.js */"./src/tokenizers.js"),s=r(/*! ../env.js */"./src/env.js");class a{put(e){throw Error("Not implemented")}end(){throw Error("Not implemented")}}const i=s.apis.IS_PROCESS_AVAILABLE?e=>process.stdout.write(e):e=>console.log(e);class l extends a{constructor(e,{skip_prompt:t=!1,callback_function:r=null,token_callback_function:n=null,decode_kwargs:o={},...s}={}){super(),this.tokenizer=e,this.skip_prompt=t,this.callback_function=r??i,this.token_callback_function=n,this.decode_kwargs={...o,...s},this.token_cache=[],this.print_len=0,this.next_tokens_are_prompt=!0}put(e){if(e.length>1)throw Error("TextStreamer only supports batch size of 1");if(this.skip_prompt&&this.next_tokens_are_prompt)return void(this.next_tokens_are_prompt=!1);const t=e[0];this.token_callback_function?.(t),this.token_cache=(0,n.mergeArrays)(this.token_cache,t);const r=this.tokenizer.decode(this.token_cache,this.decode_kwargs);let s;r.endsWith("\n")?(s=r.slice(this.print_len),this.token_cache=[],this.print_len=0):r.length>0&&(0,o.is_chinese_char)(r.charCodeAt(r.length-1))?(s=r.slice(this.print_len),this.print_len+=s.length):(s=r.slice(this.print_len,r.lastIndexOf(" ")+1),this.print_len+=s.length),this.on_finalized_text(s,!1)}end(){let e;if(this.token_cache.length>0){e=this.tokenizer.decode(this.token_cache,this.decode_kwargs).slice(this.print_len),this.token_cache=[],this.print_len=0}else e="";this.next_tokens_are_prompt=!0,this.on_finalized_text(e,!0)}on_finalized_text(e,t){e.length>0&&this.callback_function?.(e),t&&this.callback_function===i&&s.apis.IS_PROCESS_AVAILABLE&&this.callback_function?.("\n")}}class c extends l{constructor(e,{skip_prompt:t=!1,callback_function:r=null,token_callback_function:n=null,on_chunk_start:o=null,on_chunk_end:s=null,on_finalize:a=null,time_precision:i=.02,skip_special_tokens:l=!0,decode_kwargs:c={}}={}){super(e,{skip_prompt:t,callback_function:r,token_callback_function:n,decode_kwargs:{skip_special_tokens:l,...c}}),this.timestamp_begin=e.timestamp_begin,this.on_chunk_start=o,this.on_chunk_end=s,this.on_finalize=a,this.time_precision=i,this.waiting_for_timestamp=!1}put(e){if(e.length>1)throw Error("WhisperTextStreamer only supports batch size of 1");const t=e[0];if(1===t.length){const r=Number(t[0])-this.timestamp_begin;if(r>=0){const t=r*this.time_precision;this.waiting_for_timestamp?this.on_chunk_end?.(t):this.on_chunk_start?.(t),this.waiting_for_timestamp=!this.waiting_for_timestamp,e=[[]]}}return super.put(e)}end(){super.end(),this.on_finalize?.()}}},"./src/models.js":
|
|
110
110
|
/*!***********************!*\
|
|
111
111
|
!*** ./src/models.js ***!
|
|
112
|
-
\***********************/(e,t,r)=>{r.r(t),r.d(t,{ASTForAudioClassification:()=>rr,ASTModel:()=>tr,ASTPreTrainedModel:()=>er,AlbertForMaskedLM:()=>ut,AlbertForQuestionAnswering:()=>dt,AlbertForSequenceClassification:()=>ct,AlbertModel:()=>lt,AlbertPreTrainedModel:()=>it,AutoModel:()=>Ei,AutoModelForAudioClassification:()=>Yi,AutoModelForAudioFrameClassification:()=>Ki,AutoModelForCTC:()=>Hi,AutoModelForCausalLM:()=>Di,AutoModelForDepthEstimation:()=>rl,AutoModelForDocumentQuestionAnswering:()=>Zi,AutoModelForImageClassification:()=>Gi,AutoModelForImageFeatureExtraction:()=>ol,AutoModelForImageMatting:()=>el,AutoModelForImageSegmentation:()=>qi,AutoModelForImageToImage:()=>tl,AutoModelForMaskGeneration:()=>Qi,AutoModelForMaskedLM:()=>Vi,AutoModelForNormalEstimation:()=>nl,AutoModelForObjectDetection:()=>Ui,AutoModelForQuestionAnswering:()=>ji,AutoModelForSemanticSegmentation:()=>$i,AutoModelForSeq2SeqLM:()=>Ii,AutoModelForSequenceClassification:()=>Li,AutoModelForSpeechSeq2Seq:()=>Bi,AutoModelForTextToSpectrogram:()=>Ni,AutoModelForTextToWaveform:()=>Oi,AutoModelForTokenClassification:()=>zi,AutoModelForUniversalSegmentation:()=>Wi,AutoModelForVision2Seq:()=>Ri,AutoModelForXVector:()=>Ji,AutoModelForZeroShotObjectDetection:()=>Xi,BartForConditionalGeneration:()=>kt,BartForSequenceClassification:()=>yt,BartModel:()=>xt,BartPretrainedModel:()=>bt,BaseModelOutput:()=>q,BeitForImageClassification:()=>ao,BeitModel:()=>so,BeitPreTrainedModel:()=>oo,BertForMaskedLM:()=>U,BertForQuestionAnswering:()=>H,BertForSequenceClassification:()=>X,BertForTokenClassification:()=>Q,BertModel:()=>W,BertPreTrainedModel:()=>$,BlenderbotForConditionalGeneration:()=>Lt,BlenderbotModel:()=>Et,BlenderbotPreTrainedModel:()=>At,BlenderbotSmallForConditionalGeneration:()=>Bt,BlenderbotSmallModel:()=>It,BlenderbotSmallPreTrainedModel:()=>zt,BloomForCausalLM:()=>bn,BloomModel:()=>Tn,BloomPreTrainedModel:()=>wn,CLIPModel:()=>hr,CLIPPreTrainedModel:()=>pr,CLIPSegForImageSegmentation:()=>Pr,CLIPSegModel:()=>Fr,CLIPSegPreTrainedModel:()=>yr,CLIPTextModel:()=>mr,CLIPTextModelWithProjection:()=>_r,CLIPVisionModel:()=>fr,CLIPVisionModelWithProjection:()=>gr,CamembertForMaskedLM:()=>Me,CamembertForQuestionAnswering:()=>be,CamembertForSequenceClassification:()=>we,CamembertForTokenClassification:()=>Te,CamembertModel:()=>ge,CamembertPreTrainedModel:()=>fe,CausalLMOutput:()=>ul,CausalLMOutputWithPast:()=>pl,ChineseCLIPModel:()=>kr,ChineseCLIPPreTrainedModel:()=>xr,ClapAudioModelWithProjection:()=>Ta,ClapModel:()=>Ma,ClapPreTrainedModel:()=>ga,ClapTextModelWithProjection:()=>wa,CodeGenForCausalLM:()=>Xr,CodeGenModel:()=>Ur,CodeGenPreTrainedModel:()=>Wr,CohereForCausalLM:()=>Zr,CohereModel:()=>Kr,CoherePreTrainedModel:()=>Jr,ConvBertForMaskedLM:()=>ae,ConvBertForQuestionAnswering:()=>ce,ConvBertForSequenceClassification:()=>ie,ConvBertForTokenClassification:()=>le,ConvBertModel:()=>se,ConvBertPreTrainedModel:()=>oe,ConvNextForImageClassification:()=>rs,ConvNextModel:()=>ts,ConvNextPreTrainedModel:()=>es,ConvNextV2ForImageClassification:()=>ss,ConvNextV2Model:()=>os,ConvNextV2PreTrainedModel:()=>ns,DPTForDepthEstimation:()=>Vo,DPTModel:()=>Do,DPTPreTrainedModel:()=>Oo,DebertaForMaskedLM:()=>ye,DebertaForQuestionAnswering:()=>Ce,DebertaForSequenceClassification:()=>Fe,DebertaForTokenClassification:()=>Pe,DebertaModel:()=>ke,DebertaPreTrainedModel:()=>xe,DebertaV2ForMaskedLM:()=>Ae,DebertaV2ForQuestionAnswering:()=>ze,DebertaV2ForSequenceClassification:()=>Ee,DebertaV2ForTokenClassification:()=>Le,DebertaV2Model:()=>Se,DebertaV2PreTrainedModel:()=>ve,DecisionTransformerModel:()=>Ya,DecisionTransformerPreTrainedModel:()=>Ha,DeiTForImageClassification:()=>yo,DeiTModel:()=>ko,DeiTPreTrainedModel:()=>xo,DepthAnythingForDepthEstimation:()=>Ro,DepthAnythingPreTrainedModel:()=>jo,DetrForObjectDetection:()=>co,DetrForSegmentation:()=>uo,DetrModel:()=>lo,DetrObjectDetectionOutput:()=>po,DetrPreTrainedModel:()=>io,DetrSegmentationOutput:()=>ho,Dinov2ForImageClassification:()=>ls,Dinov2Model:()=>is,Dinov2PreTrainedModel:()=>as,DistilBertForMaskedLM:()=>Ve,DistilBertForQuestionAnswering:()=>De,DistilBertForSequenceClassification:()=>Ne,DistilBertForTokenClassification:()=>Oe,DistilBertModel:()=>Be,DistilBertPreTrainedModel:()=>Ie,DonutSwinModel:()=>Zo,DonutSwinPreTrainedModel:()=>Ko,EfficientNetForImageClassification:()=>La,EfficientNetModel:()=>Ea,EfficientNetPreTrainedModel:()=>Aa,ElectraForMaskedLM:()=>pe,ElectraForQuestionAnswering:()=>_e,ElectraForSequenceClassification:()=>he,ElectraForTokenClassification:()=>me,ElectraModel:()=>ue,ElectraPreTrainedModel:()=>de,EsmForMaskedLM:()=>Ge,EsmForSequenceClassification:()=>qe,EsmForTokenClassification:()=>$e,EsmModel:()=>Re,EsmPreTrainedModel:()=>je,FalconForCausalLM:()=>fa,FalconModel:()=>_a,FalconPreTrainedModel:()=>ma,FastViTForImageClassification:()=>qn,FastViTModel:()=>Gn,FastViTPreTrainedModel:()=>Rn,Florence2ForConditionalGeneration:()=>ur,Florence2PreTrainedModel:()=>dr,GLPNForDepthEstimation:()=>Jo,GLPNModel:()=>Yo,GLPNPreTrainedModel:()=>Ho,GPT2LMHeadModel:()=>Sr,GPT2Model:()=>vr,GPT2PreTrainedModel:()=>Cr,GPTBigCodeForCausalLM:()=>$r,GPTBigCodeModel:()=>qr,GPTBigCodePreTrainedModel:()=>Gr,GPTJForCausalLM:()=>Rr,GPTJModel:()=>jr,GPTJPreTrainedModel:()=>Vr,GPTNeoForCausalLM:()=>Br,GPTNeoModel:()=>Ir,GPTNeoPreTrainedModel:()=>zr,GPTNeoXForCausalLM:()=>Dr,GPTNeoXModel:()=>Or,GPTNeoXPreTrainedModel:()=>Nr,Gemma2ForCausalLM:()=>sn,Gemma2Model:()=>on,Gemma2PreTrainedModel:()=>nn,GemmaForCausalLM:()=>rn,GemmaModel:()=>tn,GemmaPreTrainedModel:()=>en,GroupViTModel:()=>jn,GroupViTPreTrainedModel:()=>Vn,HieraForImageClassification:()=>Co,HieraModel:()=>Po,HieraPreTrainedModel:()=>Fo,HubertForCTC:()=>Xs,HubertForSequenceClassification:()=>Qs,HubertModel:()=>Us,HubertPreTrainedModel:()=>Ws,ImageMattingOutput:()=>hl,JAISLMHeadModel:()=>Lr,JAISModel:()=>Er,JAISPreTrainedModel:()=>Ar,LlamaForCausalLM:()=>Yr,LlamaModel:()=>Hr,LlamaPreTrainedModel:()=>Qr,LlavaForConditionalGeneration:()=>lr,LlavaPreTrainedModel:()=>ir,LongT5ForConditionalGeneration:()=>gt,LongT5Model:()=>ft,LongT5PreTrainedModel:()=>_t,M2M100ForConditionalGeneration:()=>bs,M2M100Model:()=>Ts,M2M100PreTrainedModel:()=>ws,MBartForCausalLM:()=>St,MBartForConditionalGeneration:()=>Ct,MBartForSequenceClassification:()=>vt,MBartModel:()=>Pt,MBartPreTrainedModel:()=>Ft,MPNetForMaskedLM:()=>Ke,MPNetForQuestionAnswering:()=>tt,MPNetForSequenceClassification:()=>Ze,MPNetForTokenClassification:()=>et,MPNetModel:()=>Je,MPNetPreTrainedModel:()=>Ye,MT5ForConditionalGeneration:()=>Tt,MT5Model:()=>wt,MT5PreTrainedModel:()=>Mt,MarianMTModel:()=>Ms,MarianModel:()=>gs,MarianPreTrainedModel:()=>fs,MaskFormerForInstanceSegmentation:()=>Qo,MaskFormerModel:()=>Xo,MaskFormerPreTrainedModel:()=>Uo,MaskedLMOutput:()=>cl,MistralForCausalLM:()=>da,MistralModel:()=>ca,MistralPreTrainedModel:()=>la,MobileBertForMaskedLM:()=>Xe,MobileBertForQuestionAnswering:()=>He,MobileBertForSequenceClassification:()=>Qe,MobileBertModel:()=>Ue,MobileBertPreTrainedModel:()=>We,MobileNetV1ForImageClassification:()=>Va,MobileNetV1Model:()=>Da,MobileNetV1PreTrainedModel:()=>Oa,MobileNetV2ForImageClassification:()=>Ga,MobileNetV2Model:()=>Ra,MobileNetV2PreTrainedModel:()=>ja,MobileNetV3ForImageClassification:()=>Wa,MobileNetV3Model:()=>$a,MobileNetV3PreTrainedModel:()=>qa,MobileNetV4ForImageClassification:()=>Qa,MobileNetV4Model:()=>Xa,MobileNetV4PreTrainedModel:()=>Ua,MobileViTForImageClassification:()=>Qn,MobileViTModel:()=>Xn,MobileViTPreTrainedModel:()=>Un,MobileViTV2ForImageClassification:()=>Jn,MobileViTV2Model:()=>Yn,MobileViTV2PreTrainedModel:()=>Hn,ModelOutput:()=>G,Moondream1ForConditionalGeneration:()=>cr,MptForCausalLM:()=>yn,MptModel:()=>kn,MptPreTrainedModel:()=>xn,MusicgenForCausalLM:()=>Ba,MusicgenForConditionalGeneration:()=>Na,MusicgenModel:()=>Ia,MusicgenPreTrainedModel:()=>za,NomicBertModel:()=>J,NomicBertPreTrainedModel:()=>Y,OPTForCausalLM:()=>Cn,OPTModel:()=>Pn,OPTPreTrainedModel:()=>Fn,OpenELMForCausalLM:()=>cn,OpenELMModel:()=>ln,OpenELMPreTrainedModel:()=>an,OwlViTForObjectDetection:()=>eo,OwlViTModel:()=>Zn,OwlViTPreTrainedModel:()=>Kn,Owlv2ForObjectDetection:()=>no,Owlv2Model:()=>ro,Owlv2PreTrainedModel:()=>to,Phi3ForCausalLM:()=>Mn,Phi3Model:()=>gn,Phi3PreTrainedModel:()=>fn,PhiForCausalLM:()=>_n,PhiModel:()=>mn,PhiPreTrainedModel:()=>hn,PreTrainedModel:()=>R,PretrainedMixin:()=>Ja,PvtForImageClassification:()=>zn,PvtModel:()=>Ln,PvtPreTrainedModel:()=>En,PyAnnoteForAudioFrameClassification:()=>Ss,PyAnnoteModel:()=>vs,PyAnnotePreTrainedModel:()=>Cs,QuestionAnsweringModelOutput:()=>dl,Qwen2ForCausalLM:()=>pn,Qwen2Model:()=>un,Qwen2PreTrainedModel:()=>dn,RTDetrForObjectDetection:()=>fo,RTDetrModel:()=>_o,RTDetrObjectDetectionOutput:()=>go,RTDetrPreTrainedModel:()=>mo,ResNetForImageClassification:()=>Ao,ResNetModel:()=>So,ResNetPreTrainedModel:()=>vo,RoFormerForMaskedLM:()=>ee,RoFormerForQuestionAnswering:()=>ne,RoFormerForSequenceClassification:()=>te,RoFormerForTokenClassification:()=>re,RoFormerModel:()=>Z,RoFormerPreTrainedModel:()=>K,RobertaForMaskedLM:()=>Dt,RobertaForQuestionAnswering:()=>Rt,RobertaForSequenceClassification:()=>Vt,RobertaForTokenClassification:()=>jt,RobertaModel:()=>Ot,RobertaPreTrainedModel:()=>Nt,SamImageSegmentationOutput:()=>_s,SamModel:()=>ms,SamPreTrainedModel:()=>hs,SapiensForDepthEstimation:()=>$o,SapiensForNormalEstimation:()=>Wo,SapiensForSemanticSegmentation:()=>qo,SapiensPreTrainedModel:()=>Go,SegformerForImageClassification:()=>Fa,SegformerForSemanticSegmentation:()=>Pa,SegformerModel:()=>ya,SegformerPreTrainedModel:()=>ka,Seq2SeqLMOutput:()=>sl,SequenceClassifierOutput:()=>al,SiglipModel:()=>wr,SiglipPreTrainedModel:()=>Mr,SiglipTextModel:()=>Tr,SiglipVisionModel:()=>br,SpeechT5ForSpeechToText:()=>na,SpeechT5ForTextToSpeech:()=>oa,SpeechT5HifiGan:()=>sa,SpeechT5Model:()=>ra,SpeechT5PreTrainedModel:()=>ta,SqueezeBertForMaskedLM:()=>ot,SqueezeBertForQuestionAnswering:()=>at,SqueezeBertForSequenceClassification:()=>st,SqueezeBertModel:()=>nt,SqueezeBertPreTrainedModel:()=>rt,StableLmForCausalLM:()=>Sa,StableLmModel:()=>va,StableLmPreTrainedModel:()=>Ca,Starcoder2ForCausalLM:()=>ha,Starcoder2Model:()=>pa,Starcoder2PreTrainedModel:()=>ua,Swin2SRForImageSuperResolution:()=>No,Swin2SRModel:()=>Bo,Swin2SRPreTrainedModel:()=>Io,SwinForImageClassification:()=>zo,SwinModel:()=>Lo,SwinPreTrainedModel:()=>Eo,T5ForConditionalGeneration:()=>mt,T5Model:()=>ht,T5PreTrainedModel:()=>pt,TableTransformerForObjectDetection:()=>To,TableTransformerModel:()=>wo,TableTransformerObjectDetectionOutput:()=>bo,TableTransformerPreTrainedModel:()=>Mo,TokenClassifierOutput:()=>ll,TrOCRForCausalLM:()=>ia,TrOCRPreTrainedModel:()=>aa,UniSpeechForCTC:()=>Is,UniSpeechForSequenceClassification:()=>Bs,UniSpeechModel:()=>zs,UniSpeechPreTrainedModel:()=>Ls,UniSpeechSatForAudioFrameClassification:()=>js,UniSpeechSatForCTC:()=>Ds,UniSpeechSatForSequenceClassification:()=>Vs,UniSpeechSatModel:()=>Os,UniSpeechSatPreTrainedModel:()=>Ns,ViTForImageClassification:()=>An,ViTMAEModel:()=>Bn,ViTMAEPreTrainedModel:()=>In,ViTMSNForImageClassification:()=>Dn,ViTMSNModel:()=>On,ViTMSNPreTrainedModel:()=>Nn,ViTModel:()=>Sn,ViTPreTrainedModel:()=>vn,VisionEncoderDecoderModel:()=>ar,VitMatteForImageMatting:()=>Wn,VitMattePreTrainedModel:()=>$n,VitsModel:()=>xa,VitsModelOutput:()=>ml,VitsPreTrainedModel:()=>ba,Wav2Vec2BertForCTC:()=>qs,Wav2Vec2BertForSequenceClassification:()=>$s,Wav2Vec2BertModel:()=>Gs,Wav2Vec2BertPreTrainedModel:()=>Rs,Wav2Vec2ForAudioFrameClassification:()=>Ps,Wav2Vec2ForCTC:()=>ys,Wav2Vec2ForSequenceClassification:()=>Fs,Wav2Vec2Model:()=>ks,Wav2Vec2PreTrainedModel:()=>xs,WavLMForAudioFrameClassification:()=>ea,WavLMForCTC:()=>Js,WavLMForSequenceClassification:()=>Ks,WavLMForXVector:()=>Zs,WavLMModel:()=>Ys,WavLMPreTrainedModel:()=>Hs,WeSpeakerResNetModel:()=>Es,WeSpeakerResNetPreTrainedModel:()=>As,WhisperForConditionalGeneration:()=>sr,WhisperModel:()=>or,WhisperPreTrainedModel:()=>nr,XLMForQuestionAnswering:()=>Xt,XLMForSequenceClassification:()=>Wt,XLMForTokenClassification:()=>Ut,XLMModel:()=>qt,XLMPreTrainedModel:()=>Gt,XLMRobertaForMaskedLM:()=>Yt,XLMRobertaForQuestionAnswering:()=>Zt,XLMRobertaForSequenceClassification:()=>Jt,XLMRobertaForTokenClassification:()=>Kt,XLMRobertaModel:()=>Ht,XLMRobertaPreTrainedModel:()=>Qt,XLMWithLMHeadModel:()=>$t,XVectorOutput:()=>il,YolosForObjectDetection:()=>us,YolosModel:()=>ds,YolosObjectDetectionOutput:()=>ps,YolosPreTrainedModel:()=>cs});var n=r(/*! ./configs.js */"./src/configs.js"),o=r(/*! ./backends/onnx.js */"./src/backends/onnx.js"),s=r(/*! ./utils/dtypes.js */"./src/utils/dtypes.js"),a=r(/*! ./utils/generic.js */"./src/utils/generic.js"),i=r(/*! ./utils/core.js */"./src/utils/core.js"),l=r(/*! ./utils/hub.js */"./src/utils/hub.js"),c=r(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),d=r(/*! ./generation/configuration_utils.js */"./src/generation/configuration_utils.js"),u=r(/*! ./utils/tensor.js */"./src/utils/tensor.js"),p=r(/*! ./utils/maths.js */"./src/utils/maths.js"),h=r(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),m=r(/*! ./generation/logits_sampler.js */"./src/generation/logits_sampler.js"),_=r(/*! ./env.js */"./src/env.js"),f=r(/*! ./models/whisper/generation_whisper.js */"./src/models/whisper/generation_whisper.js"),g=r(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");const M=0,w=1,T=2,b=3,x=4,k=5,y=6,F=7,P=new Map,C=new Map,v=new Map;async function S(e,t,r){return Object.fromEntries(await Promise.all(Object.keys(t).map((async a=>{const{buffer:i,session_options:c}=await async function(e,t,r){let a=r.device;a&&"string"!=typeof a&&(a.hasOwnProperty(t)?a=a[t]:(console.warn(`device not specified for "${t}". Using the default device.`),a=null));const i=a??(_.apis.IS_NODE_ENV?"cpu":"wasm"),c=(0,o.deviceToExecutionProviders)(i);let d=r.dtype;"string"!=typeof d&&(d&&d.hasOwnProperty(t)?d=d[t]:(d=s.DEFAULT_DEVICE_DTYPE_MAPPING[i]??s.DATA_TYPES.fp32,console.warn(`dtype not specified for "${t}". Using the default dtype (${d}) for this device (${i}).`)));const u=d;if(!s.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(u))throw new Error(`Invalid dtype: ${u}. Should be one of: ${Object.keys(s.DATA_TYPES).join(", ")}`);if(u===s.DATA_TYPES.fp16&&"webgpu"===i&&!await(0,s.isWebGpuFp16Supported)())throw new Error(`The device (${i}) does not support fp16.`);const p=s.DEFAULT_DTYPE_SUFFIX_MAPPING[u],h=`${r.subfolder??""}/${t}${p}.onnx`,m={...r.session_options}??{};m.executionProviders??=c;const f=(0,l.getModelFile)(e,h,!0,r);let g=[];if(r.use_external_data_format&&(!0===r.use_external_data_format||"object"==typeof r.use_external_data_format&&r.use_external_data_format.hasOwnProperty(t)&&!0===r.use_external_data_format[t])){if(_.apis.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const n=`${t}${p}.onnx_data`,o=`${r.subfolder??""}/${n}`;g.push(new Promise((async(t,s)=>{const a=await(0,l.getModelFile)(e,o,!0,r);t({path:n,data:a})})))}else void 0!==m.externalData&&(g=m.externalData.map((async t=>{if("string"==typeof t.data){const n=await(0,l.getModelFile)(e,t.data,!0,r);return{...t,data:n}}return t})));if(g.length>0&&(m.externalData=await Promise.all(g)),"webgpu"===i){const e=(0,n.getKeyValueShapes)(r.config,{prefix:"present"});if(Object.keys(e).length>0&&!(0,o.isONNXProxy)()){const t={};for(const r in e)t[r]="gpu-buffer";m.preferredOutputLocation=t}}return{buffer:await f,session_options:m}}(e,t[a],r);return[a,await(0,o.createInferenceSession)(i,c)]}))))}async function A(e,t){const r=function(e,t){const r=Object.create(null),n=[];for(const s of e.inputNames){const e=t[s];e instanceof u.Tensor?r[s]=(0,o.isONNXProxy)()?e.clone():e:n.push(s)}if(n.length>0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${n.join(", ")}.`);const s=Object.keys(t).length,a=e.inputNames.length;if(s>a){let r=Object.keys(t).filter((t=>!e.inputNames.includes(t)));console.warn(`WARNING: Too many inputs were provided (${s} > ${a}). The following inputs will be ignored: "${r.join(", ")}".`)}return r}(e,t);try{const t=Object.fromEntries(Object.entries(r).map((([e,t])=>[e,t.ort_tensor])));let n=await e.run(t);return n=E(n),n}catch(e){throw console.error(`An error occurred during model execution: "${e}".`),console.error("Inputs given to model:",r),e}}function E(e){for(let t in e)(0,o.isONNXTensor)(e[t])?e[t]=new u.Tensor(e[t]):"object"==typeof e[t]&&E(e[t]);return e}function L(e){if(e instanceof u.Tensor)return e;if(0===e.length)throw Error("items must be non-empty");if(Array.isArray(e[0])){if(e.some((t=>t.length!==e[0].length)))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new u.Tensor("int64",BigInt64Array.from(e.flat().map((e=>BigInt(e)))),[e.length,e[0].length])}return new u.Tensor("int64",BigInt64Array.from(e.map((e=>BigInt(e)))),[1,e.length])}function z(e){return new u.Tensor("bool",[e],[1])}async function I(e,t){let{encoder_outputs:r,input_ids:n,decoder_input_ids:o,...s}=t;if(!r){const n=(0,i.pick)(t,e.sessions.model.inputNames);r=(await B(e,n)).last_hidden_state}s.input_ids=o,s.encoder_hidden_states=r,e.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(s.encoder_attention_mask=t.attention_mask);return await N(e,s,!0)}async function B(e,t){const r=e.sessions.model,n=(0,i.pick)(t,r.inputNames);if(r.inputNames.includes("inputs_embeds")&&!n.inputs_embeds){if(!t.input_ids)throw new Error("Both `input_ids` and `inputs_embeds` are missing in the model inputs.");n.inputs_embeds=await e.encode_text({input_ids:t.input_ids})}return r.inputNames.includes("token_type_ids")&&!n.token_type_ids&&(n.token_type_ids=new u.Tensor("int64",new BigInt64Array(n.input_ids.data.length),n.input_ids.dims)),await A(r,n)}async function N(e,t,r=!1){const n=e.sessions[r?"decoder_model_merged":"model"],{past_key_values:o,...s}=t;n.inputNames.includes("use_cache_branch")&&(s.use_cache_branch=z(!!o)),n.inputNames.includes("position_ids")&&s.attention_mask&&!s.position_ids&&(s.position_ids=function(e,t=null){const{input_ids:r,inputs_embeds:n,attention_mask:o}=e,[s,a]=o.dims,i=new BigInt64Array(o.data.length);for(let e=0;e<s;++e){const t=e*a;let r=BigInt(0);for(let e=0;e<a;++e){const n=t+e;0n===o.data[n]?i[n]=BigInt(1):(i[n]=r,r+=o.data[n])}}let l=new u.Tensor("int64",i,o.dims);if(t){const e=-(r??n).dims.at(1);l=l.slice(null,[e,null])}return l}(s,o)),e.addPastKeyValues(s,o);const a=(0,i.pick)(s,n.inputNames);return await A(n,a)}async function O(e,{input_ids:t=null,attention_mask:r=null,pixel_values:n=null,position_ids:o=null,inputs_embeds:s=null,past_key_values:a=null,generation_config:i=null,logits_processor:l=null,...c}){if(!s)if(s=await e.encode_text({input_ids:t}),n&&1!==t.dims[1]){const o=await e.encode_image({pixel_values:n});({inputs_embeds:s,attention_mask:r}=e._merge_input_ids_with_image_features({image_features:o,inputs_embeds:s,input_ids:t,attention_mask:r}))}else if(a&&n&&1===t.dims[1]){const e=t.dims[1],n=Object.values(a)[0].dims.at(-2);r=(0,u.cat)([(0,u.ones)([t.dims[0],n]),r.slice(null,[r.dims[1]-e,r.dims[1]])],1)}return await N(e,{inputs_embeds:s,past_key_values:a,attention_mask:r,position_ids:o,generation_config:i,logits_processor:l},!0)}function D(e,t,r,n){if(r.past_key_values){const t=Object.values(r.past_key_values)[0].dims.at(-2),{input_ids:n,attention_mask:o}=r;if(o&&o.dims[1]>n.dims[1]);else if(t<n.dims[1])r.input_ids=n.slice(null,[t,null]);else if(null!=e.config.image_token_index&&n.data.some((t=>t==e.config.image_token_index))){const o=e.config.num_image_tokens;if(!o)throw new Error("`num_image_tokens` is missing in the model configuration.");const s=n.dims[1]-(t-o);r.input_ids=n.slice(null,[-s,null]),r.attention_mask=(0,u.ones)([1,t+s])}}return r}function V(e,t,r,n){return r.past_key_values&&(t=t.map((e=>[e.at(-1)]))),{...r,decoder_input_ids:L(t)}}function j(e,...t){return e.config.is_encoder_decoder?V(e,...t):D(e,...t)}class R extends a.Callable{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,t){super(),this.config=e,this.sessions=t;const r=v.get(this.constructor),n=P.get(r);switch(this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,n){case x:this.can_generate=!0,this._forward=N,this._prepare_inputs_for_generation=D;break;case T:case b:case F:this.can_generate=!0,this._forward=I,this._prepare_inputs_for_generation=V;break;case w:this._forward=I;break;case y:this.can_generate=!0,this._forward=O,this._prepare_inputs_for_generation=j;break;default:this._forward=B}this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const t of Object.values(this.sessions))t?.handler?.dispose&&e.push(t.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:t=null,config:r=null,cache_dir:o=null,local_files_only:s=!1,revision:a="main",model_file_name:i=null,subfolder:c="onnx",device:d=null,dtype:u=null,use_external_data_format:p=null,session_options:h={}}={}){let m={progress_callback:t,config:r,cache_dir:o,local_files_only:s,revision:a,model_file_name:i,subfolder:c,device:d,dtype:u,use_external_data_format:p,session_options:h};const _=v.get(this),f=P.get(_);let g;if(r=m.config=await n.AutoConfig.from_pretrained(e,m),f===x)g=await Promise.all([S(e,{model:m.model_file_name??"model"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(f===T||f===b)g=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(f===k)g=await Promise.all([S(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},m)]);else if(f===w)g=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m)]);else if(f===y){const t={embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"};r.is_encoder_decoder&&(t.model="encoder_model"),g=await Promise.all([S(e,t,m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)])}else f===F?g=await Promise.all([S(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]):(f!==M&&console.warn(`Model type for '${_??r?.model_type}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`),g=await Promise.all([S(e,{model:m.model_file_name??"model"},m)]));return new this(r,...g)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}_get_logits_warper(e){const t=new c.LogitsProcessorList;return null!==e.temperature&&1!==e.temperature&&t.push(new c.TemperatureLogitsWarper(e.temperature)),null!==e.top_k&&0!==e.top_k&&t.push(new c.TopKLogitsWarper(e.top_k)),null!==e.top_p&&e.top_p<1&&t.push(new c.TopPLogitsWarper(e.top_p)),t}_get_logits_processor(e,t,r=null){const n=new c.LogitsProcessorList;if(null!==e.repetition_penalty&&1!==e.repetition_penalty&&n.push(new c.RepetitionPenaltyLogitsProcessor(e.repetition_penalty)),null!==e.no_repeat_ngram_size&&e.no_repeat_ngram_size>0&&n.push(new c.NoRepeatNGramLogitsProcessor(e.no_repeat_ngram_size)),null!==e.bad_words_ids&&n.push(new c.NoBadWordsLogitsProcessor(e.bad_words_ids,e.eos_token_id)),null!==e.min_length&&null!==e.eos_token_id&&e.min_length>0&&n.push(new c.MinLengthLogitsProcessor(e.min_length,e.eos_token_id)),null!==e.min_new_tokens&&null!==e.eos_token_id&&e.min_new_tokens>0&&n.push(new c.MinNewTokensLengthLogitsProcessor(t,e.min_new_tokens,e.eos_token_id)),null!==e.forced_bos_token_id&&n.push(new c.ForcedBOSTokenLogitsProcessor(e.forced_bos_token_id)),null!==e.forced_eos_token_id&&n.push(new c.ForcedEOSTokenLogitsProcessor(e.max_length,e.forced_eos_token_id)),null!==e.begin_suppress_tokens){const r=t>1||null===e.forced_bos_token_id?t:t+1;n.push(new c.SuppressTokensAtBeginLogitsProcessor(e.begin_suppress_tokens,r))}return null!==e.guidance_scale&&e.guidance_scale>1&&n.push(new c.ClassifierFreeGuidanceLogitsProcessor(e.guidance_scale)),null!==r&&n.extend(r),n}_prepare_generation_config(e,t,r=d.GenerationConfig){const n={...this.config};for(const e of["decoder","generator","text_config"])e in n&&Object.assign(n,n[e]);const o=new r(n);return"generation_config"in this&&Object.assign(o,this.generation_config),e&&Object.assign(o,e),t&&Object.assign(o,(0,i.pick)(t,Object.getOwnPropertyNames(o))),o}_get_stopping_criteria(e,t=null){const r=new h.StoppingCriteriaList;return null!==e.max_length&&r.push(new h.MaxLengthCriteria(e.max_length,this.config.max_position_embeddings??null)),null!==e.eos_token_id&&r.push(new h.EosTokenCriteria(e.eos_token_id)),t&&r.extend(t),r}_validate_model_class(){if(!this.can_generate){const e=[ii,di,ai,ti],t=v.get(this.constructor),r=new Set,n=this.config.model_type;for(const t of e){const e=t.get(n);e&&r.add(e[0])}let o=`The current model class (${t}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw r.size>0&&(o+=` Please use the following class instead: ${[...r].join(", ")}`),Error(o)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:t,model_inputs:r,is_encoder_decoder:n}){return r.past_key_values=this.getPastKeyValues(t,r.past_key_values),r.input_ids=new u.Tensor("int64",e.flat(),[e.length,1]),n||(r.attention_mask=(0,u.cat)([r.attention_mask,(0,u.ones)([r.attention_mask.dims[0],1])],1)),r.position_ids=null,r}_prepare_model_inputs({inputs:e,bos_token_id:t,model_kwargs:r}){const n=(0,i.pick)(r,this.forward_params),o=this.main_input_name;if(o in n){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else n[o]=e;return{inputs_tensor:n[o],model_inputs:n,model_input_name:o}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:t,model_input_name:r,generation_config:n}){if(this.sessions.model.inputNames.includes("inputs_embeds")&&!t.inputs_embeds&&"_prepare_inputs_embeds"in this){const{input_ids:e,pixel_values:r,attention_mask:n,...o}=t,s=await this._prepare_inputs_embeds(t);t={...o,...(0,i.pick)(s,["inputs_embeds","attention_mask"])}}let{last_hidden_state:o}=await B(this,t);if(null!==n.guidance_scale&&n.guidance_scale>1)o=(0,u.cat)([o,(0,u.full_like)(o,0)],0),"attention_mask"in t&&(t.attention_mask=(0,u.cat)([t.attention_mask,(0,u.zeros_like)(t.attention_mask)],0));else if(t.decoder_input_ids){const e=L(t.decoder_input_ids).dims[0];if(e!==o.dims[0]){if(1!==o.dims[0])throw new Error(`The encoder outputs have a different batch size (${o.dims[0]}) than the decoder inputs (${e}).`);o=(0,u.cat)(Array.from({length:e},(()=>o)),0)}}return t.encoder_outputs=o,t}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:t,model_kwargs:r,decoder_start_token_id:n,bos_token_id:o,generation_config:s}){let{decoder_input_ids:a,...i}=r;if(a)Array.isArray(a[0])||(a=Array.from({length:e},(()=>a)));else if(n??=o,"musicgen"===this.config.model_type)a=Array.from({length:e*this.config.decoder.num_codebooks},(()=>[n]));else if(Array.isArray(n)){if(n.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${n.length}`);a=n}else a=Array.from({length:e},(()=>[n]));return a=L(a),r.decoder_attention_mask=(0,u.ones_like)(a),{input_ids:a,model_inputs:i}}async generate({inputs:e=null,generation_config:t=null,logits_processor:r=null,stopping_criteria:n=null,streamer:o=null,...s}){this._validate_model_class(),t=this._prepare_generation_config(t,s);let{inputs_tensor:a,model_inputs:i,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:s});const c=this.config.is_encoder_decoder;let d;c&&("encoder_outputs"in i||(i=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:a,model_inputs:i,model_input_name:l,generation_config:t}))),c?({input_ids:d,model_inputs:i}=this._prepare_decoder_input_ids_for_generation({batch_size:i[l].dims.at(0),model_input_name:l,model_kwargs:i,decoder_start_token_id:t.decoder_start_token_id,bos_token_id:t.bos_token_id,generation_config:t})):d=i[l];let p=d.dims.at(-1);null!==t.max_new_tokens&&(t.max_length=p+t.max_new_tokens);const h=this._get_logits_processor(t,p,r),_=this._get_stopping_criteria(t,n),f=i[l].dims.at(0),g=m.LogitsSampler.getSampler(t),M=new Array(f).fill(0),w=d.tolist();o&&o.put(w);let T=null,b={};for(;;){i=this.prepare_inputs_for_generation(w,i,t);const e=await this.forward(i);if(t.output_attentions&&t.return_dict_in_generate){const t=this.getAttentions(e);for(const e in t)e in b||(b[e]=[]),b[e].push(t[e])}const r=h(w,e.logits.slice(null,-1,null)),n=[];for(let e=0;e<r.dims.at(0);++e){const t=r[e],o=await g(t);for(const[t,r]of o){const o=BigInt(t);M[e]+=r,w[e].push(o),n.push([o]);break}}o&&o.put(n);if(_(w).every((e=>e))){t.return_dict_in_generate&&(T=this.getPastKeyValues(e,i.past_key_values,!1));break}i=this._update_model_kwargs_for_generation({generated_input_ids:n,outputs:e,model_inputs:i,is_encoder_decoder:c})}o&&o.end();const x=new u.Tensor("int64",w.flat(),[w.length,w[0].length]);return t.return_dict_in_generate?{sequences:x,past_key_values:T,...b}:x}getPastKeyValues(e,t,r=!0){const n=Object.create(null);for(const o in e)if(o.startsWith("present")){const s=o.replace("present","past_key_values");if(t&&o.includes("encoder"))n[s]=t[s];else{if(r&&t){const e=t[s];"gpu-buffer"===e.location&&e.dispose()}n[s]=e[o]}}return n}getAttentions(e){const t={};for(const r of["cross_attentions","encoder_attentions","decoder_attentions"])for(const n in e)n.startsWith(r)&&(r in t||(t[r]=[]),t[r].push(e[n]));return t}addPastKeyValues(e,t){if(t)Object.assign(e,t);else{const t=this.custom_config.kv_cache_dtype??"float32",r="float16"===t?new Uint16Array:[],o=(0,n.getKeyValueShapes)(this.config);for(const n in o)e[n]=new u.Tensor(t,r,o[n])}}async encode_image({pixel_values:e}){const t=(await A(this.sessions.vision_encoder,{pixel_values:e})).image_features;return this.config.num_image_tokens||(console.warn(`The number of image tokens was not set in the model configuration. Setting it to the number of features detected by the vision encoder (${t.dims[1]}).`),this.config.num_image_tokens=t.dims[1]),t}async encode_text({input_ids:e}){return(await A(this.sessions.embed_tokens,{input_ids:e})).inputs_embeds}}class G{}class q extends G{constructor({last_hidden_state:e,hidden_states:t=null,attentions:r=null}){super(),this.last_hidden_state=e,this.hidden_states=t,this.attentions=r}}class $ extends R{}class W extends ${}class U extends ${async _call(e){return new cl(await super._call(e))}}class X extends ${async _call(e){return new al(await super._call(e))}}class Q extends ${async _call(e){return new ll(await super._call(e))}}class H extends ${async _call(e){return new dl(await super._call(e))}}class Y extends R{}class J extends Y{}class K extends R{}class Z extends K{}class ee extends K{async _call(e){return new cl(await super._call(e))}}class te extends K{async _call(e){return new al(await super._call(e))}}class re extends K{async _call(e){return new ll(await super._call(e))}}class ne extends K{async _call(e){return new dl(await super._call(e))}}class oe extends R{}class se extends oe{}class ae extends oe{async _call(e){return new cl(await super._call(e))}}class ie extends oe{async _call(e){return new al(await super._call(e))}}class le extends oe{async _call(e){return new ll(await super._call(e))}}class ce extends oe{async _call(e){return new dl(await super._call(e))}}class de extends R{}class ue extends de{}class pe extends de{async _call(e){return new cl(await super._call(e))}}class he extends de{async _call(e){return new al(await super._call(e))}}class me extends de{async _call(e){return new ll(await super._call(e))}}class _e extends de{async _call(e){return new dl(await super._call(e))}}class fe extends R{}class ge extends fe{}class Me extends fe{async _call(e){return new cl(await super._call(e))}}class we extends fe{async _call(e){return new al(await super._call(e))}}class Te extends fe{async _call(e){return new ll(await super._call(e))}}class be extends fe{async _call(e){return new dl(await super._call(e))}}class xe extends R{}class ke extends xe{}class ye extends xe{async _call(e){return new cl(await super._call(e))}}class Fe extends xe{async _call(e){return new al(await super._call(e))}}class Pe extends xe{async _call(e){return new ll(await super._call(e))}}class Ce extends xe{async _call(e){return new dl(await super._call(e))}}class ve extends R{}class Se extends ve{}class Ae extends ve{async _call(e){return new cl(await super._call(e))}}class Ee extends ve{async _call(e){return new al(await super._call(e))}}class Le extends ve{async _call(e){return new ll(await super._call(e))}}class ze extends ve{async _call(e){return new dl(await super._call(e))}}class Ie extends R{}class Be extends Ie{}class Ne extends Ie{async _call(e){return new al(await super._call(e))}}class Oe extends Ie{async _call(e){return new ll(await super._call(e))}}class De extends Ie{async _call(e){return new dl(await super._call(e))}}class Ve extends Ie{async _call(e){return new cl(await super._call(e))}}class je extends R{}class Re extends je{}class Ge extends je{async _call(e){return new cl(await super._call(e))}}class qe extends je{async _call(e){return new al(await super._call(e))}}class $e extends je{async _call(e){return new ll(await super._call(e))}}class We extends R{}class Ue extends We{}class Xe extends We{async _call(e){return new cl(await super._call(e))}}class Qe extends We{async _call(e){return new al(await super._call(e))}}class He extends We{async _call(e){return new dl(await super._call(e))}}class Ye extends R{}class Je extends Ye{}class Ke extends Ye{async _call(e){return new cl(await super._call(e))}}class Ze extends Ye{async _call(e){return new al(await super._call(e))}}class et extends Ye{async _call(e){return new ll(await super._call(e))}}class tt extends Ye{async _call(e){return new dl(await super._call(e))}}class rt extends R{}class nt extends rt{}class ot extends rt{async _call(e){return new cl(await super._call(e))}}class st extends rt{async _call(e){return new al(await super._call(e))}}class at extends rt{async _call(e){return new dl(await super._call(e))}}class it extends R{}class lt extends it{}class ct extends it{async _call(e){return new al(await super._call(e))}}class dt extends it{async _call(e){return new dl(await super._call(e))}}class ut extends it{async _call(e){return new cl(await super._call(e))}}class pt extends R{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,r){super(e,t),this.generation_config=r}}class ht extends pt{}class mt extends pt{}class _t extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class ft extends _t{}class gt extends _t{}class Mt extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class wt extends Mt{}class Tt extends Mt{}class bt extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class xt extends bt{}class kt extends bt{}class yt extends bt{async _call(e){return new al(await super._call(e))}}class Ft extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Pt extends Ft{}class Ct extends Ft{}class vt extends Ft{async _call(e){return new al(await super._call(e))}}class St extends Ft{}class At extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Et extends At{}class Lt extends At{}class zt extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class It extends zt{}class Bt extends zt{}class Nt extends R{}class Ot extends Nt{}class Dt extends Nt{async _call(e){return new cl(await super._call(e))}}class Vt extends Nt{async _call(e){return new al(await super._call(e))}}class jt extends Nt{async _call(e){return new ll(await super._call(e))}}class Rt extends Nt{async _call(e){return new dl(await super._call(e))}}class Gt extends R{}class qt extends Gt{}class $t extends Gt{async _call(e){return new cl(await super._call(e))}}class Wt extends Gt{async _call(e){return new al(await super._call(e))}}class Ut extends Gt{async _call(e){return new ll(await super._call(e))}}class Xt extends Gt{async _call(e){return new dl(await super._call(e))}}class Qt extends R{}class Ht extends Qt{}class Yt extends Qt{async _call(e){return new cl(await super._call(e))}}class Jt extends Qt{async _call(e){return new al(await super._call(e))}}class Kt extends Qt{async _call(e){return new ll(await super._call(e))}}class Zt extends Qt{async _call(e){return new dl(await super._call(e))}}class er extends R{}class tr extends er{}class rr extends er{}class nr extends R{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,r){super(e,t),this.generation_config=r}}class or extends nr{}class sr extends nr{_prepare_generation_config(e,t){return super._prepare_generation_config(e,t,f.WhisperGenerationConfig)}_retrieve_init_tokens(e){const t=[e.decoder_start_token_id];let r=e.language;const n=e.task;if(e.is_multilingual){r||(console.warn("No language specified - defaulting to English (en)."),r="en");const o=`<|${(0,g.whisper_language_to_code)(r)}|>`;t.push(e.lang_to_id[o]),t.push(e.task_to_id[n??"transcribe"])}else if(r||n)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&t.at(-1)!==e.no_timestamps_token_id?t.push(e.no_timestamps_token_id):e.return_timestamps&&t.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),t.pop()),t.filter((e=>null!=e))}async generate({inputs:e=null,generation_config:t=null,logits_processor:r=null,stopping_criteria:n=null,...o}){t=this._prepare_generation_config(t,o);const s=o.decoder_input_ids??this._retrieve_init_tokens(t);if(t.return_timestamps&&(r??=new c.LogitsProcessorList,r.push(new c.WhisperTimeStampLogitsProcessor(t,s))),t.begin_suppress_tokens&&(r??=new c.LogitsProcessorList,r.push(new c.SuppressTokensAtBeginLogitsProcessor(t.begin_suppress_tokens,s.length))),t.return_token_timestamps){if(!t.alignment_heads)throw new Error("Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config.");"translate"===t.task&&console.warn("Token-level timestamps may not be reliable for task 'translate'."),t.output_attentions=!0,t.return_dict_in_generate=!0}const a=await super.generate({inputs:e,generation_config:t,logits_processor:r,decoder_input_ids:s,...o});return t.return_token_timestamps&&(a.token_timestamps=this._extract_token_timestamps(a,t.alignment_heads,t.num_frames)),a}_extract_token_timestamps(e,t,r=null,n=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");null==r&&console.warn("`num_frames` has not been set, meaning the entire audio will be analyzed. This may lead to inaccurate token-level timestamps for short audios (< 30 seconds).");let o=this.config.median_filter_width;void 0===o&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),o=7);const s=e.cross_attentions,a=Array.from({length:this.config.decoder_layers},((e,t)=>(0,u.cat)(s.map((e=>e[t])),2))),l=(0,u.stack)(t.map((([e,t])=>{if(e>=a.length)throw new Error(`Layer index ${e} is out of bounds for cross attentions (length ${a.length}).`);return r?a[e].slice(null,t,null,[0,r]):a[e].slice(null,t)}))).transpose(1,0,2,3),[c,d]=(0,u.std_mean)(l,-2,0,!0),h=l.clone();for(let e=0;e<h.dims[0];++e){const t=h[e];for(let r=0;r<t.dims[0];++r){const n=t[r],s=c[e][r][0].data,a=d[e][r][0].data;for(let e=0;e<n.dims[0];++e){let t=n[e].data;for(let e=0;e<t.length;++e)t[e]=(t[e]-a[e])/s[e];t.set((0,p.medianFilter)(t,o))}}}const m=[(0,u.mean)(h,1)],_=e.sequences.dims,f=new u.Tensor("float32",new Float32Array(_[0]*_[1]),_);for(let e=0;e<_[0];++e){const t=m[e].neg().squeeze_(0),[r,o]=(0,p.dynamic_time_warping)(t.tolist()),s=Array.from({length:r.length-1},((e,t)=>r[t+1]-r[t])),a=(0,i.mergeArrays)([1],s).map((e=>!!e)),l=[];for(let e=0;e<a.length;++e)a[e]&&l.push(o[e]*n);f[e].data.set(l,1)}return f}}class ar extends R{main_input_name="pixel_values";forward_params=["pixel_values","input_ids","encoder_hidden_states","past_key_values"];constructor(e,t,r){super(e,t),this.generation_config=r}}class ir extends R{forward_params=["input_ids","pixel_values","attention_mask","position_ids","past_key_values"];constructor(e,t,r){super(e,t),this.generation_config=r}}class lr extends ir{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:r,attention_mask:n}){const o=this.config.image_token_index,s=r.tolist().map((e=>e.findIndex((e=>e==o)))),a=s.every((e=>-1===e)),i=s.every((e=>-1!==e));if(!a&&!i)throw new Error("Every input should contain either 0 or 1 image token.");if(a)return{inputs_embeds:e,attention_mask:n};const l=[],c=[];for(let r=0;r<s.length;++r){const o=s[r],a=e[r],i=t[r],d=n[r];l.push((0,u.cat)([a.slice([0,o]),i,a.slice([o+1,a.dims[0]])],0)),c.push((0,u.cat)([d.slice([0,o]),(0,u.ones)([i.dims[0]]),d.slice([o+1,d.dims[0]])],0))}return{inputs_embeds:(0,u.stack)(l,0),attention_mask:(0,u.stack)(c,0)}}}class cr extends lr{}class dr extends R{forward_params=["input_ids","inputs_embeds","attention_mask","pixel_values","encoder_outputs","decoder_input_ids","decoder_inputs_embeds","decoder_attention_mask","past_key_values"];main_input_name="inputs_embeds";constructor(e,t,r){super(e,t),this.generation_config=r}}class ur extends dr{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:r,attention_mask:n}){return{inputs_embeds:(0,u.cat)([t,e],1),attention_mask:(0,u.cat)([(0,u.ones)(t.dims.slice(0,2)),n],1)}}async _prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:r,attention_mask:n}){if(!e&&!t)throw new Error("Either `input_ids` or `pixel_values` should be provided.");let o,s;return e&&(o=await this.encode_text({input_ids:e})),t&&(s=await this.encode_image({pixel_values:t})),o&&s?({inputs_embeds:r,attention_mask:n}=this._merge_input_ids_with_image_features({inputs_embeds:o,image_features:s,input_ids:e,attention_mask:n})):r=o||s,{inputs_embeds:r,attention_mask:n}}async forward({input_ids:e,pixel_values:t,attention_mask:r,decoder_input_ids:n,decoder_attention_mask:o,encoder_outputs:s,past_key_values:a,inputs_embeds:i,decoder_inputs_embeds:l}){if(i||({inputs_embeds:i,attention_mask:r}=await this._prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:i,attention_mask:r})),!s){let{last_hidden_state:e}=await B(this,{inputs_embeds:i,attention_mask:r});s=e}if(!l){if(!n)throw new Error("Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.");l=await this.encode_text({input_ids:n})}const c={inputs_embeds:l,attention_mask:o,encoder_attention_mask:r,encoder_hidden_states:s,past_key_values:a};return await N(this,c,!0)}}class pr extends R{}class hr extends pr{}class mr extends pr{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class _r extends pr{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class fr extends pr{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class gr extends pr{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class Mr extends R{}class wr extends Mr{}class Tr extends Mr{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class br extends pr{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class xr extends R{}class kr extends xr{}class yr extends R{}class Fr extends yr{}class Pr extends yr{}class Cr extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class vr extends Cr{}class Sr extends Cr{}class Ar extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Er extends Ar{}class Lr extends Ar{}class zr extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Ir extends zr{}class Br extends zr{}class Nr extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Or extends Nr{}class Dr extends Nr{}class Vr extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class jr extends Vr{}class Rr extends Vr{}class Gr extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class qr extends Gr{}class $r extends Gr{}class Wr extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Ur extends Wr{}class Xr extends Wr{}class Qr extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Hr extends Qr{}class Yr extends Qr{}class Jr extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Kr extends Jr{}class Zr extends Jr{}class en extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class tn extends en{}class rn extends en{}class nn extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class on extends nn{}class sn extends nn{}class an extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class ln extends an{}class cn extends an{}class dn extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class un extends dn{}class pn extends dn{}class hn extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class mn extends hn{}class _n extends hn{}class fn extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class gn extends fn{}class Mn extends fn{}class wn extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Tn extends wn{}class bn extends wn{}class xn extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class kn extends xn{}class yn extends xn{}class Fn extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Pn extends Fn{}class Cn extends Fn{}class vn extends R{}class Sn extends vn{}class An extends vn{async _call(e){return new al(await super._call(e))}}class En extends R{}class Ln extends En{}class zn extends En{async _call(e){return new al(await super._call(e))}}class In extends R{}class Bn extends In{}class Nn extends R{}class On extends Nn{}class Dn extends Nn{async _call(e){return new al(await super._call(e))}}class Vn extends R{}class jn extends Vn{}class Rn extends R{}class Gn extends Rn{}class qn extends Rn{async _call(e){return new al(await super._call(e))}}class $n extends R{}class Wn extends $n{async _call(e){return new hl(await super._call(e))}}class Un extends R{}class Xn extends Un{}class Qn extends Un{async _call(e){return new al(await super._call(e))}}class Hn extends R{}class Yn extends Hn{}class Jn extends Hn{async _call(e){return new al(await super._call(e))}}class Kn extends R{}class Zn extends Kn{}class eo extends Kn{}class to extends R{}class ro extends to{}class no extends to{}class oo extends R{}class so extends oo{}class ao extends oo{async _call(e){return new al(await super._call(e))}}class io extends R{}class lo extends io{}class co extends io{async _call(e){return new po(await super._call(e))}}class uo extends io{async _call(e){return new ho(await super._call(e))}}class po extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class ho extends G{constructor({logits:e,pred_boxes:t,pred_masks:r}){super(),this.logits=e,this.pred_boxes=t,this.pred_masks=r}}class mo extends R{}class _o extends mo{}class fo extends mo{async _call(e){return new go(await super._call(e))}}class go extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Mo extends R{}class wo extends Mo{}class To extends Mo{async _call(e){return new bo(await super._call(e))}}class bo extends po{}class xo extends R{}class ko extends xo{}class yo extends xo{async _call(e){return new al(await super._call(e))}}class Fo extends R{}class Po extends Fo{}class Co extends Fo{async _call(e){return new al(await super._call(e))}}class vo extends R{}class So extends vo{}class Ao extends vo{async _call(e){return new al(await super._call(e))}}class Eo extends R{}class Lo extends Eo{}class zo extends Eo{async _call(e){return new al(await super._call(e))}}class Io extends R{}class Bo extends Io{}class No extends Io{}class Oo extends R{}class Do extends Oo{}class Vo extends Oo{}class jo extends R{}class Ro extends jo{}class Go extends R{}class qo extends Go{}class $o extends Go{}class Wo extends Go{}class Uo extends R{}class Xo extends Uo{}class Qo extends Uo{}class Ho extends R{}class Yo extends Ho{}class Jo extends Ho{}class Ko extends R{}class Zo extends Ko{}class es extends R{}class ts extends es{}class rs extends es{async _call(e){return new al(await super._call(e))}}class ns extends R{}class os extends ns{}class ss extends ns{async _call(e){return new al(await super._call(e))}}class as extends R{}class is extends as{}class ls extends as{async _call(e){return new al(await super._call(e))}}class cs extends R{}class ds extends cs{}class us extends cs{async _call(e){return new ps(await super._call(e))}}class ps extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class hs extends R{}class ms extends hs{async get_image_embeddings({pixel_values:e}){return await B(this,{pixel_values:e})}async forward(e){if(e.image_embeddings&&e.image_positional_embeddings||(e={...e,...await this.get_image_embeddings(e)}),!e.input_labels&&e.input_points){const t=e.input_points.dims.slice(0,-1),r=t.reduce(((e,t)=>e*t),1);e.input_labels=new u.Tensor("int64",new BigInt64Array(r).fill(1n),t)}const t={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(t.input_points=e.input_points),e.input_labels&&(t.input_labels=e.input_labels),e.input_boxes&&(t.input_boxes=e.input_boxes),await A(this.sessions.prompt_encoder_mask_decoder,t)}async _call(e){return new _s(await super._call(e))}}class _s extends G{constructor({iou_scores:e,pred_masks:t}){super(),this.iou_scores=e,this.pred_masks=t}}class fs extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class gs extends fs{}class Ms extends fs{}class ws extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Ts extends ws{}class bs extends ws{}class xs extends R{}class ks extends xs{}class ys extends xs{async _call(e){return new ul(await super._call(e))}}class Fs extends xs{async _call(e){return new al(await super._call(e))}}class Ps extends xs{async _call(e){return new ll(await super._call(e))}}class Cs extends R{}class vs extends Cs{}class Ss extends Cs{async _call(e){return new ll(await super._call(e))}}class As extends R{}class Es extends As{}class Ls extends R{}class zs extends Ls{}class Is extends Ls{async _call(e){return new ul(await super._call(e))}}class Bs extends Ls{async _call(e){return new al(await super._call(e))}}class Ns extends R{}class Os extends Ns{}class Ds extends Ns{async _call(e){return new ul(await super._call(e))}}class Vs extends Ns{async _call(e){return new al(await super._call(e))}}class js extends Ns{async _call(e){return new ll(await super._call(e))}}class Rs extends R{}class Gs extends Rs{}class qs extends Rs{async _call(e){return new ul(await super._call(e))}}class $s extends Rs{async _call(e){return new al(await super._call(e))}}class Ws extends R{}class Us extends xs{}class Xs extends xs{async _call(e){return new ul(await super._call(e))}}class Qs extends xs{async _call(e){return new al(await super._call(e))}}class Hs extends R{}class Ys extends Hs{}class Js extends Hs{async _call(e){return new ul(await super._call(e))}}class Ks extends Hs{async _call(e){return new al(await super._call(e))}}class Zs extends Hs{async _call(e){return new il(await super._call(e))}}class ea extends Hs{async _call(e){return new ll(await super._call(e))}}class ta extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class ra extends ta{}class na extends ta{}class oa extends ta{async generate_speech(e,t,{threshold:r=.5,minlenratio:n=0,maxlenratio:o=20,vocoder:s=null}={}){const a={input_ids:e},{encoder_outputs:i,encoder_attention_mask:l}=await B(this,a),c=i.dims[1]/this.config.reduction_factor,d=Math.floor(c*o),p=Math.floor(c*n),h=this.config.num_mel_bins;let m=[],_=null,f=null,g=0;for(;;){++g;const e=z(!!f);let n;n=f?f.output_sequence_out:new u.Tensor("float32",new Float32Array(h),[1,1,h]);let o={use_cache_branch:e,output_sequence:n,encoder_attention_mask:l,speaker_embeddings:t,encoder_hidden_states:i};this.addPastKeyValues(o,_),f=await A(this.sessions.decoder_model_merged,o),_=this.getPastKeyValues(f,_);const{prob:s,spectrum:a}=f;if(m.push(a),g>=p&&(Array.from(s.data).filter((e=>e>=r)).length>0||g>=d))break}const M=(0,u.cat)(m),{waveform:w}=await A(s.sessions.model,{spectrogram:M});return{spectrogram:M,waveform:w}}}class sa extends R{main_input_name="spectrogram"}class aa extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class ia extends aa{}class la extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class ca extends la{}class da extends la{}class ua extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class pa extends ua{}class ha extends ua{}class ma extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class _a extends ma{}class fa extends ma{}class ga extends R{}class Ma extends ga{}class wa extends ga{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Ta extends ga{static async from_pretrained(e,t={}){return t.model_file_name??="audio_model",super.from_pretrained(e,t)}}class ba extends R{}class xa extends ba{async _call(e){return new ml(await super._call(e))}}class ka extends R{}class ya extends ka{}class Fa extends ka{}class Pa extends ka{}class Ca extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class va extends Ca{}class Sa extends Ca{}class Aa extends R{}class Ea extends Aa{}class La extends Aa{async _call(e){return new al(await super._call(e))}}class za extends R{}class Ia extends za{}class Ba extends za{}class Na extends R{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,r){super(e,t),this.generation_config=r}_apply_and_filter_by_delay_pattern_mask(e){const[t,r]=e.dims,n=this.config.decoder.num_codebooks,o=r-n;let s=0;for(let t=0;t<e.size;++t){if(e.data[t]===this.config.decoder.pad_token_id)continue;const a=t%r-Math.floor(t/r)%n;a>0&&a<=o&&(e.data[s++]=e.data[t])}const a=Math.floor(t/n),i=s/(a*n);return new u.Tensor(e.type,e.data.slice(0,s),[a,n,i])}prepare_inputs_for_generation(e,t,r){let n=structuredClone(e);for(let e=0;e<n.length;++e)for(let t=0;t<n[e].length;++t)e%this.config.decoder.num_codebooks>=t&&(n[e][t]=BigInt(this.config.decoder.pad_token_id));null!==r.guidance_scale&&r.guidance_scale>1&&(n=n.concat(n));return super.prepare_inputs_for_generation(n,t,r)}async generate(e){const t=await super.generate(e),r=this._apply_and_filter_by_delay_pattern_mask(t).unsqueeze_(0),{audio_values:n}=await A(this.sessions.encodec_decode,{audio_codes:r});return n}}class Oa extends R{}class Da extends Oa{}class Va extends Oa{async _call(e){return new al(await super._call(e))}}class ja extends R{}class Ra extends ja{}class Ga extends ja{async _call(e){return new al(await super._call(e))}}class qa extends R{}class $a extends qa{}class Wa extends qa{async _call(e){return new al(await super._call(e))}}class Ua extends R{}class Xa extends Ua{}class Qa extends Ua{async _call(e){return new al(await super._call(e))}}class Ha extends R{}class Ya extends Ha{}class Ja{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:t=null,config:r=null,cache_dir:o=null,local_files_only:s=!1,revision:a="main",model_file_name:i=null,subfolder:l="onnx",device:c=null,dtype:d=null,use_external_data_format:u=null,session_options:p={}}={}){const h={progress_callback:t,config:r,cache_dir:o,local_files_only:s,revision:a,model_file_name:i,subfolder:l,device:c,dtype:d,use_external_data_format:u,session_options:p};if(h.config=await n.AutoConfig.from_pretrained(e,h),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(const t of this.MODEL_CLASS_MAPPINGS){const r=t.get(h.config.model_type);if(r)return await r[1].from_pretrained(e,h)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${h.config.model_type}", attempting to construct from base class.`),await R.from_pretrained(e,h);throw Error(`Unsupported model type: ${h.config.model_type}`)}}const Ka=new Map([["bert",["BertModel",W]],["nomic_bert",["NomicBertModel",J]],["roformer",["RoFormerModel",Z]],["electra",["ElectraModel",ue]],["esm",["EsmModel",Re]],["convbert",["ConvBertModel",se]],["camembert",["CamembertModel",ge]],["deberta",["DebertaModel",ke]],["deberta-v2",["DebertaV2Model",Se]],["mpnet",["MPNetModel",Je]],["albert",["AlbertModel",lt]],["distilbert",["DistilBertModel",Be]],["roberta",["RobertaModel",Ot]],["xlm",["XLMModel",qt]],["xlm-roberta",["XLMRobertaModel",Ht]],["clap",["ClapModel",Ma]],["clip",["CLIPModel",hr]],["clipseg",["CLIPSegModel",Fr]],["chinese_clip",["ChineseCLIPModel",kr]],["siglip",["SiglipModel",wr]],["mobilebert",["MobileBertModel",Ue]],["squeezebert",["SqueezeBertModel",nt]],["wav2vec2",["Wav2Vec2Model",ks]],["wav2vec2-bert",["Wav2Vec2BertModel",Gs]],["unispeech",["UniSpeechModel",zs]],["unispeech-sat",["UniSpeechSatModel",Os]],["hubert",["HubertModel",Us]],["wavlm",["WavLMModel",Ys]],["audio-spectrogram-transformer",["ASTModel",tr]],["vits",["VitsModel",xa]],["pyannote",["PyAnnoteModel",vs]],["wespeaker-resnet",["WeSpeakerResNetModel",Es]],["detr",["DetrModel",lo]],["rt_detr",["RTDetrModel",_o]],["table-transformer",["TableTransformerModel",wo]],["vit",["ViTModel",Sn]],["pvt",["PvtModel",Ln]],["vit_msn",["ViTMSNModel",On]],["vit_mae",["ViTMAEModel",Bn]],["groupvit",["GroupViTModel",jn]],["fastvit",["FastViTModel",Gn]],["mobilevit",["MobileViTModel",Xn]],["mobilevitv2",["MobileViTV2Model",Yn]],["owlvit",["OwlViTModel",Zn]],["owlv2",["Owlv2Model",ro]],["beit",["BeitModel",so]],["deit",["DeiTModel",ko]],["hiera",["HieraModel",Po]],["convnext",["ConvNextModel",ts]],["convnextv2",["ConvNextV2Model",os]],["dinov2",["Dinov2Model",is]],["resnet",["ResNetModel",So]],["swin",["SwinModel",Lo]],["swin2sr",["Swin2SRModel",Bo]],["donut-swin",["DonutSwinModel",Zo]],["yolos",["YolosModel",ds]],["dpt",["DPTModel",Do]],["glpn",["GLPNModel",Yo]],["hifigan",["SpeechT5HifiGan",sa]],["efficientnet",["EfficientNetModel",Ea]],["decision_transformer",["DecisionTransformerModel",Ya]],["mobilenet_v1",["MobileNetV1Model",Da]],["mobilenet_v2",["MobileNetV2Model",Ra]],["mobilenet_v3",["MobileNetV3Model",$a]],["mobilenet_v4",["MobileNetV4Model",Xa]],["maskformer",["MaskFormerModel",Xo]]]),Za=new Map([["t5",["T5Model",ht]],["longt5",["LongT5Model",ft]],["mt5",["MT5Model",wt]],["bart",["BartModel",xt]],["mbart",["MBartModel",Pt]],["marian",["MarianModel",gs]],["whisper",["WhisperModel",or]],["m2m_100",["M2M100Model",Ts]],["blenderbot",["BlenderbotModel",Et]],["blenderbot-small",["BlenderbotSmallModel",It]]]),ei=new Map([["bloom",["BloomModel",Tn]],["jais",["JAISModel",Er]],["gpt2",["GPT2Model",vr]],["gptj",["GPTJModel",jr]],["gpt_bigcode",["GPTBigCodeModel",qr]],["gpt_neo",["GPTNeoModel",Ir]],["gpt_neox",["GPTNeoXModel",Or]],["codegen",["CodeGenModel",Ur]],["llama",["LlamaModel",Hr]],["cohere",["CohereModel",Kr]],["gemma",["GemmaModel",tn]],["gemma2",["Gemma2Model",on]],["openelm",["OpenELMModel",ln]],["qwen2",["Qwen2Model",un]],["phi",["PhiModel",mn]],["phi3",["Phi3Model",gn]],["mpt",["MptModel",kn]],["opt",["OPTModel",Pn]],["mistral",["MistralModel",ca]],["starcoder2",["Starcoder2Model",pa]],["falcon",["FalconModel",_a]],["stablelm",["StableLmModel",va]]]),ti=new Map([["speecht5",["SpeechT5ForSpeechToText",na]],["whisper",["WhisperForConditionalGeneration",sr]]]),ri=new Map([["speecht5",["SpeechT5ForTextToSpeech",oa]]]),ni=new Map([["vits",["VitsModel",xa]],["musicgen",["MusicgenForConditionalGeneration",Na]]]),oi=new Map([["bert",["BertForSequenceClassification",X]],["roformer",["RoFormerForSequenceClassification",te]],["electra",["ElectraForSequenceClassification",he]],["esm",["EsmForSequenceClassification",qe]],["convbert",["ConvBertForSequenceClassification",ie]],["camembert",["CamembertForSequenceClassification",we]],["deberta",["DebertaForSequenceClassification",Fe]],["deberta-v2",["DebertaV2ForSequenceClassification",Ee]],["mpnet",["MPNetForSequenceClassification",Ze]],["albert",["AlbertForSequenceClassification",ct]],["distilbert",["DistilBertForSequenceClassification",Ne]],["roberta",["RobertaForSequenceClassification",Vt]],["xlm",["XLMForSequenceClassification",Wt]],["xlm-roberta",["XLMRobertaForSequenceClassification",Jt]],["bart",["BartForSequenceClassification",yt]],["mbart",["MBartForSequenceClassification",vt]],["mobilebert",["MobileBertForSequenceClassification",Qe]],["squeezebert",["SqueezeBertForSequenceClassification",st]]]),si=new Map([["bert",["BertForTokenClassification",Q]],["roformer",["RoFormerForTokenClassification",re]],["electra",["ElectraForTokenClassification",me]],["esm",["EsmForTokenClassification",$e]],["convbert",["ConvBertForTokenClassification",le]],["camembert",["CamembertForTokenClassification",Te]],["deberta",["DebertaForTokenClassification",Pe]],["deberta-v2",["DebertaV2ForTokenClassification",Le]],["mpnet",["MPNetForTokenClassification",et]],["distilbert",["DistilBertForTokenClassification",Oe]],["roberta",["RobertaForTokenClassification",jt]],["xlm",["XLMForTokenClassification",Ut]],["xlm-roberta",["XLMRobertaForTokenClassification",Kt]]]),ai=new Map([["t5",["T5ForConditionalGeneration",mt]],["longt5",["LongT5ForConditionalGeneration",gt]],["mt5",["MT5ForConditionalGeneration",Tt]],["bart",["BartForConditionalGeneration",kt]],["mbart",["MBartForConditionalGeneration",Ct]],["marian",["MarianMTModel",Ms]],["m2m_100",["M2M100ForConditionalGeneration",bs]],["blenderbot",["BlenderbotForConditionalGeneration",Lt]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",Bt]]]),ii=new Map([["bloom",["BloomForCausalLM",bn]],["gpt2",["GPT2LMHeadModel",Sr]],["jais",["JAISLMHeadModel",Lr]],["gptj",["GPTJForCausalLM",Rr]],["gpt_bigcode",["GPTBigCodeForCausalLM",$r]],["gpt_neo",["GPTNeoForCausalLM",Br]],["gpt_neox",["GPTNeoXForCausalLM",Dr]],["codegen",["CodeGenForCausalLM",Xr]],["llama",["LlamaForCausalLM",Yr]],["cohere",["CohereForCausalLM",Zr]],["gemma",["GemmaForCausalLM",rn]],["gemma2",["Gemma2ForCausalLM",sn]],["openelm",["OpenELMForCausalLM",cn]],["qwen2",["Qwen2ForCausalLM",pn]],["phi",["PhiForCausalLM",_n]],["phi3",["Phi3ForCausalLM",Mn]],["mpt",["MptForCausalLM",yn]],["opt",["OPTForCausalLM",Cn]],["mbart",["MBartForCausalLM",St]],["mistral",["MistralForCausalLM",da]],["starcoder2",["Starcoder2ForCausalLM",ha]],["falcon",["FalconForCausalLM",fa]],["trocr",["TrOCRForCausalLM",ia]],["stablelm",["StableLmForCausalLM",Sa]]]),li=new Map([["bert",["BertForMaskedLM",U]],["roformer",["RoFormerForMaskedLM",ee]],["electra",["ElectraForMaskedLM",pe]],["esm",["EsmForMaskedLM",Ge]],["convbert",["ConvBertForMaskedLM",ae]],["camembert",["CamembertForMaskedLM",Me]],["deberta",["DebertaForMaskedLM",ye]],["deberta-v2",["DebertaV2ForMaskedLM",Ae]],["mpnet",["MPNetForMaskedLM",Ke]],["albert",["AlbertForMaskedLM",ut]],["distilbert",["DistilBertForMaskedLM",Ve]],["roberta",["RobertaForMaskedLM",Dt]],["xlm",["XLMWithLMHeadModel",$t]],["xlm-roberta",["XLMRobertaForMaskedLM",Yt]],["mobilebert",["MobileBertForMaskedLM",Xe]],["squeezebert",["SqueezeBertForMaskedLM",ot]]]),ci=new Map([["bert",["BertForQuestionAnswering",H]],["roformer",["RoFormerForQuestionAnswering",ne]],["electra",["ElectraForQuestionAnswering",_e]],["convbert",["ConvBertForQuestionAnswering",ce]],["camembert",["CamembertForQuestionAnswering",be]],["deberta",["DebertaForQuestionAnswering",Ce]],["deberta-v2",["DebertaV2ForQuestionAnswering",ze]],["mpnet",["MPNetForQuestionAnswering",tt]],["albert",["AlbertForQuestionAnswering",dt]],["distilbert",["DistilBertForQuestionAnswering",De]],["roberta",["RobertaForQuestionAnswering",Rt]],["xlm",["XLMForQuestionAnswering",Xt]],["xlm-roberta",["XLMRobertaForQuestionAnswering",Zt]],["mobilebert",["MobileBertForQuestionAnswering",He]],["squeezebert",["SqueezeBertForQuestionAnswering",at]]]),di=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",ar]]]),ui=new Map([["llava",["LlavaForConditionalGeneration",lr]],["moondream1",["Moondream1ForConditionalGeneration",cr]],["florence2",["Florence2ForConditionalGeneration",ur]]]),pi=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",ar]]]),hi=new Map([["vit",["ViTForImageClassification",An]],["pvt",["PvtForImageClassification",zn]],["vit_msn",["ViTMSNForImageClassification",Dn]],["fastvit",["FastViTForImageClassification",qn]],["mobilevit",["MobileViTForImageClassification",Qn]],["mobilevitv2",["MobileViTV2ForImageClassification",Jn]],["beit",["BeitForImageClassification",ao]],["deit",["DeiTForImageClassification",yo]],["hiera",["HieraForImageClassification",Co]],["convnext",["ConvNextForImageClassification",rs]],["convnextv2",["ConvNextV2ForImageClassification",ss]],["dinov2",["Dinov2ForImageClassification",ls]],["resnet",["ResNetForImageClassification",Ao]],["swin",["SwinForImageClassification",zo]],["segformer",["SegformerForImageClassification",Fa]],["efficientnet",["EfficientNetForImageClassification",La]],["mobilenet_v1",["MobileNetV1ForImageClassification",Va]],["mobilenet_v2",["MobileNetV2ForImageClassification",Ga]],["mobilenet_v3",["MobileNetV3ForImageClassification",Wa]],["mobilenet_v4",["MobileNetV4ForImageClassification",Qa]]]),mi=new Map([["detr",["DetrForObjectDetection",co]],["rt_detr",["RTDetrForObjectDetection",fo]],["table-transformer",["TableTransformerForObjectDetection",To]],["yolos",["YolosForObjectDetection",us]]]),_i=new Map([["owlvit",["OwlViTForObjectDetection",eo]],["owlv2",["Owlv2ForObjectDetection",no]]]),fi=new Map([["detr",["DetrForSegmentation",uo]],["clipseg",["CLIPSegForImageSegmentation",Pr]]]),gi=new Map([["segformer",["SegformerForSemanticSegmentation",Pa]],["sapiens",["SapiensForSemanticSegmentation",qo]]]),Mi=new Map([["detr",["DetrForSegmentation",uo]],["maskformer",["MaskFormerForInstanceSegmentation",Qo]]]),wi=new Map([["sam",["SamModel",ms]]]),Ti=new Map([["wav2vec2",["Wav2Vec2ForCTC",ys]],["wav2vec2-bert",["Wav2Vec2BertForCTC",qs]],["unispeech",["UniSpeechForCTC",Is]],["unispeech-sat",["UniSpeechSatForCTC",Ds]],["wavlm",["WavLMForCTC",Js]],["hubert",["HubertForCTC",Xs]]]),bi=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",Fs]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",$s]],["unispeech",["UniSpeechForSequenceClassification",Bs]],["unispeech-sat",["UniSpeechSatForSequenceClassification",Vs]],["wavlm",["WavLMForSequenceClassification",Ks]],["hubert",["HubertForSequenceClassification",Qs]],["audio-spectrogram-transformer",["ASTForAudioClassification",rr]]]),xi=new Map([["wavlm",["WavLMForXVector",Zs]]]),ki=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",js]],["wavlm",["WavLMForAudioFrameClassification",ea]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",Ps]],["pyannote",["PyAnnoteForAudioFrameClassification",Ss]]]),yi=new Map([["vitmatte",["VitMatteForImageMatting",Wn]]]),Fi=new Map([["swin2sr",["Swin2SRForImageSuperResolution",No]]]),Pi=new Map([["dpt",["DPTForDepthEstimation",Vo]],["depth_anything",["DepthAnythingForDepthEstimation",Ro]],["glpn",["GLPNForDepthEstimation",Jo]],["sapiens",["SapiensForDepthEstimation",$o]]]),Ci=new Map([["sapiens",["SapiensForNormalEstimation",Wo]]]),vi=new Map([["clip",["CLIPVisionModelWithProjection",gr]],["siglip",["SiglipVisionModel",br]]]),Si=[[Ka,M],[Za,w],[ei,x],[oi,M],[si,M],[ai,T],[ti,T],[ii,x],[li,M],[ci,M],[di,b],[ui,y],[hi,M],[fi,M],[Mi,M],[gi,M],[yi,M],[Fi,M],[Pi,M],[Ci,M],[mi,M],[_i,M],[wi,k],[Ti,M],[bi,M],[ri,T],[ni,M],[xi,M],[ki,M],[vi,M]];for(const[e,t]of Si)for(const[r,n]of e.values())P.set(r,t),v.set(n,r),C.set(r,n);const Ai=[["MusicgenForConditionalGeneration",Na,F],["CLIPTextModelWithProjection",_r,M],["SiglipTextModel",Tr,M],["ClapTextModelWithProjection",wa,M],["ClapAudioModelWithProjection",Ta,M]];for(const[e,t,r]of Ai)P.set(e,r),v.set(t,e),C.set(e,t);class Ei extends Ja{static MODEL_CLASS_MAPPINGS=Si.map((e=>e[0]));static BASE_IF_FAIL=!0}class Li extends Ja{static MODEL_CLASS_MAPPINGS=[oi]}class zi extends Ja{static MODEL_CLASS_MAPPINGS=[si]}class Ii extends Ja{static MODEL_CLASS_MAPPINGS=[ai]}class Bi extends Ja{static MODEL_CLASS_MAPPINGS=[ti]}class Ni extends Ja{static MODEL_CLASS_MAPPINGS=[ri]}class Oi extends Ja{static MODEL_CLASS_MAPPINGS=[ni]}class Di extends Ja{static MODEL_CLASS_MAPPINGS=[ii]}class Vi extends Ja{static MODEL_CLASS_MAPPINGS=[li]}class ji extends Ja{static MODEL_CLASS_MAPPINGS=[ci]}class Ri extends Ja{static MODEL_CLASS_MAPPINGS=[di]}class Gi extends Ja{static MODEL_CLASS_MAPPINGS=[hi]}class qi extends Ja{static MODEL_CLASS_MAPPINGS=[fi]}class $i extends Ja{static MODEL_CLASS_MAPPINGS=[gi]}class Wi extends Ja{static MODEL_CLASS_MAPPINGS=[Mi]}class Ui extends Ja{static MODEL_CLASS_MAPPINGS=[mi]}class Xi extends Ja{static MODEL_CLASS_MAPPINGS=[_i]}class Qi extends Ja{static MODEL_CLASS_MAPPINGS=[wi]}class Hi extends Ja{static MODEL_CLASS_MAPPINGS=[Ti]}class Yi extends Ja{static MODEL_CLASS_MAPPINGS=[bi]}class Ji extends Ja{static MODEL_CLASS_MAPPINGS=[xi]}class Ki extends Ja{static MODEL_CLASS_MAPPINGS=[ki]}class Zi extends Ja{static MODEL_CLASS_MAPPINGS=[pi]}class el extends Ja{static MODEL_CLASS_MAPPINGS=[yi]}class tl extends Ja{static MODEL_CLASS_MAPPINGS=[Fi]}class rl extends Ja{static MODEL_CLASS_MAPPINGS=[Pi]}class nl extends Ja{static MODEL_CLASS_MAPPINGS=[Ci]}class ol extends Ja{static MODEL_CLASS_MAPPINGS=[vi]}class sl extends G{constructor({logits:e,past_key_values:t,encoder_outputs:r,decoder_attentions:n=null,cross_attentions:o=null}){super(),this.logits=e,this.past_key_values=t,this.encoder_outputs=r,this.decoder_attentions=n,this.cross_attentions=o}}class al extends G{constructor({logits:e}){super(),this.logits=e}}class il extends G{constructor({logits:e,embeddings:t}){super(),this.logits=e,this.embeddings=t}}class ll extends G{constructor({logits:e}){super(),this.logits=e}}class cl extends G{constructor({logits:e}){super(),this.logits=e}}class dl extends G{constructor({start_logits:e,end_logits:t}){super(),this.start_logits=e,this.end_logits=t}}class ul extends G{constructor({logits:e}){super(),this.logits=e}}class pl extends G{constructor({logits:e,past_key_values:t}){super(),this.logits=e,this.past_key_values=t}}class hl extends G{constructor({alphas:e}){super(),this.alphas=e}}class ml extends G{constructor({waveform:e,spectrogram:t}){super(),this.waveform=e,this.spectrogram=t}}},"./src/models/whisper/common_whisper.js":
|
|
112
|
+
\***********************/(e,t,r)=>{r.r(t),r.d(t,{ASTForAudioClassification:()=>rr,ASTModel:()=>tr,ASTPreTrainedModel:()=>er,AlbertForMaskedLM:()=>ut,AlbertForQuestionAnswering:()=>dt,AlbertForSequenceClassification:()=>ct,AlbertModel:()=>lt,AlbertPreTrainedModel:()=>it,AutoModel:()=>Ei,AutoModelForAudioClassification:()=>Yi,AutoModelForAudioFrameClassification:()=>Ki,AutoModelForCTC:()=>Hi,AutoModelForCausalLM:()=>Di,AutoModelForDepthEstimation:()=>rl,AutoModelForDocumentQuestionAnswering:()=>Zi,AutoModelForImageClassification:()=>Gi,AutoModelForImageFeatureExtraction:()=>ol,AutoModelForImageMatting:()=>el,AutoModelForImageSegmentation:()=>qi,AutoModelForImageToImage:()=>tl,AutoModelForMaskGeneration:()=>Qi,AutoModelForMaskedLM:()=>Vi,AutoModelForNormalEstimation:()=>nl,AutoModelForObjectDetection:()=>Ui,AutoModelForQuestionAnswering:()=>ji,AutoModelForSemanticSegmentation:()=>$i,AutoModelForSeq2SeqLM:()=>Ii,AutoModelForSequenceClassification:()=>Li,AutoModelForSpeechSeq2Seq:()=>Bi,AutoModelForTextToSpectrogram:()=>Ni,AutoModelForTextToWaveform:()=>Oi,AutoModelForTokenClassification:()=>zi,AutoModelForUniversalSegmentation:()=>Wi,AutoModelForVision2Seq:()=>Ri,AutoModelForXVector:()=>Ji,AutoModelForZeroShotObjectDetection:()=>Xi,BartForConditionalGeneration:()=>kt,BartForSequenceClassification:()=>yt,BartModel:()=>xt,BartPretrainedModel:()=>bt,BaseModelOutput:()=>q,BeitForImageClassification:()=>ao,BeitModel:()=>so,BeitPreTrainedModel:()=>oo,BertForMaskedLM:()=>U,BertForQuestionAnswering:()=>H,BertForSequenceClassification:()=>X,BertForTokenClassification:()=>Q,BertModel:()=>W,BertPreTrainedModel:()=>$,BlenderbotForConditionalGeneration:()=>Lt,BlenderbotModel:()=>Et,BlenderbotPreTrainedModel:()=>At,BlenderbotSmallForConditionalGeneration:()=>Bt,BlenderbotSmallModel:()=>It,BlenderbotSmallPreTrainedModel:()=>zt,BloomForCausalLM:()=>bn,BloomModel:()=>Tn,BloomPreTrainedModel:()=>wn,CLIPModel:()=>hr,CLIPPreTrainedModel:()=>pr,CLIPSegForImageSegmentation:()=>Pr,CLIPSegModel:()=>Fr,CLIPSegPreTrainedModel:()=>yr,CLIPTextModel:()=>mr,CLIPTextModelWithProjection:()=>_r,CLIPVisionModel:()=>fr,CLIPVisionModelWithProjection:()=>gr,CamembertForMaskedLM:()=>Me,CamembertForQuestionAnswering:()=>be,CamembertForSequenceClassification:()=>we,CamembertForTokenClassification:()=>Te,CamembertModel:()=>ge,CamembertPreTrainedModel:()=>fe,CausalLMOutput:()=>ul,CausalLMOutputWithPast:()=>pl,ChineseCLIPModel:()=>kr,ChineseCLIPPreTrainedModel:()=>xr,ClapAudioModelWithProjection:()=>Ta,ClapModel:()=>Ma,ClapPreTrainedModel:()=>ga,ClapTextModelWithProjection:()=>wa,CodeGenForCausalLM:()=>Xr,CodeGenModel:()=>Ur,CodeGenPreTrainedModel:()=>Wr,CohereForCausalLM:()=>Zr,CohereModel:()=>Kr,CoherePreTrainedModel:()=>Jr,ConvBertForMaskedLM:()=>ae,ConvBertForQuestionAnswering:()=>ce,ConvBertForSequenceClassification:()=>ie,ConvBertForTokenClassification:()=>le,ConvBertModel:()=>se,ConvBertPreTrainedModel:()=>oe,ConvNextForImageClassification:()=>rs,ConvNextModel:()=>ts,ConvNextPreTrainedModel:()=>es,ConvNextV2ForImageClassification:()=>ss,ConvNextV2Model:()=>os,ConvNextV2PreTrainedModel:()=>ns,DPTForDepthEstimation:()=>Vo,DPTModel:()=>Do,DPTPreTrainedModel:()=>Oo,DebertaForMaskedLM:()=>ye,DebertaForQuestionAnswering:()=>Ce,DebertaForSequenceClassification:()=>Fe,DebertaForTokenClassification:()=>Pe,DebertaModel:()=>ke,DebertaPreTrainedModel:()=>xe,DebertaV2ForMaskedLM:()=>Ae,DebertaV2ForQuestionAnswering:()=>ze,DebertaV2ForSequenceClassification:()=>Ee,DebertaV2ForTokenClassification:()=>Le,DebertaV2Model:()=>Se,DebertaV2PreTrainedModel:()=>ve,DecisionTransformerModel:()=>Ya,DecisionTransformerPreTrainedModel:()=>Ha,DeiTForImageClassification:()=>yo,DeiTModel:()=>ko,DeiTPreTrainedModel:()=>xo,DepthAnythingForDepthEstimation:()=>Ro,DepthAnythingPreTrainedModel:()=>jo,DetrForObjectDetection:()=>co,DetrForSegmentation:()=>uo,DetrModel:()=>lo,DetrObjectDetectionOutput:()=>po,DetrPreTrainedModel:()=>io,DetrSegmentationOutput:()=>ho,Dinov2ForImageClassification:()=>ls,Dinov2Model:()=>is,Dinov2PreTrainedModel:()=>as,DistilBertForMaskedLM:()=>Ve,DistilBertForQuestionAnswering:()=>De,DistilBertForSequenceClassification:()=>Ne,DistilBertForTokenClassification:()=>Oe,DistilBertModel:()=>Be,DistilBertPreTrainedModel:()=>Ie,DonutSwinModel:()=>Zo,DonutSwinPreTrainedModel:()=>Ko,EfficientNetForImageClassification:()=>La,EfficientNetModel:()=>Ea,EfficientNetPreTrainedModel:()=>Aa,ElectraForMaskedLM:()=>pe,ElectraForQuestionAnswering:()=>_e,ElectraForSequenceClassification:()=>he,ElectraForTokenClassification:()=>me,ElectraModel:()=>ue,ElectraPreTrainedModel:()=>de,EsmForMaskedLM:()=>Ge,EsmForSequenceClassification:()=>qe,EsmForTokenClassification:()=>$e,EsmModel:()=>Re,EsmPreTrainedModel:()=>je,FalconForCausalLM:()=>fa,FalconModel:()=>_a,FalconPreTrainedModel:()=>ma,FastViTForImageClassification:()=>qn,FastViTModel:()=>Gn,FastViTPreTrainedModel:()=>Rn,Florence2ForConditionalGeneration:()=>ur,Florence2PreTrainedModel:()=>dr,GLPNForDepthEstimation:()=>Jo,GLPNModel:()=>Yo,GLPNPreTrainedModel:()=>Ho,GPT2LMHeadModel:()=>Sr,GPT2Model:()=>vr,GPT2PreTrainedModel:()=>Cr,GPTBigCodeForCausalLM:()=>$r,GPTBigCodeModel:()=>qr,GPTBigCodePreTrainedModel:()=>Gr,GPTJForCausalLM:()=>Rr,GPTJModel:()=>jr,GPTJPreTrainedModel:()=>Vr,GPTNeoForCausalLM:()=>Br,GPTNeoModel:()=>Ir,GPTNeoPreTrainedModel:()=>zr,GPTNeoXForCausalLM:()=>Dr,GPTNeoXModel:()=>Or,GPTNeoXPreTrainedModel:()=>Nr,Gemma2ForCausalLM:()=>sn,Gemma2Model:()=>on,Gemma2PreTrainedModel:()=>nn,GemmaForCausalLM:()=>rn,GemmaModel:()=>tn,GemmaPreTrainedModel:()=>en,GroupViTModel:()=>jn,GroupViTPreTrainedModel:()=>Vn,HieraForImageClassification:()=>Co,HieraModel:()=>Po,HieraPreTrainedModel:()=>Fo,HubertForCTC:()=>Xs,HubertForSequenceClassification:()=>Qs,HubertModel:()=>Us,HubertPreTrainedModel:()=>Ws,ImageMattingOutput:()=>hl,JAISLMHeadModel:()=>Lr,JAISModel:()=>Er,JAISPreTrainedModel:()=>Ar,LlamaForCausalLM:()=>Yr,LlamaModel:()=>Hr,LlamaPreTrainedModel:()=>Qr,LlavaForConditionalGeneration:()=>lr,LlavaPreTrainedModel:()=>ir,LongT5ForConditionalGeneration:()=>gt,LongT5Model:()=>ft,LongT5PreTrainedModel:()=>_t,M2M100ForConditionalGeneration:()=>bs,M2M100Model:()=>Ts,M2M100PreTrainedModel:()=>ws,MBartForCausalLM:()=>St,MBartForConditionalGeneration:()=>Ct,MBartForSequenceClassification:()=>vt,MBartModel:()=>Pt,MBartPreTrainedModel:()=>Ft,MPNetForMaskedLM:()=>Ke,MPNetForQuestionAnswering:()=>tt,MPNetForSequenceClassification:()=>Ze,MPNetForTokenClassification:()=>et,MPNetModel:()=>Je,MPNetPreTrainedModel:()=>Ye,MT5ForConditionalGeneration:()=>Tt,MT5Model:()=>wt,MT5PreTrainedModel:()=>Mt,MarianMTModel:()=>Ms,MarianModel:()=>gs,MarianPreTrainedModel:()=>fs,MaskFormerForInstanceSegmentation:()=>Qo,MaskFormerModel:()=>Xo,MaskFormerPreTrainedModel:()=>Uo,MaskedLMOutput:()=>cl,MistralForCausalLM:()=>da,MistralModel:()=>ca,MistralPreTrainedModel:()=>la,MobileBertForMaskedLM:()=>Xe,MobileBertForQuestionAnswering:()=>He,MobileBertForSequenceClassification:()=>Qe,MobileBertModel:()=>Ue,MobileBertPreTrainedModel:()=>We,MobileNetV1ForImageClassification:()=>Va,MobileNetV1Model:()=>Da,MobileNetV1PreTrainedModel:()=>Oa,MobileNetV2ForImageClassification:()=>Ga,MobileNetV2Model:()=>Ra,MobileNetV2PreTrainedModel:()=>ja,MobileNetV3ForImageClassification:()=>Wa,MobileNetV3Model:()=>$a,MobileNetV3PreTrainedModel:()=>qa,MobileNetV4ForImageClassification:()=>Qa,MobileNetV4Model:()=>Xa,MobileNetV4PreTrainedModel:()=>Ua,MobileViTForImageClassification:()=>Qn,MobileViTModel:()=>Xn,MobileViTPreTrainedModel:()=>Un,MobileViTV2ForImageClassification:()=>Jn,MobileViTV2Model:()=>Yn,MobileViTV2PreTrainedModel:()=>Hn,ModelOutput:()=>G,Moondream1ForConditionalGeneration:()=>cr,MptForCausalLM:()=>yn,MptModel:()=>kn,MptPreTrainedModel:()=>xn,MusicgenForCausalLM:()=>Ba,MusicgenForConditionalGeneration:()=>Na,MusicgenModel:()=>Ia,MusicgenPreTrainedModel:()=>za,NomicBertModel:()=>J,NomicBertPreTrainedModel:()=>Y,OPTForCausalLM:()=>Cn,OPTModel:()=>Pn,OPTPreTrainedModel:()=>Fn,OpenELMForCausalLM:()=>cn,OpenELMModel:()=>ln,OpenELMPreTrainedModel:()=>an,OwlViTForObjectDetection:()=>eo,OwlViTModel:()=>Zn,OwlViTPreTrainedModel:()=>Kn,Owlv2ForObjectDetection:()=>no,Owlv2Model:()=>ro,Owlv2PreTrainedModel:()=>to,Phi3ForCausalLM:()=>Mn,Phi3Model:()=>gn,Phi3PreTrainedModel:()=>fn,PhiForCausalLM:()=>_n,PhiModel:()=>mn,PhiPreTrainedModel:()=>hn,PreTrainedModel:()=>R,PretrainedMixin:()=>Ja,PvtForImageClassification:()=>zn,PvtModel:()=>Ln,PvtPreTrainedModel:()=>En,PyAnnoteForAudioFrameClassification:()=>Ss,PyAnnoteModel:()=>vs,PyAnnotePreTrainedModel:()=>Cs,QuestionAnsweringModelOutput:()=>dl,Qwen2ForCausalLM:()=>pn,Qwen2Model:()=>un,Qwen2PreTrainedModel:()=>dn,RTDetrForObjectDetection:()=>fo,RTDetrModel:()=>_o,RTDetrObjectDetectionOutput:()=>go,RTDetrPreTrainedModel:()=>mo,ResNetForImageClassification:()=>Ao,ResNetModel:()=>So,ResNetPreTrainedModel:()=>vo,RoFormerForMaskedLM:()=>ee,RoFormerForQuestionAnswering:()=>ne,RoFormerForSequenceClassification:()=>te,RoFormerForTokenClassification:()=>re,RoFormerModel:()=>Z,RoFormerPreTrainedModel:()=>K,RobertaForMaskedLM:()=>Dt,RobertaForQuestionAnswering:()=>Rt,RobertaForSequenceClassification:()=>Vt,RobertaForTokenClassification:()=>jt,RobertaModel:()=>Ot,RobertaPreTrainedModel:()=>Nt,SamImageSegmentationOutput:()=>_s,SamModel:()=>ms,SamPreTrainedModel:()=>hs,SapiensForDepthEstimation:()=>$o,SapiensForNormalEstimation:()=>Wo,SapiensForSemanticSegmentation:()=>qo,SapiensPreTrainedModel:()=>Go,SegformerForImageClassification:()=>Fa,SegformerForSemanticSegmentation:()=>Pa,SegformerModel:()=>ya,SegformerPreTrainedModel:()=>ka,Seq2SeqLMOutput:()=>sl,SequenceClassifierOutput:()=>al,SiglipModel:()=>wr,SiglipPreTrainedModel:()=>Mr,SiglipTextModel:()=>Tr,SiglipVisionModel:()=>br,SpeechT5ForSpeechToText:()=>na,SpeechT5ForTextToSpeech:()=>oa,SpeechT5HifiGan:()=>sa,SpeechT5Model:()=>ra,SpeechT5PreTrainedModel:()=>ta,SqueezeBertForMaskedLM:()=>ot,SqueezeBertForQuestionAnswering:()=>at,SqueezeBertForSequenceClassification:()=>st,SqueezeBertModel:()=>nt,SqueezeBertPreTrainedModel:()=>rt,StableLmForCausalLM:()=>Sa,StableLmModel:()=>va,StableLmPreTrainedModel:()=>Ca,Starcoder2ForCausalLM:()=>ha,Starcoder2Model:()=>pa,Starcoder2PreTrainedModel:()=>ua,Swin2SRForImageSuperResolution:()=>No,Swin2SRModel:()=>Bo,Swin2SRPreTrainedModel:()=>Io,SwinForImageClassification:()=>zo,SwinModel:()=>Lo,SwinPreTrainedModel:()=>Eo,T5ForConditionalGeneration:()=>mt,T5Model:()=>ht,T5PreTrainedModel:()=>pt,TableTransformerForObjectDetection:()=>To,TableTransformerModel:()=>wo,TableTransformerObjectDetectionOutput:()=>bo,TableTransformerPreTrainedModel:()=>Mo,TokenClassifierOutput:()=>ll,TrOCRForCausalLM:()=>ia,TrOCRPreTrainedModel:()=>aa,UniSpeechForCTC:()=>Is,UniSpeechForSequenceClassification:()=>Bs,UniSpeechModel:()=>zs,UniSpeechPreTrainedModel:()=>Ls,UniSpeechSatForAudioFrameClassification:()=>js,UniSpeechSatForCTC:()=>Ds,UniSpeechSatForSequenceClassification:()=>Vs,UniSpeechSatModel:()=>Os,UniSpeechSatPreTrainedModel:()=>Ns,ViTForImageClassification:()=>An,ViTMAEModel:()=>Bn,ViTMAEPreTrainedModel:()=>In,ViTMSNForImageClassification:()=>Dn,ViTMSNModel:()=>On,ViTMSNPreTrainedModel:()=>Nn,ViTModel:()=>Sn,ViTPreTrainedModel:()=>vn,VisionEncoderDecoderModel:()=>ar,VitMatteForImageMatting:()=>Wn,VitMattePreTrainedModel:()=>$n,VitsModel:()=>xa,VitsModelOutput:()=>ml,VitsPreTrainedModel:()=>ba,Wav2Vec2BertForCTC:()=>qs,Wav2Vec2BertForSequenceClassification:()=>$s,Wav2Vec2BertModel:()=>Gs,Wav2Vec2BertPreTrainedModel:()=>Rs,Wav2Vec2ForAudioFrameClassification:()=>Ps,Wav2Vec2ForCTC:()=>ys,Wav2Vec2ForSequenceClassification:()=>Fs,Wav2Vec2Model:()=>ks,Wav2Vec2PreTrainedModel:()=>xs,WavLMForAudioFrameClassification:()=>ea,WavLMForCTC:()=>Js,WavLMForSequenceClassification:()=>Ks,WavLMForXVector:()=>Zs,WavLMModel:()=>Ys,WavLMPreTrainedModel:()=>Hs,WeSpeakerResNetModel:()=>Es,WeSpeakerResNetPreTrainedModel:()=>As,WhisperForConditionalGeneration:()=>sr,WhisperModel:()=>or,WhisperPreTrainedModel:()=>nr,XLMForQuestionAnswering:()=>Xt,XLMForSequenceClassification:()=>Wt,XLMForTokenClassification:()=>Ut,XLMModel:()=>qt,XLMPreTrainedModel:()=>Gt,XLMRobertaForMaskedLM:()=>Yt,XLMRobertaForQuestionAnswering:()=>Zt,XLMRobertaForSequenceClassification:()=>Jt,XLMRobertaForTokenClassification:()=>Kt,XLMRobertaModel:()=>Ht,XLMRobertaPreTrainedModel:()=>Qt,XLMWithLMHeadModel:()=>$t,XVectorOutput:()=>il,YolosForObjectDetection:()=>us,YolosModel:()=>ds,YolosObjectDetectionOutput:()=>ps,YolosPreTrainedModel:()=>cs});var n=r(/*! ./configs.js */"./src/configs.js"),o=r(/*! ./backends/onnx.js */"./src/backends/onnx.js"),s=r(/*! ./utils/dtypes.js */"./src/utils/dtypes.js"),a=r(/*! ./utils/generic.js */"./src/utils/generic.js"),i=r(/*! ./utils/core.js */"./src/utils/core.js"),l=r(/*! ./utils/hub.js */"./src/utils/hub.js"),c=r(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),d=r(/*! ./generation/configuration_utils.js */"./src/generation/configuration_utils.js"),u=r(/*! ./utils/tensor.js */"./src/utils/tensor.js"),p=r(/*! ./utils/maths.js */"./src/utils/maths.js"),h=r(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),m=r(/*! ./generation/logits_sampler.js */"./src/generation/logits_sampler.js"),_=r(/*! ./env.js */"./src/env.js"),f=r(/*! ./models/whisper/generation_whisper.js */"./src/models/whisper/generation_whisper.js"),g=r(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");const M=0,w=1,T=2,b=3,x=4,k=5,y=6,F=7,P=new Map,C=new Map,v=new Map;async function S(e,t,r){return Object.fromEntries(await Promise.all(Object.keys(t).map((async a=>{const{buffer:i,session_options:c}=await async function(e,t,r){const a=r.config?.["transformers.js_config"]??{};let i=r.device??a.device;i&&"string"!=typeof i&&(i.hasOwnProperty(t)?i=i[t]:(console.warn(`device not specified for "${t}". Using the default device.`),i=null));const c=i??(_.apis.IS_NODE_ENV?"cpu":"wasm"),d=(0,o.deviceToExecutionProviders)(c);let u=r.dtype??a.dtype;"string"!=typeof u&&(u&&u.hasOwnProperty(t)?u=u[t]:(u=s.DEFAULT_DEVICE_DTYPE_MAPPING[c]??s.DATA_TYPES.fp32,console.warn(`dtype not specified for "${t}". Using the default dtype (${u}) for this device (${c}).`)));const p=u;if(!s.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(p))throw new Error(`Invalid dtype: ${p}. Should be one of: ${Object.keys(s.DATA_TYPES).join(", ")}`);if(p===s.DATA_TYPES.fp16&&"webgpu"===c&&!await(0,s.isWebGpuFp16Supported)())throw new Error(`The device (${c}) does not support fp16.`);const h=s.DEFAULT_DTYPE_SUFFIX_MAPPING[p],m=`${r.subfolder??""}/${t}${h}.onnx`,f={...r.session_options}??{};f.executionProviders??=d;const g=a.free_dimension_overrides;g?f.freeDimensionOverrides??=g:c.startsWith("webnn")&&!f.freeDimensionOverrides&&console.warn('WebNN does not currently support dynamic shapes and requires `free_dimension_overrides` to be set in config.json as a field within "transformers.js_config". When `free_dimension_overrides` is not set, you may experience significant performance degradation.');const M=(0,l.getModelFile)(e,m,!0,r);let w=[];if(r.use_external_data_format&&(!0===r.use_external_data_format||"object"==typeof r.use_external_data_format&&r.use_external_data_format.hasOwnProperty(t)&&!0===r.use_external_data_format[t])){if(_.apis.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const n=`${t}${h}.onnx_data`,o=`${r.subfolder??""}/${n}`;w.push(new Promise((async(t,s)=>{const a=await(0,l.getModelFile)(e,o,!0,r);t({path:n,data:a})})))}else void 0!==f.externalData&&(w=f.externalData.map((async t=>{if("string"==typeof t.data){const n=await(0,l.getModelFile)(e,t.data,!0,r);return{...t,data:n}}return t})));if(w.length>0&&(f.externalData=await Promise.all(w)),"webgpu"===c){const e=(0,n.getKeyValueShapes)(r.config,{prefix:"present"});if(Object.keys(e).length>0&&!(0,o.isONNXProxy)()){const t={};for(const r in e)r.includes("encoder")||(t[r]="gpu-buffer");f.preferredOutputLocation=t}}return{buffer:await M,session_options:f}}(e,t[a],r);return[a,await(0,o.createInferenceSession)(i,c)]}))))}async function A(e,t){const r=function(e,t){const r=Object.create(null),n=[];for(const s of e.inputNames){const e=t[s];e instanceof u.Tensor?r[s]=(0,o.isONNXProxy)()?e.clone():e:n.push(s)}if(n.length>0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${n.join(", ")}.`);const s=Object.keys(t).length,a=e.inputNames.length;if(s>a){let r=Object.keys(t).filter((t=>!e.inputNames.includes(t)));console.warn(`WARNING: Too many inputs were provided (${s} > ${a}). The following inputs will be ignored: "${r.join(", ")}".`)}return r}(e,t);try{const t=Object.fromEntries(Object.entries(r).map((([e,t])=>[e,t.ort_tensor])));let n=await e.run(t);return n=E(n),n}catch(e){throw console.error(`An error occurred during model execution: "${e}".`),console.error("Inputs given to model:",r),e}}function E(e){for(let t in e)(0,o.isONNXTensor)(e[t])?e[t]=new u.Tensor(e[t]):"object"==typeof e[t]&&E(e[t]);return e}function L(e){if(e instanceof u.Tensor)return e;if(0===e.length)throw Error("items must be non-empty");if(Array.isArray(e[0])){if(e.some((t=>t.length!==e[0].length)))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new u.Tensor("int64",BigInt64Array.from(e.flat().map((e=>BigInt(e)))),[e.length,e[0].length])}return new u.Tensor("int64",BigInt64Array.from(e.map((e=>BigInt(e)))),[1,e.length])}function z(e){return new u.Tensor("bool",[e],[1])}async function I(e,t){let{encoder_outputs:r,input_ids:n,decoder_input_ids:o,...s}=t;if(!r){const n=(0,i.pick)(t,e.sessions.model.inputNames);r=(await B(e,n)).last_hidden_state}s.input_ids=o,s.encoder_hidden_states=r,e.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(s.encoder_attention_mask=t.attention_mask);return await N(e,s,!0)}async function B(e,t){const r=e.sessions.model,n=(0,i.pick)(t,r.inputNames);if(r.inputNames.includes("inputs_embeds")&&!n.inputs_embeds){if(!t.input_ids)throw new Error("Both `input_ids` and `inputs_embeds` are missing in the model inputs.");n.inputs_embeds=await e.encode_text({input_ids:t.input_ids})}return r.inputNames.includes("token_type_ids")&&!n.token_type_ids&&(n.token_type_ids=new u.Tensor("int64",new BigInt64Array(n.input_ids.data.length),n.input_ids.dims)),await A(r,n)}async function N(e,t,r=!1){const n=e.sessions[r?"decoder_model_merged":"model"],{past_key_values:o,...s}=t;n.inputNames.includes("use_cache_branch")&&(s.use_cache_branch=z(!!o)),n.inputNames.includes("position_ids")&&s.attention_mask&&!s.position_ids&&(s.position_ids=function(e,t=null){const{input_ids:r,inputs_embeds:n,attention_mask:o}=e,[s,a]=o.dims,i=new BigInt64Array(o.data.length);for(let e=0;e<s;++e){const t=e*a;let r=BigInt(0);for(let e=0;e<a;++e){const n=t+e;0n===o.data[n]?i[n]=BigInt(1):(i[n]=r,r+=o.data[n])}}let l=new u.Tensor("int64",i,o.dims);if(t){const e=-(r??n).dims.at(1);l=l.slice(null,[e,null])}return l}(s,o)),e.addPastKeyValues(s,o);const a=(0,i.pick)(s,n.inputNames);return await A(n,a)}async function O(e,{input_ids:t=null,attention_mask:r=null,pixel_values:n=null,position_ids:o=null,inputs_embeds:s=null,past_key_values:a=null,generation_config:i=null,logits_processor:l=null,...c}){if(!s)if(s=await e.encode_text({input_ids:t}),n&&1!==t.dims[1]){const o=await e.encode_image({pixel_values:n});({inputs_embeds:s,attention_mask:r}=e._merge_input_ids_with_image_features({image_features:o,inputs_embeds:s,input_ids:t,attention_mask:r}))}else if(a&&n&&1===t.dims[1]){const e=t.dims[1],n=Object.values(a)[0].dims.at(-2);r=(0,u.cat)([(0,u.ones)([t.dims[0],n]),r.slice(null,[r.dims[1]-e,r.dims[1]])],1)}return await N(e,{inputs_embeds:s,past_key_values:a,attention_mask:r,position_ids:o,generation_config:i,logits_processor:l},!0)}function D(e,t,r,n){if(r.past_key_values){const t=Object.values(r.past_key_values)[0].dims.at(-2),{input_ids:n,attention_mask:o}=r;if(o&&o.dims[1]>n.dims[1]);else if(t<n.dims[1])r.input_ids=n.slice(null,[t,null]);else if(null!=e.config.image_token_index&&n.data.some((t=>t==e.config.image_token_index))){const o=e.config.num_image_tokens;if(!o)throw new Error("`num_image_tokens` is missing in the model configuration.");const s=n.dims[1]-(t-o);r.input_ids=n.slice(null,[-s,null]),r.attention_mask=(0,u.ones)([1,t+s])}}return r}function V(e,t,r,n){return r.past_key_values&&(t=t.map((e=>[e.at(-1)]))),{...r,decoder_input_ids:L(t)}}function j(e,...t){return e.config.is_encoder_decoder?V(e,...t):D(e,...t)}class R extends a.Callable{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,t){super(),this.config=e,this.sessions=t;const r=v.get(this.constructor),n=P.get(r);switch(this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,n){case x:this.can_generate=!0,this._forward=N,this._prepare_inputs_for_generation=D;break;case T:case b:case F:this.can_generate=!0,this._forward=I,this._prepare_inputs_for_generation=V;break;case w:this._forward=I;break;case y:this.can_generate=!0,this._forward=O,this._prepare_inputs_for_generation=j;break;default:this._forward=B}this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const t of Object.values(this.sessions))t?.handler?.dispose&&e.push(t.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:t=null,config:r=null,cache_dir:o=null,local_files_only:s=!1,revision:a="main",model_file_name:i=null,subfolder:c="onnx",device:d=null,dtype:u=null,use_external_data_format:p=null,session_options:h={}}={}){let m={progress_callback:t,config:r,cache_dir:o,local_files_only:s,revision:a,model_file_name:i,subfolder:c,device:d,dtype:u,use_external_data_format:p,session_options:h};const _=v.get(this),f=P.get(_);let g;if(r=m.config=await n.AutoConfig.from_pretrained(e,m),f===x)g=await Promise.all([S(e,{model:m.model_file_name??"model"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(f===T||f===b)g=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]);else if(f===k)g=await Promise.all([S(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},m)]);else if(f===w)g=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},m)]);else if(f===y){const t={embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"};r.is_encoder_decoder&&(t.model="encoder_model"),g=await Promise.all([S(e,t,m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)])}else f===F?g=await Promise.all([S(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},m),(0,l.getModelJSON)(e,"generation_config.json",!1,m)]):(f!==M&&console.warn(`Model type for '${_??r?.model_type}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`),g=await Promise.all([S(e,{model:m.model_file_name??"model"},m)]));return new this(r,...g)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}_get_logits_warper(e){const t=new c.LogitsProcessorList;return null!==e.temperature&&1!==e.temperature&&t.push(new c.TemperatureLogitsWarper(e.temperature)),null!==e.top_k&&0!==e.top_k&&t.push(new c.TopKLogitsWarper(e.top_k)),null!==e.top_p&&e.top_p<1&&t.push(new c.TopPLogitsWarper(e.top_p)),t}_get_logits_processor(e,t,r=null){const n=new c.LogitsProcessorList;if(null!==e.repetition_penalty&&1!==e.repetition_penalty&&n.push(new c.RepetitionPenaltyLogitsProcessor(e.repetition_penalty)),null!==e.no_repeat_ngram_size&&e.no_repeat_ngram_size>0&&n.push(new c.NoRepeatNGramLogitsProcessor(e.no_repeat_ngram_size)),null!==e.bad_words_ids&&n.push(new c.NoBadWordsLogitsProcessor(e.bad_words_ids,e.eos_token_id)),null!==e.min_length&&null!==e.eos_token_id&&e.min_length>0&&n.push(new c.MinLengthLogitsProcessor(e.min_length,e.eos_token_id)),null!==e.min_new_tokens&&null!==e.eos_token_id&&e.min_new_tokens>0&&n.push(new c.MinNewTokensLengthLogitsProcessor(t,e.min_new_tokens,e.eos_token_id)),null!==e.forced_bos_token_id&&n.push(new c.ForcedBOSTokenLogitsProcessor(e.forced_bos_token_id)),null!==e.forced_eos_token_id&&n.push(new c.ForcedEOSTokenLogitsProcessor(e.max_length,e.forced_eos_token_id)),null!==e.begin_suppress_tokens){const r=t>1||null===e.forced_bos_token_id?t:t+1;n.push(new c.SuppressTokensAtBeginLogitsProcessor(e.begin_suppress_tokens,r))}return null!==e.guidance_scale&&e.guidance_scale>1&&n.push(new c.ClassifierFreeGuidanceLogitsProcessor(e.guidance_scale)),null!==r&&n.extend(r),n}_prepare_generation_config(e,t,r=d.GenerationConfig){const n={...this.config};for(const e of["decoder","generator","text_config"])e in n&&Object.assign(n,n[e]);const o=new r(n);return"generation_config"in this&&Object.assign(o,this.generation_config),e&&Object.assign(o,e),t&&Object.assign(o,(0,i.pick)(t,Object.getOwnPropertyNames(o))),o}_get_stopping_criteria(e,t=null){const r=new h.StoppingCriteriaList;return null!==e.max_length&&r.push(new h.MaxLengthCriteria(e.max_length,this.config.max_position_embeddings??null)),null!==e.eos_token_id&&r.push(new h.EosTokenCriteria(e.eos_token_id)),t&&r.extend(t),r}_validate_model_class(){if(!this.can_generate){const e=[ii,di,ai,ti],t=v.get(this.constructor),r=new Set,n=this.config.model_type;for(const t of e){const e=t.get(n);e&&r.add(e[0])}let o=`The current model class (${t}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw r.size>0&&(o+=` Please use the following class instead: ${[...r].join(", ")}`),Error(o)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:t,model_inputs:r,is_encoder_decoder:n}){return r.past_key_values=this.getPastKeyValues(t,r.past_key_values),r.input_ids=new u.Tensor("int64",e.flat(),[e.length,1]),n||(r.attention_mask=(0,u.cat)([r.attention_mask,(0,u.ones)([r.attention_mask.dims[0],1])],1)),r.position_ids=null,r}_prepare_model_inputs({inputs:e,bos_token_id:t,model_kwargs:r}){const n=(0,i.pick)(r,this.forward_params),o=this.main_input_name;if(o in n){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else n[o]=e;return{inputs_tensor:n[o],model_inputs:n,model_input_name:o}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:t,model_input_name:r,generation_config:n}){if(this.sessions.model.inputNames.includes("inputs_embeds")&&!t.inputs_embeds&&"_prepare_inputs_embeds"in this){const{input_ids:e,pixel_values:r,attention_mask:n,...o}=t,s=await this._prepare_inputs_embeds(t);t={...o,...(0,i.pick)(s,["inputs_embeds","attention_mask"])}}let{last_hidden_state:o}=await B(this,t);if(null!==n.guidance_scale&&n.guidance_scale>1)o=(0,u.cat)([o,(0,u.full_like)(o,0)],0),"attention_mask"in t&&(t.attention_mask=(0,u.cat)([t.attention_mask,(0,u.zeros_like)(t.attention_mask)],0));else if(t.decoder_input_ids){const e=L(t.decoder_input_ids).dims[0];if(e!==o.dims[0]){if(1!==o.dims[0])throw new Error(`The encoder outputs have a different batch size (${o.dims[0]}) than the decoder inputs (${e}).`);o=(0,u.cat)(Array.from({length:e},(()=>o)),0)}}return t.encoder_outputs=o,t}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:t,model_kwargs:r,decoder_start_token_id:n,bos_token_id:o,generation_config:s}){let{decoder_input_ids:a,...i}=r;if(a)Array.isArray(a[0])||(a=Array.from({length:e},(()=>a)));else if(n??=o,"musicgen"===this.config.model_type)a=Array.from({length:e*this.config.decoder.num_codebooks},(()=>[n]));else if(Array.isArray(n)){if(n.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${n.length}`);a=n}else a=Array.from({length:e},(()=>[n]));return a=L(a),r.decoder_attention_mask=(0,u.ones_like)(a),{input_ids:a,model_inputs:i}}async generate({inputs:e=null,generation_config:t=null,logits_processor:r=null,stopping_criteria:n=null,streamer:o=null,...s}){this._validate_model_class(),t=this._prepare_generation_config(t,s);let{inputs_tensor:a,model_inputs:i,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:s});const c=this.config.is_encoder_decoder;let d;c&&("encoder_outputs"in i||(i=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:a,model_inputs:i,model_input_name:l,generation_config:t}))),c?({input_ids:d,model_inputs:i}=this._prepare_decoder_input_ids_for_generation({batch_size:i[l].dims.at(0),model_input_name:l,model_kwargs:i,decoder_start_token_id:t.decoder_start_token_id,bos_token_id:t.bos_token_id,generation_config:t})):d=i[l];let p=d.dims.at(-1);null!==t.max_new_tokens&&(t.max_length=p+t.max_new_tokens);const h=this._get_logits_processor(t,p,r),_=this._get_stopping_criteria(t,n),f=i[l].dims.at(0),g=m.LogitsSampler.getSampler(t),M=new Array(f).fill(0),w=d.tolist();let T;o&&o.put(w);let b={};for(;;){if(i=this.prepare_inputs_for_generation(w,i,t),T=await this.forward(i),t.output_attentions&&t.return_dict_in_generate){const e=this.getAttentions(T);for(const t in e)t in b||(b[t]=[]),b[t].push(e[t])}const e=h(w,T.logits.slice(null,-1,null)),r=[];for(let t=0;t<e.dims.at(0);++t){const n=e[t],o=await g(n);for(const[e,n]of o){const o=BigInt(e);M[t]+=n,w[t].push(o),r.push([o]);break}}o&&o.put(r);if(_(w).every((e=>e)))break;i=this._update_model_kwargs_for_generation({generated_input_ids:r,outputs:T,model_inputs:i,is_encoder_decoder:c})}o&&o.end();const x=this.getPastKeyValues(T,i.past_key_values,!0),k=new u.Tensor("int64",w.flat(),[w.length,w[0].length]);if(t.return_dict_in_generate)return{sequences:k,past_key_values:x,...b};for(const e of Object.values(T))"gpu-buffer"===e.location&&e.dispose();return k}getPastKeyValues(e,t,r=!1){const n=Object.create(null);for(const o in e)if(o.startsWith("present")){const s=o.replace("present","past_key_values"),a=o.includes("encoder");if(n[s]=a&&t?t[s]:e[o],t&&(!a||r)){const e=t[s];"gpu-buffer"===e.location&&e.dispose()}}return n}getAttentions(e){const t={};for(const r of["cross_attentions","encoder_attentions","decoder_attentions"])for(const n in e)n.startsWith(r)&&(r in t||(t[r]=[]),t[r].push(e[n]));return t}addPastKeyValues(e,t){if(t)Object.assign(e,t);else{const t=this.custom_config.kv_cache_dtype??"float32",r="float16"===t?new Uint16Array:[],o=(0,n.getKeyValueShapes)(this.config);for(const n in o)e[n]=new u.Tensor(t,r,o[n])}}async encode_image({pixel_values:e}){const t=(await A(this.sessions.vision_encoder,{pixel_values:e})).image_features;return this.config.num_image_tokens||(console.warn(`The number of image tokens was not set in the model configuration. Setting it to the number of features detected by the vision encoder (${t.dims[1]}).`),this.config.num_image_tokens=t.dims[1]),t}async encode_text({input_ids:e}){return(await A(this.sessions.embed_tokens,{input_ids:e})).inputs_embeds}}class G{}class q extends G{constructor({last_hidden_state:e,hidden_states:t=null,attentions:r=null}){super(),this.last_hidden_state=e,this.hidden_states=t,this.attentions=r}}class $ extends R{}class W extends ${}class U extends ${async _call(e){return new cl(await super._call(e))}}class X extends ${async _call(e){return new al(await super._call(e))}}class Q extends ${async _call(e){return new ll(await super._call(e))}}class H extends ${async _call(e){return new dl(await super._call(e))}}class Y extends R{}class J extends Y{}class K extends R{}class Z extends K{}class ee extends K{async _call(e){return new cl(await super._call(e))}}class te extends K{async _call(e){return new al(await super._call(e))}}class re extends K{async _call(e){return new ll(await super._call(e))}}class ne extends K{async _call(e){return new dl(await super._call(e))}}class oe extends R{}class se extends oe{}class ae extends oe{async _call(e){return new cl(await super._call(e))}}class ie extends oe{async _call(e){return new al(await super._call(e))}}class le extends oe{async _call(e){return new ll(await super._call(e))}}class ce extends oe{async _call(e){return new dl(await super._call(e))}}class de extends R{}class ue extends de{}class pe extends de{async _call(e){return new cl(await super._call(e))}}class he extends de{async _call(e){return new al(await super._call(e))}}class me extends de{async _call(e){return new ll(await super._call(e))}}class _e extends de{async _call(e){return new dl(await super._call(e))}}class fe extends R{}class ge extends fe{}class Me extends fe{async _call(e){return new cl(await super._call(e))}}class we extends fe{async _call(e){return new al(await super._call(e))}}class Te extends fe{async _call(e){return new ll(await super._call(e))}}class be extends fe{async _call(e){return new dl(await super._call(e))}}class xe extends R{}class ke extends xe{}class ye extends xe{async _call(e){return new cl(await super._call(e))}}class Fe extends xe{async _call(e){return new al(await super._call(e))}}class Pe extends xe{async _call(e){return new ll(await super._call(e))}}class Ce extends xe{async _call(e){return new dl(await super._call(e))}}class ve extends R{}class Se extends ve{}class Ae extends ve{async _call(e){return new cl(await super._call(e))}}class Ee extends ve{async _call(e){return new al(await super._call(e))}}class Le extends ve{async _call(e){return new ll(await super._call(e))}}class ze extends ve{async _call(e){return new dl(await super._call(e))}}class Ie extends R{}class Be extends Ie{}class Ne extends Ie{async _call(e){return new al(await super._call(e))}}class Oe extends Ie{async _call(e){return new ll(await super._call(e))}}class De extends Ie{async _call(e){return new dl(await super._call(e))}}class Ve extends Ie{async _call(e){return new cl(await super._call(e))}}class je extends R{}class Re extends je{}class Ge extends je{async _call(e){return new cl(await super._call(e))}}class qe extends je{async _call(e){return new al(await super._call(e))}}class $e extends je{async _call(e){return new ll(await super._call(e))}}class We extends R{}class Ue extends We{}class Xe extends We{async _call(e){return new cl(await super._call(e))}}class Qe extends We{async _call(e){return new al(await super._call(e))}}class He extends We{async _call(e){return new dl(await super._call(e))}}class Ye extends R{}class Je extends Ye{}class Ke extends Ye{async _call(e){return new cl(await super._call(e))}}class Ze extends Ye{async _call(e){return new al(await super._call(e))}}class et extends Ye{async _call(e){return new ll(await super._call(e))}}class tt extends Ye{async _call(e){return new dl(await super._call(e))}}class rt extends R{}class nt extends rt{}class ot extends rt{async _call(e){return new cl(await super._call(e))}}class st extends rt{async _call(e){return new al(await super._call(e))}}class at extends rt{async _call(e){return new dl(await super._call(e))}}class it extends R{}class lt extends it{}class ct extends it{async _call(e){return new al(await super._call(e))}}class dt extends it{async _call(e){return new dl(await super._call(e))}}class ut extends it{async _call(e){return new cl(await super._call(e))}}class pt extends R{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,r){super(e,t),this.generation_config=r}}class ht extends pt{}class mt extends pt{}class _t extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class ft extends _t{}class gt extends _t{}class Mt extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class wt extends Mt{}class Tt extends Mt{}class bt extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class xt extends bt{}class kt extends bt{}class yt extends bt{async _call(e){return new al(await super._call(e))}}class Ft extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Pt extends Ft{}class Ct extends Ft{}class vt extends Ft{async _call(e){return new al(await super._call(e))}}class St extends Ft{}class At extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Et extends At{}class Lt extends At{}class zt extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class It extends zt{}class Bt extends zt{}class Nt extends R{}class Ot extends Nt{}class Dt extends Nt{async _call(e){return new cl(await super._call(e))}}class Vt extends Nt{async _call(e){return new al(await super._call(e))}}class jt extends Nt{async _call(e){return new ll(await super._call(e))}}class Rt extends Nt{async _call(e){return new dl(await super._call(e))}}class Gt extends R{}class qt extends Gt{}class $t extends Gt{async _call(e){return new cl(await super._call(e))}}class Wt extends Gt{async _call(e){return new al(await super._call(e))}}class Ut extends Gt{async _call(e){return new ll(await super._call(e))}}class Xt extends Gt{async _call(e){return new dl(await super._call(e))}}class Qt extends R{}class Ht extends Qt{}class Yt extends Qt{async _call(e){return new cl(await super._call(e))}}class Jt extends Qt{async _call(e){return new al(await super._call(e))}}class Kt extends Qt{async _call(e){return new ll(await super._call(e))}}class Zt extends Qt{async _call(e){return new dl(await super._call(e))}}class er extends R{}class tr extends er{}class rr extends er{}class nr extends R{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,r){super(e,t),this.generation_config=r}}class or extends nr{}class sr extends nr{_prepare_generation_config(e,t){return super._prepare_generation_config(e,t,f.WhisperGenerationConfig)}_retrieve_init_tokens(e){const t=[e.decoder_start_token_id];let r=e.language;const n=e.task;if(e.is_multilingual){r||(console.warn("No language specified - defaulting to English (en)."),r="en");const o=`<|${(0,g.whisper_language_to_code)(r)}|>`;t.push(e.lang_to_id[o]),t.push(e.task_to_id[n??"transcribe"])}else if(r||n)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&t.at(-1)!==e.no_timestamps_token_id?t.push(e.no_timestamps_token_id):e.return_timestamps&&t.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),t.pop()),t.filter((e=>null!=e))}async generate({inputs:e=null,generation_config:t=null,logits_processor:r=null,stopping_criteria:n=null,...o}){t=this._prepare_generation_config(t,o);const s=o.decoder_input_ids??this._retrieve_init_tokens(t);if(t.return_timestamps&&(r??=new c.LogitsProcessorList,r.push(new c.WhisperTimeStampLogitsProcessor(t,s))),t.begin_suppress_tokens&&(r??=new c.LogitsProcessorList,r.push(new c.SuppressTokensAtBeginLogitsProcessor(t.begin_suppress_tokens,s.length))),t.return_token_timestamps){if(!t.alignment_heads)throw new Error("Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config.");"translate"===t.task&&console.warn("Token-level timestamps may not be reliable for task 'translate'."),t.output_attentions=!0,t.return_dict_in_generate=!0}const a=await super.generate({inputs:e,generation_config:t,logits_processor:r,decoder_input_ids:s,...o});return t.return_token_timestamps&&(a.token_timestamps=this._extract_token_timestamps(a,t.alignment_heads,t.num_frames)),a}_extract_token_timestamps(e,t,r=null,n=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");null==r&&console.warn("`num_frames` has not been set, meaning the entire audio will be analyzed. This may lead to inaccurate token-level timestamps for short audios (< 30 seconds).");let o=this.config.median_filter_width;void 0===o&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),o=7);const s=e.cross_attentions,a=Array.from({length:this.config.decoder_layers},((e,t)=>(0,u.cat)(s.map((e=>e[t])),2))),l=(0,u.stack)(t.map((([e,t])=>{if(e>=a.length)throw new Error(`Layer index ${e} is out of bounds for cross attentions (length ${a.length}).`);return r?a[e].slice(null,t,null,[0,r]):a[e].slice(null,t)}))).transpose(1,0,2,3),[c,d]=(0,u.std_mean)(l,-2,0,!0),h=l.clone();for(let e=0;e<h.dims[0];++e){const t=h[e];for(let r=0;r<t.dims[0];++r){const n=t[r],s=c[e][r][0].data,a=d[e][r][0].data;for(let e=0;e<n.dims[0];++e){let t=n[e].data;for(let e=0;e<t.length;++e)t[e]=(t[e]-a[e])/s[e];t.set((0,p.medianFilter)(t,o))}}}const m=[(0,u.mean)(h,1)],_=e.sequences.dims,f=new u.Tensor("float32",new Float32Array(_[0]*_[1]),_);for(let e=0;e<_[0];++e){const t=m[e].neg().squeeze_(0),[r,o]=(0,p.dynamic_time_warping)(t.tolist()),s=Array.from({length:r.length-1},((e,t)=>r[t+1]-r[t])),a=(0,i.mergeArrays)([1],s).map((e=>!!e)),l=[];for(let e=0;e<a.length;++e)a[e]&&l.push(o[e]*n);f[e].data.set(l,1)}return f}}class ar extends R{main_input_name="pixel_values";forward_params=["pixel_values","input_ids","encoder_hidden_states","past_key_values"];constructor(e,t,r){super(e,t),this.generation_config=r}}class ir extends R{forward_params=["input_ids","pixel_values","attention_mask","position_ids","past_key_values"];constructor(e,t,r){super(e,t),this.generation_config=r}}class lr extends ir{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:r,attention_mask:n}){const o=this.config.image_token_index,s=r.tolist().map((e=>e.findIndex((e=>e==o)))),a=s.every((e=>-1===e)),i=s.every((e=>-1!==e));if(!a&&!i)throw new Error("Every input should contain either 0 or 1 image token.");if(a)return{inputs_embeds:e,attention_mask:n};const l=[],c=[];for(let r=0;r<s.length;++r){const o=s[r],a=e[r],i=t[r],d=n[r];l.push((0,u.cat)([a.slice([0,o]),i,a.slice([o+1,a.dims[0]])],0)),c.push((0,u.cat)([d.slice([0,o]),(0,u.ones)([i.dims[0]]),d.slice([o+1,d.dims[0]])],0))}return{inputs_embeds:(0,u.stack)(l,0),attention_mask:(0,u.stack)(c,0)}}}class cr extends lr{}class dr extends R{forward_params=["input_ids","inputs_embeds","attention_mask","pixel_values","encoder_outputs","decoder_input_ids","decoder_inputs_embeds","decoder_attention_mask","past_key_values"];main_input_name="inputs_embeds";constructor(e,t,r){super(e,t),this.generation_config=r}}class ur extends dr{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:r,attention_mask:n}){return{inputs_embeds:(0,u.cat)([t,e],1),attention_mask:(0,u.cat)([(0,u.ones)(t.dims.slice(0,2)),n],1)}}async _prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:r,attention_mask:n}){if(!e&&!t)throw new Error("Either `input_ids` or `pixel_values` should be provided.");let o,s;return e&&(o=await this.encode_text({input_ids:e})),t&&(s=await this.encode_image({pixel_values:t})),o&&s?({inputs_embeds:r,attention_mask:n}=this._merge_input_ids_with_image_features({inputs_embeds:o,image_features:s,input_ids:e,attention_mask:n})):r=o||s,{inputs_embeds:r,attention_mask:n}}async forward({input_ids:e,pixel_values:t,attention_mask:r,decoder_input_ids:n,decoder_attention_mask:o,encoder_outputs:s,past_key_values:a,inputs_embeds:i,decoder_inputs_embeds:l}){if(i||({inputs_embeds:i,attention_mask:r}=await this._prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:i,attention_mask:r})),!s){let{last_hidden_state:e}=await B(this,{inputs_embeds:i,attention_mask:r});s=e}if(!l){if(!n)throw new Error("Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.");l=await this.encode_text({input_ids:n})}const c={inputs_embeds:l,attention_mask:o,encoder_attention_mask:r,encoder_hidden_states:s,past_key_values:a};return await N(this,c,!0)}}class pr extends R{}class hr extends pr{}class mr extends pr{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class _r extends pr{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class fr extends pr{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class gr extends pr{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class Mr extends R{}class wr extends Mr{}class Tr extends Mr{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class br extends pr{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class xr extends R{}class kr extends xr{}class yr extends R{}class Fr extends yr{}class Pr extends yr{}class Cr extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class vr extends Cr{}class Sr extends Cr{}class Ar extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Er extends Ar{}class Lr extends Ar{}class zr extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Ir extends zr{}class Br extends zr{}class Nr extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Or extends Nr{}class Dr extends Nr{}class Vr extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class jr extends Vr{}class Rr extends Vr{}class Gr extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class qr extends Gr{}class $r extends Gr{}class Wr extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Ur extends Wr{}class Xr extends Wr{}class Qr extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Hr extends Qr{}class Yr extends Qr{}class Jr extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Kr extends Jr{}class Zr extends Jr{}class en extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class tn extends en{}class rn extends en{}class nn extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class on extends nn{}class sn extends nn{}class an extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class ln extends an{}class cn extends an{}class dn extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class un extends dn{}class pn extends dn{}class hn extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class mn extends hn{}class _n extends hn{}class fn extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class gn extends fn{}class Mn extends fn{}class wn extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Tn extends wn{}class bn extends wn{}class xn extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class kn extends xn{}class yn extends xn{}class Fn extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Pn extends Fn{}class Cn extends Fn{}class vn extends R{}class Sn extends vn{}class An extends vn{async _call(e){return new al(await super._call(e))}}class En extends R{}class Ln extends En{}class zn extends En{async _call(e){return new al(await super._call(e))}}class In extends R{}class Bn extends In{}class Nn extends R{}class On extends Nn{}class Dn extends Nn{async _call(e){return new al(await super._call(e))}}class Vn extends R{}class jn extends Vn{}class Rn extends R{}class Gn extends Rn{}class qn extends Rn{async _call(e){return new al(await super._call(e))}}class $n extends R{}class Wn extends $n{async _call(e){return new hl(await super._call(e))}}class Un extends R{}class Xn extends Un{}class Qn extends Un{async _call(e){return new al(await super._call(e))}}class Hn extends R{}class Yn extends Hn{}class Jn extends Hn{async _call(e){return new al(await super._call(e))}}class Kn extends R{}class Zn extends Kn{}class eo extends Kn{}class to extends R{}class ro extends to{}class no extends to{}class oo extends R{}class so extends oo{}class ao extends oo{async _call(e){return new al(await super._call(e))}}class io extends R{}class lo extends io{}class co extends io{async _call(e){return new po(await super._call(e))}}class uo extends io{async _call(e){return new ho(await super._call(e))}}class po extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class ho extends G{constructor({logits:e,pred_boxes:t,pred_masks:r}){super(),this.logits=e,this.pred_boxes=t,this.pred_masks=r}}class mo extends R{}class _o extends mo{}class fo extends mo{async _call(e){return new go(await super._call(e))}}class go extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Mo extends R{}class wo extends Mo{}class To extends Mo{async _call(e){return new bo(await super._call(e))}}class bo extends po{}class xo extends R{}class ko extends xo{}class yo extends xo{async _call(e){return new al(await super._call(e))}}class Fo extends R{}class Po extends Fo{}class Co extends Fo{async _call(e){return new al(await super._call(e))}}class vo extends R{}class So extends vo{}class Ao extends vo{async _call(e){return new al(await super._call(e))}}class Eo extends R{}class Lo extends Eo{}class zo extends Eo{async _call(e){return new al(await super._call(e))}}class Io extends R{}class Bo extends Io{}class No extends Io{}class Oo extends R{}class Do extends Oo{}class Vo extends Oo{}class jo extends R{}class Ro extends jo{}class Go extends R{}class qo extends Go{}class $o extends Go{}class Wo extends Go{}class Uo extends R{}class Xo extends Uo{}class Qo extends Uo{}class Ho extends R{}class Yo extends Ho{}class Jo extends Ho{}class Ko extends R{}class Zo extends Ko{}class es extends R{}class ts extends es{}class rs extends es{async _call(e){return new al(await super._call(e))}}class ns extends R{}class os extends ns{}class ss extends ns{async _call(e){return new al(await super._call(e))}}class as extends R{}class is extends as{}class ls extends as{async _call(e){return new al(await super._call(e))}}class cs extends R{}class ds extends cs{}class us extends cs{async _call(e){return new ps(await super._call(e))}}class ps extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class hs extends R{}class ms extends hs{async get_image_embeddings({pixel_values:e}){return await B(this,{pixel_values:e})}async forward(e){if(e.image_embeddings&&e.image_positional_embeddings||(e={...e,...await this.get_image_embeddings(e)}),!e.input_labels&&e.input_points){const t=e.input_points.dims.slice(0,-1),r=t.reduce(((e,t)=>e*t),1);e.input_labels=new u.Tensor("int64",new BigInt64Array(r).fill(1n),t)}const t={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(t.input_points=e.input_points),e.input_labels&&(t.input_labels=e.input_labels),e.input_boxes&&(t.input_boxes=e.input_boxes),await A(this.sessions.prompt_encoder_mask_decoder,t)}async _call(e){return new _s(await super._call(e))}}class _s extends G{constructor({iou_scores:e,pred_masks:t}){super(),this.iou_scores=e,this.pred_masks=t}}class fs extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class gs extends fs{}class Ms extends fs{}class ws extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class Ts extends ws{}class bs extends ws{}class xs extends R{}class ks extends xs{}class ys extends xs{async _call(e){return new ul(await super._call(e))}}class Fs extends xs{async _call(e){return new al(await super._call(e))}}class Ps extends xs{async _call(e){return new ll(await super._call(e))}}class Cs extends R{}class vs extends Cs{}class Ss extends Cs{async _call(e){return new ll(await super._call(e))}}class As extends R{}class Es extends As{}class Ls extends R{}class zs extends Ls{}class Is extends Ls{async _call(e){return new ul(await super._call(e))}}class Bs extends Ls{async _call(e){return new al(await super._call(e))}}class Ns extends R{}class Os extends Ns{}class Ds extends Ns{async _call(e){return new ul(await super._call(e))}}class Vs extends Ns{async _call(e){return new al(await super._call(e))}}class js extends Ns{async _call(e){return new ll(await super._call(e))}}class Rs extends R{}class Gs extends Rs{}class qs extends Rs{async _call(e){return new ul(await super._call(e))}}class $s extends Rs{async _call(e){return new al(await super._call(e))}}class Ws extends R{}class Us extends xs{}class Xs extends xs{async _call(e){return new ul(await super._call(e))}}class Qs extends xs{async _call(e){return new al(await super._call(e))}}class Hs extends R{}class Ys extends Hs{}class Js extends Hs{async _call(e){return new ul(await super._call(e))}}class Ks extends Hs{async _call(e){return new al(await super._call(e))}}class Zs extends Hs{async _call(e){return new il(await super._call(e))}}class ea extends Hs{async _call(e){return new ll(await super._call(e))}}class ta extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class ra extends ta{}class na extends ta{}class oa extends ta{async generate_speech(e,t,{threshold:r=.5,minlenratio:n=0,maxlenratio:o=20,vocoder:s=null}={}){const a={input_ids:e},{encoder_outputs:i,encoder_attention_mask:l}=await B(this,a),c=i.dims[1]/this.config.reduction_factor,d=Math.floor(c*o),p=Math.floor(c*n),h=this.config.num_mel_bins;let m=[],_=null,f=null,g=0;for(;;){++g;const e=z(!!f);let n;n=f?f.output_sequence_out:new u.Tensor("float32",new Float32Array(h),[1,1,h]);let o={use_cache_branch:e,output_sequence:n,encoder_attention_mask:l,speaker_embeddings:t,encoder_hidden_states:i};this.addPastKeyValues(o,_),f=await A(this.sessions.decoder_model_merged,o),_=this.getPastKeyValues(f,_);const{prob:s,spectrum:a}=f;if(m.push(a),g>=p&&(Array.from(s.data).filter((e=>e>=r)).length>0||g>=d))break}const M=(0,u.cat)(m),{waveform:w}=await A(s.sessions.model,{spectrogram:M});return{spectrogram:M,waveform:w}}}class sa extends R{main_input_name="spectrogram"}class aa extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class ia extends aa{}class la extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class ca extends la{}class da extends la{}class ua extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class pa extends ua{}class ha extends ua{}class ma extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class _a extends ma{}class fa extends ma{}class ga extends R{}class Ma extends ga{}class wa extends ga{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Ta extends ga{static async from_pretrained(e,t={}){return t.model_file_name??="audio_model",super.from_pretrained(e,t)}}class ba extends R{}class xa extends ba{async _call(e){return new ml(await super._call(e))}}class ka extends R{}class ya extends ka{}class Fa extends ka{}class Pa extends ka{}class Ca extends R{constructor(e,t,r){super(e,t),this.generation_config=r}}class va extends Ca{}class Sa extends Ca{}class Aa extends R{}class Ea extends Aa{}class La extends Aa{async _call(e){return new al(await super._call(e))}}class za extends R{}class Ia extends za{}class Ba extends za{}class Na extends R{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,r){super(e,t),this.generation_config=r}_apply_and_filter_by_delay_pattern_mask(e){const[t,r]=e.dims,n=this.config.decoder.num_codebooks,o=r-n;let s=0;for(let t=0;t<e.size;++t){if(e.data[t]===this.config.decoder.pad_token_id)continue;const a=t%r-Math.floor(t/r)%n;a>0&&a<=o&&(e.data[s++]=e.data[t])}const a=Math.floor(t/n),i=s/(a*n);return new u.Tensor(e.type,e.data.slice(0,s),[a,n,i])}prepare_inputs_for_generation(e,t,r){let n=structuredClone(e);for(let e=0;e<n.length;++e)for(let t=0;t<n[e].length;++t)e%this.config.decoder.num_codebooks>=t&&(n[e][t]=BigInt(this.config.decoder.pad_token_id));null!==r.guidance_scale&&r.guidance_scale>1&&(n=n.concat(n));return super.prepare_inputs_for_generation(n,t,r)}async generate(e){const t=await super.generate(e),r=this._apply_and_filter_by_delay_pattern_mask(t).unsqueeze_(0),{audio_values:n}=await A(this.sessions.encodec_decode,{audio_codes:r});return n}}class Oa extends R{}class Da extends Oa{}class Va extends Oa{async _call(e){return new al(await super._call(e))}}class ja extends R{}class Ra extends ja{}class Ga extends ja{async _call(e){return new al(await super._call(e))}}class qa extends R{}class $a extends qa{}class Wa extends qa{async _call(e){return new al(await super._call(e))}}class Ua extends R{}class Xa extends Ua{}class Qa extends Ua{async _call(e){return new al(await super._call(e))}}class Ha extends R{}class Ya extends Ha{}class Ja{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:t=null,config:r=null,cache_dir:o=null,local_files_only:s=!1,revision:a="main",model_file_name:i=null,subfolder:l="onnx",device:c=null,dtype:d=null,use_external_data_format:u=null,session_options:p={}}={}){const h={progress_callback:t,config:r,cache_dir:o,local_files_only:s,revision:a,model_file_name:i,subfolder:l,device:c,dtype:d,use_external_data_format:u,session_options:p};if(h.config=await n.AutoConfig.from_pretrained(e,h),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(const t of this.MODEL_CLASS_MAPPINGS){const r=t.get(h.config.model_type);if(r)return await r[1].from_pretrained(e,h)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${h.config.model_type}", attempting to construct from base class.`),await R.from_pretrained(e,h);throw Error(`Unsupported model type: ${h.config.model_type}`)}}const Ka=new Map([["bert",["BertModel",W]],["nomic_bert",["NomicBertModel",J]],["roformer",["RoFormerModel",Z]],["electra",["ElectraModel",ue]],["esm",["EsmModel",Re]],["convbert",["ConvBertModel",se]],["camembert",["CamembertModel",ge]],["deberta",["DebertaModel",ke]],["deberta-v2",["DebertaV2Model",Se]],["mpnet",["MPNetModel",Je]],["albert",["AlbertModel",lt]],["distilbert",["DistilBertModel",Be]],["roberta",["RobertaModel",Ot]],["xlm",["XLMModel",qt]],["xlm-roberta",["XLMRobertaModel",Ht]],["clap",["ClapModel",Ma]],["clip",["CLIPModel",hr]],["clipseg",["CLIPSegModel",Fr]],["chinese_clip",["ChineseCLIPModel",kr]],["siglip",["SiglipModel",wr]],["mobilebert",["MobileBertModel",Ue]],["squeezebert",["SqueezeBertModel",nt]],["wav2vec2",["Wav2Vec2Model",ks]],["wav2vec2-bert",["Wav2Vec2BertModel",Gs]],["unispeech",["UniSpeechModel",zs]],["unispeech-sat",["UniSpeechSatModel",Os]],["hubert",["HubertModel",Us]],["wavlm",["WavLMModel",Ys]],["audio-spectrogram-transformer",["ASTModel",tr]],["vits",["VitsModel",xa]],["pyannote",["PyAnnoteModel",vs]],["wespeaker-resnet",["WeSpeakerResNetModel",Es]],["detr",["DetrModel",lo]],["rt_detr",["RTDetrModel",_o]],["table-transformer",["TableTransformerModel",wo]],["vit",["ViTModel",Sn]],["pvt",["PvtModel",Ln]],["vit_msn",["ViTMSNModel",On]],["vit_mae",["ViTMAEModel",Bn]],["groupvit",["GroupViTModel",jn]],["fastvit",["FastViTModel",Gn]],["mobilevit",["MobileViTModel",Xn]],["mobilevitv2",["MobileViTV2Model",Yn]],["owlvit",["OwlViTModel",Zn]],["owlv2",["Owlv2Model",ro]],["beit",["BeitModel",so]],["deit",["DeiTModel",ko]],["hiera",["HieraModel",Po]],["convnext",["ConvNextModel",ts]],["convnextv2",["ConvNextV2Model",os]],["dinov2",["Dinov2Model",is]],["resnet",["ResNetModel",So]],["swin",["SwinModel",Lo]],["swin2sr",["Swin2SRModel",Bo]],["donut-swin",["DonutSwinModel",Zo]],["yolos",["YolosModel",ds]],["dpt",["DPTModel",Do]],["glpn",["GLPNModel",Yo]],["hifigan",["SpeechT5HifiGan",sa]],["efficientnet",["EfficientNetModel",Ea]],["decision_transformer",["DecisionTransformerModel",Ya]],["mobilenet_v1",["MobileNetV1Model",Da]],["mobilenet_v2",["MobileNetV2Model",Ra]],["mobilenet_v3",["MobileNetV3Model",$a]],["mobilenet_v4",["MobileNetV4Model",Xa]],["maskformer",["MaskFormerModel",Xo]]]),Za=new Map([["t5",["T5Model",ht]],["longt5",["LongT5Model",ft]],["mt5",["MT5Model",wt]],["bart",["BartModel",xt]],["mbart",["MBartModel",Pt]],["marian",["MarianModel",gs]],["whisper",["WhisperModel",or]],["m2m_100",["M2M100Model",Ts]],["blenderbot",["BlenderbotModel",Et]],["blenderbot-small",["BlenderbotSmallModel",It]]]),ei=new Map([["bloom",["BloomModel",Tn]],["jais",["JAISModel",Er]],["gpt2",["GPT2Model",vr]],["gptj",["GPTJModel",jr]],["gpt_bigcode",["GPTBigCodeModel",qr]],["gpt_neo",["GPTNeoModel",Ir]],["gpt_neox",["GPTNeoXModel",Or]],["codegen",["CodeGenModel",Ur]],["llama",["LlamaModel",Hr]],["cohere",["CohereModel",Kr]],["gemma",["GemmaModel",tn]],["gemma2",["Gemma2Model",on]],["openelm",["OpenELMModel",ln]],["qwen2",["Qwen2Model",un]],["phi",["PhiModel",mn]],["phi3",["Phi3Model",gn]],["mpt",["MptModel",kn]],["opt",["OPTModel",Pn]],["mistral",["MistralModel",ca]],["starcoder2",["Starcoder2Model",pa]],["falcon",["FalconModel",_a]],["stablelm",["StableLmModel",va]]]),ti=new Map([["speecht5",["SpeechT5ForSpeechToText",na]],["whisper",["WhisperForConditionalGeneration",sr]]]),ri=new Map([["speecht5",["SpeechT5ForTextToSpeech",oa]]]),ni=new Map([["vits",["VitsModel",xa]],["musicgen",["MusicgenForConditionalGeneration",Na]]]),oi=new Map([["bert",["BertForSequenceClassification",X]],["roformer",["RoFormerForSequenceClassification",te]],["electra",["ElectraForSequenceClassification",he]],["esm",["EsmForSequenceClassification",qe]],["convbert",["ConvBertForSequenceClassification",ie]],["camembert",["CamembertForSequenceClassification",we]],["deberta",["DebertaForSequenceClassification",Fe]],["deberta-v2",["DebertaV2ForSequenceClassification",Ee]],["mpnet",["MPNetForSequenceClassification",Ze]],["albert",["AlbertForSequenceClassification",ct]],["distilbert",["DistilBertForSequenceClassification",Ne]],["roberta",["RobertaForSequenceClassification",Vt]],["xlm",["XLMForSequenceClassification",Wt]],["xlm-roberta",["XLMRobertaForSequenceClassification",Jt]],["bart",["BartForSequenceClassification",yt]],["mbart",["MBartForSequenceClassification",vt]],["mobilebert",["MobileBertForSequenceClassification",Qe]],["squeezebert",["SqueezeBertForSequenceClassification",st]]]),si=new Map([["bert",["BertForTokenClassification",Q]],["roformer",["RoFormerForTokenClassification",re]],["electra",["ElectraForTokenClassification",me]],["esm",["EsmForTokenClassification",$e]],["convbert",["ConvBertForTokenClassification",le]],["camembert",["CamembertForTokenClassification",Te]],["deberta",["DebertaForTokenClassification",Pe]],["deberta-v2",["DebertaV2ForTokenClassification",Le]],["mpnet",["MPNetForTokenClassification",et]],["distilbert",["DistilBertForTokenClassification",Oe]],["roberta",["RobertaForTokenClassification",jt]],["xlm",["XLMForTokenClassification",Ut]],["xlm-roberta",["XLMRobertaForTokenClassification",Kt]]]),ai=new Map([["t5",["T5ForConditionalGeneration",mt]],["longt5",["LongT5ForConditionalGeneration",gt]],["mt5",["MT5ForConditionalGeneration",Tt]],["bart",["BartForConditionalGeneration",kt]],["mbart",["MBartForConditionalGeneration",Ct]],["marian",["MarianMTModel",Ms]],["m2m_100",["M2M100ForConditionalGeneration",bs]],["blenderbot",["BlenderbotForConditionalGeneration",Lt]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",Bt]]]),ii=new Map([["bloom",["BloomForCausalLM",bn]],["gpt2",["GPT2LMHeadModel",Sr]],["jais",["JAISLMHeadModel",Lr]],["gptj",["GPTJForCausalLM",Rr]],["gpt_bigcode",["GPTBigCodeForCausalLM",$r]],["gpt_neo",["GPTNeoForCausalLM",Br]],["gpt_neox",["GPTNeoXForCausalLM",Dr]],["codegen",["CodeGenForCausalLM",Xr]],["llama",["LlamaForCausalLM",Yr]],["cohere",["CohereForCausalLM",Zr]],["gemma",["GemmaForCausalLM",rn]],["gemma2",["Gemma2ForCausalLM",sn]],["openelm",["OpenELMForCausalLM",cn]],["qwen2",["Qwen2ForCausalLM",pn]],["phi",["PhiForCausalLM",_n]],["phi3",["Phi3ForCausalLM",Mn]],["mpt",["MptForCausalLM",yn]],["opt",["OPTForCausalLM",Cn]],["mbart",["MBartForCausalLM",St]],["mistral",["MistralForCausalLM",da]],["starcoder2",["Starcoder2ForCausalLM",ha]],["falcon",["FalconForCausalLM",fa]],["trocr",["TrOCRForCausalLM",ia]],["stablelm",["StableLmForCausalLM",Sa]]]),li=new Map([["bert",["BertForMaskedLM",U]],["roformer",["RoFormerForMaskedLM",ee]],["electra",["ElectraForMaskedLM",pe]],["esm",["EsmForMaskedLM",Ge]],["convbert",["ConvBertForMaskedLM",ae]],["camembert",["CamembertForMaskedLM",Me]],["deberta",["DebertaForMaskedLM",ye]],["deberta-v2",["DebertaV2ForMaskedLM",Ae]],["mpnet",["MPNetForMaskedLM",Ke]],["albert",["AlbertForMaskedLM",ut]],["distilbert",["DistilBertForMaskedLM",Ve]],["roberta",["RobertaForMaskedLM",Dt]],["xlm",["XLMWithLMHeadModel",$t]],["xlm-roberta",["XLMRobertaForMaskedLM",Yt]],["mobilebert",["MobileBertForMaskedLM",Xe]],["squeezebert",["SqueezeBertForMaskedLM",ot]]]),ci=new Map([["bert",["BertForQuestionAnswering",H]],["roformer",["RoFormerForQuestionAnswering",ne]],["electra",["ElectraForQuestionAnswering",_e]],["convbert",["ConvBertForQuestionAnswering",ce]],["camembert",["CamembertForQuestionAnswering",be]],["deberta",["DebertaForQuestionAnswering",Ce]],["deberta-v2",["DebertaV2ForQuestionAnswering",ze]],["mpnet",["MPNetForQuestionAnswering",tt]],["albert",["AlbertForQuestionAnswering",dt]],["distilbert",["DistilBertForQuestionAnswering",De]],["roberta",["RobertaForQuestionAnswering",Rt]],["xlm",["XLMForQuestionAnswering",Xt]],["xlm-roberta",["XLMRobertaForQuestionAnswering",Zt]],["mobilebert",["MobileBertForQuestionAnswering",He]],["squeezebert",["SqueezeBertForQuestionAnswering",at]]]),di=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",ar]]]),ui=new Map([["llava",["LlavaForConditionalGeneration",lr]],["moondream1",["Moondream1ForConditionalGeneration",cr]],["florence2",["Florence2ForConditionalGeneration",ur]]]),pi=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",ar]]]),hi=new Map([["vit",["ViTForImageClassification",An]],["pvt",["PvtForImageClassification",zn]],["vit_msn",["ViTMSNForImageClassification",Dn]],["fastvit",["FastViTForImageClassification",qn]],["mobilevit",["MobileViTForImageClassification",Qn]],["mobilevitv2",["MobileViTV2ForImageClassification",Jn]],["beit",["BeitForImageClassification",ao]],["deit",["DeiTForImageClassification",yo]],["hiera",["HieraForImageClassification",Co]],["convnext",["ConvNextForImageClassification",rs]],["convnextv2",["ConvNextV2ForImageClassification",ss]],["dinov2",["Dinov2ForImageClassification",ls]],["resnet",["ResNetForImageClassification",Ao]],["swin",["SwinForImageClassification",zo]],["segformer",["SegformerForImageClassification",Fa]],["efficientnet",["EfficientNetForImageClassification",La]],["mobilenet_v1",["MobileNetV1ForImageClassification",Va]],["mobilenet_v2",["MobileNetV2ForImageClassification",Ga]],["mobilenet_v3",["MobileNetV3ForImageClassification",Wa]],["mobilenet_v4",["MobileNetV4ForImageClassification",Qa]]]),mi=new Map([["detr",["DetrForObjectDetection",co]],["rt_detr",["RTDetrForObjectDetection",fo]],["table-transformer",["TableTransformerForObjectDetection",To]],["yolos",["YolosForObjectDetection",us]]]),_i=new Map([["owlvit",["OwlViTForObjectDetection",eo]],["owlv2",["Owlv2ForObjectDetection",no]]]),fi=new Map([["detr",["DetrForSegmentation",uo]],["clipseg",["CLIPSegForImageSegmentation",Pr]]]),gi=new Map([["segformer",["SegformerForSemanticSegmentation",Pa]],["sapiens",["SapiensForSemanticSegmentation",qo]]]),Mi=new Map([["detr",["DetrForSegmentation",uo]],["maskformer",["MaskFormerForInstanceSegmentation",Qo]]]),wi=new Map([["sam",["SamModel",ms]]]),Ti=new Map([["wav2vec2",["Wav2Vec2ForCTC",ys]],["wav2vec2-bert",["Wav2Vec2BertForCTC",qs]],["unispeech",["UniSpeechForCTC",Is]],["unispeech-sat",["UniSpeechSatForCTC",Ds]],["wavlm",["WavLMForCTC",Js]],["hubert",["HubertForCTC",Xs]]]),bi=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",Fs]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",$s]],["unispeech",["UniSpeechForSequenceClassification",Bs]],["unispeech-sat",["UniSpeechSatForSequenceClassification",Vs]],["wavlm",["WavLMForSequenceClassification",Ks]],["hubert",["HubertForSequenceClassification",Qs]],["audio-spectrogram-transformer",["ASTForAudioClassification",rr]]]),xi=new Map([["wavlm",["WavLMForXVector",Zs]]]),ki=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",js]],["wavlm",["WavLMForAudioFrameClassification",ea]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",Ps]],["pyannote",["PyAnnoteForAudioFrameClassification",Ss]]]),yi=new Map([["vitmatte",["VitMatteForImageMatting",Wn]]]),Fi=new Map([["swin2sr",["Swin2SRForImageSuperResolution",No]]]),Pi=new Map([["dpt",["DPTForDepthEstimation",Vo]],["depth_anything",["DepthAnythingForDepthEstimation",Ro]],["glpn",["GLPNForDepthEstimation",Jo]],["sapiens",["SapiensForDepthEstimation",$o]]]),Ci=new Map([["sapiens",["SapiensForNormalEstimation",Wo]]]),vi=new Map([["clip",["CLIPVisionModelWithProjection",gr]],["siglip",["SiglipVisionModel",br]]]),Si=[[Ka,M],[Za,w],[ei,x],[oi,M],[si,M],[ai,T],[ti,T],[ii,x],[li,M],[ci,M],[di,b],[ui,y],[hi,M],[fi,M],[Mi,M],[gi,M],[yi,M],[Fi,M],[Pi,M],[Ci,M],[mi,M],[_i,M],[wi,k],[Ti,M],[bi,M],[ri,T],[ni,M],[xi,M],[ki,M],[vi,M]];for(const[e,t]of Si)for(const[r,n]of e.values())P.set(r,t),v.set(n,r),C.set(r,n);const Ai=[["MusicgenForConditionalGeneration",Na,F],["CLIPTextModelWithProjection",_r,M],["SiglipTextModel",Tr,M],["ClapTextModelWithProjection",wa,M],["ClapAudioModelWithProjection",Ta,M]];for(const[e,t,r]of Ai)P.set(e,r),v.set(t,e),C.set(e,t);class Ei extends Ja{static MODEL_CLASS_MAPPINGS=Si.map((e=>e[0]));static BASE_IF_FAIL=!0}class Li extends Ja{static MODEL_CLASS_MAPPINGS=[oi]}class zi extends Ja{static MODEL_CLASS_MAPPINGS=[si]}class Ii extends Ja{static MODEL_CLASS_MAPPINGS=[ai]}class Bi extends Ja{static MODEL_CLASS_MAPPINGS=[ti]}class Ni extends Ja{static MODEL_CLASS_MAPPINGS=[ri]}class Oi extends Ja{static MODEL_CLASS_MAPPINGS=[ni]}class Di extends Ja{static MODEL_CLASS_MAPPINGS=[ii]}class Vi extends Ja{static MODEL_CLASS_MAPPINGS=[li]}class ji extends Ja{static MODEL_CLASS_MAPPINGS=[ci]}class Ri extends Ja{static MODEL_CLASS_MAPPINGS=[di]}class Gi extends Ja{static MODEL_CLASS_MAPPINGS=[hi]}class qi extends Ja{static MODEL_CLASS_MAPPINGS=[fi]}class $i extends Ja{static MODEL_CLASS_MAPPINGS=[gi]}class Wi extends Ja{static MODEL_CLASS_MAPPINGS=[Mi]}class Ui extends Ja{static MODEL_CLASS_MAPPINGS=[mi]}class Xi extends Ja{static MODEL_CLASS_MAPPINGS=[_i]}class Qi extends Ja{static MODEL_CLASS_MAPPINGS=[wi]}class Hi extends Ja{static MODEL_CLASS_MAPPINGS=[Ti]}class Yi extends Ja{static MODEL_CLASS_MAPPINGS=[bi]}class Ji extends Ja{static MODEL_CLASS_MAPPINGS=[xi]}class Ki extends Ja{static MODEL_CLASS_MAPPINGS=[ki]}class Zi extends Ja{static MODEL_CLASS_MAPPINGS=[pi]}class el extends Ja{static MODEL_CLASS_MAPPINGS=[yi]}class tl extends Ja{static MODEL_CLASS_MAPPINGS=[Fi]}class rl extends Ja{static MODEL_CLASS_MAPPINGS=[Pi]}class nl extends Ja{static MODEL_CLASS_MAPPINGS=[Ci]}class ol extends Ja{static MODEL_CLASS_MAPPINGS=[vi]}class sl extends G{constructor({logits:e,past_key_values:t,encoder_outputs:r,decoder_attentions:n=null,cross_attentions:o=null}){super(),this.logits=e,this.past_key_values=t,this.encoder_outputs=r,this.decoder_attentions=n,this.cross_attentions=o}}class al extends G{constructor({logits:e}){super(),this.logits=e}}class il extends G{constructor({logits:e,embeddings:t}){super(),this.logits=e,this.embeddings=t}}class ll extends G{constructor({logits:e}){super(),this.logits=e}}class cl extends G{constructor({logits:e}){super(),this.logits=e}}class dl extends G{constructor({start_logits:e,end_logits:t}){super(),this.start_logits=e,this.end_logits=t}}class ul extends G{constructor({logits:e}){super(),this.logits=e}}class pl extends G{constructor({logits:e,past_key_values:t}){super(),this.logits=e,this.past_key_values=t}}class hl extends G{constructor({alphas:e}){super(),this.alphas=e}}class ml extends G{constructor({waveform:e,spectrogram:t}){super(),this.waveform=e,this.spectrogram=t}}},"./src/models/whisper/common_whisper.js":
|
|
113
113
|
/*!**********************************************!*\
|
|
114
114
|
!*** ./src/models/whisper/common_whisper.js ***!
|
|
115
115
|
\**********************************************/(e,t,r)=>{r.r(t),r.d(t,{WHISPER_LANGUAGE_MAPPING:()=>o,WHISPER_TO_LANGUAGE_CODE_MAPPING:()=>s,whisper_language_to_code:()=>a});const n=[["en","english"],["zh","chinese"],["de","german"],["es","spanish"],["ru","russian"],["ko","korean"],["fr","french"],["ja","japanese"],["pt","portuguese"],["tr","turkish"],["pl","polish"],["ca","catalan"],["nl","dutch"],["ar","arabic"],["sv","swedish"],["it","italian"],["id","indonesian"],["hi","hindi"],["fi","finnish"],["vi","vietnamese"],["he","hebrew"],["uk","ukrainian"],["el","greek"],["ms","malay"],["cs","czech"],["ro","romanian"],["da","danish"],["hu","hungarian"],["ta","tamil"],["no","norwegian"],["th","thai"],["ur","urdu"],["hr","croatian"],["bg","bulgarian"],["lt","lithuanian"],["la","latin"],["mi","maori"],["ml","malayalam"],["cy","welsh"],["sk","slovak"],["te","telugu"],["fa","persian"],["lv","latvian"],["bn","bengali"],["sr","serbian"],["az","azerbaijani"],["sl","slovenian"],["kn","kannada"],["et","estonian"],["mk","macedonian"],["br","breton"],["eu","basque"],["is","icelandic"],["hy","armenian"],["ne","nepali"],["mn","mongolian"],["bs","bosnian"],["kk","kazakh"],["sq","albanian"],["sw","swahili"],["gl","galician"],["mr","marathi"],["pa","punjabi"],["si","sinhala"],["km","khmer"],["sn","shona"],["yo","yoruba"],["so","somali"],["af","afrikaans"],["oc","occitan"],["ka","georgian"],["be","belarusian"],["tg","tajik"],["sd","sindhi"],["gu","gujarati"],["am","amharic"],["yi","yiddish"],["lo","lao"],["uz","uzbek"],["fo","faroese"],["ht","haitian creole"],["ps","pashto"],["tk","turkmen"],["nn","nynorsk"],["mt","maltese"],["sa","sanskrit"],["lb","luxembourgish"],["my","myanmar"],["bo","tibetan"],["tl","tagalog"],["mg","malagasy"],["as","assamese"],["tt","tatar"],["haw","hawaiian"],["ln","lingala"],["ha","hausa"],["ba","bashkir"],["jw","javanese"],["su","sundanese"]],o=new Map(n),s=new Map([...n.map((([e,t])=>[t,e])),["burmese","my"],["valencian","ca"],["flemish","nl"],["haitian","ht"],["letzeburgesch","lb"],["pushto","ps"],["panjabi","pa"],["moldavian","ro"],["moldovan","ro"],["sinhalese","si"],["castilian","es"]]);function a(e){e=e.toLowerCase();let t=s.get(e);if(void 0===t){if(!o.has(e)){const t=2===e.length?o.keys():o.values();throw new Error(`Language "${e}" is not supported. Must be one of: ${JSON.stringify(t)}`)}t=e}return t}},"./src/models/whisper/generation_whisper.js":
|
|
@@ -127,7 +127,7 @@ import*as e from"fs";import*as t from"onnxruntime-node";import*as r from"path";i
|
|
|
127
127
|
\***************************/(e,t,r)=>{r.r(t),r.d(t,{ASTFeatureExtractor:()=>se,AutoProcessor:()=>Me,BeitFeatureExtractor:()=>X,BitImageProcessor:()=>P,CLIPFeatureExtractor:()=>v,CLIPImageProcessor:()=>S,ChineseCLIPFeatureExtractor:()=>A,ClapFeatureExtractor:()=>ae,ConvNextFeatureExtractor:()=>L,ConvNextImageProcessor:()=>z,DPTFeatureExtractor:()=>y,DPTImageProcessor:()=>F,DeiTFeatureExtractor:()=>U,DetrFeatureExtractor:()=>Y,DonutFeatureExtractor:()=>Q,EfficientNetImageProcessor:()=>N,FeatureExtractor:()=>w,Florence2Processor:()=>ge,GLPNFeatureExtractor:()=>C,ImageFeatureExtractor:()=>T,MaskFormerFeatureExtractor:()=>J,MobileNetV1FeatureExtractor:()=>O,MobileNetV2FeatureExtractor:()=>D,MobileNetV3FeatureExtractor:()=>V,MobileNetV4FeatureExtractor:()=>j,MobileViTFeatureExtractor:()=>R,MobileViTImageProcessor:()=>G,NougatImageProcessor:()=>H,OwlViTFeatureExtractor:()=>q,OwlViTProcessor:()=>fe,Owlv2ImageProcessor:()=>$,Processor:()=>de,PvtImageProcessor:()=>k,PyAnnoteFeatureExtractor:()=>ie,PyAnnoteProcessor:()=>me,RTDetrImageProcessor:()=>W,SamImageProcessor:()=>Z,SamProcessor:()=>ue,SapiensFeatureExtractor:()=>b,SeamlessM4TFeatureExtractor:()=>oe,SegformerFeatureExtractor:()=>x,SiglipImageProcessor:()=>E,SpeechT5FeatureExtractor:()=>ce,SpeechT5Processor:()=>_e,Swin2SRImageProcessor:()=>ee,ViTFeatureExtractor:()=>I,ViTImageProcessor:()=>B,VitMatteImageProcessor:()=>te,Wav2Vec2FeatureExtractor:()=>ne,Wav2Vec2ProcessorWithLM:()=>he,WeSpeakerFeatureExtractor:()=>le,WhisperFeatureExtractor:()=>re,WhisperProcessor:()=>pe,YolosFeatureExtractor:()=>K});var n=r(/*! ./utils/generic.js */"./src/utils/generic.js"),o=r(/*! ./utils/core.js */"./src/utils/core.js"),s=r(/*! ./utils/hub.js */"./src/utils/hub.js"),a=r(/*! ./utils/maths.js */"./src/utils/maths.js"),i=r(/*! ./utils/tensor.js */"./src/utils/tensor.js"),l=(r(/*! ./utils/image.js */"./src/utils/image.js"),r(/*! ./utils/audio.js */"./src/utils/audio.js"));function c([e,t,r,n]){return[e-r/2,t-n/2,e+r/2,t+n/2]}function d(e,t=.5,r=null,n=!1){const o=e.logits,s=e.pred_boxes,[i,l,d]=o.dims;if(null!==r&&r.length!==i)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");let u=[];for(let e=0;e<i;++e){let i=null!==r?r[e]:null,p={boxes:[],classes:[],scores:[]},h=o[e],m=s[e];for(let e=0;e<l;++e){let r,o=h[e],s=[];if(n){r=o.sigmoid().data;for(let e=0;e<r.length;++e)r[e]>t&&s.push(e)}else{let e=(0,a.max)(o.data)[1];if(e===d-1)continue;if(r=(0,a.softmax)(o.data),r[e]<t)continue;s.push(e)}for(const t of s){let n=m[e].data;n=c(n),null!==i&&(n=n.map(((e,t)=>e*i[(t+1)%2]))),p.boxes.push(n),p.classes.push(t),p.scores.push(r[t])}}u.push(p)}return u}function u(e,t=null){const r=e.logits,n=r.dims[0];if(null!==t&&t.length!==n)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");const o=[];for(let e=0;e<n;++e){const n=null!==t?t[e]:null;let s=r[e];null!==n&&(s=(0,i.interpolate)(s,n,"bilinear",!1));const[a,l]=n??s.dims.slice(-2),c=new i.Tensor("int32",new Int32Array(a*l),[a,l]),d=s[0].data,u=c.data;for(let e=1;e<s.dims[0];++e){const t=s[e].data;for(let r=0;r<t.length;++r)t[r]>d[r]&&(d[r]=t[r],u[r]=e)}const p=new Array(s.dims[0]);for(let e=0;e<u.length;++e){const t=u[e];p[t]=t}const h=p.filter((e=>void 0!==e));o.push({segmentation:c,labels:h})}return o}function p(e,t,r,n){const o=[],s=[],i=[];for(let l=0;l<e.dims[0];++l){const c=e[l],d=t[l],u=(0,a.max)(c.data)[1];if(u===n)continue;const p=(0,a.softmax)(c.data)[u];p>r&&(o.push(d),s.push(p),i.push(u))}return[o,s,i]}function h(e,t,r,n=.5,o=.8){const s=[];let a=0,i=0;const l=t[r].data;for(let t=0;t<e.length;++t)e[t]===r&&(s.push(t),++a),l[t]>=n&&++i;let c=a>0&&i>0;if(c){c=a/i>o}return[c,s]}function m(e,t,r,n,o,s=null,a=null){const[l,c]=a??e[0].dims,d=new i.Tensor("int32",new Int32Array(l*c),[l,c]),u=[];if(null!==a)for(let t=0;t<e.length;++t)e[t]=(0,i.interpolate)(e[t],a,"bilinear",!1);const p=new Int32Array(e[0].data.length),m=new Float32Array(e[0].data.length);for(let r=0;r<e.length;++r){let n=t[r];const o=e[r].data;for(let e=0;e<o.length;++e)o[e]*=n,o[e]>m[e]&&(p[e]=r,m[e]=o[e])}let _=0;const f=d.data;for(let s=0;s<r.length;++s){const a=r[s],[i,l]=h(p,e,s,n,o);if(i){++_;for(const e of l)f[e]=_;u.push({id:_,label_id:a,score:t[s]})}}return[d,u]}function _(e,t=.5,r=.5,n=.8,o=null,s=null){null===o&&(console.warn("`label_ids_to_fuse` unset. No instance will be fused."),o=new Set);const a=e.class_queries_logits??e.logits,l=(e.masks_queries_logits??e.pred_masks).sigmoid();let[c,d,u]=a.dims;if(u-=1,null!==s&&s.length!==c)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");let h=[];for(let e=0;e<c;++e){let c=null!==s?s[e]:null,d=a[e],_=l[e],[f,g,M]=p(d,_,t,u);if(0===M.length){let[e,t]=c??_.dims.slice(-2),r=new i.Tensor("int32",new Int32Array(e*t).fill(-1),[e,t]);h.push({segmentation:r,segments_info:[]});continue}let[w,T]=m(f,g,M,r,n,o,c);h.push({segmentation:w,segments_info:T})}return h}function f(e,t){if(!(e instanceof Float32Array||e instanceof Float64Array))throw new Error(`${t} expects input to be a Float32Array or a Float64Array, but got ${e?.constructor?.name??typeof e} instead. If using the feature extractor directly, remember to use \`read_audio(url, sampling_rate)\` to obtain the raw audio data of the file/url.`)}function g(e,t,r=0,n=null){const o=e/t;let s=(0,a.bankers_round)(o)*t;return null!==n&&s>n&&(s=Math.floor(o)*t),s<r&&(s=Math.ceil(o)*t),s}function M([e,t],r){return[Math.max(Math.floor(e/r),1)*r,Math.max(Math.floor(t/r),1)*r]}class w extends n.Callable{constructor(e){super(),this.config=e}}class T extends w{constructor(e){super(e),this.image_mean=this.config.image_mean??this.config.mean,this.image_std=this.config.image_std??this.config.std,this.resample=this.config.resample??2,this.do_rescale=this.config.do_rescale??!0,this.rescale_factor=this.config.rescale_factor??1/255,this.do_normalize=this.config.do_normalize,this.do_resize=this.config.do_resize,this.do_thumbnail=this.config.do_thumbnail,this.size=this.config.size,this.size_divisibility=this.config.size_divisibility??this.config.size_divisor,this.do_center_crop=this.config.do_center_crop,this.crop_size=this.config.crop_size,this.do_convert_rgb=this.config.do_convert_rgb??!0,this.do_crop_margin=this.config.do_crop_margin,this.pad_size=this.config.pad_size,this.do_pad=this.config.do_pad,this.do_pad&&!this.pad_size&&this.size&&void 0!==this.size.width&&void 0!==this.size.height&&(this.pad_size=this.size),this.do_flip_channel_order=this.config.do_flip_channel_order??!1}async thumbnail(e,t,r=2){const n=e.height,o=e.width,s=t.height,a=t.width;let i=Math.min(n,s),l=Math.min(o,a);return i===n&&l===o?e:(n>o?l=Math.floor(o*i/n):o>n&&(i=Math.floor(n*l/o)),await e.resize(l,i,{resample:r}))}async crop_margin(e,t=200){const r=e.clone().grayscale(),n=(0,a.min)(r.data)[0],o=(0,a.max)(r.data)[0]-n;if(0===o)return e;const s=t/255;let i=r.width,l=r.height,c=0,d=0;const u=r.data;for(let e=0;e<r.height;++e){const t=e*r.width;for(let a=0;a<r.width;++a)(u[t+a]-n)/o<s&&(i=Math.min(i,a),l=Math.min(l,e),c=Math.max(c,a),d=Math.max(d,e))}return e=await e.crop([i,l,c,d])}pad_image(e,t,r,{mode:n="constant",center:s=!1,constant_values:a=0}={}){const[i,l,c]=t;let d,u;if("number"==typeof r?(d=r,u=r):(d=r.width,u=r.height),d!==l||u!==i){const r=new Float32Array(d*u*c);if(Array.isArray(a))for(let e=0;e<r.length;++e)r[e]=a[e%c];else 0!==a&&r.fill(a);const[p,h]=s?[Math.floor((d-l)/2),Math.floor((u-i)/2)]:[0,0];for(let t=0;t<i;++t){const n=(t+h)*d,o=t*l;for(let t=0;t<l;++t){const s=(n+t+p)*c,a=(o+t)*c;for(let t=0;t<c;++t)r[s+t]=e[a+t]}}if("symmetric"===n){if(s)throw new Error("`center` padding is not supported when `mode` is set to `symmetric`.");const t=i-1,n=l-1;for(let s=0;s<u;++s){const a=s*d,u=(0,o.calculateReflectOffset)(s,t)*l;for(let t=0;t<d;++t){if(s<i&&t<l)continue;const d=(a+t)*c,p=(u+(0,o.calculateReflectOffset)(t,n))*c;for(let t=0;t<c;++t)r[d+t]=e[p+t]}}}e=r,t=[u,d,c]}return[e,t]}rescale(e){for(let t=0;t<e.length;++t)e[t]=this.rescale_factor*e[t]}get_resize_output_image_size(e,t){const[r,n]=e.size;let o,s;if(this.do_thumbnail){const{height:e,width:r}=t;o=Math.min(e,r)}else Number.isInteger(t)?(o=t,s=this.config.max_size??o):void 0!==t&&(o=t.shortest_edge,s=t.longest_edge);if(void 0!==o||void 0!==s){const e=void 0===o?1:Math.max(o/r,o/n),t=r*e,a=n*e,i=void 0===s?1:Math.min(s/t,s/a);let l=Math.floor(Number((t*i).toFixed(2))),c=Math.floor(Number((a*i).toFixed(2)));return void 0!==this.size_divisibility&&([l,c]=M([l,c],this.size_divisibility)),[l,c]}if(void 0!==t&&void 0!==t.width&&void 0!==t.height){let e=t.width,o=t.height;if(this.config.keep_aspect_ratio&&this.config.ensure_multiple_of){let t=o/n,s=e/r;Math.abs(1-s)<Math.abs(1-t)?t=s:s=t,o=g(t*n,this.config.ensure_multiple_of),e=g(s*r,this.config.ensure_multiple_of)}return[e,o]}if(void 0!==this.size_divisibility)return M([r,n],this.size_divisibility);throw new Error(`Could not resize image due to unsupported \`this.size\` option in config: ${JSON.stringify(t)}`)}async resize(e){const[t,r]=this.get_resize_output_image_size(e,this.size);return await e.resize(t,r,{resample:this.resample})}async preprocess(e,{do_normalize:t=null,do_pad:r=null,do_convert_rgb:n=null,do_convert_grayscale:o=null,do_flip_channel_order:s=null}={}){this.do_crop_margin&&(e=await this.crop_margin(e));const[a,l]=e.size;if(n??this.do_convert_rgb?e=e.rgb():o&&(e=e.grayscale()),this.do_resize&&(e=await this.resize(e)),this.do_thumbnail&&(e=await this.thumbnail(e,this.size,this.resample)),this.do_center_crop){let t,r;Number.isInteger(this.crop_size)?(t=this.crop_size,r=this.crop_size):(t=this.crop_size.width,r=this.crop_size.height),e=await e.center_crop(t,r)}const c=[e.height,e.width];let d=Float32Array.from(e.data),u=[e.height,e.width,e.channels];if(this.do_rescale&&this.rescale(d),t??this.do_normalize){let t=this.image_mean;Array.isArray(this.image_mean)||(t=new Array(e.channels).fill(t));let r=this.image_std;if(Array.isArray(this.image_std)||(r=new Array(e.channels).fill(t)),t.length!==e.channels||r.length!==e.channels)throw new Error(`When set to arrays, the length of \`image_mean\` (${t.length}) and \`image_std\` (${r.length}) must match the number of channels in the image (${e.channels}).`);for(let n=0;n<d.length;n+=e.channels)for(let o=0;o<e.channels;++o)d[n+o]=(d[n+o]-t[o])/r[o]}if(r??this.do_pad)if(this.pad_size){const t=this.pad_image(d,[e.height,e.width,e.channels],this.pad_size);[d,u]=t}else if(this.size_divisibility){const[e,t]=M([u[1],u[0]],this.size_divisibility);[d,u]=this.pad_image(d,u,{width:e,height:t})}if(s??this.do_flip_channel_order){if(3!==u[2])throw new Error("Flipping channel order is only supported for RGB images.");for(let e=0;e<d.length;e+=3){const t=d[e];d[e]=d[e+2],d[e+2]=t}}return{original_size:[l,a],reshaped_input_size:c,pixel_values:new i.Tensor("float32",d,u).permute(2,0,1)}}async _call(e,...t){Array.isArray(e)||(e=[e]);const r=await Promise.all(e.map((e=>this.preprocess(e))));return{pixel_values:(0,i.stack)(r.map((e=>e.pixel_values)),0),original_sizes:r.map((e=>e.original_size)),reshaped_input_sizes:r.map((e=>e.reshaped_input_size))}}}class b extends T{post_process_semantic_segmentation(...e){return u(...e)}}class x extends T{post_process_semantic_segmentation(...e){return u(...e)}}class k extends T{}class y extends T{}class F extends y{}class P extends T{}class C extends T{}class v extends T{}class S extends v{}class A extends T{}class E extends T{}class L extends T{constructor(e){super(e),this.crop_pct=this.config.crop_pct??.875}async resize(e){const t=this.size?.shortest_edge;if(void 0===t)throw new Error("Size dictionary must contain 'shortest_edge' key.");if(t<384){const r=Math.floor(t/this.crop_pct),[n,o]=this.get_resize_output_image_size(e,{shortest_edge:r});e=await e.resize(n,o,{resample:this.resample}),e=await e.center_crop(t,t)}else e=await e.resize(t,t,{resample:this.resample});return e}}class z extends L{}class I extends T{}class B extends T{}class N extends T{constructor(e){super(e),this.include_top=this.config.include_top??!0,this.include_top&&(this.image_std=this.image_std.map((e=>e*e)))}}class O extends T{}class D extends T{}class V extends T{}class j extends T{}class R extends T{}class G extends R{}class q extends T{post_process_object_detection(...e){return d(...e)}}class $ extends q{}class W extends T{post_process_object_detection(...e){return d(...e)}}class U extends T{}class X extends T{}class Q extends T{pad_image(e,t,r,n={}){const[o,s,a]=t;let i=this.image_mean;Array.isArray(this.image_mean)||(i=new Array(a).fill(i));let l=this.image_std;Array.isArray(l)||(l=new Array(a).fill(i));const c=i.map(((e,t)=>-e/l[t]));return super.pad_image(e,t,r,{center:!0,constant_values:c,...n})}}class H extends Q{}class Y extends T{async _call(e){const t=await super._call(e),r=[t.pixel_values.dims[0],64,64],n=(0,i.full)(r,1n);return{...t,pixel_mask:n}}post_process_object_detection(...e){return d(...e)}post_process_panoptic_segmentation(...e){return _(...e)}post_process_instance_segmentation(){throw Error("Not implemented yet")}}class J extends T{post_process_panoptic_segmentation(...e){return _(...e)}post_process_instance_segmentation(){throw Error("Not implemented yet")}}class K extends T{post_process_object_detection(...e){return d(...e)}}class Z extends T{reshape_input_points(e,t,r,n=!1){e=structuredClone(e);let s=(0,o.calculateDimensions)(e);if(3===s.length)n||(s=[1,...s]),e=[e];else if(4!==s.length)throw Error("The input_points must be a 4D tensor of shape `batch_size`, `point_batch_size`, `nb_points_per_image`, `2`.");for(let n=0;n<e.length;++n){let o=t[n],s=r[n],a=[s[0]/o[0],s[1]/o[1]];for(let t=0;t<e[n].length;++t)for(let r=0;r<e[n][t].length;++r)for(let o=0;o<e[n][t][r].length;++o)e[n][t][r][o]*=a[o%2]}return new i.Tensor("float32",Float32Array.from(e.flat(1/0)),s)}add_input_labels(e,t){let r=(0,o.calculateDimensions)(e);if(2===r.length)r=[1,...r],e=[e];else if(3!==r.length)throw Error("The input_points must be a 4D tensor of shape `batch_size`, `point_batch_size`, `nb_points_per_image`, `2`.");if(r.some(((e,r)=>e!==t.dims[r])))throw Error(`The first ${r.length} dimensions of 'input_points' and 'input_labels' must be the same.`);return new i.Tensor("int64",e.flat(1/0).map(BigInt),r)}async _call(e,{input_points:t=null,input_labels:r=null,input_boxes:n=null}={}){const o=await super._call(e);if(t&&(o.input_points=this.reshape_input_points(t,o.original_sizes,o.reshaped_input_sizes)),r){if(!o.input_points)throw Error("`input_points` must be provided if `input_labels` are provided.");o.input_labels=this.add_input_labels(r,o.input_points)}return n&&(o.input_boxes=this.reshape_input_points(n,o.original_sizes,o.reshaped_input_sizes,!0)),o}async post_process_masks(e,t,r,{mask_threshold:n=0,binarize:o=!0,pad_size:s=null}={}){const a=[],l=[(s=s??this.pad_size).height,s.width];for(let s=0;s<t.length;++s){const c=t[s],d=r[s];let u=await(0,i.interpolate_4d)(e[s],{mode:"bilinear",size:l});if(u=u.slice(null,null,[0,d[0]],[0,d[1]]),u=await(0,i.interpolate_4d)(u,{mode:"bilinear",size:c}),o){const e=u.data,t=new Uint8Array(e.length);for(let r=0;r<e.length;++r)e[r]>n&&(t[r]=1);u=new i.Tensor("bool",t,u.dims)}a.push(u)}return a}generate_crop_boxes(e,t,{crop_n_layers:r=0,overlap_ratio:n=512/1500,points_per_crop:o=32,crop_n_points_downscale_factor:s=1}={}){}}class ee extends T{pad_image(e,t,r,n={}){const[o,s,a]=t;return super.pad_image(e,t,{width:s+(r-s%r)%r,height:o+(r-o%r)%r},{mode:"symmetric",center:!1,constant_values:-1,...n})}}class te extends T{async _call(e,t){Array.isArray(e)||(e=[e]),Array.isArray(t)||(t=[t]);const r=await Promise.all(e.map((e=>this.preprocess(e)))),n=await Promise.all(t.map((e=>this.preprocess(e,{do_normalize:!1,do_convert_rgb:!1,do_convert_grayscale:!0}))));return{pixel_values:(0,i.stack)(r.map(((e,t)=>(0,i.cat)([e.pixel_values,n[t].pixel_values],0))),0),original_sizes:r.map((e=>e.original_size)),reshaped_input_sizes:r.map((e=>e.reshaped_input_size))}}}class re extends w{constructor(e){super(e),this.config.mel_filters??=(0,l.mel_filter_bank)(Math.floor(1+this.config.n_fft/2),this.config.feature_size,0,8e3,this.config.sampling_rate,"slaney","slaney"),this.window=(0,l.window_function)(this.config.n_fft,"hann")}async _extract_fbank_features(e){const t=await(0,l.spectrogram)(e,this.window,this.config.n_fft,this.config.hop_length,{power:2,mel_filters:this.config.mel_filters,log_mel:"log10",max_num_frames:this.config.nb_max_frames}),r=t.data,n=(0,a.max)(r)[0];for(let e=0;e<r.length;++e)r[e]=(Math.max(r[e],n-8)+4)/4;return t}async _call(e){let t;f(e,"WhisperFeatureExtractor"),e.length>this.config.n_samples?(console.warn("Attempting to extract features for audio longer than 30 seconds. If using a pipeline to extract transcript from a long audio clip, remember to specify `chunk_length_s` and/or `stride_length_s`."),t=e.slice(0,this.config.n_samples)):(t=new Float32Array(this.config.n_samples),t.set(e));return{input_features:(await this._extract_fbank_features(t)).unsqueeze_(0)}}}class ne extends w{_zero_mean_unit_var_norm(e){const t=e.reduce(((e,t)=>e+t),0)/e.length,r=e.reduce(((e,r)=>e+(r-t)**2),0)/e.length;return e.map((e=>(e-t)/Math.sqrt(r+1e-7)))}async _call(e){f(e,"Wav2Vec2FeatureExtractor"),e instanceof Float64Array&&(e=new Float32Array(e));let t=e;this.config.do_normalize&&(t=this._zero_mean_unit_var_norm(t));const r=[1,t.length];return{input_values:new i.Tensor("float32",t,r),attention_mask:new i.Tensor("int64",new BigInt64Array(t.length).fill(1n),r)}}}class oe extends w{constructor(e){super(e);const t=this.config.sampling_rate,r=(0,l.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<r.length;++e)r[e].push(0);this.mel_filters=r,this.window=(0,l.window_function)(400,"povey",{periodic:!1})}async _extract_fbank_features(e,t){return e=e.map((e=>32768*e)),(0,l.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,max_num_frames:t,transpose:!0})}async _call(e,{padding:t=!0,pad_to_multiple_of:r=2,do_normalize_per_mel_bins:n=!0,return_attention_mask:o=!0}={}){f(e,"SeamlessM4TFeatureExtractor");let s,a=await this._extract_fbank_features(e,this.config.max_length);if(n){const[e,t]=a.dims,r=a.data;for(let n=0;n<t;++n){let o=0;for(let s=0;s<e;++s)o+=r[s*t+n];const s=o/e;let a=0;for(let o=0;o<e;++o)a+=(r[o*t+n]-s)**2;a/=e-1;const i=Math.sqrt(a+1e-7);for(let o=0;o<e;++o){const e=o*t+n;r[e]=(r[e]-s)/i}}}if(t){const[e,t]=a.dims,n=a.data,l=e%r;if(l>0){const r=new Float32Array(t*(e+l));r.set(n),r.fill(this.config.padding_value,n.length);const c=e+l;a=new i.Tensor(a.type,r,[c,t]),o&&(s=new i.Tensor("int64",new BigInt64Array(c),[1,c]),s.data.fill(1n,0,e))}}const[l,c]=a.dims,d=this.config.stride;if(0!==l%d)throw new Error(`The number of frames (${l}) must be a multiple of the stride (${d}).`);const u=a.view(1,Math.floor(l/d),c*d),p={input_features:u};if(o){const e=u.dims[1],t=new BigInt64Array(e);if(s){const e=s.data;for(let r=1,n=0;r<l;r+=d,++n)t[n]=e[r]}else t.fill(1n);p.attention_mask=new i.Tensor("int64",t,[1,e])}return p}}class se extends w{constructor(e){super(e);const t=this.config.sampling_rate,r=(0,l.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<r.length;++e)r[e].push(0);this.mel_filters=r,this.window=(0,l.window_function)(400,"hann",{periodic:!1}),this.mean=this.config.mean,this.std=this.config.std}async _extract_fbank_features(e,t){return(0,l.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,max_num_frames:t,transpose:!0})}async _call(e){f(e,"ASTFeatureExtractor");const t=await this._extract_fbank_features(e,this.config.max_length);if(this.config.do_normalize){const e=2*this.std,r=t.data;for(let t=0;t<r.length;++t)r[t]=(r[t]-this.mean)/e}return{input_values:t.unsqueeze_(0)}}}class ae extends w{constructor(e){super(e),this.mel_filters=(0,l.mel_filter_bank)(this.config.nb_frequency_bins,this.config.feature_size,this.config.frequency_min,this.config.frequency_max,this.config.sampling_rate,null,"htk"),this.mel_filters_slaney=(0,l.mel_filter_bank)(this.config.nb_frequency_bins,this.config.feature_size,this.config.frequency_min,this.config.frequency_max,this.config.sampling_rate,"slaney","slaney"),this.window=(0,l.window_function)(this.config.fft_window_size,"hann")}async _get_input_mel(e,t,r,n){let o,s=!1;const a=e.length-t;if(a>0){if("rand_trunc"!==r)throw new Error(`Truncation strategy "${r}" not implemented`);{s=!0;const r=Math.floor(Math.random()*(a+1));e=e.subarray(r,r+t),o=await this._extract_fbank_features(e,this.mel_filters_slaney,this.config.nb_max_samples)}}else{if(a<0){let r=new Float64Array(t);if(r.set(e),"repeat"===n)for(let n=e.length;n<t;n+=e.length)r.set(e.subarray(0,Math.min(e.length,t-n)),n);else if("repeatpad"===n)for(let t=e.length;t<-a;t+=e.length)r.set(e,t);e=r}if("fusion"===r)throw new Error(`Truncation strategy "${r}" not implemented`);o=await this._extract_fbank_features(e,this.mel_filters_slaney,this.config.nb_max_samples)}return o.unsqueeze_(0)}async _extract_fbank_features(e,t,r=null){return(0,l.spectrogram)(e,this.window,this.config.fft_window_size,this.config.hop_length,{power:2,mel_filters:t,log_mel:"dB",max_num_frames:r,do_pad:!1,transpose:!0})}async _call(e,{max_length:t=null}={}){f(e,"ClapFeatureExtractor");return{input_features:(await this._get_input_mel(e,t??this.config.nb_max_samples,this.config.truncation,this.config.padding)).unsqueeze_(0)}}}class ie extends w{async _call(e){f(e,"PyAnnoteFeatureExtractor"),e instanceof Float64Array&&(e=new Float32Array(e));const t=[1,1,e.length];return{input_values:new i.Tensor("float32",e,t)}}samples_to_frames(e){return(e-this.config.offset)/this.config.step}post_process_speaker_diarization(e,t){const r=t/this.samples_to_frames(t)/this.config.sampling_rate,n=[];for(const t of e.tolist()){const e=[];let o=-1;for(let r=0;r<t.length;++r){const n=(0,a.softmax)(t[r]),[s,i]=(0,a.max)(n),[l,c]=[r,r+1];i!==o?(o=i,e.push({id:i,start:l,end:c,score:s})):(e.at(-1).end=c,e.at(-1).score+=s)}n.push(e.map((({id:e,start:t,end:n,score:o})=>({id:e,start:t*r,end:n*r,confidence:o/(n-t)}))))}return n}}class le extends w{constructor(e){super(e);const t=this.config.sampling_rate,r=(0,l.mel_filter_bank)(256,this.config.num_mel_bins,20,Math.floor(t/2),t,null,"kaldi",!0);for(let e=0;e<r.length;++e)r[e].push(0);this.mel_filters=r,this.window=(0,l.window_function)(400,"hamming",{periodic:!1}),this.min_num_frames=this.config.min_num_frames}async _extract_fbank_features(e){return e=e.map((e=>32768*e)),(0,l.spectrogram)(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1.192092955078125e-7,remove_dc_offset:!0,transpose:!0,min_num_frames:this.min_num_frames})}async _call(e){f(e,"WeSpeakerFeatureExtractor");const t=(await this._extract_fbank_features(e)).unsqueeze_(0);if(null===this.config.fbank_centering_span){const e=t.mean(1).data,r=t.data,[n,o,s]=t.dims;for(let t=0;t<n;++t){const n=t*o*s,a=t*s;for(let t=0;t<o;++t){const o=n+t*s;for(let t=0;t<s;++t)r[o+t]-=e[a+t]}}}return{input_features:t}}}class ce extends w{}class de extends n.Callable{constructor(e){super(),this.feature_extractor=e}async _call(e,...t){return await this.feature_extractor(e,...t)}}class ue extends de{async _call(...e){return await this.feature_extractor(...e)}post_process_masks(...e){return this.feature_extractor.post_process_masks(...e)}reshape_input_points(...e){return this.feature_extractor.reshape_input_points(...e)}}class pe extends de{async _call(e){return await this.feature_extractor(e)}}class he extends de{async _call(e){return await this.feature_extractor(e)}}class me extends de{async _call(e){return await this.feature_extractor(e)}post_process_speaker_diarization(...e){return this.feature_extractor.post_process_speaker_diarization(...e)}}class _e extends de{async _call(e){return await this.feature_extractor(e)}}class fe extends de{}class ge extends de{constructor(e){super(e);const{tasks_answer_post_processing_type:t,task_prompts_without_inputs:r,task_prompts_with_input:n}=e.config;this.tasks_answer_post_processing_type=new Map(Object.entries(t??{})),this.task_prompts_without_inputs=new Map(Object.entries(r??{})),this.task_prompts_with_input=new Map(Object.entries(n??{})),this.regexes={quad_boxes:/(.+?)<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>/gm,bboxes:/([^<]+)?<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>/gm},this.size_per_bin=1e3}construct_prompts(e){"string"==typeof e&&(e=[e]);const t=[];for(const r of e)if(this.task_prompts_without_inputs.has(r))t.push(this.task_prompts_without_inputs.get(r));else{for(const[e,n]of this.task_prompts_with_input)if(r.includes(e)){t.push(n.replaceAll("{input}",r).replaceAll(e,""));break}t.length!==e.length&&t.push(r)}return t}post_process_generation(e,t,r){const n=this.tasks_answer_post_processing_type.get(t)??"pure_text";let o;switch(e=e.replaceAll("<s>","").replaceAll("</s>",""),n){case"pure_text":o=e;break;case"description_with_bboxes":case"bboxes":case"phrase_grounding":case"ocr":const s="ocr"===n?"quad_boxes":"bboxes",a=e.matchAll(this.regexes[s]),i=[],l=[];for(const[e,t,...n]of a)i.push(t?t.trim():i.at(-1)??""),l.push(n.map(((e,t)=>(Number(e)+.5)/this.size_per_bin*r[t%2])));o={labels:i,[s]:l};break;default:throw new Error(`Task "${t}" (of type "${n}") not yet implemented.`)}return{[t]:o}}}class Me{static FEATURE_EXTRACTOR_CLASS_MAPPING={ImageFeatureExtractor:T,WhisperFeatureExtractor:re,ViTFeatureExtractor:I,MobileViTFeatureExtractor:R,MobileViTImageProcessor:G,MobileNetV1FeatureExtractor:O,MobileNetV2FeatureExtractor:D,MobileNetV3FeatureExtractor:V,MobileNetV4FeatureExtractor:j,OwlViTFeatureExtractor:q,Owlv2ImageProcessor:$,CLIPFeatureExtractor:v,CLIPImageProcessor:S,Florence2Processor:ge,ChineseCLIPFeatureExtractor:A,SiglipImageProcessor:E,ConvNextFeatureExtractor:L,ConvNextImageProcessor:z,SegformerFeatureExtractor:x,SapiensFeatureExtractor:b,BitImageProcessor:P,DPTImageProcessor:F,DPTFeatureExtractor:y,PvtImageProcessor:k,GLPNFeatureExtractor:C,BeitFeatureExtractor:X,DeiTFeatureExtractor:U,DetrFeatureExtractor:Y,RTDetrImageProcessor:W,MaskFormerFeatureExtractor:J,YolosFeatureExtractor:K,DonutFeatureExtractor:Q,NougatImageProcessor:H,EfficientNetImageProcessor:N,ViTImageProcessor:B,VitMatteImageProcessor:te,SamImageProcessor:Z,Swin2SRImageProcessor:ee,Wav2Vec2FeatureExtractor:ne,SeamlessM4TFeatureExtractor:oe,SpeechT5FeatureExtractor:ce,ASTFeatureExtractor:se,ClapFeatureExtractor:ae,PyAnnoteFeatureExtractor:ie,WeSpeakerFeatureExtractor:le};static PROCESSOR_CLASS_MAPPING={WhisperProcessor:pe,Wav2Vec2ProcessorWithLM:he,PyAnnoteProcessor:me,SamProcessor:ue,SpeechT5Processor:_e,OwlViTProcessor:fe,Florence2Processor:ge};static async from_pretrained(e,{progress_callback:t=null,config:r=null,cache_dir:n=null,local_files_only:o=!1,revision:a="main"}={}){let i=r??await(0,s.getModelJSON)(e,"preprocessor_config.json",!0,{progress_callback:t,config:r,cache_dir:n,local_files_only:o,revision:a}),l=i.feature_extractor_type??i.image_processor_type,c=this.FEATURE_EXTRACTOR_CLASS_MAPPING[l];if(!c){if(void 0===i.size)throw new Error(`Unknown Feature Extractor type: ${l}`);console.warn(`Feature extractor type "${l}" not found, assuming ImageFeatureExtractor due to size parameter in config.`),c=T}return new(this.PROCESSOR_CLASS_MAPPING[i.processor_class]??de)(new c(i))}}},"./src/tokenizers.js":
|
|
128
128
|
/*!***************************!*\
|
|
129
129
|
!*** ./src/tokenizers.js ***!
|
|
130
|
-
\***************************/(e,t,r)=>{r.r(t),r.d(t,{AlbertTokenizer:()=>xe,AutoTokenizer:()=>ht,BartTokenizer:()=>Ne,BertTokenizer:()=>be,BlenderbotSmallTokenizer:()=>lt,BlenderbotTokenizer:()=>it,BloomTokenizer:()=>je,CLIPTokenizer:()=>nt,CamembertTokenizer:()=>Ee,CodeGenTokenizer:()=>rt,CodeLlamaTokenizer:()=>qe,CohereTokenizer:()=>pt,ConvBertTokenizer:()=>ve,DebertaTokenizer:()=>Fe,DebertaV2Tokenizer:()=>Pe,DistilBertTokenizer:()=>Ae,ElectraTokenizer:()=>ze,EsmTokenizer:()=>Qe,FalconTokenizer:()=>Ue,GPT2Tokenizer:()=>Be,GPTNeoXTokenizer:()=>Xe,GemmaTokenizer:()=>Ye,Grok1Tokenizer:()=>Je,HerbertTokenizer:()=>Ce,LlamaTokenizer:()=>Ge,M2M100Tokenizer:()=>et,MBart50Tokenizer:()=>De,MBartTokenizer:()=>Oe,MPNetTokenizer:()=>We,MarianTokenizer:()=>st,MobileBertTokenizer:()=>ke,NllbTokenizer:()=>Ze,NougatTokenizer:()=>dt,PreTrainedTokenizer:()=>Te,Qwen2Tokenizer:()=>He,RoFormerTokenizer:()=>Se,RobertaTokenizer:()=>Ve,SiglipTokenizer:()=>ot,SpeechT5Tokenizer:()=>ct,SqueezeBertTokenizer:()=>ye,T5Tokenizer:()=>Ie,TokenizerModel:()=>k,VitsTokenizer:()=>ut,Wav2Vec2CTCTokenizer:()=>at,WhisperTokenizer:()=>tt,XLMRobertaTokenizer:()=>$e,XLMTokenizer:()=>Le,is_chinese_char:()=>M});var n=r(/*! ./utils/generic.js */"./src/utils/generic.js"),o=r(/*! ./utils/core.js */"./src/utils/core.js"),s=r(/*! ./utils/hub.js */"./src/utils/hub.js"),a=r(/*! ./utils/maths.js */"./src/utils/maths.js"),i=r(/*! ./utils/tensor.js */"./src/utils/tensor.js"),l=r(/*! ./utils/data-structures.js */"./src/utils/data-structures.js"),c=r(/*! @huggingface/jinja */"./node_modules/@huggingface/jinja/dist/index.js"),d=r(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js"),u=r(/*! ./utils/constants.js */"./src/utils/constants.js");async function p(e,t){const r=await Promise.all([(0,s.getModelJSON)(e,"tokenizer.json",!0,t),(0,s.getModelJSON)(e,"tokenizer_config.json",!0,t)]);return null!==t.legacy&&(r[1].legacy=t.legacy),r}function h(e,t=!0){if(void 0!==e.Regex){let t=e.Regex.replace(/\\([#&~])/g,"$1");for(const[e,r]of b)t=t.replaceAll(e,r);return new RegExp(t,"gu")}if(void 0!==e.String){const r=(0,o.escapeRegExp)(e.String);return new RegExp(t?r:`(${r})`,"gu")}return console.warn("Unknown pattern type:",e),null}function m(e){return new Map(Object.entries(e))}function _(e){const t=e.dims;switch(t.length){case 1:return e.tolist();case 2:if(1!==t[0])throw new Error("Unable to decode tensor with `batch size !== 1`. Use `tokenizer.batch_decode(...)` for batched inputs.");return e.tolist()[0];default:throw new Error(`Expected tensor to have 1-2 dimensions, got ${t.length}.`)}}function f(e){return e.replace(/ \./g,".").replace(/ \?/g,"?").replace(/ \!/g,"!").replace(/ ,/g,",").replace(/ \' /g,"'").replace(/ n\'t/g,"n't").replace(/ \'m/g,"'m").replace(/ \'s/g,"'s").replace(/ \'ve/g,"'ve").replace(/ \'re/g,"'re")}function g(e){return e.replace(/[\u0300-\u036f]/g,"")}function M(e){return e>=19968&&e<=40959||e>=13312&&e<=19903||e>=131072&&e<=173791||e>=173824&&e<=177983||e>=177984&&e<=178207||e>=178208&&e<=183983||e>=63744&&e<=64255||e>=194560&&e<=195103}const w="\\p{P}\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E",T=new RegExp(`^[${w}]+$`,"gu"),b=new Map([["(?i:'s|'t|'re|'ve|'m|'ll|'d)","(?:'([sS]|[tT]|[rR][eE]|[vV][eE]|[mM]|[lL][lL]|[dD]))"]]);class x{constructor(e){this.content=e.content,this.id=e.id,this.single_word=e.single_word??!1,this.lstrip=e.lstrip??!1,this.rstrip=e.rstrip??!1,this.special=e.special??!1,this.normalized=e.normalized??null}}class k extends n.Callable{constructor(e){super(),this.config=e,this.vocab=[],this.tokens_to_ids=new Map,this.unk_token_id=void 0,this.unk_token=void 0,this.end_of_word_suffix=void 0,this.fuse_unk=this.config.fuse_unk??!1}static fromConfig(e,...t){switch(e.type){case"WordPiece":return new y(e);case"Unigram":return new F(e,...t);case"BPE":return new v(e);default:if(e.vocab)return new S(e,...t);throw new Error(`Unknown TokenizerModel type: ${e.type}`)}}_call(e){return e=this.encode(e),this.fuse_unk&&(e=function(e,t,r){const n=[];let o=0;for(;o<e.length;)if(n.push(e[o]),(t.get(e[o])??r)===r)for(;++o<e.length&&(t.get(e[o])??r)===r;)t.get(n.at(-1))!==r&&(n[n.length-1]+=e[o]);else++o;return n}(e,this.tokens_to_ids,this.unk_token_id)),e}encode(e){throw Error("encode should be implemented in subclass.")}convert_tokens_to_ids(e){return e.map((e=>this.tokens_to_ids.get(e)??this.unk_token_id))}convert_ids_to_tokens(e){return e.map((e=>this.vocab[e]??this.unk_token))}}class y extends k{constructor(e){super(e),this.tokens_to_ids=m(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.max_input_chars_per_word=e.max_input_chars_per_word??100,this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e}encode(e){const t=[];for(const r of e){const e=[...r];if(e.length>this.max_input_chars_per_word){t.push(this.unk_token);continue}let n=!1,o=0;const s=[];for(;o<e.length;){let t=e.length,r=null;for(;o<t;){let n=e.slice(o,t).join("");if(o>0&&(n=this.config.continuing_subword_prefix+n),this.tokens_to_ids.has(n)){r=n;break}--t}if(null===r){n=!0;break}s.push(r),o=t}n?t.push(this.unk_token):t.push(...s)}return t}}class F extends k{constructor(e,t){super(e);const r=e.vocab.length;this.vocab=new Array(r),this.scores=new Array(r);for(let t=0;t<r;++t){const r=e.vocab[t];this.vocab[t]=r[0],this.scores[t]=r[1]}this.unk_token_id=e.unk_id,this.unk_token=this.vocab[e.unk_id],this.tokens_to_ids=new Map(this.vocab.map(((e,t)=>[e,t]))),this.bosToken=" ",this.bosTokenId=this.tokens_to_ids.get(this.bosToken),this.eosToken=t.eos_token,this.eosTokenId=this.tokens_to_ids.get(this.eosToken),this.unkToken=this.vocab[this.unk_token_id],this.minScore=(0,a.min)(this.scores)[0],this.unkScore=this.minScore-10,this.scores[this.unk_token_id]=this.unkScore,this.trie=new l.CharTrie,this.trie.extend(this.vocab),this.fuse_unk=!0}populateNodes(e){const t=e.sentence,r=t.length;let n=0;for(;n<r;){const r=1;let o=!1;const s=[];for(let a of this.trie.commonPrefixSearch(t.slice(n))){s.push(a);const t=this.tokens_to_ids.get(a),i=this.scores[t],l=a.length;e.insert(n,l,i,t),o||l!==r||(o=!0)}o||e.insert(n,r,this.unkScore,this.unk_token_id),n+=r}}tokenize(e){const t=new l.TokenLattice(e,this.bosTokenId,this.eosTokenId);return this.populateNodes(t),t.tokens()}encode(e){const t=[];for(const r of e){const e=this.tokenize(r);t.push(...e)}return t}}const P=(()=>{const e=[...Array.from({length:"~".charCodeAt(0)-"!".charCodeAt(0)+1},((e,t)=>t+"!".charCodeAt(0))),...Array.from({length:"¬".charCodeAt(0)-"¡".charCodeAt(0)+1},((e,t)=>t+"¡".charCodeAt(0))),...Array.from({length:"ÿ".charCodeAt(0)-"®".charCodeAt(0)+1},((e,t)=>t+"®".charCodeAt(0)))],t=e.slice();let r=0;for(let n=0;n<256;++n)e.includes(n)||(e.push(n),t.push(256+r),r+=1);const n=t.map((e=>String.fromCharCode(e)));return Object.fromEntries(e.map(((e,t)=>[e,n[t]])))})(),C=(0,o.reverseDictionary)(P);class v extends k{constructor(e){super(e),this.BPE_SPLIT_TOKEN=" ",this.tokens_to_ids=m(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e;this.bpe_ranks=new Map(e.merges.map(((e,t)=>[e,t]))),this.merges=e.merges.map((e=>e.split(this.BPE_SPLIT_TOKEN))),this.end_of_word_suffix=e.end_of_word_suffix,this.continuing_subword_suffix=e.continuing_subword_suffix??null,this.byte_fallback=this.config.byte_fallback??!1,this.byte_fallback&&(this.text_encoder=new TextEncoder),this.ignore_merges=this.config.ignore_merges??!1,this.cache=new Map}bpe(e){if(0===e.length)return[];const t=this.cache.get(e);if(void 0!==t)return t;const r=Array.from(e);this.end_of_word_suffix&&(r[r.length-1]+=this.end_of_word_suffix);let n=[];if(r.length>1){const e=new l.PriorityQueue(((e,t)=>e.score<t.score));let t={token:r[0],bias:0,prev:null,next:null},o=t;for(let t=1;t<r.length;++t){const n={bias:t/r.length,token:r[t],prev:o,next:null};o.next=n,this._add_node(e,o),o=n}for(;!e.isEmpty();){const r=e.pop();if(r.deleted||!r.next||r.next.deleted)continue;if(r.deleted=!0,r.next.deleted=!0,r.prev){const e={...r.prev};r.prev.deleted=!0,r.prev=e,e.prev?e.prev.next=e:t=e}const n={token:r.token+r.next.token,bias:r.bias,prev:r.prev,next:r.next.next};n.prev?(n.prev.next=n,this._add_node(e,n.prev)):t=n,n.next&&(n.next.prev=n,this._add_node(e,n))}for(let e=t;null!==e;e=e.next)n.push(e.token)}else n=r;if(this.continuing_subword_suffix)for(let e=0;e<n.length-1;++e)n[e]+=this.continuing_subword_suffix;return this.cache.set(e,n),n}_add_node(e,t){const r=this.bpe_ranks.get(t.token+this.BPE_SPLIT_TOKEN+t.next.token);void 0!==r&&(t.score=r+t.bias,e.push(t))}encode(e){const t=[];for(const r of e){if(this.ignore_merges&&this.tokens_to_ids.has(r)){t.push(r);continue}const e=this.bpe(r);for(const r of e)if(this.tokens_to_ids.has(r))t.push(r);else if(this.byte_fallback){const e=Array.from(this.text_encoder.encode(r)).map((e=>`<0x${e.toString(16).toUpperCase().padStart(2,"0")}>`));e.every((e=>this.tokens_to_ids.has(e)))?t.push(...e):t.push(this.unk_token)}else t.push(this.unk_token)}return t}}class S extends k{constructor(e,t){super(e),this.tokens_to_ids=m(t.target_lang?e.vocab[t.target_lang]:e.vocab),this.bos_token=t.bos_token,this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t.eos_token,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.pad_token=t.pad_token,this.pad_token_id=this.tokens_to_ids.get(this.pad_token),this.unk_token=t.unk_token,this.unk_token_id=this.tokens_to_ids.get(this.unk_token),this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e}encode(e){return e}}class A extends n.Callable{constructor(e){super(),this.config=e}static fromConfig(e){if(null===e)return null;switch(e.type){case"BertNormalizer":return new j(e);case"Precompiled":return new pe(e);case"Sequence":return new V(e);case"Replace":return new E(e);case"NFC":return new L(e);case"NFKC":return new z(e);case"NFKD":return new I(e);case"Strip":return new B(e);case"StripAccents":return new N(e);case"Lowercase":return new O(e);case"Prepend":return new D(e);default:throw new Error(`Unknown Normalizer type: ${e.type}`)}}normalize(e){throw Error("normalize should be implemented in subclass.")}_call(e){return this.normalize(e)}}class E extends A{normalize(e){const t=h(this.config.pattern);return null===t?e:e.replaceAll(t,this.config.content)}}class L extends A{normalize(e){return e=e.normalize("NFC")}}class z extends A{normalize(e){return e=e.normalize("NFKC")}}class I extends A{normalize(e){return e=e.normalize("NFKD")}}class B extends A{normalize(e){return this.config.strip_left&&this.config.strip_right?e=e.trim():(this.config.strip_left&&(e=e.trimStart()),this.config.strip_right&&(e=e.trimEnd())),e}}class N extends A{normalize(e){return e=g(e)}}class O extends A{normalize(e){return e=e.toLowerCase()}}class D extends A{normalize(e){return e=this.config.prepend+e}}class V extends A{constructor(e){super(e),this.normalizers=e.normalizers.map((e=>A.fromConfig(e)))}normalize(e){return this.normalizers.reduce(((e,t)=>t.normalize(e)),e)}}class j extends A{_tokenize_chinese_chars(e){const t=[];for(let r=0;r<e.length;++r){const n=e[r];M(n.charCodeAt(0))?(t.push(" "),t.push(n),t.push(" ")):t.push(n)}return t.join("")}stripAccents(e){return e.normalize("NFD").replace(/[\u0300-\u036f]/g,"")}_is_control(e){switch(e){case"\t":case"\n":case"\r":return!1;default:return/^\p{Cc}|\p{Cf}|\p{Co}|\p{Cs}$/u.test(e)}}_clean_text(e){const t=[];for(const r of e){const e=r.charCodeAt(0);0===e||65533===e||this._is_control(r)||(/^\s$/.test(r)?t.push(" "):t.push(r))}return t.join("")}normalize(e){return this.config.clean_text&&(e=this._clean_text(e)),this.config.handle_chinese_chars&&(e=this._tokenize_chinese_chars(e)),this.config.lowercase?(e=e.toLowerCase(),!1!==this.config.strip_accents&&(e=this.stripAccents(e))):this.config.strip_accents&&(e=this.stripAccents(e)),e}}class R extends n.Callable{static fromConfig(e){if(null===e)return null;switch(e.type){case"BertPreTokenizer":return new G(e);case"Sequence":return new he(e);case"Whitespace":return new me(e);case"WhitespaceSplit":return new _e(e);case"Metaspace":return new de(e);case"ByteLevel":return new q(e);case"Split":return new $(e);case"Punctuation":return new W(e);case"Digits":return new U(e);case"Replace":return new fe(e);default:throw new Error(`Unknown PreTokenizer type: ${e.type}`)}}pre_tokenize_text(e,t){throw Error("pre_tokenize_text should be implemented in subclass.")}pre_tokenize(e,t){return(Array.isArray(e)?e.map((e=>this.pre_tokenize_text(e,t))):this.pre_tokenize_text(e,t)).flat()}_call(e,t){return this.pre_tokenize(e,t)}}class G extends R{constructor(e){super(),this.pattern=new RegExp(`[^\\s${w}]+|[${w}]`,"gu")}pre_tokenize_text(e,t){return e.trim().match(this.pattern)||[]}}class q extends R{constructor(e){super(),this.config=e,this.add_prefix_space=this.config.add_prefix_space,this.trim_offsets=this.config.trim_offsets,this.use_regex=this.config.use_regex??!0,this.pattern=/'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu,this.byte_encoder=P,this.text_encoder=new TextEncoder}pre_tokenize_text(e,t){this.add_prefix_space&&!e.startsWith(" ")&&(e=" "+e);return(this.use_regex?e.match(this.pattern)||[]:[e]).map((e=>Array.from(this.text_encoder.encode(e),(e=>this.byte_encoder[e])).join("")))}}class $ extends R{constructor(e){super(),this.config=e,this.pattern=h(this.config.pattern,this.config.invert)}pre_tokenize_text(e,t){return null===this.pattern?[]:this.config.invert?e.match(this.pattern)||[]:function(e,t){const r=[];let n=0;for(const o of e.matchAll(t)){const t=o[0];n<o.index&&r.push(e.slice(n,o.index)),t.length>0&&r.push(t),n=o.index+t.length}return n<e.length&&r.push(e.slice(n)),r}(e,this.pattern)}}class W extends R{constructor(e){super(),this.config=e,this.pattern=new RegExp(`[^${w}]+|[${w}]+`,"gu")}pre_tokenize_text(e,t){return e.match(this.pattern)||[]}}class U extends R{constructor(e){super(),this.config=e;const t="[^\\d]+|\\d"+(this.config.individual_digits?"":"+");this.pattern=new RegExp(t,"gu")}pre_tokenize_text(e,t){return e.match(this.pattern)||[]}}class X extends n.Callable{constructor(e){super(),this.config=e}static fromConfig(e){if(null===e)return null;switch(e.type){case"TemplateProcessing":return new Y(e);case"ByteLevel":return new J(e);case"RobertaProcessing":return new H(e);case"BertProcessing":return new Q(e);case"Sequence":return new K(e);default:throw new Error(`Unknown PostProcessor type: ${e.type}`)}}post_process(e,...t){throw Error("post_process should be implemented in subclass.")}_call(e,...t){return this.post_process(e,...t)}}class Q extends X{constructor(e){super(e),this.cls=e.cls[0],this.sep=e.sep[0]}post_process(e,t=null,{add_special_tokens:r=!0}={}){r&&(e=(0,o.mergeArrays)([this.cls],e,[this.sep]));let n=new Array(e.length).fill(0);if(null!==t){const s=r&&this instanceof H?[this.sep]:[],a=r?[this.sep]:[];e=(0,o.mergeArrays)(e,s,t,a),n=(0,o.mergeArrays)(n,new Array(t.length+s.length+a.length).fill(1))}return{tokens:e,token_type_ids:n}}}class H extends Q{}class Y extends X{constructor(e){super(e),this.single=e.single,this.pair=e.pair}post_process(e,t=null,{add_special_tokens:r=!0}={}){const n=null===t?this.single:this.pair;let s=[],a=[];for(const i of n)"SpecialToken"in i?r&&(s.push(i.SpecialToken.id),a.push(i.SpecialToken.type_id)):"Sequence"in i&&("A"===i.Sequence.id?(s=(0,o.mergeArrays)(s,e),a=(0,o.mergeArrays)(a,new Array(e.length).fill(i.Sequence.type_id))):"B"===i.Sequence.id&&(s=(0,o.mergeArrays)(s,t),a=(0,o.mergeArrays)(a,new Array(t.length).fill(i.Sequence.type_id))));return{tokens:s,token_type_ids:a}}}class J extends X{post_process(e,t=null){return t&&(e=(0,o.mergeArrays)(e,t)),{tokens:e}}}class K extends X{constructor(e){super(e),this.processors=e.processors.map((e=>X.fromConfig(e)))}post_process(e,t=null,r={}){let n;for(const o of this.processors)if(o instanceof J){if(e=o.post_process(e).tokens,t){t=o.post_process(t).tokens}}else{const s=o.post_process(e,t,r);e=s.tokens,n=s.token_type_ids}return{tokens:e,token_type_ids:n}}}class Z extends n.Callable{constructor(e){super(),this.config=e,this.added_tokens=[],this.end_of_word_suffix=null,this.trim_offsets=e.trim_offsets}static fromConfig(e){if(null===e)return null;switch(e.type){case"WordPiece":return new oe(e);case"Metaspace":return new ue(e);case"ByteLevel":return new se(e);case"Replace":return new ee(e);case"ByteFallback":return new te(e);case"Fuse":return new re(e);case"Strip":return new ne(e);case"Sequence":return new ie(e);case"CTC":return new ae(e);case"BPEDecoder":return new le(e);default:throw new Error(`Unknown Decoder type: ${e.type}`)}}_call(e){return this.decode(e)}decode(e){return this.decode_chain(e).join("")}decode_chain(e){throw Error("`decode_chain` should be implemented in subclass.")}}class ee extends Z{decode_chain(e){const t=h(this.config.pattern);return null===t?e:e.map((e=>e.replaceAll(t,this.config.content)))}}class te extends Z{constructor(e){super(e),this.text_decoder=new TextDecoder}decode_chain(e){const t=[];let r=[];for(const n of e){let e=null;if(6===n.length&&n.startsWith("<0x")&&n.endsWith(">")){const t=parseInt(n.slice(3,5),16);isNaN(t)||(e=t)}if(null!==e)r.push(e);else{if(r.length>0){const e=this.text_decoder.decode(Uint8Array.from(r));t.push(e),r=[]}t.push(n)}}if(r.length>0){const e=this.text_decoder.decode(Uint8Array.from(r));t.push(e),r=[]}return t}}class re extends Z{decode_chain(e){return[e.join("")]}}class ne extends Z{constructor(e){super(e),this.content=this.config.content,this.start=this.config.start,this.stop=this.config.stop}decode_chain(e){return e.map((e=>{let t=0;for(let r=0;r<this.start&&e[r]===this.content;++r)t=r+1;let r=e.length;for(let t=0;t<this.stop;++t){const n=e.length-t-1;if(e[n]!==this.content)break;r=n}return e.slice(t,r)}))}}class oe extends Z{constructor(e){super(e),this.cleanup=e.cleanup}decode_chain(e){return e.map(((e,t)=>(0!==t&&(e=e.startsWith(this.config.prefix)?e.replace(this.config.prefix,""):" "+e),this.cleanup&&(e=f(e)),e)))}}class se extends Z{constructor(e){super(e),this.byte_decoder=C,this.text_decoder=new TextDecoder("utf-8",{fatal:!1,ignoreBOM:!0}),this.end_of_word_suffix=null}convert_tokens_to_string(e){const t=e.join(""),r=new Uint8Array([...t].map((e=>this.byte_decoder[e])));return this.text_decoder.decode(r)}decode_chain(e){const t=[];let r=[];for(const n of e)void 0!==this.added_tokens.find((e=>e.content===n))?(r.length>0&&(t.push(this.convert_tokens_to_string(r)),r=[]),t.push(n)):r.push(n);return r.length>0&&t.push(this.convert_tokens_to_string(r)),t}}class ae extends Z{constructor(e){super(e),this.pad_token=this.config.pad_token,this.word_delimiter_token=this.config.word_delimiter_token,this.cleanup=this.config.cleanup}convert_tokens_to_string(e){if(0===e.length)return"";const t=[e[0]];for(let r=1;r<e.length;++r)e[r]!==t.at(-1)&&t.push(e[r]);let r=t.filter((e=>e!==this.pad_token)).join("");return this.cleanup&&(r=f(r).replaceAll(this.word_delimiter_token," ").trim()),r}decode_chain(e){return[this.convert_tokens_to_string(e)]}}class ie extends Z{constructor(e){super(e),this.decoders=e.decoders.map((e=>Z.fromConfig(e)))}decode_chain(e){return this.decoders.reduce(((e,t)=>t.decode_chain(e)),e)}}class le extends Z{constructor(e){super(e),this.suffix=this.config.suffix}decode_chain(e){return e.map(((t,r)=>t.replaceAll(this.suffix,r===e.length-1?"":" ")))}}class ce extends Z{decode_chain(e){let t="";for(let r=1;r<e.length;r+=2)t+=e[r];return[t]}}class de extends R{constructor(e){super(),this.addPrefixSpace=e.add_prefix_space,this.replacement=e.replacement,this.strRep=e.str_rep||this.replacement,this.prepend_scheme=e.prepend_scheme??"always"}pre_tokenize_text(e,{section_index:t}={}){let r=e.replaceAll(" ",this.strRep);return this.addPrefixSpace&&!r.startsWith(this.replacement)&&("always"===this.prepend_scheme||"first"===this.prepend_scheme&&0===t)&&(r=this.strRep+r),[r]}}class ue extends Z{constructor(e){super(e),this.addPrefixSpace=e.add_prefix_space,this.replacement=e.replacement}decode_chain(e){const t=[];for(let r=0;r<e.length;++r){let n=e[r].replaceAll(this.replacement," ");this.addPrefixSpace&&0==r&&n.startsWith(" ")&&(n=n.substring(1)),t.push(n)}return t}}class pe extends A{constructor(e){super(e),this.charsmap=e.precompiled_charsmap}normalize(e){if((e=(e=e.replace(/[\u0001-\u0008\u000B\u000E-\u001F\u007F\u008F\u009F]/gm,"")).replace(/[\u0009\u000A\u000C\u000D\u1680\u200B\u200C\u200E\u200F\u2028\u2029\u2581\uFEFF\uFFFD]/gm," ")).includes("~")){const t=e.split("~");e=t.map((e=>e.normalize("NFKC"))).join("~")}else e=e.normalize("NFKC");return e}}class he extends R{constructor(e){super(),this.tokenizers=e.pretokenizers.map((e=>R.fromConfig(e)))}pre_tokenize_text(e,t){return this.tokenizers.reduce(((e,r)=>r.pre_tokenize(e,t)),[e])}}class me extends R{constructor(e){super()}pre_tokenize_text(e,t){return e.match(/\w+|[^\w\s]+/g)||[]}}class _e extends R{constructor(e){super()}pre_tokenize_text(e,t){return function(e){return e.match(/\S+/g)||[]}(e)}}class fe extends R{constructor(e){super(),this.config=e,this.pattern=h(this.config.pattern),this.content=this.config.content}pre_tokenize_text(e,t){return null===this.pattern?[e]:[e.replaceAll(this.pattern,this.config.content)]}}const ge=["bos_token","eos_token","unk_token","sep_token","pad_token","cls_token","mask_token"];function Me(e,t,r,n){for(const s of Object.keys(e)){const a=t-e[s].length,i=r(s),l=new Array(a).fill(i);e[s]="right"===n?(0,o.mergeArrays)(e[s],l):(0,o.mergeArrays)(l,e[s])}}function we(e,t){for(const r of Object.keys(e))e[r].length=t}class Te extends n.Callable{return_token_type_ids=!1;padding_side="right";constructor(e,t){super(),this._tokenizer_config=t,this.normalizer=A.fromConfig(e.normalizer),this.pre_tokenizer=R.fromConfig(e.pre_tokenizer),this.model=k.fromConfig(e.model,t),this.post_processor=X.fromConfig(e.post_processor),this.decoder=Z.fromConfig(e.decoder),this.special_tokens=[],this.all_special_ids=[],this.added_tokens=[];for(const t of e.added_tokens){const e=new x(t);this.added_tokens.push(e),this.model.tokens_to_ids.set(e.content,e.id),this.model.vocab[e.id]=e.content,e.special&&(this.special_tokens.push(e.content),this.all_special_ids.push(e.id))}if(this.additional_special_tokens=t.additional_special_tokens??[],this.special_tokens.push(...this.additional_special_tokens),this.special_tokens=[...new Set(this.special_tokens)],this.decoder&&(this.decoder.added_tokens=this.added_tokens,this.decoder.end_of_word_suffix=this.model.end_of_word_suffix),this.added_tokens_regex=this.added_tokens.length>0?new RegExp(this.added_tokens.slice().sort(((e,t)=>t.content.length-e.content.length)).map((e=>`${e.lstrip?"\\s*":""}(${(0,o.escapeRegExp)(e.content)})${e.rstrip?"\\s*":""}`)).join("|")):null,this.mask_token=this.getToken("mask_token"),this.mask_token_id=this.model.tokens_to_ids.get(this.mask_token),this.pad_token=this.getToken("pad_token","eos_token"),this.pad_token_id=this.model.tokens_to_ids.get(this.pad_token),this.sep_token=this.getToken("sep_token"),this.sep_token_id=this.model.tokens_to_ids.get(this.sep_token),this.unk_token=this.getToken("unk_token"),this.unk_token_id=this.model.tokens_to_ids.get(this.unk_token),this.model_max_length=t.model_max_length,this.remove_space=t.remove_space,this.clean_up_tokenization_spaces=t.clean_up_tokenization_spaces??!0,this.do_lowercase_and_remove_accent=t.do_lowercase_and_remove_accent??!1,t.padding_side&&(this.padding_side=t.padding_side),this.legacy=!1,this.chat_template=t.chat_template??null,Array.isArray(this.chat_template)){const e=Object.create(null);for(const{name:t,template:r}of this.chat_template){if("string"!=typeof t||"string"!=typeof r)throw new Error('Chat template must be a list of objects with "name" and "template" properties');e[t]=r}this.chat_template=e}this._compiled_template_cache=new Map}getToken(...e){for(const t of e){const e=this._tokenizer_config[t];if(e){if("object"==typeof e){if("AddedToken"===e.__type)return e.content;throw Error(`Unknown token: ${e}`)}return e}}return null}static async from_pretrained(e,{progress_callback:t=null,config:r=null,cache_dir:n=null,local_files_only:o=!1,revision:s="main",legacy:a=null}={}){return new this(...await p(e,{progress_callback:t,config:r,cache_dir:n,local_files_only:o,revision:s,legacy:a}))}_call(e,{text_pair:t=null,add_special_tokens:r=!0,padding:n=!1,truncation:o=null,max_length:s=null,return_tensor:l=!0,return_token_type_ids:c=null}={}){const d=Array.isArray(e);let u;if(d){if(0===e.length)throw Error("text array must be non-empty");if(null!==t){if(!Array.isArray(t))throw Error("text_pair must also be an array");if(e.length!==t.length)throw Error("text and text_pair must have the same length");u=e.map(((e,n)=>this._encode_plus(e,{text_pair:t[n],add_special_tokens:r,return_token_type_ids:c})))}else u=e.map((e=>this._encode_plus(e,{add_special_tokens:r,return_token_type_ids:c})))}else{if(null==e)throw Error("text may not be null or undefined");if(Array.isArray(t))throw Error("When specifying `text_pair`, since `text` is a string, `text_pair` must also be a string (i.e., not an array).");u=[this._encode_plus(e,{text_pair:t,add_special_tokens:r,return_token_type_ids:c})]}if(null===s?s="max_length"===n?this.model_max_length:(0,a.max)(u.map((e=>e.input_ids.length)))[0]:o||console.warn("Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=true` to explicitly truncate examples to max length."),s=Math.min(s,this.model_max_length??1/0),n||o)for(let e=0;e<u.length;++e)u[e].input_ids.length!==s&&(u[e].input_ids.length>s?o&&we(u[e],s):n&&Me(u[e],s,(e=>"input_ids"===e?this.pad_token_id:0),this.padding_side));const p={};if(l){if((!n||!o)&&u.some((e=>{for(const t of Object.keys(e))if(e[t].length!==u[0][t]?.length)return!0;return!1})))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=true' and 'truncation=true' to have batched tensors with the same length.");const e=[u.length,u[0].input_ids.length];for(const t of Object.keys(u[0]))p[t]=new i.Tensor("int64",BigInt64Array.from(u.flatMap((e=>e[t])).map(BigInt)),e)}else{for(const e of Object.keys(u[0]))p[e]=u.map((t=>t[e]));if(!d)for(const e of Object.keys(p))p[e]=p[e][0]}return p}_encode_text(e){if(null===e)return null;const t=(this.added_tokens_regex?e.split(this.added_tokens_regex).filter((e=>e)):[e]).map(((e,t)=>{if(void 0!==this.added_tokens.find((t=>t.content===e)))return e;{if(!0===this.remove_space&&(e=e.trim().split(/\s+/).join(" ")),this.do_lowercase_and_remove_accent&&(e=function(e){return g(e.toLowerCase())}(e)),null!==this.normalizer&&(e=this.normalizer(e)),0===e.length)return[];const r=null!==this.pre_tokenizer?this.pre_tokenizer(e,{section_index:t}):[e];return this.model(r)}})).flat();return t}_encode_plus(e,{text_pair:t=null,add_special_tokens:r=!0,return_token_type_ids:n=null}={}){const{tokens:o,token_type_ids:s}=this._tokenize_helper(e,{pair:t,add_special_tokens:r}),a=this.model.convert_tokens_to_ids(o),i={input_ids:a,attention_mask:new Array(a.length).fill(1)};return(n??this.return_token_type_ids)&&s&&(i.token_type_ids=s),i}_tokenize_helper(e,{pair:t=null,add_special_tokens:r=!1}={}){const n=this._encode_text(e),s=this._encode_text(t);return this.post_processor?this.post_processor(n,s,{add_special_tokens:r}):{tokens:(0,o.mergeArrays)(n??[],s??[])}}tokenize(e,{pair:t=null,add_special_tokens:r=!1}={}){return this._tokenize_helper(e,{pair:t,add_special_tokens:r}).tokens}encode(e,{text_pair:t=null,add_special_tokens:r=!0,return_token_type_ids:n=null}={}){return this._encode_plus(e,{text_pair:t,add_special_tokens:r,return_token_type_ids:n}).input_ids}batch_decode(e,t={}){return e instanceof i.Tensor&&(e=e.tolist()),e.map((e=>this.decode(e,t)))}decode(e,t={}){if(e instanceof i.Tensor&&(e=_(e)),!Array.isArray(e)||0===e.length||!(0,o.isIntegralNumber)(e[0]))throw Error("token_ids must be a non-empty array of integers.");return this.decode_single(e,t)}decode_single(e,{skip_special_tokens:t=!1,clean_up_tokenization_spaces:r=null}){let n=this.model.convert_ids_to_tokens(e);t&&(n=n.filter((e=>!this.special_tokens.includes(e))));let o=this.decoder?this.decoder(n):n.join(" ");return this.decoder&&this.decoder.end_of_word_suffix&&(o=o.replaceAll(this.decoder.end_of_word_suffix," "),t&&(o=o.trim())),(r??this.clean_up_tokenization_spaces)&&(o=f(o)),o}get_chat_template({chat_template:e=null,tools:t=null}={}){if(this.chat_template&&"object"==typeof this.chat_template){const r=this.chat_template;if(null!==e&&Object.hasOwn(r,e))e=r[e];else if(null===e)if(null!==t&&"tool_use"in r)e=r.tool_use;else{if(!("default"in r))throw Error(`This model has multiple chat templates with no default specified! Please either pass a chat template or the name of the template you wish to use to the 'chat_template' argument. Available template names are ${Object.keys(r).sort()}.`);e=r.default}}else if(null===e){if(!this.chat_template)throw Error("Cannot use apply_chat_template() because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating");e=this.chat_template}return e}apply_chat_template(e,{tools:t=null,documents:r=null,chat_template:n=null,add_generation_prompt:o=!1,tokenize:s=!0,padding:a=!1,truncation:i=!1,max_length:l=null,return_tensor:d=!0,return_dict:u=!1,tokenizer_kwargs:p={},...h}={}){if("string"!=typeof(n=this.get_chat_template({chat_template:n,tools:t})))throw Error("chat_template must be a string, but got "+typeof n);let m=this._compiled_template_cache.get(n);void 0===m&&(m=new c.Template(n),this._compiled_template_cache.set(n,m));const _=Object.create(null);for(const e of ge){const t=this.getToken(e);t&&(_[e]=t)}const f=m.render({messages:e,add_generation_prompt:o,tools:t,documents:r,..._,...h});if(s){const e=this._call(f,{add_special_tokens:!1,padding:a,truncation:i,max_length:l,return_tensor:d,...p});return u?e:e.input_ids}return f}}class be extends Te{return_token_type_ids=!0}class xe extends Te{return_token_type_ids=!0}class ke extends Te{return_token_type_ids=!0}class ye extends Te{return_token_type_ids=!0}class Fe extends Te{return_token_type_ids=!0}class Pe extends Te{return_token_type_ids=!0}class Ce extends Te{return_token_type_ids=!0}class ve extends Te{return_token_type_ids=!0}class Se extends Te{return_token_type_ids=!0}class Ae extends Te{}class Ee extends Te{}class Le extends Te{return_token_type_ids=!0;constructor(e,t){super(e,t),console.warn('WARNING: `XLMTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}}class ze extends Te{return_token_type_ids=!0}class Ie extends Te{}class Be extends Te{}class Ne extends Te{}class Oe extends Te{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{2}_[A-Z]{2}$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))),this.lang_to_token=e=>e}_build_translation_inputs(e,t,r){return Ke(this,e,t,r)}}class De extends Oe{}class Ve extends Te{}class je extends Te{constructor(e,t){const r=".,!?…。,、।۔،",n=e.pre_tokenizer?.pretokenizers[0]?.pattern;n&&n.Regex===` ?[^(\\s|[${r}])]+`&&(n.Regex=` ?[^\\s${r}]+`),super(e,t)}}const Re="▁";class Ge extends Te{padding_side="left";constructor(e,t){super(e,t),this.legacy=t.legacy??!0,this.legacy||(this.normalizer=null,this.pre_tokenizer=new de({replacement:Re,add_prefix_space:!0,prepend_scheme:"first"}))}_encode_text(e){if(null===e)return null;if(this.legacy||0===e.length)return super._encode_text(e);let t=super._encode_text(Re+e.replaceAll(Re," "));return t.length>1&&t[0]===Re&&this.special_tokens.includes(t[1])&&(t=t.slice(1)),t}}class qe extends Te{}class $e extends Te{}class We extends Te{}class Ue extends Te{}class Xe extends Te{}class Qe extends Te{}class He extends Te{}class Ye extends Te{}class Je extends Te{}function Ke(e,t,r,n){if(!("language_codes"in e)||!Array.isArray(e.language_codes))throw new Error("Tokenizer must have `language_codes` attribute set and it should be an array of language ids.");if(!("languageRegex"in e&&e.languageRegex instanceof RegExp))throw new Error("Tokenizer must have `languageRegex` attribute set and it should be a regular expression.");if(!("lang_to_token"in e)||"function"!=typeof e.lang_to_token)throw new Error("Tokenizer must have `lang_to_token` attribute set and it should be a function.");const o=n.src_lang,s=n.tgt_lang;if(!e.language_codes.includes(s))throw new Error(`Target language code "${s}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);if(void 0!==o){if(!e.language_codes.includes(o))throw new Error(`Source language code "${o}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);for(const t of e.post_processor.config.single)if("SpecialToken"in t&&e.languageRegex.test(t.SpecialToken.id)){t.SpecialToken.id=e.lang_to_token(o);break}}return n.forced_bos_token_id=e.model.convert_tokens_to_ids([e.lang_to_token(s)])[0],e._call(t,r)}class Ze extends Te{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{3}_[A-Z][a-z]{3}$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))),this.lang_to_token=e=>e}_build_translation_inputs(e,t,r){return Ke(this,e,t,r)}}class et extends Te{constructor(e,t){super(e,t),this.languageRegex=/^__[a-z]{2,3}__$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))).map((e=>e.slice(2,-2))),this.lang_to_token=e=>`__${e}__`}_build_translation_inputs(e,t,r){return Ke(this,e,t,r)}}class tt extends Te{get timestamp_begin(){return this.model.convert_tokens_to_ids(["<|notimestamps|>"])[0]+1}_decode_asr(e,{return_timestamps:t=!1,return_language:r=!1,time_precision:n=null,force_full_sequences:o=!0}={}){if(null===n)throw Error("Must specify time_precision");let s=null;const i="word"===t;function l(){return{language:s,timestamp:[null,null],text:""}}const c=[];let u=l(),p=0;const h=this.timestamp_begin;let m=[],_=[],f=!1,g=null;const M=new Set(this.all_special_ids);for(const r of e){const e=r.tokens,o=i?r.token_timestamps:null;let w=null,b=h;if("stride"in r){const[t,o,s]=r.stride;if(p-=o,g=t-s,o&&(b=o/n+h),s)for(let t=e.length-1;t>=0;--t){const r=Number(e[t]);if(r>=h){if(null!==w&&(r-h)*n<g)break;w=r}}}let x=[],k=[];for(let r=0;r<e.length;++r){const g=Number(e[r]);if(M.has(g)){const e=this.decode([g]),r=d.WHISPER_LANGUAGE_MAPPING.get(e.slice(2,-2));if(void 0!==r){if(null!==s&&r!==s&&!t){m.push(x);const e=this.findLongestCommonSequence(m)[0],t=this.decode(e);u.text=t,c.push(u),m=[],x=[],u=l()}s=u.language=r}}else if(g>=h){const e=(g-h)*n+p,t=(0,a.round)(e,2);if(null!==w&&g>=w)f=!0;else if(f||m.length>0&&g<b)f=!1;else if(null===u.timestamp[0])u.timestamp[0]=t;else if(t===u.timestamp[0]);else{u.timestamp[1]=t,m.push(x),i&&_.push(k);const[e,r]=this.findLongestCommonSequence(m,_),n=this.decode(e);u.text=n,i&&(u.words=this.collateWordTimestamps(e,r,s)),c.push(u),m=[],x=[],_=[],k=[],u=l()}}else if(x.push(g),i){let e,t=(0,a.round)(o[r]+p,2);if(r+1<o.length){e=(0,a.round)(o[r+1]+p,2);const s=this.decode([g]);T.test(s)&&(e=(0,a.round)(Math.min(t+n,e),2))}else e=null;k.push([t,e])}}if("stride"in r){const[e,t,n]=r.stride;p+=e-n}x.length>0?(m.push(x),i&&_.push(k)):m.every((e=>0===e.length))&&(u=l(),m=[],x=[],_=[],k=[])}if(m.length>0){if(o&&t)throw new Error("Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.");const[e,r]=this.findLongestCommonSequence(m,_),n=this.decode(e);u.text=n,i&&(u.words=this.collateWordTimestamps(e,r,s)),c.push(u)}let w=Object.create(null);const b=c.map((e=>e.text)).join("");if(t||r){for(let e=0;e<c.length;++e){const n=c[e];t||delete n.timestamp,r||delete n.language}if(i){const e=[];for(const t of c)for(const r of t.words)e.push(r);w={chunks:e}}else w={chunks:c}}return[b,w]}findLongestCommonSequence(e,t=null){let r=e[0],n=r.length,o=[];const s=Array.isArray(t)&&t.length>0;let a=s?[]:null,i=s?t[0]:null;for(let l=1;l<e.length;++l){const c=e[l];let d=0,u=[n,n,0,0];const p=c.length;for(let e=1;e<n+p;++e){const o=Math.max(0,n-e),a=Math.min(n,n+p-e),h=r.slice(o,a),m=Math.max(0,e-n),_=Math.min(p,e),f=c.slice(m,_);if(h.length!==f.length)throw new Error("There is a bug within whisper `decode_asr` function, please report it. Dropping to prevent bad inference.");let g;g=s?h.filter(((e,r)=>e===f[r]&&i[o+r]<=t[l][m+r])).length:h.filter(((e,t)=>e===f[t])).length;const M=g/e+e/1e4;g>1&&M>d&&(d=M,u=[o,a,m,_])}const[h,m,_,f]=u,g=Math.floor((m+h)/2),M=Math.floor((f+_)/2);o.push(...r.slice(0,g)),r=c.slice(M),n=r.length,s&&(a.push(...i.slice(0,g)),i=t[l].slice(M))}return o.push(...r),s?(a.push(...i),[o,a]):[o,[]]}collateWordTimestamps(e,t,r){const[n,o,s]=this.combineTokensIntoWords(e,r),a=[];for(let e=0;e<n.length;++e){const r=s[e];a.push({text:n[e],timestamp:[t[r.at(0)][0],t[r.at(-1)][1]]})}return a}combineTokensIntoWords(e,t,r="\"'“¡¿([{-",n="\"'.。,,!!??::”)]}、"){let o,s,a;return["chinese","japanese","thai","lao","myanmar"].includes(t=t??"english")?[o,s,a]=this.splitTokensOnUnicode(e):[o,s,a]=this.splitTokensOnSpaces(e),this.mergePunctuations(o,s,a,r,n)}decode(e,t){let r;return t?.decode_with_timestamps?(e instanceof i.Tensor&&(e=_(e)),r=this.decodeWithTimestamps(e,t)):r=super.decode(e,t),r}decodeWithTimestamps(e,t){const r=t?.time_precision??.02,n=Array.from(this.all_special_ids).at(-1)+1;let o=[[]];for(let t of e)if(t=Number(t),t>=n){const e=((t-n)*r).toFixed(2);o.push(`<|${e}|>`),o.push([])}else o[o.length-1].push(t);return o=o.map((e=>"string"==typeof e?e:super.decode(e,t))),o.join("")}splitTokensOnUnicode(e){const t=this.decode(e,{decode_with_timestamps:!0}),r=[],n=[],o=[];let s=[],a=[],i=0;for(let l=0;l<e.length;++l){const c=e[l];s.push(c),a.push(l);const d=this.decode(s,{decode_with_timestamps:!0});d.includes("�")&&"�"!==t[i+d.indexOf("�")]||(r.push(d),n.push(s),o.push(a),s=[],a=[],i+=d.length)}return[r,n,o]}splitTokensOnSpaces(e){const[t,r,n]=this.splitTokensOnUnicode(e),o=[],s=[],a=[],i=new RegExp(`^[${w}]$`,"gu");for(let e=0;e<t.length;++e){const l=t[e],c=r[e],d=n[e],u=c[0]>=this.model.tokens_to_ids.get("<|endoftext|>"),p=l.startsWith(" "),h=l.trim(),m=i.test(h);if(u||p||m||0===o.length)o.push(l),s.push(c),a.push(d);else{const e=o.length-1;o[e]+=l,s[e].push(...c),a[e].push(...d)}}return[o,s,a]}mergePunctuations(e,t,r,n,s){const a=structuredClone(e),i=structuredClone(t),l=structuredClone(r);let c=a.length-2,d=a.length-1;for(;c>=0;)a[c].startsWith(" ")&&n.includes(a[c].trim())?(a[d]=a[c]+a[d],i[d]=(0,o.mergeArrays)(i[c],i[d]),l[d]=(0,o.mergeArrays)(l[c],l[d]),a[c]="",i[c]=[],l[c]=[]):d=c,--c;for(c=0,d=1;d<a.length;)!a[c].endsWith(" ")&&s.includes(a[d])?(a[c]+=a[d],i[c]=(0,o.mergeArrays)(i[c],i[d]),l[c]=(0,o.mergeArrays)(l[c],l[d]),a[d]="",i[d]=[],l[d]=[]):c=d,++d;return[a.filter((e=>e)),i.filter((e=>e.length>0)),l.filter((e=>e.length>0))]}get_decoder_prompt_ids({language:e=null,task:t=null,no_timestamps:r=!0}={}){const n=[];if(e){const t=(0,d.whisper_language_to_code)(e),r=this.model.tokens_to_ids.get(`<|${t}|>`);if(void 0===r)throw new Error(`Unable to find language "${t}" in model vocabulary. Please report this issue at ${u.GITHUB_ISSUE_URL}.`);n.push(r)}else n.push(null);if(t){if("transcribe"!==(t=t.toLowerCase())&&"translate"!==t)throw new Error(`Task "${t}" is not supported. Must be one of: ["transcribe", "translate"]`);const e=this.model.tokens_to_ids.get(`<|${t}|>`);if(void 0===e)throw new Error(`Unable to find task "${t}" in model vocabulary. Please report this issue at ${u.GITHUB_ISSUE_URL}.`);n.push(e)}else n.push(null);if(r){const e=this.model.tokens_to_ids.get("<|notimestamps|>");if(void 0===e)throw new Error(`Unable to find "<|notimestamps|>" in model vocabulary. Please report this issue at ${u.GITHUB_ISSUE_URL}.`);n.push(e)}return n.map(((e,t)=>[t+1,e])).filter((e=>null!==e[1]))}}class rt extends Te{}class nt extends Te{}class ot extends Te{}class st extends Te{constructor(e,t){super(e,t),this.languageRegex=/^(>>\w+<<)\s*/g,this.supported_language_codes=this.model.vocab.filter((e=>this.languageRegex.test(e))),console.warn('WARNING: `MarianTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}_encode_text(e){if(null===e)return null;const[t,...r]=e.trim().split(this.languageRegex);if(0===r.length)return super._encode_text(t);if(2===r.length){const[e,t]=r;return this.supported_language_codes.includes(e)||console.warn(`Unsupported language code "${e}" detected, which may lead to unexpected behavior. Should be one of: ${JSON.stringify(this.supported_language_codes)}`),(0,o.mergeArrays)([e],super._encode_text(t))}}}class at extends Te{}class it extends Te{}class lt extends Te{}class ct extends Te{}class dt extends Te{}class ut extends Te{constructor(e,t){super(e,t),this.decoder=new ce({})}}class pt extends Te{}class ht{static TOKENIZER_CLASS_MAPPING={T5Tokenizer:Ie,DistilBertTokenizer:Ae,CamembertTokenizer:Ee,DebertaTokenizer:Fe,DebertaV2Tokenizer:Pe,BertTokenizer:be,HerbertTokenizer:Ce,ConvBertTokenizer:ve,RoFormerTokenizer:Se,XLMTokenizer:Le,ElectraTokenizer:ze,MobileBertTokenizer:ke,SqueezeBertTokenizer:ye,AlbertTokenizer:xe,GPT2Tokenizer:Be,BartTokenizer:Ne,MBartTokenizer:Oe,MBart50Tokenizer:De,RobertaTokenizer:Ve,WhisperTokenizer:tt,CodeGenTokenizer:rt,CLIPTokenizer:nt,SiglipTokenizer:ot,MarianTokenizer:st,BloomTokenizer:je,NllbTokenizer:Ze,M2M100Tokenizer:et,LlamaTokenizer:Ge,CodeLlamaTokenizer:qe,XLMRobertaTokenizer:$e,MPNetTokenizer:We,FalconTokenizer:Ue,GPTNeoXTokenizer:Xe,EsmTokenizer:Qe,Wav2Vec2CTCTokenizer:at,BlenderbotTokenizer:it,BlenderbotSmallTokenizer:lt,SpeechT5Tokenizer:ct,NougatTokenizer:dt,VitsTokenizer:ut,Qwen2Tokenizer:He,GemmaTokenizer:Ye,Grok1Tokenizer:Je,CohereTokenizer:pt,PreTrainedTokenizer:Te};static async from_pretrained(e,{progress_callback:t=null,config:r=null,cache_dir:n=null,local_files_only:o=!1,revision:s="main",legacy:a=null}={}){const[i,l]=await p(e,{progress_callback:t,config:r,cache_dir:n,local_files_only:o,revision:s,legacy:a}),c=l.tokenizer_class?.replace(/Fast$/,"")??"PreTrainedTokenizer";let d=this.TOKENIZER_CLASS_MAPPING[c];return d||(console.warn(`Unknown tokenizer class "${c}", attempting to construct from base class.`),d=Te),new d(i,l)}}},"./src/utils/audio.js":
|
|
130
|
+
\***************************/(e,t,r)=>{r.r(t),r.d(t,{AlbertTokenizer:()=>xe,AutoTokenizer:()=>ht,BartTokenizer:()=>Ne,BertTokenizer:()=>be,BlenderbotSmallTokenizer:()=>lt,BlenderbotTokenizer:()=>it,BloomTokenizer:()=>je,CLIPTokenizer:()=>nt,CamembertTokenizer:()=>Ee,CodeGenTokenizer:()=>rt,CodeLlamaTokenizer:()=>qe,CohereTokenizer:()=>pt,ConvBertTokenizer:()=>ve,DebertaTokenizer:()=>Fe,DebertaV2Tokenizer:()=>Pe,DistilBertTokenizer:()=>Ae,ElectraTokenizer:()=>ze,EsmTokenizer:()=>Qe,FalconTokenizer:()=>Ue,GPT2Tokenizer:()=>Be,GPTNeoXTokenizer:()=>Xe,GemmaTokenizer:()=>Ye,Grok1Tokenizer:()=>Je,HerbertTokenizer:()=>Ce,LlamaTokenizer:()=>Ge,M2M100Tokenizer:()=>et,MBart50Tokenizer:()=>De,MBartTokenizer:()=>Oe,MPNetTokenizer:()=>We,MarianTokenizer:()=>st,MobileBertTokenizer:()=>ke,NllbTokenizer:()=>Ze,NougatTokenizer:()=>dt,PreTrainedTokenizer:()=>Te,Qwen2Tokenizer:()=>He,RoFormerTokenizer:()=>Se,RobertaTokenizer:()=>Ve,SiglipTokenizer:()=>ot,SpeechT5Tokenizer:()=>ct,SqueezeBertTokenizer:()=>ye,T5Tokenizer:()=>Ie,TokenizerModel:()=>k,VitsTokenizer:()=>ut,Wav2Vec2CTCTokenizer:()=>at,WhisperTokenizer:()=>tt,XLMRobertaTokenizer:()=>$e,XLMTokenizer:()=>Le,is_chinese_char:()=>M});var n=r(/*! ./utils/generic.js */"./src/utils/generic.js"),o=r(/*! ./utils/core.js */"./src/utils/core.js"),s=r(/*! ./utils/hub.js */"./src/utils/hub.js"),a=r(/*! ./utils/maths.js */"./src/utils/maths.js"),i=r(/*! ./utils/tensor.js */"./src/utils/tensor.js"),l=r(/*! ./utils/data-structures.js */"./src/utils/data-structures.js"),c=r(/*! @huggingface/jinja */"./node_modules/@huggingface/jinja/dist/index.js"),d=r(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js"),u=r(/*! ./utils/constants.js */"./src/utils/constants.js");async function p(e,t){const r=await Promise.all([(0,s.getModelJSON)(e,"tokenizer.json",!0,t),(0,s.getModelJSON)(e,"tokenizer_config.json",!0,t)]);return null!==t.legacy&&(r[1].legacy=t.legacy),r}function h(e,t=!0){if(void 0!==e.Regex){let t=e.Regex.replace(/\\([#&~])/g,"$1");for(const[e,r]of b)t=t.replaceAll(e,r);return new RegExp(t,"gu")}if(void 0!==e.String){const r=(0,o.escapeRegExp)(e.String);return new RegExp(t?r:`(${r})`,"gu")}return console.warn("Unknown pattern type:",e),null}function m(e){return new Map(Object.entries(e))}function _(e){const t=e.dims;switch(t.length){case 1:return e.tolist();case 2:if(1!==t[0])throw new Error("Unable to decode tensor with `batch size !== 1`. Use `tokenizer.batch_decode(...)` for batched inputs.");return e.tolist()[0];default:throw new Error(`Expected tensor to have 1-2 dimensions, got ${t.length}.`)}}function f(e){return e.replace(/ \./g,".").replace(/ \?/g,"?").replace(/ \!/g,"!").replace(/ ,/g,",").replace(/ \' /g,"'").replace(/ n\'t/g,"n't").replace(/ \'m/g,"'m").replace(/ \'s/g,"'s").replace(/ \'ve/g,"'ve").replace(/ \'re/g,"'re")}function g(e){return e.replace(/\p{M}/gu,"")}function M(e){return e>=19968&&e<=40959||e>=13312&&e<=19903||e>=131072&&e<=173791||e>=173824&&e<=177983||e>=177984&&e<=178207||e>=178208&&e<=183983||e>=63744&&e<=64255||e>=194560&&e<=195103}const w="\\p{P}\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E",T=new RegExp(`^[${w}]+$`,"gu"),b=new Map([["(?i:'s|'t|'re|'ve|'m|'ll|'d)","(?:'([sS]|[tT]|[rR][eE]|[vV][eE]|[mM]|[lL][lL]|[dD]))"]]);class x{constructor(e){this.content=e.content,this.id=e.id,this.single_word=e.single_word??!1,this.lstrip=e.lstrip??!1,this.rstrip=e.rstrip??!1,this.special=e.special??!1,this.normalized=e.normalized??null}}class k extends n.Callable{constructor(e){super(),this.config=e,this.vocab=[],this.tokens_to_ids=new Map,this.unk_token_id=void 0,this.unk_token=void 0,this.end_of_word_suffix=void 0,this.fuse_unk=this.config.fuse_unk??!1}static fromConfig(e,...t){switch(e.type){case"WordPiece":return new y(e);case"Unigram":return new F(e,...t);case"BPE":return new v(e);default:if(e.vocab)return new S(e,...t);throw new Error(`Unknown TokenizerModel type: ${e.type}`)}}_call(e){return e=this.encode(e),this.fuse_unk&&(e=function(e,t,r){const n=[];let o=0;for(;o<e.length;)if(n.push(e[o]),(t.get(e[o])??r)===r)for(;++o<e.length&&(t.get(e[o])??r)===r;)t.get(n.at(-1))!==r&&(n[n.length-1]+=e[o]);else++o;return n}(e,this.tokens_to_ids,this.unk_token_id)),e}encode(e){throw Error("encode should be implemented in subclass.")}convert_tokens_to_ids(e){return e.map((e=>this.tokens_to_ids.get(e)??this.unk_token_id))}convert_ids_to_tokens(e){return e.map((e=>this.vocab[e]??this.unk_token))}}class y extends k{constructor(e){super(e),this.tokens_to_ids=m(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.max_input_chars_per_word=e.max_input_chars_per_word??100,this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e}encode(e){const t=[];for(const r of e){const e=[...r];if(e.length>this.max_input_chars_per_word){t.push(this.unk_token);continue}let n=!1,o=0;const s=[];for(;o<e.length;){let t=e.length,r=null;for(;o<t;){let n=e.slice(o,t).join("");if(o>0&&(n=this.config.continuing_subword_prefix+n),this.tokens_to_ids.has(n)){r=n;break}--t}if(null===r){n=!0;break}s.push(r),o=t}n?t.push(this.unk_token):t.push(...s)}return t}}class F extends k{constructor(e,t){super(e);const r=e.vocab.length;this.vocab=new Array(r),this.scores=new Array(r);for(let t=0;t<r;++t){const r=e.vocab[t];this.vocab[t]=r[0],this.scores[t]=r[1]}this.unk_token_id=e.unk_id,this.unk_token=this.vocab[e.unk_id],this.tokens_to_ids=new Map(this.vocab.map(((e,t)=>[e,t]))),this.bos_token=" ",this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t.eos_token,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.unk_token=this.vocab[this.unk_token_id],this.minScore=(0,a.min)(this.scores)[0],this.unk_score=this.minScore-10,this.scores[this.unk_token_id]=this.unk_score,this.trie=new l.CharTrie,this.trie.extend(this.vocab),this.fuse_unk=!0}populateNodes(e){const t=e.chars;let r=0;for(;r<t.length;){let n=!1;const s=[],a=t.slice(r).join(""),i=this.trie.commonPrefixSearch(a);for(const t of i){s.push(t);const a=this.tokens_to_ids.get(t),i=this.scores[a],l=(0,o.len)(t);e.insert(r,l,i,a),n||1!==l||(n=!0)}n||e.insert(r,1,this.unk_score,this.unk_token_id),r+=1}}tokenize(e){const t=new l.TokenLattice(e,this.bos_token_id,this.eos_token_id);return this.populateNodes(t),t.tokens()}encode(e){const t=[];for(const r of e){const e=this.tokenize(r);t.push(...e)}return t}}const P=(()=>{const e=[...Array.from({length:"~".charCodeAt(0)-"!".charCodeAt(0)+1},((e,t)=>t+"!".charCodeAt(0))),...Array.from({length:"¬".charCodeAt(0)-"¡".charCodeAt(0)+1},((e,t)=>t+"¡".charCodeAt(0))),...Array.from({length:"ÿ".charCodeAt(0)-"®".charCodeAt(0)+1},((e,t)=>t+"®".charCodeAt(0)))],t=e.slice();let r=0;for(let n=0;n<256;++n)e.includes(n)||(e.push(n),t.push(256+r),r+=1);const n=t.map((e=>String.fromCharCode(e)));return Object.fromEntries(e.map(((e,t)=>[e,n[t]])))})(),C=(0,o.reverseDictionary)(P);class v extends k{constructor(e){super(e),this.BPE_SPLIT_TOKEN=" ",this.tokens_to_ids=m(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e;this.bpe_ranks=new Map(e.merges.map(((e,t)=>[e,t]))),this.merges=e.merges.map((e=>e.split(this.BPE_SPLIT_TOKEN))),this.end_of_word_suffix=e.end_of_word_suffix,this.continuing_subword_suffix=e.continuing_subword_suffix??null,this.byte_fallback=this.config.byte_fallback??!1,this.byte_fallback&&(this.text_encoder=new TextEncoder),this.ignore_merges=this.config.ignore_merges??!1,this.cache=new Map}bpe(e){if(0===e.length)return[];const t=this.cache.get(e);if(void 0!==t)return t;const r=Array.from(e);this.end_of_word_suffix&&(r[r.length-1]+=this.end_of_word_suffix);let n=[];if(r.length>1){const e=new l.PriorityQueue(((e,t)=>e.score<t.score));let t={token:r[0],bias:0,prev:null,next:null},o=t;for(let t=1;t<r.length;++t){const n={bias:t/r.length,token:r[t],prev:o,next:null};o.next=n,this._add_node(e,o),o=n}for(;!e.isEmpty();){const r=e.pop();if(r.deleted||!r.next||r.next.deleted)continue;if(r.deleted=!0,r.next.deleted=!0,r.prev){const e={...r.prev};r.prev.deleted=!0,r.prev=e,e.prev?e.prev.next=e:t=e}const n={token:r.token+r.next.token,bias:r.bias,prev:r.prev,next:r.next.next};n.prev?(n.prev.next=n,this._add_node(e,n.prev)):t=n,n.next&&(n.next.prev=n,this._add_node(e,n))}for(let e=t;null!==e;e=e.next)n.push(e.token)}else n=r;if(this.continuing_subword_suffix)for(let e=0;e<n.length-1;++e)n[e]+=this.continuing_subword_suffix;return this.cache.set(e,n),n}_add_node(e,t){const r=this.bpe_ranks.get(t.token+this.BPE_SPLIT_TOKEN+t.next.token);void 0!==r&&(t.score=r+t.bias,e.push(t))}encode(e){const t=[];for(const r of e){if(this.ignore_merges&&this.tokens_to_ids.has(r)){t.push(r);continue}const e=this.bpe(r);for(const r of e)if(this.tokens_to_ids.has(r))t.push(r);else if(this.byte_fallback){const e=Array.from(this.text_encoder.encode(r)).map((e=>`<0x${e.toString(16).toUpperCase().padStart(2,"0")}>`));e.every((e=>this.tokens_to_ids.has(e)))?t.push(...e):t.push(this.unk_token)}else t.push(this.unk_token)}return t}}class S extends k{constructor(e,t){super(e),this.tokens_to_ids=m(t.target_lang?e.vocab[t.target_lang]:e.vocab),this.bos_token=t.bos_token,this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t.eos_token,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.pad_token=t.pad_token,this.pad_token_id=this.tokens_to_ids.get(this.pad_token),this.unk_token=t.unk_token,this.unk_token_id=this.tokens_to_ids.get(this.unk_token),this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e}encode(e){return e}}class A extends n.Callable{constructor(e){super(),this.config=e}static fromConfig(e){if(null===e)return null;switch(e.type){case"BertNormalizer":return new j(e);case"Precompiled":return new pe(e);case"Sequence":return new V(e);case"Replace":return new E(e);case"NFC":return new L(e);case"NFKC":return new z(e);case"NFKD":return new I(e);case"Strip":return new B(e);case"StripAccents":return new N(e);case"Lowercase":return new O(e);case"Prepend":return new D(e);default:throw new Error(`Unknown Normalizer type: ${e.type}`)}}normalize(e){throw Error("normalize should be implemented in subclass.")}_call(e){return this.normalize(e)}}class E extends A{normalize(e){const t=h(this.config.pattern);return null===t?e:e.replaceAll(t,this.config.content)}}class L extends A{normalize(e){return e=e.normalize("NFC")}}class z extends A{normalize(e){return e=e.normalize("NFKC")}}class I extends A{normalize(e){return e=e.normalize("NFKD")}}class B extends A{normalize(e){return this.config.strip_left&&this.config.strip_right?e=e.trim():(this.config.strip_left&&(e=e.trimStart()),this.config.strip_right&&(e=e.trimEnd())),e}}class N extends A{normalize(e){return e=g(e)}}class O extends A{normalize(e){return e=e.toLowerCase()}}class D extends A{normalize(e){return e=this.config.prepend+e}}class V extends A{constructor(e){super(e),this.normalizers=e.normalizers.map((e=>A.fromConfig(e)))}normalize(e){return this.normalizers.reduce(((e,t)=>t.normalize(e)),e)}}class j extends A{_tokenize_chinese_chars(e){const t=[];for(let r=0;r<e.length;++r){const n=e[r];M(n.charCodeAt(0))?(t.push(" "),t.push(n),t.push(" ")):t.push(n)}return t.join("")}stripAccents(e){return e.normalize("NFD").replace(/\p{Mn}/gu,"")}_is_control(e){switch(e){case"\t":case"\n":case"\r":return!1;default:return/^\p{Cc}|\p{Cf}|\p{Co}|\p{Cs}$/u.test(e)}}_clean_text(e){const t=[];for(const r of e){const e=r.charCodeAt(0);0===e||65533===e||this._is_control(r)||(/^\s$/.test(r)?t.push(" "):t.push(r))}return t.join("")}normalize(e){return this.config.clean_text&&(e=this._clean_text(e)),this.config.handle_chinese_chars&&(e=this._tokenize_chinese_chars(e)),this.config.lowercase?(e=e.toLowerCase(),!1!==this.config.strip_accents&&(e=this.stripAccents(e))):this.config.strip_accents&&(e=this.stripAccents(e)),e}}class R extends n.Callable{static fromConfig(e){if(null===e)return null;switch(e.type){case"BertPreTokenizer":return new G(e);case"Sequence":return new he(e);case"Whitespace":return new me(e);case"WhitespaceSplit":return new _e(e);case"Metaspace":return new de(e);case"ByteLevel":return new q(e);case"Split":return new $(e);case"Punctuation":return new W(e);case"Digits":return new U(e);case"Replace":return new fe(e);default:throw new Error(`Unknown PreTokenizer type: ${e.type}`)}}pre_tokenize_text(e,t){throw Error("pre_tokenize_text should be implemented in subclass.")}pre_tokenize(e,t){return(Array.isArray(e)?e.map((e=>this.pre_tokenize_text(e,t))):this.pre_tokenize_text(e,t)).flat()}_call(e,t){return this.pre_tokenize(e,t)}}class G extends R{constructor(e){super(),this.pattern=new RegExp(`[^\\s${w}]+|[${w}]`,"gu")}pre_tokenize_text(e,t){return e.trim().match(this.pattern)||[]}}class q extends R{constructor(e){super(),this.config=e,this.add_prefix_space=this.config.add_prefix_space,this.trim_offsets=this.config.trim_offsets,this.use_regex=this.config.use_regex??!0,this.pattern=/'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu,this.byte_encoder=P,this.text_encoder=new TextEncoder}pre_tokenize_text(e,t){this.add_prefix_space&&!e.startsWith(" ")&&(e=" "+e);return(this.use_regex?e.match(this.pattern)||[]:[e]).map((e=>Array.from(this.text_encoder.encode(e),(e=>this.byte_encoder[e])).join("")))}}class $ extends R{constructor(e){super(),this.config=e,this.pattern=h(this.config.pattern,this.config.invert)}pre_tokenize_text(e,t){return null===this.pattern?[]:this.config.invert?e.match(this.pattern)||[]:function(e,t){const r=[];let n=0;for(const o of e.matchAll(t)){const t=o[0];n<o.index&&r.push(e.slice(n,o.index)),t.length>0&&r.push(t),n=o.index+t.length}return n<e.length&&r.push(e.slice(n)),r}(e,this.pattern)}}class W extends R{constructor(e){super(),this.config=e,this.pattern=new RegExp(`[^${w}]+|[${w}]+`,"gu")}pre_tokenize_text(e,t){return e.match(this.pattern)||[]}}class U extends R{constructor(e){super(),this.config=e;const t="[^\\d]+|\\d"+(this.config.individual_digits?"":"+");this.pattern=new RegExp(t,"gu")}pre_tokenize_text(e,t){return e.match(this.pattern)||[]}}class X extends n.Callable{constructor(e){super(),this.config=e}static fromConfig(e){if(null===e)return null;switch(e.type){case"TemplateProcessing":return new Y(e);case"ByteLevel":return new J(e);case"RobertaProcessing":return new H(e);case"BertProcessing":return new Q(e);case"Sequence":return new K(e);default:throw new Error(`Unknown PostProcessor type: ${e.type}`)}}post_process(e,...t){throw Error("post_process should be implemented in subclass.")}_call(e,...t){return this.post_process(e,...t)}}class Q extends X{constructor(e){super(e),this.cls=e.cls[0],this.sep=e.sep[0]}post_process(e,t=null,{add_special_tokens:r=!0}={}){r&&(e=(0,o.mergeArrays)([this.cls],e,[this.sep]));let n=new Array(e.length).fill(0);if(null!==t){const s=r&&this instanceof H?[this.sep]:[],a=r?[this.sep]:[];e=(0,o.mergeArrays)(e,s,t,a),n=(0,o.mergeArrays)(n,new Array(t.length+s.length+a.length).fill(1))}return{tokens:e,token_type_ids:n}}}class H extends Q{}class Y extends X{constructor(e){super(e),this.single=e.single,this.pair=e.pair}post_process(e,t=null,{add_special_tokens:r=!0}={}){const n=null===t?this.single:this.pair;let s=[],a=[];for(const i of n)"SpecialToken"in i?r&&(s.push(i.SpecialToken.id),a.push(i.SpecialToken.type_id)):"Sequence"in i&&("A"===i.Sequence.id?(s=(0,o.mergeArrays)(s,e),a=(0,o.mergeArrays)(a,new Array(e.length).fill(i.Sequence.type_id))):"B"===i.Sequence.id&&(s=(0,o.mergeArrays)(s,t),a=(0,o.mergeArrays)(a,new Array(t.length).fill(i.Sequence.type_id))));return{tokens:s,token_type_ids:a}}}class J extends X{post_process(e,t=null){return t&&(e=(0,o.mergeArrays)(e,t)),{tokens:e}}}class K extends X{constructor(e){super(e),this.processors=e.processors.map((e=>X.fromConfig(e)))}post_process(e,t=null,r={}){let n;for(const o of this.processors)if(o instanceof J){if(e=o.post_process(e).tokens,t){t=o.post_process(t).tokens}}else{const s=o.post_process(e,t,r);e=s.tokens,n=s.token_type_ids}return{tokens:e,token_type_ids:n}}}class Z extends n.Callable{constructor(e){super(),this.config=e,this.added_tokens=[],this.end_of_word_suffix=null,this.trim_offsets=e.trim_offsets}static fromConfig(e){if(null===e)return null;switch(e.type){case"WordPiece":return new oe(e);case"Metaspace":return new ue(e);case"ByteLevel":return new se(e);case"Replace":return new ee(e);case"ByteFallback":return new te(e);case"Fuse":return new re(e);case"Strip":return new ne(e);case"Sequence":return new ie(e);case"CTC":return new ae(e);case"BPEDecoder":return new le(e);default:throw new Error(`Unknown Decoder type: ${e.type}`)}}_call(e){return this.decode(e)}decode(e){return this.decode_chain(e).join("")}decode_chain(e){throw Error("`decode_chain` should be implemented in subclass.")}}class ee extends Z{decode_chain(e){const t=h(this.config.pattern);return null===t?e:e.map((e=>e.replaceAll(t,this.config.content)))}}class te extends Z{constructor(e){super(e),this.text_decoder=new TextDecoder}decode_chain(e){const t=[];let r=[];for(const n of e){let e=null;if(6===n.length&&n.startsWith("<0x")&&n.endsWith(">")){const t=parseInt(n.slice(3,5),16);isNaN(t)||(e=t)}if(null!==e)r.push(e);else{if(r.length>0){const e=this.text_decoder.decode(Uint8Array.from(r));t.push(e),r=[]}t.push(n)}}if(r.length>0){const e=this.text_decoder.decode(Uint8Array.from(r));t.push(e),r=[]}return t}}class re extends Z{decode_chain(e){return[e.join("")]}}class ne extends Z{constructor(e){super(e),this.content=this.config.content,this.start=this.config.start,this.stop=this.config.stop}decode_chain(e){return e.map((e=>{let t=0;for(let r=0;r<this.start&&e[r]===this.content;++r)t=r+1;let r=e.length;for(let t=0;t<this.stop;++t){const n=e.length-t-1;if(e[n]!==this.content)break;r=n}return e.slice(t,r)}))}}class oe extends Z{constructor(e){super(e),this.cleanup=e.cleanup}decode_chain(e){return e.map(((e,t)=>(0!==t&&(e=e.startsWith(this.config.prefix)?e.replace(this.config.prefix,""):" "+e),this.cleanup&&(e=f(e)),e)))}}class se extends Z{constructor(e){super(e),this.byte_decoder=C,this.text_decoder=new TextDecoder("utf-8",{fatal:!1,ignoreBOM:!0}),this.end_of_word_suffix=null}convert_tokens_to_string(e){const t=e.join(""),r=new Uint8Array([...t].map((e=>this.byte_decoder[e])));return this.text_decoder.decode(r)}decode_chain(e){const t=[];let r=[];for(const n of e)void 0!==this.added_tokens.find((e=>e.content===n))?(r.length>0&&(t.push(this.convert_tokens_to_string(r)),r=[]),t.push(n)):r.push(n);return r.length>0&&t.push(this.convert_tokens_to_string(r)),t}}class ae extends Z{constructor(e){super(e),this.pad_token=this.config.pad_token,this.word_delimiter_token=this.config.word_delimiter_token,this.cleanup=this.config.cleanup}convert_tokens_to_string(e){if(0===e.length)return"";const t=[e[0]];for(let r=1;r<e.length;++r)e[r]!==t.at(-1)&&t.push(e[r]);let r=t.filter((e=>e!==this.pad_token)).join("");return this.cleanup&&(r=f(r).replaceAll(this.word_delimiter_token," ").trim()),r}decode_chain(e){return[this.convert_tokens_to_string(e)]}}class ie extends Z{constructor(e){super(e),this.decoders=e.decoders.map((e=>Z.fromConfig(e)))}decode_chain(e){return this.decoders.reduce(((e,t)=>t.decode_chain(e)),e)}}class le extends Z{constructor(e){super(e),this.suffix=this.config.suffix}decode_chain(e){return e.map(((t,r)=>t.replaceAll(this.suffix,r===e.length-1?"":" ")))}}class ce extends Z{decode_chain(e){let t="";for(let r=1;r<e.length;r+=2)t+=e[r];return[t]}}class de extends R{constructor(e){super(),this.addPrefixSpace=e.add_prefix_space,this.replacement=e.replacement,this.strRep=e.str_rep||this.replacement,this.prepend_scheme=e.prepend_scheme??"always"}pre_tokenize_text(e,{section_index:t}={}){let r=e.replaceAll(" ",this.strRep);return this.addPrefixSpace&&!r.startsWith(this.replacement)&&("always"===this.prepend_scheme||"first"===this.prepend_scheme&&0===t)&&(r=this.strRep+r),[r]}}class ue extends Z{constructor(e){super(e),this.addPrefixSpace=e.add_prefix_space,this.replacement=e.replacement}decode_chain(e){const t=[];for(let r=0;r<e.length;++r){let n=e[r].replaceAll(this.replacement," ");this.addPrefixSpace&&0==r&&n.startsWith(" ")&&(n=n.substring(1)),t.push(n)}return t}}class pe extends A{constructor(e){super(e),this.charsmap=e.precompiled_charsmap}normalize(e){if((e=(e=e.replace(/[\u0001-\u0008\u000B\u000E-\u001F\u007F\u008F\u009F]/gm,"")).replace(/[\u0009\u000A\u000C\u000D\u00A0\u1680\u2000-\u200F\u2028\u2029\u202F\u205F\u2581\u3000\uFEFF\uFFFD]/gm," ")).includes("~")){const t=e.split("~");e=t.map((e=>e.normalize("NFKC"))).join("~")}else e=e.normalize("NFKC");return e}}class he extends R{constructor(e){super(),this.tokenizers=e.pretokenizers.map((e=>R.fromConfig(e)))}pre_tokenize_text(e,t){return this.tokenizers.reduce(((e,r)=>r.pre_tokenize(e,t)),[e])}}class me extends R{constructor(e){super()}pre_tokenize_text(e,t){return e.match(/\w+|[^\w\s]+/g)||[]}}class _e extends R{constructor(e){super()}pre_tokenize_text(e,t){return function(e){return e.match(/\S+/g)||[]}(e)}}class fe extends R{constructor(e){super(),this.config=e,this.pattern=h(this.config.pattern),this.content=this.config.content}pre_tokenize_text(e,t){return null===this.pattern?[e]:[e.replaceAll(this.pattern,this.config.content)]}}const ge=["bos_token","eos_token","unk_token","sep_token","pad_token","cls_token","mask_token"];function Me(e,t,r,n){for(const s of Object.keys(e)){const a=t-e[s].length,i=r(s),l=new Array(a).fill(i);e[s]="right"===n?(0,o.mergeArrays)(e[s],l):(0,o.mergeArrays)(l,e[s])}}function we(e,t){for(const r of Object.keys(e))e[r].length=t}class Te extends n.Callable{return_token_type_ids=!1;padding_side="right";constructor(e,t){super(),this._tokenizer_config=t,this.normalizer=A.fromConfig(e.normalizer),this.pre_tokenizer=R.fromConfig(e.pre_tokenizer),this.model=k.fromConfig(e.model,t),this.post_processor=X.fromConfig(e.post_processor),this.decoder=Z.fromConfig(e.decoder),this.special_tokens=[],this.all_special_ids=[],this.added_tokens=[];for(const t of e.added_tokens){const e=new x(t);this.added_tokens.push(e),this.model.tokens_to_ids.set(e.content,e.id),this.model.vocab[e.id]=e.content,e.special&&(this.special_tokens.push(e.content),this.all_special_ids.push(e.id))}if(this.additional_special_tokens=t.additional_special_tokens??[],this.special_tokens.push(...this.additional_special_tokens),this.special_tokens=[...new Set(this.special_tokens)],this.decoder&&(this.decoder.added_tokens=this.added_tokens,this.decoder.end_of_word_suffix=this.model.end_of_word_suffix),this.added_tokens_regex=this.added_tokens.length>0?new RegExp(this.added_tokens.slice().sort(((e,t)=>t.content.length-e.content.length)).map((e=>`${e.lstrip?"\\s*":""}(${(0,o.escapeRegExp)(e.content)})${e.rstrip?"\\s*":""}`)).join("|")):null,this.mask_token=this.getToken("mask_token"),this.mask_token_id=this.model.tokens_to_ids.get(this.mask_token),this.pad_token=this.getToken("pad_token","eos_token"),this.pad_token_id=this.model.tokens_to_ids.get(this.pad_token),this.sep_token=this.getToken("sep_token"),this.sep_token_id=this.model.tokens_to_ids.get(this.sep_token),this.unk_token=this.getToken("unk_token"),this.unk_token_id=this.model.tokens_to_ids.get(this.unk_token),this.model_max_length=t.model_max_length,this.remove_space=t.remove_space,this.clean_up_tokenization_spaces=t.clean_up_tokenization_spaces??!0,this.do_lowercase_and_remove_accent=t.do_lowercase_and_remove_accent??!1,t.padding_side&&(this.padding_side=t.padding_side),this.legacy=!1,this.chat_template=t.chat_template??null,Array.isArray(this.chat_template)){const e=Object.create(null);for(const{name:t,template:r}of this.chat_template){if("string"!=typeof t||"string"!=typeof r)throw new Error('Chat template must be a list of objects with "name" and "template" properties');e[t]=r}this.chat_template=e}this._compiled_template_cache=new Map}getToken(...e){for(const t of e){const e=this._tokenizer_config[t];if(e){if("object"==typeof e){if("AddedToken"===e.__type)return e.content;throw Error(`Unknown token: ${e}`)}return e}}return null}static async from_pretrained(e,{progress_callback:t=null,config:r=null,cache_dir:n=null,local_files_only:o=!1,revision:s="main",legacy:a=null}={}){return new this(...await p(e,{progress_callback:t,config:r,cache_dir:n,local_files_only:o,revision:s,legacy:a}))}_call(e,{text_pair:t=null,add_special_tokens:r=!0,padding:n=!1,truncation:o=null,max_length:s=null,return_tensor:l=!0,return_token_type_ids:c=null}={}){const d=Array.isArray(e);let u;if(d){if(0===e.length)throw Error("text array must be non-empty");if(null!==t){if(!Array.isArray(t))throw Error("text_pair must also be an array");if(e.length!==t.length)throw Error("text and text_pair must have the same length");u=e.map(((e,n)=>this._encode_plus(e,{text_pair:t[n],add_special_tokens:r,return_token_type_ids:c})))}else u=e.map((e=>this._encode_plus(e,{add_special_tokens:r,return_token_type_ids:c})))}else{if(null==e)throw Error("text may not be null or undefined");if(Array.isArray(t))throw Error("When specifying `text_pair`, since `text` is a string, `text_pair` must also be a string (i.e., not an array).");u=[this._encode_plus(e,{text_pair:t,add_special_tokens:r,return_token_type_ids:c})]}if(null===s?s="max_length"===n?this.model_max_length:(0,a.max)(u.map((e=>e.input_ids.length)))[0]:o||console.warn("Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=true` to explicitly truncate examples to max length."),s=Math.min(s,this.model_max_length??1/0),n||o)for(let e=0;e<u.length;++e)u[e].input_ids.length!==s&&(u[e].input_ids.length>s?o&&we(u[e],s):n&&Me(u[e],s,(e=>"input_ids"===e?this.pad_token_id:0),this.padding_side));const p={};if(l){if((!n||!o)&&u.some((e=>{for(const t of Object.keys(e))if(e[t].length!==u[0][t]?.length)return!0;return!1})))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=true' and 'truncation=true' to have batched tensors with the same length.");const e=[u.length,u[0].input_ids.length];for(const t of Object.keys(u[0]))p[t]=new i.Tensor("int64",BigInt64Array.from(u.flatMap((e=>e[t])).map(BigInt)),e)}else{for(const e of Object.keys(u[0]))p[e]=u.map((t=>t[e]));if(!d)for(const e of Object.keys(p))p[e]=p[e][0]}return p}_encode_text(e){if(null===e)return null;const t=(this.added_tokens_regex?e.split(this.added_tokens_regex).filter((e=>e)):[e]).map(((e,t)=>{if(void 0!==this.added_tokens.find((t=>t.content===e)))return e;{if(!0===this.remove_space&&(e=e.trim().split(/\s+/).join(" ")),this.do_lowercase_and_remove_accent&&(e=function(e){return g(e.toLowerCase())}(e)),null!==this.normalizer&&(e=this.normalizer(e)),0===e.length)return[];const r=null!==this.pre_tokenizer?this.pre_tokenizer(e,{section_index:t}):[e];return this.model(r)}})).flat();return t}_encode_plus(e,{text_pair:t=null,add_special_tokens:r=!0,return_token_type_ids:n=null}={}){const{tokens:o,token_type_ids:s}=this._tokenize_helper(e,{pair:t,add_special_tokens:r}),a=this.model.convert_tokens_to_ids(o),i={input_ids:a,attention_mask:new Array(a.length).fill(1)};return(n??this.return_token_type_ids)&&s&&(i.token_type_ids=s),i}_tokenize_helper(e,{pair:t=null,add_special_tokens:r=!1}={}){const n=this._encode_text(e),s=this._encode_text(t);return this.post_processor?this.post_processor(n,s,{add_special_tokens:r}):{tokens:(0,o.mergeArrays)(n??[],s??[])}}tokenize(e,{pair:t=null,add_special_tokens:r=!1}={}){return this._tokenize_helper(e,{pair:t,add_special_tokens:r}).tokens}encode(e,{text_pair:t=null,add_special_tokens:r=!0,return_token_type_ids:n=null}={}){return this._encode_plus(e,{text_pair:t,add_special_tokens:r,return_token_type_ids:n}).input_ids}batch_decode(e,t={}){return e instanceof i.Tensor&&(e=e.tolist()),e.map((e=>this.decode(e,t)))}decode(e,t={}){if(e instanceof i.Tensor&&(e=_(e)),!Array.isArray(e)||0===e.length||!(0,o.isIntegralNumber)(e[0]))throw Error("token_ids must be a non-empty array of integers.");return this.decode_single(e,t)}decode_single(e,{skip_special_tokens:t=!1,clean_up_tokenization_spaces:r=null}){let n=this.model.convert_ids_to_tokens(e);t&&(n=n.filter((e=>!this.special_tokens.includes(e))));let o=this.decoder?this.decoder(n):n.join(" ");return this.decoder&&this.decoder.end_of_word_suffix&&(o=o.replaceAll(this.decoder.end_of_word_suffix," "),t&&(o=o.trim())),(r??this.clean_up_tokenization_spaces)&&(o=f(o)),o}get_chat_template({chat_template:e=null,tools:t=null}={}){if(this.chat_template&&"object"==typeof this.chat_template){const r=this.chat_template;if(null!==e&&Object.hasOwn(r,e))e=r[e];else if(null===e)if(null!==t&&"tool_use"in r)e=r.tool_use;else{if(!("default"in r))throw Error(`This model has multiple chat templates with no default specified! Please either pass a chat template or the name of the template you wish to use to the 'chat_template' argument. Available template names are ${Object.keys(r).sort()}.`);e=r.default}}else if(null===e){if(!this.chat_template)throw Error("Cannot use apply_chat_template() because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating");e=this.chat_template}return e}apply_chat_template(e,{tools:t=null,documents:r=null,chat_template:n=null,add_generation_prompt:o=!1,tokenize:s=!0,padding:a=!1,truncation:i=!1,max_length:l=null,return_tensor:d=!0,return_dict:u=!1,tokenizer_kwargs:p={},...h}={}){if("string"!=typeof(n=this.get_chat_template({chat_template:n,tools:t})))throw Error("chat_template must be a string, but got "+typeof n);let m=this._compiled_template_cache.get(n);void 0===m&&(m=new c.Template(n),this._compiled_template_cache.set(n,m));const _=Object.create(null);for(const e of ge){const t=this.getToken(e);t&&(_[e]=t)}const f=m.render({messages:e,add_generation_prompt:o,tools:t,documents:r,..._,...h});if(s){const e=this._call(f,{add_special_tokens:!1,padding:a,truncation:i,max_length:l,return_tensor:d,...p});return u?e:e.input_ids}return f}}class be extends Te{return_token_type_ids=!0}class xe extends Te{return_token_type_ids=!0}class ke extends Te{return_token_type_ids=!0}class ye extends Te{return_token_type_ids=!0}class Fe extends Te{return_token_type_ids=!0}class Pe extends Te{return_token_type_ids=!0}class Ce extends Te{return_token_type_ids=!0}class ve extends Te{return_token_type_ids=!0}class Se extends Te{return_token_type_ids=!0}class Ae extends Te{}class Ee extends Te{}class Le extends Te{return_token_type_ids=!0;constructor(e,t){super(e,t),console.warn('WARNING: `XLMTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}}class ze extends Te{return_token_type_ids=!0}class Ie extends Te{}class Be extends Te{}class Ne extends Te{}class Oe extends Te{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{2}_[A-Z]{2}$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))),this.lang_to_token=e=>e}_build_translation_inputs(e,t,r){return Ke(this,e,t,r)}}class De extends Oe{}class Ve extends Te{}class je extends Te{constructor(e,t){const r=".,!?…。,、।۔،",n=e.pre_tokenizer?.pretokenizers[0]?.pattern;n&&n.Regex===` ?[^(\\s|[${r}])]+`&&(n.Regex=` ?[^\\s${r}]+`),super(e,t)}}const Re="▁";class Ge extends Te{padding_side="left";constructor(e,t){super(e,t),this.legacy=t.legacy??!0,this.legacy||(this.normalizer=null,this.pre_tokenizer=new de({replacement:Re,add_prefix_space:!0,prepend_scheme:"first"}))}_encode_text(e){if(null===e)return null;if(this.legacy||0===e.length)return super._encode_text(e);let t=super._encode_text(Re+e.replaceAll(Re," "));return t.length>1&&t[0]===Re&&this.special_tokens.includes(t[1])&&(t=t.slice(1)),t}}class qe extends Te{}class $e extends Te{}class We extends Te{}class Ue extends Te{}class Xe extends Te{}class Qe extends Te{}class He extends Te{}class Ye extends Te{}class Je extends Te{}function Ke(e,t,r,n){if(!("language_codes"in e)||!Array.isArray(e.language_codes))throw new Error("Tokenizer must have `language_codes` attribute set and it should be an array of language ids.");if(!("languageRegex"in e&&e.languageRegex instanceof RegExp))throw new Error("Tokenizer must have `languageRegex` attribute set and it should be a regular expression.");if(!("lang_to_token"in e)||"function"!=typeof e.lang_to_token)throw new Error("Tokenizer must have `lang_to_token` attribute set and it should be a function.");const o=n.src_lang,s=n.tgt_lang;if(!e.language_codes.includes(s))throw new Error(`Target language code "${s}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);if(void 0!==o){if(!e.language_codes.includes(o))throw new Error(`Source language code "${o}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);for(const t of e.post_processor.config.single)if("SpecialToken"in t&&e.languageRegex.test(t.SpecialToken.id)){t.SpecialToken.id=e.lang_to_token(o);break}}return n.forced_bos_token_id=e.model.convert_tokens_to_ids([e.lang_to_token(s)])[0],e._call(t,r)}class Ze extends Te{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{3}_[A-Z][a-z]{3}$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))),this.lang_to_token=e=>e}_build_translation_inputs(e,t,r){return Ke(this,e,t,r)}}class et extends Te{constructor(e,t){super(e,t),this.languageRegex=/^__[a-z]{2,3}__$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))).map((e=>e.slice(2,-2))),this.lang_to_token=e=>`__${e}__`}_build_translation_inputs(e,t,r){return Ke(this,e,t,r)}}class tt extends Te{get timestamp_begin(){return this.model.convert_tokens_to_ids(["<|notimestamps|>"])[0]+1}_decode_asr(e,{return_timestamps:t=!1,return_language:r=!1,time_precision:n=null,force_full_sequences:o=!0}={}){if(null===n)throw Error("Must specify time_precision");let s=null;const i="word"===t;function l(){return{language:s,timestamp:[null,null],text:""}}const c=[];let u=l(),p=0;const h=this.timestamp_begin;let m=[],_=[],f=!1,g=null;const M=new Set(this.all_special_ids);for(const r of e){const e=r.tokens,o=i?r.token_timestamps:null;let w=null,b=h;if("stride"in r){const[t,o,s]=r.stride;if(p-=o,g=t-s,o&&(b=o/n+h),s)for(let t=e.length-1;t>=0;--t){const r=Number(e[t]);if(r>=h){if(null!==w&&(r-h)*n<g)break;w=r}}}let x=[],k=[];for(let r=0;r<e.length;++r){const g=Number(e[r]);if(M.has(g)){const e=this.decode([g]),r=d.WHISPER_LANGUAGE_MAPPING.get(e.slice(2,-2));if(void 0!==r){if(null!==s&&r!==s&&!t){m.push(x);const e=this.findLongestCommonSequence(m)[0],t=this.decode(e);u.text=t,c.push(u),m=[],x=[],u=l()}s=u.language=r}}else if(g>=h){const e=(g-h)*n+p,t=(0,a.round)(e,2);if(null!==w&&g>=w)f=!0;else if(f||m.length>0&&g<b)f=!1;else if(null===u.timestamp[0])u.timestamp[0]=t;else if(t===u.timestamp[0]);else{u.timestamp[1]=t,m.push(x),i&&_.push(k);const[e,r]=this.findLongestCommonSequence(m,_),n=this.decode(e);u.text=n,i&&(u.words=this.collateWordTimestamps(e,r,s)),c.push(u),m=[],x=[],_=[],k=[],u=l()}}else if(x.push(g),i){let e,t=(0,a.round)(o[r]+p,2);if(r+1<o.length){e=(0,a.round)(o[r+1]+p,2);const s=this.decode([g]);T.test(s)&&(e=(0,a.round)(Math.min(t+n,e),2))}else e=null;k.push([t,e])}}if("stride"in r){const[e,t,n]=r.stride;p+=e-n}x.length>0?(m.push(x),i&&_.push(k)):m.every((e=>0===e.length))&&(u=l(),m=[],x=[],_=[],k=[])}if(m.length>0){if(o&&t)throw new Error("Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.");const[e,r]=this.findLongestCommonSequence(m,_),n=this.decode(e);u.text=n,i&&(u.words=this.collateWordTimestamps(e,r,s)),c.push(u)}let w=Object.create(null);const b=c.map((e=>e.text)).join("");if(t||r){for(let e=0;e<c.length;++e){const n=c[e];t||delete n.timestamp,r||delete n.language}if(i){const e=[];for(const t of c)for(const r of t.words)e.push(r);w={chunks:e}}else w={chunks:c}}return[b,w]}findLongestCommonSequence(e,t=null){let r=e[0],n=r.length,o=[];const s=Array.isArray(t)&&t.length>0;let a=s?[]:null,i=s?t[0]:null;for(let l=1;l<e.length;++l){const c=e[l];let d=0,u=[n,n,0,0];const p=c.length;for(let e=1;e<n+p;++e){const o=Math.max(0,n-e),a=Math.min(n,n+p-e),h=r.slice(o,a),m=Math.max(0,e-n),_=Math.min(p,e),f=c.slice(m,_);if(h.length!==f.length)throw new Error("There is a bug within whisper `decode_asr` function, please report it. Dropping to prevent bad inference.");let g;g=s?h.filter(((e,r)=>e===f[r]&&i[o+r]<=t[l][m+r])).length:h.filter(((e,t)=>e===f[t])).length;const M=g/e+e/1e4;g>1&&M>d&&(d=M,u=[o,a,m,_])}const[h,m,_,f]=u,g=Math.floor((m+h)/2),M=Math.floor((f+_)/2);o.push(...r.slice(0,g)),r=c.slice(M),n=r.length,s&&(a.push(...i.slice(0,g)),i=t[l].slice(M))}return o.push(...r),s?(a.push(...i),[o,a]):[o,[]]}collateWordTimestamps(e,t,r){const[n,o,s]=this.combineTokensIntoWords(e,r),a=[];for(let e=0;e<n.length;++e){const r=s[e];a.push({text:n[e],timestamp:[t[r.at(0)][0],t[r.at(-1)][1]]})}return a}combineTokensIntoWords(e,t,r="\"'“¡¿([{-",n="\"'.。,,!!??::”)]}、"){let o,s,a;return["chinese","japanese","thai","lao","myanmar"].includes(t=t??"english")?[o,s,a]=this.splitTokensOnUnicode(e):[o,s,a]=this.splitTokensOnSpaces(e),this.mergePunctuations(o,s,a,r,n)}decode(e,t){let r;return t?.decode_with_timestamps?(e instanceof i.Tensor&&(e=_(e)),r=this.decodeWithTimestamps(e,t)):r=super.decode(e,t),r}decodeWithTimestamps(e,t){const r=t?.time_precision??.02,n=Array.from(this.all_special_ids).at(-1)+1;let o=[[]];for(let t of e)if(t=Number(t),t>=n){const e=((t-n)*r).toFixed(2);o.push(`<|${e}|>`),o.push([])}else o[o.length-1].push(t);return o=o.map((e=>"string"==typeof e?e:super.decode(e,t))),o.join("")}splitTokensOnUnicode(e){const t=this.decode(e,{decode_with_timestamps:!0}),r=[],n=[],o=[];let s=[],a=[],i=0;for(let l=0;l<e.length;++l){const c=e[l];s.push(c),a.push(l);const d=this.decode(s,{decode_with_timestamps:!0});d.includes("�")&&"�"!==t[i+d.indexOf("�")]||(r.push(d),n.push(s),o.push(a),s=[],a=[],i+=d.length)}return[r,n,o]}splitTokensOnSpaces(e){const[t,r,n]=this.splitTokensOnUnicode(e),o=[],s=[],a=[],i=new RegExp(`^[${w}]$`,"gu");for(let e=0;e<t.length;++e){const l=t[e],c=r[e],d=n[e],u=c[0]>=this.model.tokens_to_ids.get("<|endoftext|>"),p=l.startsWith(" "),h=l.trim(),m=i.test(h);if(u||p||m||0===o.length)o.push(l),s.push(c),a.push(d);else{const e=o.length-1;o[e]+=l,s[e].push(...c),a[e].push(...d)}}return[o,s,a]}mergePunctuations(e,t,r,n,s){const a=structuredClone(e),i=structuredClone(t),l=structuredClone(r);let c=a.length-2,d=a.length-1;for(;c>=0;)a[c].startsWith(" ")&&n.includes(a[c].trim())?(a[d]=a[c]+a[d],i[d]=(0,o.mergeArrays)(i[c],i[d]),l[d]=(0,o.mergeArrays)(l[c],l[d]),a[c]="",i[c]=[],l[c]=[]):d=c,--c;for(c=0,d=1;d<a.length;)!a[c].endsWith(" ")&&s.includes(a[d])?(a[c]+=a[d],i[c]=(0,o.mergeArrays)(i[c],i[d]),l[c]=(0,o.mergeArrays)(l[c],l[d]),a[d]="",i[d]=[],l[d]=[]):c=d,++d;return[a.filter((e=>e)),i.filter((e=>e.length>0)),l.filter((e=>e.length>0))]}get_decoder_prompt_ids({language:e=null,task:t=null,no_timestamps:r=!0}={}){const n=[];if(e){const t=(0,d.whisper_language_to_code)(e),r=this.model.tokens_to_ids.get(`<|${t}|>`);if(void 0===r)throw new Error(`Unable to find language "${t}" in model vocabulary. Please report this issue at ${u.GITHUB_ISSUE_URL}.`);n.push(r)}else n.push(null);if(t){if("transcribe"!==(t=t.toLowerCase())&&"translate"!==t)throw new Error(`Task "${t}" is not supported. Must be one of: ["transcribe", "translate"]`);const e=this.model.tokens_to_ids.get(`<|${t}|>`);if(void 0===e)throw new Error(`Unable to find task "${t}" in model vocabulary. Please report this issue at ${u.GITHUB_ISSUE_URL}.`);n.push(e)}else n.push(null);if(r){const e=this.model.tokens_to_ids.get("<|notimestamps|>");if(void 0===e)throw new Error(`Unable to find "<|notimestamps|>" in model vocabulary. Please report this issue at ${u.GITHUB_ISSUE_URL}.`);n.push(e)}return n.map(((e,t)=>[t+1,e])).filter((e=>null!==e[1]))}}class rt extends Te{}class nt extends Te{}class ot extends Te{}class st extends Te{constructor(e,t){super(e,t),this.languageRegex=/^(>>\w+<<)\s*/g,this.supported_language_codes=this.model.vocab.filter((e=>this.languageRegex.test(e))),console.warn('WARNING: `MarianTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}_encode_text(e){if(null===e)return null;const[t,...r]=e.trim().split(this.languageRegex);if(0===r.length)return super._encode_text(t);if(2===r.length){const[e,t]=r;return this.supported_language_codes.includes(e)||console.warn(`Unsupported language code "${e}" detected, which may lead to unexpected behavior. Should be one of: ${JSON.stringify(this.supported_language_codes)}`),(0,o.mergeArrays)([e],super._encode_text(t))}}}class at extends Te{}class it extends Te{}class lt extends Te{}class ct extends Te{}class dt extends Te{}class ut extends Te{constructor(e,t){super(e,t),this.decoder=new ce({})}}class pt extends Te{}class ht{static TOKENIZER_CLASS_MAPPING={T5Tokenizer:Ie,DistilBertTokenizer:Ae,CamembertTokenizer:Ee,DebertaTokenizer:Fe,DebertaV2Tokenizer:Pe,BertTokenizer:be,HerbertTokenizer:Ce,ConvBertTokenizer:ve,RoFormerTokenizer:Se,XLMTokenizer:Le,ElectraTokenizer:ze,MobileBertTokenizer:ke,SqueezeBertTokenizer:ye,AlbertTokenizer:xe,GPT2Tokenizer:Be,BartTokenizer:Ne,MBartTokenizer:Oe,MBart50Tokenizer:De,RobertaTokenizer:Ve,WhisperTokenizer:tt,CodeGenTokenizer:rt,CLIPTokenizer:nt,SiglipTokenizer:ot,MarianTokenizer:st,BloomTokenizer:je,NllbTokenizer:Ze,M2M100Tokenizer:et,LlamaTokenizer:Ge,CodeLlamaTokenizer:qe,XLMRobertaTokenizer:$e,MPNetTokenizer:We,FalconTokenizer:Ue,GPTNeoXTokenizer:Xe,EsmTokenizer:Qe,Wav2Vec2CTCTokenizer:at,BlenderbotTokenizer:it,BlenderbotSmallTokenizer:lt,SpeechT5Tokenizer:ct,NougatTokenizer:dt,VitsTokenizer:ut,Qwen2Tokenizer:He,GemmaTokenizer:Ye,Grok1Tokenizer:Je,CohereTokenizer:pt,PreTrainedTokenizer:Te};static async from_pretrained(e,{progress_callback:t=null,config:r=null,cache_dir:n=null,local_files_only:o=!1,revision:s="main",legacy:a=null}={}){const[i,l]=await p(e,{progress_callback:t,config:r,cache_dir:n,local_files_only:o,revision:s,legacy:a}),c=l.tokenizer_class?.replace(/Fast$/,"")??"PreTrainedTokenizer";let d=this.TOKENIZER_CLASS_MAPPING[c];return d||(console.warn(`Unknown tokenizer class "${c}", attempting to construct from base class.`),d=Te),new d(i,l)}}},"./src/utils/audio.js":
|
|
131
131
|
/*!****************************!*\
|
|
132
132
|
!*** ./src/utils/audio.js ***!
|
|
133
133
|
\****************************/(e,t,r)=>{r.r(t),r.d(t,{hamming:()=>d,hanning:()=>c,mel_filter_bank:()=>_,read_audio:()=>i,spectrogram:()=>g,window_function:()=>M});var n=r(/*! ./hub.js */"./src/utils/hub.js"),o=r(/*! ./maths.js */"./src/utils/maths.js"),s=r(/*! ./core.js */"./src/utils/core.js"),a=r(/*! ./tensor.js */"./src/utils/tensor.js");async function i(e,t){if("undefined"==typeof AudioContext)throw Error("Unable to load audio from path/URL since `AudioContext` is not available in your environment. Instead, audio data should be passed directly to the pipeline/processor. For more information and some example code, see https://huggingface.co/docs/transformers.js/guides/node-audio-processing.");const r=await(await(0,n.getFile)(e)).arrayBuffer(),o=new AudioContext({sampleRate:t});void 0===t&&console.warn(`No sampling rate provided, using default of ${o.sampleRate}Hz.`);const s=await o.decodeAudioData(r);let a;if(2===s.numberOfChannels){const e=Math.sqrt(2),t=s.getChannelData(0),r=s.getChannelData(1);a=new Float32Array(t.length);for(let n=0;n<s.length;++n)a[n]=e*(t[n]+r[n])/2}else a=s.getChannelData(0);return a}function l(e,t){if(e<1)return new Float64Array;if(1===e)return new Float64Array([1]);const r=1-t,n=2*Math.PI/(e-1),o=new Float64Array(e);for(let s=0;s<e;++s)o[s]=t-r*Math.cos(s*n);return o}function c(e){return l(e,.5)}function d(e){return l(e,.54)}const u={htk:e=>2595*Math.log10(1+e/700),kaldi:e=>1127*Math.log(1+e/700),slaney:(e,t=1e3,r=15,n=27/Math.log(6.4))=>e>=t?r+Math.log(e/t)*n:3*e/200};function p(e,t="htk"){const r=u[t];if(!r)throw new Error('mel_scale should be one of "htk", "slaney" or "kaldi".');return"number"==typeof e?r(e):e.map((e=>r(e)))}const h={htk:e=>700*(10**(e/2595)-1),kaldi:e=>700*(Math.exp(e/1127)-1),slaney:(e,t=1e3,r=15,n=Math.log(6.4)/27)=>e>=r?t*Math.exp(n*(e-r)):200*e/3};function m(e,t,r){const n=(t-e)/(r-1);return Float64Array.from({length:r},((t,r)=>e+n*r))}function _(e,t,r,n,o,s=null,a="htk",i=!1){if(null!==s&&"slaney"!==s)throw new Error('norm must be one of null or "slaney"');const l=m(p(r,a),p(n,a),t+2);let c,d=function(e,t="htk"){const r=h[t];if(!r)throw new Error('mel_scale should be one of "htk", "slaney" or "kaldi".');return"number"==typeof e?r(e):e.map((e=>r(e)))}(l,a);if(i){const t=o/(2*e);c=p(Float64Array.from({length:e},((e,r)=>r*t)),a),d=l}else c=m(0,Math.floor(o/2),e);const u=function(e,t){const r=Float64Array.from({length:t.length-1},((e,r)=>t[r+1]-t[r])),n=Array.from({length:e.length},(()=>new Array(t.length)));for(let r=0;r<e.length;++r){const o=n[r];for(let n=0;n<t.length;++n)o[n]=t[n]-e[r]}const o=t.length-2,s=Array.from({length:o},(()=>new Array(e.length)));for(let t=0;t<e.length;++t){const e=n[t];for(let n=0;n<o;++n){const o=-e[n]/r[n],a=e[n+2]/r[n+1];s[n][t]=Math.max(0,Math.min(o,a))}}return s}(c,d);if(null!==s&&"slaney"===s)for(let r=0;r<t;++r){const t=u[r],n=2/(d[r+2]-d[r]);for(let r=0;r<e;++r)t[r]*=n}return u}function f(e,t,r,n,s){if(r<=0)throw new Error("reference must be greater than zero");if(n<=0)throw new Error("min_value must be greater than zero");r=Math.max(n,r);const a=Math.log10(r);for(let r=0;r<e.length;++r)e[r]=t*Math.log10(Math.max(n,e[r])-a);if(null!==s){if(s<=0)throw new Error("db_range must be greater than zero");const t=(0,o.max)(e)[0]-s;for(let r=0;r<e.length;++r)e[r]=Math.max(e[r],t)}return e}async function g(e,t,r,n,{fft_length:i=null,power:l=1,center:c=!0,pad_mode:d="reflect",onesided:u=!0,preemphasis:p=null,mel_filters:h=null,mel_floor:m=1e-10,log_mel:_=null,reference:g=1,min_value:M=1e-10,db_range:w=null,remove_dc_offset:T=null,min_num_frames:b=null,max_num_frames:x=null,do_pad:k=!0,transpose:y=!1}={}){const F=t.length;if(null===i&&(i=r),r>i)throw Error(`frame_length (${r}) may not be larger than fft_length (${i})`);if(F!==r)throw new Error(`Length of the window (${F}) must equal frame_length (${r})`);if(n<=0)throw new Error("hop_length must be greater than zero");if(null===l&&null!==h)throw new Error("You have provided `mel_filters` but `power` is `None`. Mel spectrogram computation is not yet supported for complex-valued spectrogram. Specify `power` to fix this issue.");if(c){if("reflect"!==d)throw new Error(`pad_mode="${d}" not implemented yet.`);const t=Math.floor((i-1)/2)+1;e=function(e,t,r){const n=new e.constructor(e.length+t+r),o=e.length-1;for(let r=0;r<e.length;++r)n[t+r]=e[r];for(let r=1;r<=t;++r)n[t-r]=e[(0,s.calculateReflectOffset)(r,o)];for(let a=1;a<=r;++a)n[o+t+a]=e[(0,s.calculateReflectOffset)(o-a,o)];return n}(e,t,t)}let P=Math.floor(1+Math.floor((e.length-r)/n));null!==b&&P<b&&(P=b);const C=u?Math.floor(i/2)+1:i;let v=P,S=P;null!==x&&(x>P?k&&(S=x):S=v=x);const A=new o.FFT(i),E=new Float64Array(i),L=new Float64Array(A.outputBufferSize),z=new Float32Array(C*S);for(let o=0;o<v;++o){const s=o*n,a=Math.min(e.length-s,r);a!==r&&E.fill(0,0,r);for(let t=0;t<a;++t)E[t]=e[s+t];if(T){let e=0;for(let t=0;t<a;++t)e+=E[t];const t=e/a;for(let e=0;e<a;++e)E[e]-=t}if(null!==p){for(let e=a-1;e>=1;--e)E[e]-=p*E[e-1];E[0]*=1-p}for(let e=0;e<t.length;++e)E[e]*=t[e];A.realTransform(L,E);for(let e=0;e<C;++e){const t=e<<1;z[e*S+o]=L[t]**2+L[t+1]**2}}if(null!==l&&2!==l){const e=2/l;for(let t=0;t<z.length;++t)z[t]**=e}const I=h.length;let B=await(0,a.matmul)(new a.Tensor("float32",h.flat(),[I,C]),new a.Tensor("float32",z,[C,S]));y&&(B=B.transpose(1,0));const N=B.data;for(let e=0;e<N.length;++e)N[e]=Math.max(m,N[e]);if(null!==l&&null!==_){const e=Math.min(N.length,v*I);switch(_){case"log":for(let t=0;t<e;++t)N[t]=Math.log(N[t]);break;case"log10":for(let t=0;t<e;++t)N[t]=Math.log10(N[t]);break;case"dB":if(1===l)!function(e,t=1,r=1e-5,n=null){f(e,20,t,r,n)}(N,g,M,w);else{if(2!==l)throw new Error(`Cannot use log_mel option '${_}' with power ${l}`);!function(e,t=1,r=1e-10,n=null){f(e,10,t,r,n)}(N,g,M,w)}break;default:throw new Error(`log_mel must be one of null, 'log', 'log10' or 'dB'. Got '${_}'`)}}return B}function M(e,t,{periodic:r=!0,frame_length:n=null,center:o=!0}={}){const s=r?e+1:e;let a;switch(t){case"boxcar":a=new Float64Array(s).fill(1);break;case"hann":case"hann_window":a=c(s);break;case"hamming":a=d(s);break;case"povey":a=c(s).map((e=>Math.pow(e,.85)));break;default:throw new Error(`Unknown window type ${t}.`)}if(r&&(a=a.subarray(0,e)),null===n)return a;if(e>n)throw new Error(`Length of the window (${e}) may not be larger than frame_length (${n})`);return a}},"./src/utils/constants.js":
|
|
@@ -136,10 +136,10 @@ import*as e from"fs";import*as t from"onnxruntime-node";import*as r from"path";i
|
|
|
136
136
|
\********************************/(e,t,r)=>{r.r(t),r.d(t,{GITHUB_ISSUE_URL:()=>n});const n="https://github.com/xenova/transformers.js/issues/new/choose"},"./src/utils/core.js":
|
|
137
137
|
/*!***************************!*\
|
|
138
138
|
!*** ./src/utils/core.js ***!
|
|
139
|
-
\***************************/(e,t,r)=>{function n(e,t){e&&e(t)}function o(e){return Object.fromEntries(Object.entries(e).map((([e,t])=>[t,e])))}function s(e){return e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&")}function a(e){return"TypedArray"===e?.prototype?.__proto__?.constructor?.name}function i(e){return Number.isInteger(e)||"bigint"==typeof e}function l(e){const t=[];let r=e;for(;Array.isArray(r);)t.push(r.length),r=r[0];return t}function c(e,t,r=void 0){const n=e[t];if(void 0!==n)return delete e[t],n;if(void 0===r)throw Error(`Key ${t} does not exist in object.`);return r}function d(...e){return Array.prototype.concat.apply([],e)}function u(...e){return e.reduce(((e,t)=>e.flatMap((e=>t.map((t=>[e,t]))))))}function p(e,t){return Math.abs((e+t)%(2*t)-t)}function h(e,t){return Object.assign({},...t.map((t=>{if(void 0!==e[t])return{[t]:e[t]}})))}r.r(t),r.d(t,{calculateDimensions:()=>l,calculateReflectOffset:()=>p,dispatchCallback:()=>n,escapeRegExp:()=>s,isIntegralNumber:()=>i,isTypedArray:()=>a,mergeArrays:()=>d,pick:()=>h,pop:()=>c,product:()=>u,reverseDictionary:()=>o})},"./src/utils/data-structures.js":
|
|
139
|
+
\***************************/(e,t,r)=>{function n(e,t){e&&e(t)}function o(e){return Object.fromEntries(Object.entries(e).map((([e,t])=>[t,e])))}function s(e){return e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&")}function a(e){return"TypedArray"===e?.prototype?.__proto__?.constructor?.name}function i(e){return Number.isInteger(e)||"bigint"==typeof e}function l(e){const t=[];let r=e;for(;Array.isArray(r);)t.push(r.length),r=r[0];return t}function c(e,t,r=void 0){const n=e[t];if(void 0!==n)return delete e[t],n;if(void 0===r)throw Error(`Key ${t} does not exist in object.`);return r}function d(...e){return Array.prototype.concat.apply([],e)}function u(...e){return e.reduce(((e,t)=>e.flatMap((e=>t.map((t=>[e,t]))))))}function p(e,t){return Math.abs((e+t)%(2*t)-t)}function h(e,t){return Object.assign({},...t.map((t=>{if(void 0!==e[t])return{[t]:e[t]}})))}function m(e){let t=0;for(const r of e)++t;return t}r.r(t),r.d(t,{calculateDimensions:()=>l,calculateReflectOffset:()=>p,dispatchCallback:()=>n,escapeRegExp:()=>s,isIntegralNumber:()=>i,isTypedArray:()=>a,len:()=>m,mergeArrays:()=>d,pick:()=>h,pop:()=>c,product:()=>u,reverseDictionary:()=>o})},"./src/utils/data-structures.js":
|
|
140
140
|
/*!**************************************!*\
|
|
141
141
|
!*** ./src/utils/data-structures.js ***!
|
|
142
|
-
\**************************************/(e,t,r)=>{r.r(t),r.d(t,{CharTrie:()=>o,PriorityQueue:()=>n,TokenLattice:()=>a});class n{constructor(e=(e,t)=>e>t,t=1/0){this._heap=[],this._comparator=e,this._maxSize=t}get size(){return this._heap.length}isEmpty(){return 0===this.size}peek(){return this._heap[0]}push(...e){return this.extend(e)}extend(e){for(const t of e)if(this.size<this._maxSize)this._heap.push(t),this._siftUp();else{const e=this._smallest();this._comparator(t,this._heap[e])&&(this._heap[e]=t,this._siftUpFrom(e))}return this.size}pop(){const e=this.peek(),t=this.size-1;return t>0&&this._swap(0,t),this._heap.pop(),this._siftDown(),e}replace(e){const t=this.peek();return this._heap[0]=e,this._siftDown(),t}_parent(e){return(e+1>>>1)-1}_left(e){return 1+(e<<1)}_right(e){return e+1<<1}_greater(e,t){return this._comparator(this._heap[e],this._heap[t])}_swap(e,t){const r=this._heap[e];this._heap[e]=this._heap[t],this._heap[t]=r}_siftUp(){this._siftUpFrom(this.size-1)}_siftUpFrom(e){for(;e>0&&this._greater(e,this._parent(e));)this._swap(e,this._parent(e)),e=this._parent(e)}_siftDown(){let e=0;for(;this._left(e)<this.size&&this._greater(this._left(e),e)||this._right(e)<this.size&&this._greater(this._right(e),e);){const t=this._right(e)<this.size&&this._greater(this._right(e),this._left(e))?this._right(e):this._left(e);this._swap(e,t),e=t}}_smallest(){return 2**Math.floor(Math.log2(this.size))-1}}class o{constructor(){this.root=s.default()}extend(e){for(
|
|
142
|
+
\**************************************/(e,t,r)=>{r.r(t),r.d(t,{CharTrie:()=>o,PriorityQueue:()=>n,TokenLattice:()=>a});class n{constructor(e=(e,t)=>e>t,t=1/0){this._heap=[],this._comparator=e,this._maxSize=t}get size(){return this._heap.length}isEmpty(){return 0===this.size}peek(){return this._heap[0]}push(...e){return this.extend(e)}extend(e){for(const t of e)if(this.size<this._maxSize)this._heap.push(t),this._siftUp();else{const e=this._smallest();this._comparator(t,this._heap[e])&&(this._heap[e]=t,this._siftUpFrom(e))}return this.size}pop(){const e=this.peek(),t=this.size-1;return t>0&&this._swap(0,t),this._heap.pop(),this._siftDown(),e}replace(e){const t=this.peek();return this._heap[0]=e,this._siftDown(),t}_parent(e){return(e+1>>>1)-1}_left(e){return 1+(e<<1)}_right(e){return e+1<<1}_greater(e,t){return this._comparator(this._heap[e],this._heap[t])}_swap(e,t){const r=this._heap[e];this._heap[e]=this._heap[t],this._heap[t]=r}_siftUp(){this._siftUpFrom(this.size-1)}_siftUpFrom(e){for(;e>0&&this._greater(e,this._parent(e));)this._swap(e,this._parent(e)),e=this._parent(e)}_siftDown(){let e=0;for(;this._left(e)<this.size&&this._greater(this._left(e),e)||this._right(e)<this.size&&this._greater(this._right(e),e);){const t=this._right(e)<this.size&&this._greater(this._right(e),this._left(e))?this._right(e):this._left(e);this._swap(e,t),e=t}}_smallest(){return 2**Math.floor(Math.log2(this.size))-1}}class o{constructor(){this.root=s.default()}extend(e){for(const t of e)this.push(t)}push(e){let t=this.root;for(const r of e){let e=t.children.get(r);void 0===e&&(e=s.default(),t.children.set(r,e)),t=e}t.isLeaf=!0}*commonPrefixSearch(e){let t=this.root;if(void 0===t)return;let r="";for(const n of e){if(r+=n,t=t.children.get(n),void 0===t)return;t.isLeaf&&(yield r)}}}class s{constructor(e,t){this.isLeaf=e,this.children=t}static default(){return new s(!1,new Map)}}class a{constructor(e,t,r){this.chars=Array.from(e),this.len=this.chars.length,this.bosTokenId=t,this.eosTokenId=r,this.nodes=[],this.beginNodes=Array.from({length:this.len+1},(()=>[])),this.endNodes=Array.from({length:this.len+1},(()=>[]));const n=new i(this.bosTokenId,0,0,0,0),o=new i(this.eosTokenId,1,this.len,0,0);this.nodes.push(n.clone()),this.nodes.push(o.clone()),this.beginNodes[this.len].push(o),this.endNodes[0].push(n)}insert(e,t,r,n){const o=this.nodes.length,s=new i(n,o,e,t,r);this.beginNodes[e].push(s),this.endNodes[e+t].push(s),this.nodes.push(s)}viterbi(){const e=this.len;let t=0;for(;t<=e;){if(0==this.beginNodes[t].length)return[];for(let e of this.beginNodes[t]){e.prev=null;let r=0,n=null;for(let o of this.endNodes[t]){const t=o.backtraceScore+e.score;(null===n||t>r)&&(n=o.clone(),r=t)}if(null===n)return[];e.prev=n,e.backtraceScore=r}++t}const r=[],n=this.beginNodes[e][0].prev;if(null===n)return[];let o=n.clone();for(;null!==o.prev;){r.push(o.clone());const e=o.clone();o=e.prev.clone()}return r.reverse(),r}piece(e){return this.chars.slice(e.pos,e.pos+e.length).join("")}tokens(){return this.viterbi().map((e=>this.piece(e)))}tokenIds(){return this.viterbi().map((e=>e.tokenId))}}class i{constructor(e,t,r,n,o){this.tokenId=e,this.nodeId=t,this.pos=r,this.length=n,this.score=o,this.prev=null,this.backtraceScore=0}clone(){const e=new i(this.tokenId,this.nodeId,this.pos,this.length,this.score);return e.prev=this.prev,e.backtraceScore=this.backtraceScore,e}}},"./src/utils/devices.js":
|
|
143
143
|
/*!******************************!*\
|
|
144
144
|
!*** ./src/utils/devices.js ***!
|
|
145
145
|
\******************************/(e,t,r)=>{r.r(t),r.d(t,{DEVICE_TYPES:()=>n});const n=Object.freeze({auto:"auto",gpu:"gpu",cpu:"cpu",wasm:"wasm",webgpu:"webgpu",cuda:"cuda",dml:"dml",webnn:"webnn","webnn-npu":"webnn-npu","webnn-gpu":"webnn-gpu","webnn-cpu":"webnn-cpu"})},"./src/utils/dtypes.js":
|