@huggingface/transformers 3.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3788,57 +3788,57 @@ __webpack_require__.r(__webpack_exports__);
3788
3788
  /* harmony export */ TRACE: () => (/* binding */ Sr),
3789
3789
  /* harmony export */ TRACE_FUNC_BEGIN: () => (/* binding */ Le),
3790
3790
  /* harmony export */ TRACE_FUNC_END: () => (/* binding */ Ve),
3791
- /* harmony export */ Tensor: () => (/* binding */ De),
3791
+ /* harmony export */ Tensor: () => (/* binding */ Be),
3792
3792
  /* harmony export */ TrainingSession: () => (/* binding */ Ap),
3793
- /* harmony export */ "default": () => (/* binding */ Hx),
3793
+ /* harmony export */ "default": () => (/* binding */ Vx),
3794
3794
  /* harmony export */ env: () => (/* binding */ _e),
3795
3795
  /* harmony export */ registerBackend: () => (/* binding */ St)
3796
3796
  /* harmony export */ });
3797
3797
  /*!
3798
- * ONNX Runtime Web v1.20.0-dev.20241016-2b8fc5529b
3798
+ * ONNX Runtime Web v1.21.0-dev.20241024-d9ca84ef96
3799
3799
  * Copyright (c) Microsoft Corporation. All rights reserved.
3800
3800
  * Licensed under the MIT License.
3801
3801
  */
3802
- var Un=Object.defineProperty;var vp=Object.getOwnPropertyDescriptor;var $p=Object.getOwnPropertyNames;var xp=Object.prototype.hasOwnProperty;var Vn=(e=>typeof require<"u"?require:typeof Proxy<"u"?new Proxy(e,{get:(t,r)=>(typeof require<"u"?require:t)[r]}):e)(function(e){if(typeof require<"u")return require.apply(this,arguments);throw Error('Dynamic require of "'+e+'" is not supported')});var V=(e,t)=>()=>(e&&(t=e(e=0)),t);var Gt=(e,t)=>{for(var r in t)Un(e,r,{get:t[r],enumerable:!0})},Sp=(e,t,r,n)=>{if(t&&typeof t=="object"||typeof t=="function")for(let o of $p(t))!xp.call(e,o)&&o!==r&&Un(e,o,{get:()=>t[o],enumerable:!(n=vp(t,o))||n.enumerable});return e};var br=e=>Sp(Un({},"__esModule",{value:!0}),e);var wr,xt,St,Tp,_r,vr=V(()=>{"use strict";wr=new Map,xt=[],St=(e,t,r)=>{if(t&&typeof t.init=="function"&&typeof t.createInferenceSessionHandler=="function"){let n=wr.get(e);if(n===void 0)wr.set(e,{backend:t,priority:r});else{if(n.priority>r)return;if(n.priority===r&&n.backend!==t)throw new Error(`cannot register backend "${e}" using priority ${r}`)}if(r>=0){let o=xt.indexOf(e);o!==-1&&xt.splice(o,1);for(let i=0;i<xt.length;i++)if(wr.get(xt[i]).priority<=r){xt.splice(i,0,e);return}xt.push(e)}return}throw new TypeError("not a valid backend")},Tp=async e=>{let t=wr.get(e);if(!t)return"backend not found.";if(t.initialized)return t.backend;if(t.aborted)return t.error;{let r=!!t.initPromise;try{return r||(t.initPromise=t.backend.init(e)),await t.initPromise,t.initialized=!0,t.backend}catch(n){return r||(t.error=`${n}`,t.aborted=!0),t.error}finally{delete t.initPromise}}},_r=async e=>{let t=e.executionProviders||[],r=t.map(l=>typeof l=="string"?l:l.name),n=r.length===0?xt:r,o,i=[],a=new Set;for(let l of n){let c=await Tp(l);typeof c=="string"?i.push({name:l,err:c}):(o||(o=c),o===c&&a.add(l))}if(!o)throw new Error(`no available backend found. ERR: ${i.map(l=>`[${l.name}] ${l.err}`).join(", ")}`);for(let{name:l,err:c}of i)r.includes(l)&&console.warn(`removing requested execution provider "${l}" from session options because it is not available: ${c}`);let d=t.filter(l=>a.has(typeof l=="string"?l:l.name));return[o,new Proxy(e,{get:(l,c)=>c==="executionProviders"?d:Reflect.get(l,c)})]}});var Zi=V(()=>{"use strict";vr()});var Qi,Ji=V(()=>{"use strict";Qi="1.20.0-dev.20241016-2b8fc5529b"});var ea,We,Nn=V(()=>{"use strict";Ji();ea="warning",We={wasm:{},webgl:{},webgpu:{},versions:{common:Qi},set logLevel(e){if(e!==void 0){if(typeof e!="string"||["verbose","info","warning","error","fatal"].indexOf(e)===-1)throw new Error(`Unsupported logging level: ${e}`);ea=e}},get logLevel(){return ea}};Object.defineProperty(We,"logLevel",{enumerable:!0})});var _e,ta=V(()=>{"use strict";Nn();_e=We});var ra,na,oa=V(()=>{"use strict";ra=(e,t)=>{let r=typeof document<"u"?document.createElement("canvas"):new OffscreenCanvas(1,1);r.width=e.dims[3],r.height=e.dims[2];let n=r.getContext("2d");if(n!=null){let o,i;t?.tensorLayout!==void 0&&t.tensorLayout==="NHWC"?(o=e.dims[2],i=e.dims[3]):(o=e.dims[3],i=e.dims[2]);let a=t?.format!==void 0?t.format:"RGB",d=t?.norm,l,c;d===void 0||d.mean===void 0?l=[255,255,255,255]:typeof d.mean=="number"?l=[d.mean,d.mean,d.mean,d.mean]:(l=[d.mean[0],d.mean[1],d.mean[2],0],d.mean[3]!==void 0&&(l[3]=d.mean[3])),d===void 0||d.bias===void 0?c=[0,0,0,0]:typeof d.bias=="number"?c=[d.bias,d.bias,d.bias,d.bias]:(c=[d.bias[0],d.bias[1],d.bias[2],0],d.bias[3]!==void 0&&(c[3]=d.bias[3]));let m=i*o,u=0,h=m,w=m*2,g=-1;a==="RGBA"?(u=0,h=m,w=m*2,g=m*3):a==="RGB"?(u=0,h=m,w=m*2):a==="RBG"&&(u=0,w=m,h=m*2);for(let y=0;y<i;y++)for(let S=0;S<o;S++){let $=(e.data[u++]-c[0])*l[0],v=(e.data[h++]-c[1])*l[1],x=(e.data[w++]-c[2])*l[2],T=g===-1?255:(e.data[g++]-c[3])*l[3];n.fillStyle="rgba("+$+","+v+","+x+","+T+")",n.fillRect(S,y,1,1)}if("toDataURL"in r)return r.toDataURL();throw new Error("toDataURL is not supported")}else throw new Error("Can not access image data")},na=(e,t)=>{let r=typeof document<"u"?document.createElement("canvas").getContext("2d"):new OffscreenCanvas(1,1).getContext("2d"),n;if(r!=null){let o,i,a;t?.tensorLayout!==void 0&&t.tensorLayout==="NHWC"?(o=e.dims[2],i=e.dims[1],a=e.dims[3]):(o=e.dims[3],i=e.dims[2],a=e.dims[1]);let d=t!==void 0&&t.format!==void 0?t.format:"RGB",l=t?.norm,c,m;l===void 0||l.mean===void 0?c=[255,255,255,255]:typeof l.mean=="number"?c=[l.mean,l.mean,l.mean,l.mean]:(c=[l.mean[0],l.mean[1],l.mean[2],255],l.mean[3]!==void 0&&(c[3]=l.mean[3])),l===void 0||l.bias===void 0?m=[0,0,0,0]:typeof l.bias=="number"?m=[l.bias,l.bias,l.bias,l.bias]:(m=[l.bias[0],l.bias[1],l.bias[2],0],l.bias[3]!==void 0&&(m[3]=l.bias[3]));let u=i*o;if(t!==void 0&&(t.format!==void 0&&a===4&&t.format!=="RGBA"||a===3&&t.format!=="RGB"&&t.format!=="BGR"))throw new Error("Tensor format doesn't match input tensor dims");let h=4,w=0,g=1,y=2,S=3,$=0,v=u,x=u*2,T=-1;d==="RGBA"?($=0,v=u,x=u*2,T=u*3):d==="RGB"?($=0,v=u,x=u*2):d==="RBG"&&($=0,x=u,v=u*2),n=r.createImageData(o,i);for(let C=0;C<i*o;w+=h,g+=h,y+=h,S+=h,C++)n.data[w]=(e.data[$++]-m[0])*c[0],n.data[g]=(e.data[v++]-m[1])*c[1],n.data[y]=(e.data[x++]-m[2])*c[2],n.data[S]=T===-1?255:(e.data[T++]-m[3])*c[3]}else throw new Error("Can not access image data");return n}});var Wn,ia,aa,sa,ua,da,la=V(()=>{"use strict";$r();Wn=(e,t)=>{if(e===void 0)throw new Error("Image buffer must be defined");if(t.height===void 0||t.width===void 0)throw new Error("Image height and width must be defined");if(t.tensorLayout==="NHWC")throw new Error("NHWC Tensor layout is not supported yet");let{height:r,width:n}=t,o=t.norm??{mean:255,bias:0},i,a;typeof o.mean=="number"?i=[o.mean,o.mean,o.mean,o.mean]:i=[o.mean[0],o.mean[1],o.mean[2],o.mean[3]??255],typeof o.bias=="number"?a=[o.bias,o.bias,o.bias,o.bias]:a=[o.bias[0],o.bias[1],o.bias[2],o.bias[3]??0];let d=t.format!==void 0?t.format:"RGBA",l=t.tensorFormat!==void 0&&t.tensorFormat!==void 0?t.tensorFormat:"RGB",c=r*n,m=l==="RGBA"?new Float32Array(c*4):new Float32Array(c*3),u=4,h=0,w=1,g=2,y=3,S=0,$=c,v=c*2,x=-1;d==="RGB"&&(u=3,h=0,w=1,g=2,y=-1),l==="RGBA"?x=c*3:l==="RBG"?(S=0,v=c,$=c*2):l==="BGR"&&(v=0,$=c,S=c*2);for(let C=0;C<c;C++,h+=u,g+=u,w+=u,y+=u)m[S++]=(e[h]+a[0])/i[0],m[$++]=(e[w]+a[1])/i[1],m[v++]=(e[g]+a[2])/i[2],x!==-1&&y!==-1&&(m[x++]=(e[y]+a[3])/i[3]);return l==="RGBA"?new Be("float32",m,[1,4,r,n]):new Be("float32",m,[1,3,r,n])},ia=async(e,t)=>{let r=typeof HTMLImageElement<"u"&&e instanceof HTMLImageElement,n=typeof ImageData<"u"&&e instanceof ImageData,o=typeof ImageBitmap<"u"&&e instanceof ImageBitmap,i=typeof e=="string",a,d=t??{},l=()=>{if(typeof document<"u")return document.createElement("canvas");if(typeof OffscreenCanvas<"u")return new OffscreenCanvas(1,1);throw new Error("Canvas is not supported")},c=m=>typeof HTMLCanvasElement<"u"&&m instanceof HTMLCanvasElement||m instanceof OffscreenCanvas?m.getContext("2d"):null;if(r){let m=l();m.width=e.width,m.height=e.height;let u=c(m);if(u!=null){let h=e.height,w=e.width;if(t!==void 0&&t.resizedHeight!==void 0&&t.resizedWidth!==void 0&&(h=t.resizedHeight,w=t.resizedWidth),t!==void 0){if(d=t,t.tensorFormat!==void 0)throw new Error("Image input config format must be RGBA for HTMLImageElement");d.tensorFormat="RGBA",d.height=h,d.width=w}else d.tensorFormat="RGBA",d.height=h,d.width=w;u.drawImage(e,0,0),a=u.getImageData(0,0,w,h).data}else throw new Error("Can not access image data")}else if(n){let m,u;if(t!==void 0&&t.resizedWidth!==void 0&&t.resizedHeight!==void 0?(m=t.resizedHeight,u=t.resizedWidth):(m=e.height,u=e.width),t!==void 0&&(d=t),d.format="RGBA",d.height=m,d.width=u,t!==void 0){let h=l();h.width=u,h.height=m;let w=c(h);if(w!=null)w.putImageData(e,0,0),a=w.getImageData(0,0,u,m).data;else throw new Error("Can not access image data")}else a=e.data}else if(o){if(t===void 0)throw new Error("Please provide image config with format for Imagebitmap");let m=l();m.width=e.width,m.height=e.height;let u=c(m);if(u!=null){let h=e.height,w=e.width;return u.drawImage(e,0,0,w,h),a=u.getImageData(0,0,w,h).data,d.height=h,d.width=w,Wn(a,d)}else throw new Error("Can not access image data")}else{if(i)return new Promise((m,u)=>{let h=l(),w=c(h);if(!e||!w)return u();let g=new Image;g.crossOrigin="Anonymous",g.src=e,g.onload=()=>{h.width=g.width,h.height=g.height,w.drawImage(g,0,0,h.width,h.height);let y=w.getImageData(0,0,h.width,h.height);d.height=h.height,d.width=h.width,m(Wn(y.data,d))}});throw new Error("Input data provided is not supported - aborted tensor creation")}if(a!==void 0)return Wn(a,d);throw new Error("Input data provided is not supported - aborted tensor creation")},aa=(e,t)=>{let{width:r,height:n,download:o,dispose:i}=t,a=[1,n,r,4];return new Be({location:"texture",type:"float32",texture:e,dims:a,download:o,dispose:i})},sa=(e,t)=>{let{dataType:r,dims:n,download:o,dispose:i}=t;return new Be({location:"gpu-buffer",type:r??"float32",gpuBuffer:e,dims:n,download:o,dispose:i})},ua=(e,t)=>{let{dataType:r,dims:n,download:o,dispose:i}=t;return new Be({location:"ml-tensor",type:r??"float32",mlTensor:e,dims:n,download:o,dispose:i})},da=(e,t,r)=>new Be({location:"cpu-pinned",type:e,data:t,dims:r??[t.length]})});var Tt,Ft,ca,pa,ma=V(()=>{"use strict";Tt=new Map([["float32",Float32Array],["uint8",Uint8Array],["int8",Int8Array],["uint16",Uint16Array],["int16",Int16Array],["int32",Int32Array],["bool",Uint8Array],["float64",Float64Array],["uint32",Uint32Array],["int4",Uint8Array],["uint4",Uint8Array]]),Ft=new Map([[Float32Array,"float32"],[Uint8Array,"uint8"],[Int8Array,"int8"],[Uint16Array,"uint16"],[Int16Array,"int16"],[Int32Array,"int32"],[Float64Array,"float64"],[Uint32Array,"uint32"]]),ca=!1,pa=()=>{if(!ca){ca=!0;let e=typeof BigInt64Array<"u"&&BigInt64Array.from,t=typeof BigUint64Array<"u"&&BigUint64Array.from,r=typeof Float16Array<"u"&&Float16Array.from;e&&(Tt.set("int64",BigInt64Array),Ft.set(BigInt64Array,"int64")),t&&(Tt.set("uint64",BigUint64Array),Ft.set(BigUint64Array,"uint64")),r?(Tt.set("float16",Float16Array),Ft.set(Float16Array,"float16")):Tt.set("float16",Uint16Array)}}});var fa,ha,ga=V(()=>{"use strict";$r();fa=e=>{let t=1;for(let r=0;r<e.length;r++){let n=e[r];if(typeof n!="number"||!Number.isSafeInteger(n))throw new TypeError(`dims[${r}] must be an integer, got: ${n}`);if(n<0)throw new RangeError(`dims[${r}] must be a non-negative integer, got: ${n}`);t*=n}return t},ha=(e,t)=>{switch(e.location){case"cpu":return new Be(e.type,e.data,t);case"cpu-pinned":return new Be({location:"cpu-pinned",data:e.data,type:e.type,dims:t});case"texture":return new Be({location:"texture",texture:e.texture,type:e.type,dims:t});case"gpu-buffer":return new Be({location:"gpu-buffer",gpuBuffer:e.gpuBuffer,type:e.type,dims:t});case"ml-tensor":return new Be({location:"ml-tensor",mlTensor:e.mlTensor,type:e.type,dims:t});default:throw new Error(`tensorReshape: tensor location ${e.location} is not supported`)}}});var Be,$r=V(()=>{"use strict";oa();la();ma();ga();Be=class{constructor(t,r,n){pa();let o,i;if(typeof t=="object"&&"location"in t)switch(this.dataLocation=t.location,o=t.type,i=t.dims,t.location){case"cpu-pinned":{let d=Tt.get(o);if(!d)throw new TypeError(`unsupported type "${o}" to create tensor from pinned buffer`);if(!(t.data instanceof d))throw new TypeError(`buffer should be of type ${d.name}`);this.cpuData=t.data;break}case"texture":{if(o!=="float32")throw new TypeError(`unsupported type "${o}" to create tensor from texture`);this.gpuTextureData=t.texture,this.downloader=t.download,this.disposer=t.dispose;break}case"gpu-buffer":{if(o!=="float32"&&o!=="float16"&&o!=="int32"&&o!=="int64"&&o!=="uint32"&&o!=="uint8"&&o!=="bool"&&o!=="uint4"&&o!=="int4")throw new TypeError(`unsupported type "${o}" to create tensor from gpu buffer`);this.gpuBufferData=t.gpuBuffer,this.downloader=t.download,this.disposer=t.dispose;break}case"ml-tensor":{if(o!=="float32"&&o!=="float16"&&o!=="int32"&&o!=="int64"&&o!=="uint32"&&o!=="uint64"&&o!=="int8"&&o!=="uint8"&&o!=="bool")throw new TypeError(`unsupported type "${o}" to create tensor from MLTensor`);this.mlTensorData=t.mlTensor,this.downloader=t.download,this.disposer=t.dispose;break}default:throw new Error(`Tensor constructor: unsupported location '${this.dataLocation}'`)}else{let d,l;if(typeof t=="string")if(o=t,l=n,t==="string"){if(!Array.isArray(r))throw new TypeError("A string tensor's data must be a string array.");d=r}else{let c=Tt.get(t);if(c===void 0)throw new TypeError(`Unsupported tensor type: ${t}.`);if(Array.isArray(r)){if(t==="float16"&&c===Uint16Array||t==="uint4"||t==="int4")throw new TypeError(`Creating a ${t} tensor from number array is not supported. Please use ${c.name} as data.`);t==="uint64"||t==="int64"?d=c.from(r,BigInt):d=c.from(r)}else if(r instanceof c)d=r;else if(r instanceof Uint8ClampedArray)if(t==="uint8")d=Uint8Array.from(r);else throw new TypeError("A Uint8ClampedArray tensor's data must be type of uint8");else throw new TypeError(`A ${o} tensor's data must be type of ${c}`)}else if(l=r,Array.isArray(t)){if(t.length===0)throw new TypeError("Tensor type cannot be inferred from an empty array.");let c=typeof t[0];if(c==="string")o="string",d=t;else if(c==="boolean")o="bool",d=Uint8Array.from(t);else throw new TypeError(`Invalid element type of data array: ${c}.`)}else if(t instanceof Uint8ClampedArray)o="uint8",d=Uint8Array.from(t);else{let c=Ft.get(t.constructor);if(c===void 0)throw new TypeError(`Unsupported type for tensor data: ${t.constructor}.`);o=c,d=t}if(l===void 0)l=[d.length];else if(!Array.isArray(l))throw new TypeError("A tensor's dims must be a number array");i=l,this.cpuData=d,this.dataLocation="cpu"}let a=fa(i);if(this.cpuData&&a!==this.cpuData.length&&!((o==="uint4"||o==="int4")&&Math.ceil(a/2)===this.cpuData.length))throw new Error(`Tensor's size(${a}) does not match data length(${this.cpuData.length}).`);this.type=o,this.dims=i,this.size=a}static async fromImage(t,r){return ia(t,r)}static fromTexture(t,r){return aa(t,r)}static fromGpuBuffer(t,r){return sa(t,r)}static fromMLTensor(t,r){return ua(t,r)}static fromPinnedBuffer(t,r,n){return da(t,r,n)}toDataURL(t){return ra(this,t)}toImageData(t){return na(this,t)}get data(){if(this.ensureValid(),!this.cpuData)throw new Error("The data is not on CPU. Use `getData()` to download GPU data to CPU, or use `texture` or `gpuBuffer` property to access the GPU data directly.");return this.cpuData}get location(){return this.dataLocation}get texture(){if(this.ensureValid(),!this.gpuTextureData)throw new Error("The data is not stored as a WebGL texture.");return this.gpuTextureData}get gpuBuffer(){if(this.ensureValid(),!this.gpuBufferData)throw new Error("The data is not stored as a WebGPU buffer.");return this.gpuBufferData}get mlTensor(){if(this.ensureValid(),!this.mlTensorData)throw new Error("The data is not stored as a WebNN MLTensor.");return this.mlTensorData}async getData(t){switch(this.ensureValid(),this.dataLocation){case"cpu":case"cpu-pinned":return this.data;case"texture":case"gpu-buffer":case"ml-tensor":{if(!this.downloader)throw new Error("The current tensor is not created with a specified data downloader.");if(this.isDownloading)throw new Error("The current tensor is being downloaded.");try{this.isDownloading=!0;let r=await this.downloader();return this.downloader=void 0,this.dataLocation="cpu",this.cpuData=r,t&&this.disposer&&(this.disposer(),this.disposer=void 0),r}finally{this.isDownloading=!1}}default:throw new Error(`cannot get data from location: ${this.dataLocation}`)}}dispose(){if(this.isDownloading)throw new Error("The current tensor is being downloaded.");this.disposer&&(this.disposer(),this.disposer=void 0),this.cpuData=void 0,this.gpuTextureData=void 0,this.gpuBufferData=void 0,this.mlTensorData=void 0,this.downloader=void 0,this.isDownloading=void 0,this.dataLocation="none"}ensureValid(){if(this.dataLocation==="none")throw new Error("The tensor is disposed.")}reshape(t){if(this.ensureValid(),this.downloader||this.disposer)throw new Error("Cannot reshape a tensor that owns GPU resource.");return ha(this,t)}}});var De,xr=V(()=>{"use strict";$r();De=Be});var Sr,ya,Le,Ve,Ln=V(()=>{"use strict";Nn();Sr=(e,t)=>{(typeof We.trace>"u"?!We.wasm.trace:!We.trace)||console.timeStamp(`${e}::ORT::${t}`)},ya=(e,t)=>{let r=new Error().stack?.split(/\r\n|\r|\n/g)||[],n=!1;for(let o=0;o<r.length;o++){if(n&&!r[o].includes("TRACE_FUNC")){let i=`FUNC_${e}::${r[o].trim().split(" ")[1]}`;t&&(i+=`::${t}`),Sr("CPU",i);return}r[o].includes("TRACE_FUNC")&&(n=!0)}},Le=e=>{(typeof We.trace>"u"?!We.wasm.trace:!We.trace)||ya("BEGIN",e)},Ve=e=>{(typeof We.trace>"u"?!We.wasm.trace:!We.trace)||ya("END",e)}});var Tr,ba=V(()=>{"use strict";vr();xr();Ln();Tr=class e{constructor(t){this.handler=t}async run(t,r,n){Le();let o={},i={};if(typeof t!="object"||t===null||t instanceof De||Array.isArray(t))throw new TypeError("'feeds' must be an object that use input names as keys and OnnxValue as corresponding values.");let a=!0;if(typeof r=="object"){if(r===null)throw new TypeError("Unexpected argument[1]: cannot be null.");if(r instanceof De)throw new TypeError("'fetches' cannot be a Tensor");if(Array.isArray(r)){if(r.length===0)throw new TypeError("'fetches' cannot be an empty array.");a=!1;for(let c of r){if(typeof c!="string")throw new TypeError("'fetches' must be a string array or an object.");if(this.outputNames.indexOf(c)===-1)throw new RangeError(`'fetches' contains invalid output name: ${c}.`);o[c]=null}if(typeof n=="object"&&n!==null)i=n;else if(typeof n<"u")throw new TypeError("'options' must be an object.")}else{let c=!1,m=Object.getOwnPropertyNames(r);for(let u of this.outputNames)if(m.indexOf(u)!==-1){let h=r[u];(h===null||h instanceof De)&&(c=!0,a=!1,o[u]=h)}if(c){if(typeof n=="object"&&n!==null)i=n;else if(typeof n<"u")throw new TypeError("'options' must be an object.")}else i=r}}else if(typeof r<"u")throw new TypeError("Unexpected argument[1]: must be 'fetches' or 'options'.");for(let c of this.inputNames)if(typeof t[c]>"u")throw new Error(`input '${c}' is missing in 'feeds'.`);if(a)for(let c of this.outputNames)o[c]=null;let d=await this.handler.run(t,o,i),l={};for(let c in d)if(Object.hasOwnProperty.call(d,c)){let m=d[c];m instanceof De?l[c]=m:l[c]=new De(m.type,m.data,m.dims)}return Ve(),l}async release(){return this.handler.dispose()}static async create(t,r,n,o){Le();let i,a={};if(typeof t=="string"){if(i=t,typeof r=="object"&&r!==null)a=r;else if(typeof r<"u")throw new TypeError("'options' must be an object.")}else if(t instanceof Uint8Array){if(i=t,typeof r=="object"&&r!==null)a=r;else if(typeof r<"u")throw new TypeError("'options' must be an object.")}else if(t instanceof ArrayBuffer||typeof SharedArrayBuffer<"u"&&t instanceof SharedArrayBuffer){let m=t,u=0,h=t.byteLength;if(typeof r=="object"&&r!==null)a=r;else if(typeof r=="number"){if(u=r,!Number.isSafeInteger(u))throw new RangeError("'byteOffset' must be an integer.");if(u<0||u>=m.byteLength)throw new RangeError(`'byteOffset' is out of range [0, ${m.byteLength}).`);if(h=t.byteLength-u,typeof n=="number"){if(h=n,!Number.isSafeInteger(h))throw new RangeError("'byteLength' must be an integer.");if(h<=0||u+h>m.byteLength)throw new RangeError(`'byteLength' is out of range (0, ${m.byteLength-u}].`);if(typeof o=="object"&&o!==null)a=o;else if(typeof o<"u")throw new TypeError("'options' must be an object.")}else if(typeof n<"u")throw new TypeError("'byteLength' must be a number.")}else if(typeof r<"u")throw new TypeError("'options' must be an object.");i=new Uint8Array(m,u,h)}else throw new TypeError("Unexpected argument[0]: must be 'path' or 'buffer'.");let[d,l]=await _r(a),c=await d.createInferenceSessionHandler(i,l);return Ve(),new e(c)}startProfiling(){this.handler.startProfiling()}endProfiling(){this.handler.endProfiling()}get inputNames(){return this.handler.inputNames}get outputNames(){return this.handler.outputNames}}});var Ip,wa=V(()=>{"use strict";ba();Ip=Tr});var _a=V(()=>{"use strict"});var va=V(()=>{"use strict"});var $a=V(()=>{"use strict"});var xa=V(()=>{"use strict"});var Cp,Ir,Sa=V(()=>{"use strict";vr();xr();Cp="Training backend could not be resolved. Make sure you're using the correct configuration & WebAssembly files.",Ir=class e{constructor(t,r,n){this.handler=t,this.hasOptimizerModel=r,this.hasEvalModel=n}get trainingInputNames(){return this.handler.inputNames}get trainingOutputNames(){return this.handler.outputNames}get evalInputNames(){if(this.hasEvalModel)return this.handler.evalInputNames;throw new Error("This training session has no evalModel loaded.")}get evalOutputNames(){if(this.hasEvalModel)return this.handler.evalOutputNames;throw new Error("This training session has no evalModel loaded.")}static async create(t,r){let n=t.evalModel||"",o=t.optimizerModel||"",i=r||{},[a,d]=await _r(i);if(a.createTrainingSessionHandler){let l=await a.createTrainingSessionHandler(t.checkpointState,t.trainModel,n,o,d);return new e(l,!!t.optimizerModel,!!t.evalModel)}else throw new Error(Cp)}typeNarrowingForRunStep(t,r,n,o,i){let a={},d={};if(typeof n!="object"||n===null||n instanceof De||Array.isArray(n))throw new TypeError("'feeds' must be an object that use input names as keys and OnnxValue as corresponding values.");let l=!0;if(typeof o=="object"){if(o===null)throw new TypeError("Unexpected argument[1]: cannot be null.");if(o instanceof De)throw new TypeError("'fetches' cannot be a Tensor");if(Array.isArray(o)){if(o.length===0)throw new TypeError("'fetches' cannot be an empty array.");l=!1;for(let c of o){if(typeof c!="string")throw new TypeError("'fetches' must be a string array or an object.");if(r.indexOf(c)===-1)throw new RangeError(`'fetches' contains invalid output name: ${c}.`);a[c]=null}if(typeof i=="object"&&i!==null)d=i;else if(typeof i<"u")throw new TypeError("'options' must be an object.")}else{let c=!1,m=Object.getOwnPropertyNames(o);for(let u of r)if(m.indexOf(u)!==-1){let h=o[u];(h===null||h instanceof De)&&(c=!0,l=!1,a[u]=h)}if(c){if(typeof i=="object"&&i!==null)d=i;else if(typeof i<"u")throw new TypeError("'options' must be an object.")}else d=o}}else if(typeof o<"u")throw new TypeError("Unexpected argument[1]: must be 'fetches' or 'options'.");for(let c of t)if(typeof n[c]>"u")throw new Error(`input '${c}' is missing in 'feeds'.`);if(l)for(let c of r)a[c]=null;return[a,d]}convertHandlerReturnTypeToMapOfTensors(t){let r={};for(let n in t)if(Object.hasOwnProperty.call(t,n)){let o=t[n];o instanceof De?r[n]=o:r[n]=new De(o.type,o.data,o.dims)}return r}async lazyResetGrad(){await this.handler.lazyResetGrad()}async runTrainStep(t,r,n){let[o,i]=this.typeNarrowingForRunStep(this.trainingInputNames,this.trainingOutputNames,t,r,n),a=await this.handler.runTrainStep(t,o,i);return this.convertHandlerReturnTypeToMapOfTensors(a)}async runOptimizerStep(t){if(this.hasOptimizerModel)await this.handler.runOptimizerStep(t||{});else throw new Error("This TrainingSession has no OptimizerModel loaded.")}async runEvalStep(t,r,n){if(this.hasEvalModel){let[o,i]=this.typeNarrowingForRunStep(this.evalInputNames,this.evalOutputNames,t,r,n),a=await this.handler.runEvalStep(t,o,i);return this.convertHandlerReturnTypeToMapOfTensors(a)}else throw new Error("This TrainingSession has no EvalModel loaded.")}async getParametersSize(t=!0){return this.handler.getParametersSize(t)}async loadParametersBuffer(t,r=!0){let n=await this.getParametersSize(r);if(t.length!==4*n)throw new Error("Size of the buffer passed into loadParametersBuffer must match the number of parameters in the model. Please use getParametersSize method to check.");return this.handler.loadParametersBuffer(t,r)}async getContiguousParameters(t=!0){return this.handler.getContiguousParameters(t)}async release(){return this.handler.dispose()}}});var Ap,Ta=V(()=>{"use strict";Sa();Ap=Ir});var Hn={};Gt(Hn,{InferenceSession:()=>Ip,TRACE:()=>Sr,TRACE_FUNC_BEGIN:()=>Le,TRACE_FUNC_END:()=>Ve,Tensor:()=>De,TrainingSession:()=>Ap,env:()=>_e,registerBackend:()=>St});var Ke=V(()=>{"use strict";Zi();ta();wa();xr();_a();va();Ln();$a();xa();Ta()});var Cr=V(()=>{"use strict"});var ka={};Gt(ka,{default:()=>kp});var Ca,Aa,kp,Ea=V(()=>{"use strict";Gn();gt();qt();Ca="ort-wasm-proxy-worker",Aa=globalThis.self?.name===Ca;Aa&&(self.onmessage=e=>{let{type:t,in:r}=e.data;try{switch(t){case"init-wasm":Ar(r.wasm).then(()=>{kr(r).then(()=>{postMessage({type:t})},n=>{postMessage({type:t,err:n})})},n=>{postMessage({type:t,err:n})});break;case"init-ep":{let{epName:n,env:o}=r;Er(o,n).then(()=>{postMessage({type:t})},i=>{postMessage({type:t,err:i})});break}case"copy-from":{let{buffer:n}=r,o=jt(n);postMessage({type:t,out:o});break}case"create":{let{model:n,options:o}=r;Pr(n,o).then(i=>{postMessage({type:t,out:i})},i=>{postMessage({type:t,err:i})});break}case"release":zr(r),postMessage({type:t});break;case"run":{let{sessionId:n,inputIndices:o,inputs:i,outputIndices:a,options:d}=r;Or(n,o,i,a,new Array(a.length).fill(null),d).then(l=>{l.some(c=>c[3]!=="cpu")?postMessage({type:t,err:"Proxy does not support non-cpu tensor location."}):postMessage({type:t,out:l},Dr([...i,...l]))},l=>{postMessage({type:t,err:l})});break}case"end-profiling":Br(r),postMessage({type:t});break;default:}}catch(n){postMessage({type:t,err:n})}});kp=Aa?null:e=>new Worker(e??Ut,{type:"module",name:Ca})});var za={};Gt(za,{default:()=>Ep});var Fn,Pa,Ep,Oa=V(()=>{"use strict";Pa=(Fn=import.meta.url,async function(e={}){function t(){return de.buffer!=ce.buffer&&Ce(),ce}function r(){return de.buffer!=ce.buffer&&Ce(),q}function n(){return de.buffer!=ce.buffer&&Ce(),ue}function o(){return de.buffer!=ce.buffer&&Ce(),re}function i(){return de.buffer!=ce.buffer&&Ce(),ne}function a(){return de.buffer!=ce.buffer&&Ce(),oe}function d(){return de.buffer!=ce.buffer&&Ce(),U}function l(){return de.buffer!=ce.buffer&&Ce(),Re}var c,m,u=Object.assign({},e),h=new Promise((s,p)=>{c=s,m=p}),w=typeof window=="object",g=typeof importScripts=="function",y=g&&self.name=="em-pthread";u.mountExternalData=(s,p)=>{s.startsWith("./")&&(s=s.substring(2)),(u.Fb||(u.Fb=new Map)).set(s,p)},u.unmountExternalData=()=>{delete u.Fb};var S=globalThis.SharedArrayBuffer??new WebAssembly.Memory({initial:0,maximum:0,shared:!0}).buffer.constructor;let $=()=>{let s=(f,b,_)=>(...I)=>{let O=et,D=b?.();I=f(...I);let L=b?.();return D!==L&&(f=L,_(D),b=_=null),et!=O?new Promise((H,X)=>{En={resolve:H,reject:X}}):I},p=f=>async(...b)=>{try{if(u.Eb)throw Error("Session already started");let _=u.Eb={fc:b[0],errors:[]},I=await f(...b);if(u.Eb!==_)throw Error("Session mismatch");u.Gb?.flush();let O=_.errors;if(0<O.length){let D=await Promise.all(O);if(D=D.filter(L=>L),0<D.length)throw Error(D.join(`
3803
- `))}return I}finally{u.Eb=null}};u._OrtCreateSession=s(u._OrtCreateSession,()=>u._OrtCreateSession,f=>u._OrtCreateSession=f),u._OrtRun=p(s(u._OrtRun,()=>u._OrtRun,f=>u._OrtRun=f)),u._OrtRunWithBinding=p(s(u._OrtRunWithBinding,()=>u._OrtRunWithBinding,f=>u._OrtRunWithBinding=f)),u._OrtBindInput=s(u._OrtBindInput,()=>u._OrtBindInput,f=>u._OrtBindInput=f),$=void 0};u.jsepInit=(s,p)=>{if($?.(),s==="webgpu"){[u.Gb,u.Ub,u.Yb,u.Nb,u.Xb,u.jb,u.Zb,u.bc,u.Vb,u.Wb,u.$b]=p;let f=u.Gb;u.jsepRegisterBuffer=(b,_,I,O)=>f.registerBuffer(b,_,I,O),u.jsepGetBuffer=b=>f.getBuffer(b),u.jsepCreateDownloader=(b,_,I)=>f.createDownloader(b,_,I),u.jsepOnReleaseSession=b=>{f.onReleaseSession(b)},u.jsepOnRunStart=b=>f.onRunStart(b),u.cc=(b,_)=>{f.upload(b,_)}}else if(s==="webnn"){[u.Gb,u.ac,u.Ob,u.jsepEnsureTensor,u.dc,u.jsepDownloadTensor]=p,u.jsepReleaseTensorId=u.Ob;let f=u.Gb;u.jsepOnRunStart=b=>f.onRunStart(b),u.jsepRegisterMLContext=(b,_)=>{f.registerMLContext(b,_)},u.jsepOnReleaseSession=b=>{f.onReleaseSession(b)},u.jsepCreateMLTensorDownloader=(b,_)=>f.createMLTensorDownloader(b,_),u.jsepRegisterMLTensor=(b,_,I)=>f.registerMLTensor(b,_,I)}};var v,x,T=Object.assign({},u),C="./this.program",A=(s,p)=>{throw p},P="";(w||g)&&(g?P=self.location.href:typeof document<"u"&&document.currentScript&&(P=document.currentScript.src),Fn&&(P=Fn),P=P.startsWith("blob:")?"":P.substr(0,P.replace(/[?#].*/,"").lastIndexOf("/")+1),g&&(x=s=>{var p=new XMLHttpRequest;return p.open("GET",s,!1),p.responseType="arraybuffer",p.send(null),new Uint8Array(p.response)}),v=(s,p,f)=>{var b=new XMLHttpRequest;b.open("GET",s,!0),b.responseType="arraybuffer",b.onload=()=>{b.status==200||b.status==0&&b.response?p(b.response):f()},b.onerror=f,b.send(null)});var B,N=console.log.bind(console),W=console.error.bind(console),K=N,Z=W;if(Object.assign(u,T),T=null,y){let s=function(p){try{var f=p.data,b=f.cmd;if(b==="load"){let _=[];self.onmessage=I=>_.push(I),self.startWorker=()=>{postMessage({cmd:"loaded"});for(let I of _)s(I);self.onmessage=s};for(let I of f.handlers)u[I]&&!u[I].proxy||(u[I]=(...O)=>{postMessage({Mb:"callHandler",oc:I,args:O})},I=="print"&&(K=u[I]),I=="printErr"&&(Z=u[I]));de=f.wasmMemory,Ce(),ee(f.wasmModule)}else if(b==="run"){Bn(f.pthread_ptr,0,0,1,0,0),Cn(f.pthread_ptr),ic(),Lo(),se||(Vi(),se=!0);try{ac(f.start_routine,f.arg)}catch(_){if(_!="unwind")throw _}}else b==="cancel"?Rt()&&gr(-1):f.target!=="setimmediate"&&(b==="checkMailbox"?se&&sr():b&&(Z(`worker: received unknown command ${b}`),Z(f)))}catch(_){throw Ni(),_}};var jh=s,ee,se=!1;Z=function(...p){p=p.join(" "),console.error(p)},self.alert=function(...p){postMessage({Mb:"alert",text:p.join(" "),qc:Rt()})},u.instantiateWasm=(p,f)=>new Promise(b=>{ee=_=>{_=new WebAssembly.Instance(_,Ro()),f(_),b()}}),self.onunhandledrejection=p=>{throw p.reason||p},self.onmessage=s}u.wasmBinary&&(B=u.wasmBinary);var de,Y,le,ce,q,ue,re,ne,oe,U,G,ye,Re,$e=!1;function Ce(){var s=de.buffer;u.HEAP8=ce=new Int8Array(s),u.HEAP16=ue=new Int16Array(s),u.HEAPU8=q=new Uint8Array(s),u.HEAPU16=re=new Uint16Array(s),u.HEAP32=ne=new Int32Array(s),u.HEAPU32=oe=new Uint32Array(s),u.HEAPF32=U=new Float32Array(s),u.HEAPF64=Re=new Float64Array(s),u.HEAP64=G=new BigInt64Array(s),u.HEAPU64=ye=new BigUint64Array(s)}if(!y){if(!((de=new WebAssembly.Memory({initial:256,maximum:65536,shared:!0})).buffer instanceof S))throw Z("requested a shared WebAssembly.Memory but the returned buffer is not a SharedArrayBuffer, indicating that while the browser has SharedArrayBuffer it does not have WebAssembly threads support - you may need to set a flag"),Error("bad memory");Ce()}var bt=[],Ae=[],Me=[],Ue=0,zt=null,wt=null;function zo(){if(--Ue==0&&(zt!==null&&(clearInterval(zt),zt=null),wt)){var s=wt;wt=null,s()}}function Ot(s){throw Z(s="Aborted("+s+")"),$e=!0,le=1,s=new WebAssembly.RuntimeError(s+". Build with -sASSERTIONS for more info."),m(s),s}var hn,Oo=s=>s.startsWith("data:application/octet-stream;base64,"),Bo=s=>s.startsWith("file://");function Do(s){if(s==hn&&B)return new Uint8Array(B);if(x)return x(s);throw"both async and sync fetching of the wasm failed"}function Mo(s,p,f){return function(b){if(!B&&(w||g)){if(typeof fetch=="function"&&!Bo(b))return fetch(b,{credentials:"same-origin"}).then(_=>{if(!_.ok)throw`failed to load wasm binary file at '${b}'`;return _.arrayBuffer()}).catch(()=>Do(b));if(v)return new Promise((_,I)=>{v(b,O=>_(new Uint8Array(O)),I)})}return Promise.resolve().then(()=>Do(b))}(s).then(b=>WebAssembly.instantiate(b,p)).then(f,b=>{Z(`failed to asynchronously prepare wasm: ${b}`),Ot(b)})}function Ro(){return{a:{O:oc,Aa:nc,b:uc,aa:qo,B:Yo,qa:Xo,Y:Qo,_:Jo,ra:ei,oa:ti,ha:ri,na:ni,L:oi,Z:ii,W:ai,pa:si,X:ui,wa:dc,F:cc,Q:pc,P:fc,E:gc,u:yc,q:bc,G:wc,A:Ic,R:Cc,ua:Ac,ka:kc,U:Ec,ba:Pc,H:zc,ja:Cn,ta:Oc,t:Bc,x:Rc,o:Uc,l:Nc,c:Tn,n:Wc,j:Gc,w:Fc,p:qc,g:jc,s:Kc,m:Yc,e:Xc,k:Zc,i:Qc,h:Jc,d:ep,ea:tp,fa:rp,ga:np,ca:$i,da:xi,T:op,f:ip,D:ap,I:sp,M:up,y:dp,sa:lp,V:cp,v:Ti,z:pp,N:mp,S:fp,za:hp,ya:gp,la:Ai,ma:ki,$:_n,C:Ei,K:Pi,ia:zi,J:Oi,a:de,xa:wn,va:Mi,r:wp}}}var gn={867620:(s,p,f,b,_)=>{if(u===void 0||!u.Fb)return 1;if((s=ze(s>>>0)).startsWith("./")&&(s=s.substring(2)),!(s=u.Fb.get(s)))return 2;if(b>>>=0,(p>>>=0)+(f>>>=0)>s.byteLength)return 3;try{let I=s.subarray(p,p+f);switch(_){case 0:r().set(I,b>>>0);break;case 1:u.cc(b,I);break;default:return 4}return 0}catch{return 4}},868303:(s,p,f)=>{u.dc(s,r().subarray(p>>>0,p+f>>>0))},868366:()=>u.ac(),868407:s=>{u.Ob(s)},868443:()=>{u.Vb()},868474:()=>{u.Wb()},868503:()=>{u.$b()},868528:s=>u.Ub(s),868561:s=>u.Yb(s),868593:(s,p,f)=>{u.Nb(s,p,f,!0)},868632:(s,p,f)=>{u.Nb(s,p,f)},868665:()=>typeof wasmOffsetConverter<"u",868722:s=>{u.jb("Abs",s,void 0)},868773:s=>{u.jb("Neg",s,void 0)},868824:s=>{u.jb("Floor",s,void 0)},868877:s=>{u.jb("Ceil",s,void 0)},868929:s=>{u.jb("Reciprocal",s,void 0)},868987:s=>{u.jb("Sqrt",s,void 0)},869039:s=>{u.jb("Exp",s,void 0)},869090:s=>{u.jb("Erf",s,void 0)},869141:s=>{u.jb("Sigmoid",s,void 0)},869196:(s,p,f)=>{u.jb("HardSigmoid",s,{alpha:p,beta:f})},869275:s=>{u.jb("Log",s,void 0)},869326:s=>{u.jb("Sin",s,void 0)},869377:s=>{u.jb("Cos",s,void 0)},869428:s=>{u.jb("Tan",s,void 0)},869479:s=>{u.jb("Asin",s,void 0)},869531:s=>{u.jb("Acos",s,void 0)},869583:s=>{u.jb("Atan",s,void 0)},869635:s=>{u.jb("Sinh",s,void 0)},869687:s=>{u.jb("Cosh",s,void 0)},869739:s=>{u.jb("Asinh",s,void 0)},869792:s=>{u.jb("Acosh",s,void 0)},869845:s=>{u.jb("Atanh",s,void 0)},869898:s=>{u.jb("Tanh",s,void 0)},869950:s=>{u.jb("Not",s,void 0)},870001:(s,p,f)=>{u.jb("Clip",s,{min:p,max:f})},870070:s=>{u.jb("Clip",s,void 0)},870122:(s,p)=>{u.jb("Elu",s,{alpha:p})},870180:s=>{u.jb("Gelu",s,void 0)},870232:s=>{u.jb("Relu",s,void 0)},870284:(s,p)=>{u.jb("LeakyRelu",s,{alpha:p})},870348:(s,p)=>{u.jb("ThresholdedRelu",s,{alpha:p})},870418:(s,p)=>{u.jb("Cast",s,{to:p})},870476:s=>{u.jb("Add",s,void 0)},870527:s=>{u.jb("Sub",s,void 0)},870578:s=>{u.jb("Mul",s,void 0)},870629:s=>{u.jb("Div",s,void 0)},870680:s=>{u.jb("Pow",s,void 0)},870731:s=>{u.jb("Equal",s,void 0)},870784:s=>{u.jb("Greater",s,void 0)},870839:s=>{u.jb("GreaterOrEqual",s,void 0)},870901:s=>{u.jb("Less",s,void 0)},870953:s=>{u.jb("LessOrEqual",s,void 0)},871012:(s,p,f,b,_)=>{u.jb("ReduceMean",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},871171:(s,p,f,b,_)=>{u.jb("ReduceMax",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},871329:(s,p,f,b,_)=>{u.jb("ReduceMin",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},871487:(s,p,f,b,_)=>{u.jb("ReduceProd",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},871646:(s,p,f,b,_)=>{u.jb("ReduceSum",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},871804:(s,p,f,b,_)=>{u.jb("ReduceL1",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},871961:(s,p,f,b,_)=>{u.jb("ReduceL2",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},872118:(s,p,f,b,_)=>{u.jb("ReduceLogSum",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},872279:(s,p,f,b,_)=>{u.jb("ReduceSumSquare",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},872443:(s,p,f,b,_)=>{u.jb("ReduceLogSumExp",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},872607:s=>{u.jb("Where",s,void 0)},872660:(s,p,f)=>{u.jb("Transpose",s,{perm:p?Array.from(i().subarray(p>>>0,f>>>0)):[]})},872768:(s,p,f,b)=>{u.jb("DepthToSpace",s,{blocksize:p,mode:ze(f),format:b?"NHWC":"NCHW"})},872901:(s,p,f,b)=>{u.jb("DepthToSpace",s,{blocksize:p,mode:ze(f),format:b?"NHWC":"NCHW"})},873034:(s,p,f,b,_,I,O,D,L,H,X,fe,be,z,pe)=>{u.jb("ConvTranspose",s,{format:L?"NHWC":"NCHW",autoPad:p,dilations:[f],group:b,kernelShape:[_],pads:[I,O],strides:[D],wIsConst:()=>!!t()[H>>>0],outputPadding:X?Array.from(i().subarray(X>>>0,fe>>>0)):[],outputShape:be?Array.from(i().subarray(be>>>0,z>>>0)):[],activation:ze(pe)})},873435:(s,p,f,b,_,I,O,D,L,H,X,fe,be,z)=>{u.jb("ConvTranspose",s,{format:D?"NHWC":"NCHW",autoPad:p,dilations:Array.from(i().subarray(f>>>0,2+(f>>>0)>>>0)),group:b,kernelShape:Array.from(i().subarray(_>>>0,2+(_>>>0)>>>0)),pads:Array.from(i().subarray(I>>>0,4+(I>>>0)>>>0)),strides:Array.from(i().subarray(O>>>0,2+(O>>>0)>>>0)),wIsConst:()=>!!t()[L>>>0],outputPadding:H?Array.from(i().subarray(H>>>0,X>>>0)):[],outputShape:fe?Array.from(i().subarray(fe>>>0,be>>>0)):[],activation:ze(z)})},874e3:(s,p,f,b,_,I,O,D,L,H,X,fe,be,z,pe)=>{u.jb("ConvTranspose",s,{format:L?"NHWC":"NCHW",autoPad:p,dilations:[f],group:b,kernelShape:[_],pads:[I,O],strides:[D],wIsConst:()=>!!t()[H>>>0],outputPadding:X?Array.from(i().subarray(X>>>0,fe>>>0)):[],outputShape:be?Array.from(i().subarray(be>>>0,z>>>0)):[],activation:ze(pe)})},874401:(s,p,f,b,_,I,O,D,L,H,X,fe,be,z)=>{u.jb("ConvTranspose",s,{format:D?"NHWC":"NCHW",autoPad:p,dilations:Array.from(i().subarray(f>>>0,2+(f>>>0)>>>0)),group:b,kernelShape:Array.from(i().subarray(_>>>0,2+(_>>>0)>>>0)),pads:Array.from(i().subarray(I>>>0,4+(I>>>0)>>>0)),strides:Array.from(i().subarray(O>>>0,2+(O>>>0)>>>0)),wIsConst:()=>!!t()[L>>>0],outputPadding:H?Array.from(i().subarray(H>>>0,X>>>0)):[],outputShape:fe?Array.from(i().subarray(fe>>>0,be>>>0)):[],activation:ze(z)})},874966:(s,p)=>{u.jb("GlobalAveragePool",s,{format:p?"NHWC":"NCHW"})},875057:(s,p,f,b,_,I,O,D,L,H,X,fe,be,z)=>{u.jb("AveragePool",s,{format:z?"NHWC":"NCHW",auto_pad:p,ceil_mode:f,count_include_pad:b,storage_order:_,dilations:I?Array.from(i().subarray(I>>>0,O>>>0)):[],kernel_shape:D?Array.from(i().subarray(D>>>0,L>>>0)):[],pads:H?Array.from(i().subarray(H>>>0,X>>>0)):[],strides:fe?Array.from(i().subarray(fe>>>0,be>>>0)):[]})},875472:(s,p)=>{u.jb("GlobalAveragePool",s,{format:p?"NHWC":"NCHW"})},875563:(s,p,f,b,_,I,O,D,L,H,X,fe,be,z)=>{u.jb("AveragePool",s,{format:z?"NHWC":"NCHW",auto_pad:p,ceil_mode:f,count_include_pad:b,storage_order:_,dilations:I?Array.from(i().subarray(I>>>0,O>>>0)):[],kernel_shape:D?Array.from(i().subarray(D>>>0,L>>>0)):[],pads:H?Array.from(i().subarray(H>>>0,X>>>0)):[],strides:fe?Array.from(i().subarray(fe>>>0,be>>>0)):[]})},875978:(s,p)=>{u.jb("GlobalMaxPool",s,{format:p?"NHWC":"NCHW"})},876065:(s,p,f,b,_,I,O,D,L,H,X,fe,be,z)=>{u.jb("MaxPool",s,{format:z?"NHWC":"NCHW",auto_pad:p,ceil_mode:f,count_include_pad:b,storage_order:_,dilations:I?Array.from(i().subarray(I>>>0,O>>>0)):[],kernel_shape:D?Array.from(i().subarray(D>>>0,L>>>0)):[],pads:H?Array.from(i().subarray(H>>>0,X>>>0)):[],strides:fe?Array.from(i().subarray(fe>>>0,be>>>0)):[]})},876476:(s,p)=>{u.jb("GlobalMaxPool",s,{format:p?"NHWC":"NCHW"})},876563:(s,p,f,b,_,I,O,D,L,H,X,fe,be,z)=>{u.jb("MaxPool",s,{format:z?"NHWC":"NCHW",auto_pad:p,ceil_mode:f,count_include_pad:b,storage_order:_,dilations:I?Array.from(i().subarray(I>>>0,O>>>0)):[],kernel_shape:D?Array.from(i().subarray(D>>>0,L>>>0)):[],pads:H?Array.from(i().subarray(H>>>0,X>>>0)):[],strides:fe?Array.from(i().subarray(fe>>>0,be>>>0)):[]})},876974:(s,p,f,b,_)=>{u.jb("Gemm",s,{alpha:p,beta:f,transA:b,transB:_})},877078:s=>{u.jb("MatMul",s,void 0)},877132:(s,p,f,b)=>{u.jb("ArgMax",s,{keepDims:!!p,selectLastIndex:!!f,axis:b})},877240:(s,p,f,b)=>{u.jb("ArgMin",s,{keepDims:!!p,selectLastIndex:!!f,axis:b})},877348:(s,p)=>{u.jb("Softmax",s,{axis:p})},877411:(s,p)=>{u.jb("Concat",s,{axis:p})},877471:(s,p,f,b,_)=>{u.jb("Split",s,{axis:p,numOutputs:f,splitSizes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},877611:s=>{u.jb("Expand",s,void 0)},877665:(s,p)=>{u.jb("Gather",s,{axis:Number(p)})},877736:(s,p)=>{u.jb("GatherElements",s,{axis:Number(p)})},877815:(s,p,f,b,_,I,O,D,L,H,X)=>{u.jb("Resize",s,{antialias:p,axes:f?Array.from(i().subarray(f>>>0,b>>>0)):[],coordinateTransformMode:ze(_),cubicCoeffA:I,excludeOutside:O,extrapolationValue:D,keepAspectRatioPolicy:ze(L),mode:ze(H),nearestMode:ze(X)})},878161:(s,p,f,b,_,I,O)=>{u.jb("Slice",s,{starts:p?Array.from(i().subarray(p>>>0,f>>>0)):[],ends:b?Array.from(i().subarray(b>>>0,_>>>0)):[],axes:I?Array.from(i().subarray(I>>>0,O>>>0)):[]})},878377:s=>{u.jb("Tile",s,void 0)},878429:(s,p,f)=>{u.jb("InstanceNormalization",s,{epsilon:p,format:f?"NHWC":"NCHW"})},878543:(s,p,f)=>{u.jb("InstanceNormalization",s,{epsilon:p,format:f?"NHWC":"NCHW"})},878657:s=>{u.jb("Range",s,void 0)},878710:(s,p)=>{u.jb("Einsum",s,{equation:ze(p)})},878791:(s,p,f,b,_)=>{u.jb("Pad",s,{mode:p,value:f,pads:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},878918:(s,p,f,b,_,I)=>{u.jb("BatchNormalization",s,{epsilon:p,momentum:f,spatial:!!_,trainingMode:!!b,format:I?"NHWC":"NCHW"})},879087:(s,p,f,b,_,I)=>{u.jb("BatchNormalization",s,{epsilon:p,momentum:f,spatial:!!_,trainingMode:!!b,format:I?"NHWC":"NCHW"})},879256:(s,p,f)=>{u.jb("CumSum",s,{exclusive:Number(p),reverse:Number(f)})},879353:(s,p,f)=>{u.jb("DequantizeLinear",s,{axis:p,blockSize:f})},879443:(s,p,f,b,_,I,O,D,L)=>{u.jb("Attention",s,{numHeads:p,isUnidirectional:f,maskFilterValue:b,scale:_,doRotary:I,qkvHiddenSizes:O?Array.from(i().subarray(Number(D)>>>0,Number(D)+O>>>0)):[],pastPresentShareBuffer:!!L})},879715:s=>{u.jb("BiasAdd",s,void 0)},879770:s=>{u.jb("BiasSplitGelu",s,void 0)},879831:s=>{u.jb("FastGelu",s,void 0)},879887:(s,p,f,b,_,I,O,D,L,H,X,fe,be,z,pe,Se)=>{u.jb("Conv",s,{format:fe?"NHWC":"NCHW",auto_pad:p,dilations:f?Array.from(i().subarray(f>>>0,b>>>0)):[],group:_,kernel_shape:I?Array.from(i().subarray(I>>>0,O>>>0)):[],pads:D?Array.from(i().subarray(D>>>0,L>>>0)):[],strides:H?Array.from(i().subarray(H>>>0,X>>>0)):[],w_is_const:()=>!!t()[be>>>0],activation:ze(z),activation_params:pe?Array.from(d().subarray(pe>>>0,Se>>>0)):[]})},880383:s=>{u.jb("Gelu",s,void 0)},880435:(s,p,f,b)=>{u.jb("GroupQueryAttention",s,{numHeads:p,kvNumHeads:f,scale:b})},880548:(s,p,f,b)=>{u.jb("LayerNormalization",s,{axis:p,epsilon:f,simplified:!!b})},880659:(s,p,f,b)=>{u.jb("LayerNormalization",s,{axis:p,epsilon:f,simplified:!!b})},880770:(s,p,f,b,_,I)=>{u.jb("MatMulNBits",s,{k:p,n:f,accuracyLevel:b,bits:_,blockSize:I})},880897:(s,p,f,b,_,I)=>{u.jb("MultiHeadAttention",s,{numHeads:p,isUnidirectional:f,maskFilterValue:b,scale:_,doRotary:I})},881056:(s,p)=>{u.jb("QuickGelu",s,{alpha:p})},881120:(s,p,f,b,_)=>{u.jb("RotaryEmbedding",s,{interleaved:!!p,numHeads:f,rotaryEmbeddingDim:b,scale:_})},881259:(s,p,f)=>{u.jb("SkipLayerNormalization",s,{epsilon:p,simplified:!!f})},881361:(s,p,f)=>{u.jb("SkipLayerNormalization",s,{epsilon:p,simplified:!!f})},881463:(s,p,f,b)=>{u.jb("GatherBlockQuantized",s,{gatherAxis:p,quantizeAxis:f,blockSize:b})},881584:s=>{u.Zb(s)},881618:(s,p)=>u.bc(s,p,u.Eb.fc,u.Eb.errors)};function nc(s,p,f){return yi(async()=>{await u.Xb(s,p,f)})}function oc(){return typeof wasmOffsetConverter<"u"}function yn(s){this.name="ExitStatus",this.message=`Program terminated with exit(${s})`,this.status=s}var bn=s=>{s.terminate(),s.onmessage=()=>{}},Uo=s=>{pt.length==0&&(Go(),Ho(pt[0]));var p=pt.pop();if(!p)return 6;vt.push(p),Qe[s.Ab]=p,p.Ab=s.Ab;var f={cmd:"run",start_routine:s.hc,arg:s.Qb,pthread_ptr:s.Ab};return p.postMessage(f,s.mc),0},_t=0,xe=(s,p,...f)=>{for(var b=2*f.length,_=Rn(),I=Mn(8*b),O=I>>>3,D=0;D<f.length;D++){var L=f[D];typeof L=="bigint"?(G[O+2*D]=1n,G[O+2*D+1]=L):(G[O+2*D]=0n,l()[O+2*D+1>>>0]=L)}return s=Wi(s,0,b,I,p),yr(_),s};function wn(s){if(y)return xe(0,1,s);if(le=s,!(0<_t)){for(var p of vt)bn(p);for(p of pt)bn(p);pt=[],vt=[],Qe=[],$e=!0}A(s,new yn(s))}function Vo(s){if(y)return xe(1,0,s);_n(s)}var _n=s=>{if(le=s,y)throw Vo(s),"unwind";wn(s)},pt=[],vt=[],No=[],Qe={},Wo=s=>{var p=s.Ab;delete Qe[p],pt.push(s),vt.splice(vt.indexOf(s),1),s.Ab=0,Dn(p)};function Lo(){No.forEach(s=>s())}var Ho=s=>new Promise(p=>{s.onmessage=_=>{var I=(_=_.data).cmd;if(_.targetThread&&_.targetThread!=Rt()){var O=Qe[_.targetThread];O?O.postMessage(_,_.transferList):Z(`Internal error! Worker sent a message "${I}" to target pthread ${_.targetThread}, but that thread no longer exists!`)}else I==="checkMailbox"?sr():I==="spawnThread"?Uo(_):I==="cleanupThread"?Wo(Qe[_.thread]):I==="killThread"?(_=_.thread,I=Qe[_],delete Qe[_],bn(I),Dn(_),vt.splice(vt.indexOf(I),1),I.Ab=0):I==="cancelThread"?Qe[_.thread].postMessage({cmd:"cancel"}):I==="loaded"?(s.loaded=!0,p(s)):I==="alert"?alert(`Thread ${_.threadId}: ${_.text}`):_.target==="setimmediate"?s.postMessage(_):I==="callHandler"?u[_.handler](..._.args):I&&Z(`worker sent an unknown command ${I}`)},s.onerror=_=>{throw Z(`worker sent an error! ${_.filename}:${_.lineno}: ${_.message}`),_};var f,b=[];for(f of[])u.hasOwnProperty(f)&&b.push(f);s.postMessage({cmd:"load",handlers:b,wasmMemory:de,wasmModule:Y})});function Go(){var s=new Worker(new URL(import.meta.url),{type:"module",workerData:"em-pthread",name:"em-pthread"});pt.push(s)}var ar=s=>{for(;0<s.length;)s.shift()(u)},ic=()=>{var s=Rt(),p=a()[s+52>>>2>>>0];s=a()[s+56>>>2>>>0],Hi(p,p-s),yr(p)},ac=(s,p)=>{_t=0,s=Gi(s,p),0<_t?le=s:gr(s)};class sc{constructor(p){this.Jb=p-24}}function uc(s,p,f){var b=new sc(s>>>=0);throw p>>>=0,f>>>=0,a()[b.Jb+16>>>2>>>0]=0,a()[b.Jb+4>>>2>>>0]=p,a()[b.Jb+8>>>2>>>0]=f,s}function Fo(s,p,f,b){return y?xe(2,1,s,p,f,b):qo(s,p,f,b)}function qo(s,p,f,b){if(s>>>=0,p>>>=0,f>>>=0,b>>>=0,S===void 0)return Z("Current environment does not support SharedArrayBuffer, pthreads are not available!"),6;var _=[];return y&&_.length===0?Fo(s,p,f,b):(s={hc:f,Ab:s,Qb:b,mc:_},y?(s.Mb="spawnThread",postMessage(s,_),0):Uo(s))}var jo=typeof TextDecoder<"u"?new TextDecoder("utf8"):void 0,Ko=(s,p,f)=>{var b=(p>>>=0)+f;for(f=p;s[f]&&!(f>=b);)++f;if(16<f-p&&s.buffer&&jo)return jo.decode(s.buffer instanceof S?s.slice(p,f):s.subarray(p,f));for(b="";p<f;){var _=s[p++];if(128&_){var I=63&s[p++];if((224&_)==192)b+=String.fromCharCode((31&_)<<6|I);else{var O=63&s[p++];65536>(_=(240&_)==224?(15&_)<<12|I<<6|O:(7&_)<<18|I<<12|O<<6|63&s[p++])?b+=String.fromCharCode(_):(_-=65536,b+=String.fromCharCode(55296|_>>10,56320|1023&_))}}else b+=String.fromCharCode(_)}return b},ze=(s,p)=>(s>>>=0)?Ko(r(),s,p):"";function Yo(s,p,f){return y?xe(3,1,s,p,f):0}function Xo(s,p){if(y)return xe(4,1,s,p)}var vn=s=>{for(var p=0,f=0;f<s.length;++f){var b=s.charCodeAt(f);127>=b?p++:2047>=b?p+=2:55296<=b&&57343>=b?(p+=4,++f):p+=3}return p},Zo=(s,p,f,b)=>{if(!(0<b))return 0;var _=f>>>=0;b=f+b-1;for(var I=0;I<s.length;++I){var O=s.charCodeAt(I);if(55296<=O&&57343>=O&&(O=65536+((1023&O)<<10)|1023&s.charCodeAt(++I)),127>=O){if(f>=b)break;p[f++>>>0]=O}else{if(2047>=O){if(f+1>=b)break;p[f++>>>0]=192|O>>6}else{if(65535>=O){if(f+2>=b)break;p[f++>>>0]=224|O>>12}else{if(f+3>=b)break;p[f++>>>0]=240|O>>18,p[f++>>>0]=128|O>>12&63}p[f++>>>0]=128|O>>6&63}p[f++>>>0]=128|63&O}}return p[f>>>0]=0,f-_},Bt=(s,p,f)=>Zo(s,r(),p,f);function Qo(s,p){if(y)return xe(5,1,s,p)}function Jo(s,p,f){if(y)return xe(6,1,s,p,f)}function ei(s,p,f){return y?xe(7,1,s,p,f):0}function ti(s,p){if(y)return xe(8,1,s,p)}function ri(s,p,f){if(y)return xe(9,1,s,p,f)}function ni(s,p,f,b){if(y)return xe(10,1,s,p,f,b)}function oi(s,p,f,b){if(y)return xe(11,1,s,p,f,b)}function ii(s,p,f,b){if(y)return xe(12,1,s,p,f,b)}function ai(s){if(y)return xe(13,1,s)}function si(s,p){if(y)return xe(14,1,s,p)}function ui(s,p,f){if(y)return xe(15,1,s,p,f)}var di,mt,dc=()=>{Ot("")},Je=s=>{for(var p="";r()[s>>>0];)p+=di[r()[s++>>>0]];return p},$n={},xn={},lc={};function ut(s,p,f={}){if(!("argPackAdvance"in p))throw new TypeError("registerType registeredInstance requires argPackAdvance");return function(b,_,I={}){var O=_.name;if(!b)throw new mt(`type "${O}" must have a positive integer typeid pointer`);if(xn.hasOwnProperty(b)){if(I.Sb)return;throw new mt(`Cannot register type '${O}' twice`)}xn[b]=_,delete lc[b],$n.hasOwnProperty(b)&&(_=$n[b],delete $n[b],_.forEach(D=>D()))}(s,p,f)}var li=(s,p,f)=>{switch(p){case 1:return f?b=>t()[b>>>0]:b=>r()[b>>>0];case 2:return f?b=>n()[b>>>1>>>0]:b=>o()[b>>>1>>>0];case 4:return f?b=>i()[b>>>2>>>0]:b=>a()[b>>>2>>>0];case 8:return f?b=>G[b>>>3]:b=>ye[b>>>3];default:throw new TypeError(`invalid integer width (${p}): ${s}`)}};function cc(s,p,f){f>>>=0,ut(s>>>=0,{name:p=Je(p>>>0),fromWireType:b=>b,toWireType:function(b,_){if(typeof _!="bigint"&&typeof _!="number")throw _=_===null?"null":(b=typeof _)=="object"||b==="array"||b==="function"?_.toString():""+_,new TypeError(`Cannot convert "${_}" to ${this.name}`);return typeof _=="number"&&(_=BigInt(_)),_},argPackAdvance:ft,readValueFromPointer:li(p,f,p.indexOf("u")==-1),Db:null})}var ft=8;function pc(s,p,f,b){ut(s>>>=0,{name:p=Je(p>>>0),fromWireType:function(_){return!!_},toWireType:function(_,I){return I?f:b},argPackAdvance:ft,readValueFromPointer:function(_){return this.fromWireType(r()[_>>>0])},Db:null})}var Sn=[],dt=[];function Tn(s){9<(s>>>=0)&&--dt[s+1]==0&&(dt[s]=void 0,Sn.push(s))}var qe=s=>{if(!s)throw new mt("Cannot use deleted val. handle = "+s);return dt[s]},je=s=>{switch(s){case void 0:return 2;case null:return 4;case!0:return 6;case!1:return 8;default:let p=Sn.pop()||dt.length;return dt[p]=s,dt[p+1]=1,p}};function In(s){return this.fromWireType(a()[s>>>2>>>0])}var mc={name:"emscripten::val",fromWireType:s=>{var p=qe(s);return Tn(s),p},toWireType:(s,p)=>je(p),argPackAdvance:ft,readValueFromPointer:In,Db:null};function fc(s){return ut(s>>>0,mc)}var hc=(s,p)=>{switch(p){case 4:return function(f){return this.fromWireType(d()[f>>>2>>>0])};case 8:return function(f){return this.fromWireType(l()[f>>>3>>>0])};default:throw new TypeError(`invalid float width (${p}): ${s}`)}};function gc(s,p,f){f>>>=0,ut(s>>>=0,{name:p=Je(p>>>0),fromWireType:b=>b,toWireType:(b,_)=>_,argPackAdvance:ft,readValueFromPointer:hc(p,f),Db:null})}function yc(s,p,f,b,_){if(s>>>=0,f>>>=0,p=Je(p>>>0),_===-1&&(_=4294967295),_=D=>D,b===0){var I=32-8*f;_=D=>D<<I>>>I}var O=p.includes("unsigned")?function(D,L){return L>>>0}:function(D,L){return L};ut(s,{name:p,fromWireType:_,toWireType:O,argPackAdvance:ft,readValueFromPointer:li(p,f,b!==0),Db:null})}function bc(s,p,f){function b(I){var O=a()[I>>>2>>>0];return I=a()[I+4>>>2>>>0],new _(t().buffer,I,O)}var _=[Int8Array,Uint8Array,Int16Array,Uint16Array,Int32Array,Uint32Array,Float32Array,Float64Array,BigInt64Array,BigUint64Array][p];ut(s>>>=0,{name:f=Je(f>>>0),fromWireType:b,argPackAdvance:ft,readValueFromPointer:b},{Sb:!0})}function wc(s,p){s>>>=0;var f=(p=Je(p>>>0))==="std::string";ut(s,{name:p,fromWireType:function(b){var _=a()[b>>>2>>>0],I=b+4;if(f)for(var O=I,D=0;D<=_;++D){var L=I+D;if(D==_||r()[L>>>0]==0){if(O=ze(O,L-O),H===void 0)var H=O;else H+=String.fromCharCode(0),H+=O;O=L+1}}else{for(H=Array(_),D=0;D<_;++D)H[D]=String.fromCharCode(r()[I+D>>>0]);H=H.join("")}return tt(b),H},toWireType:function(b,_){_ instanceof ArrayBuffer&&(_=new Uint8Array(_));var I=typeof _=="string";if(!(I||_ instanceof Uint8Array||_ instanceof Uint8ClampedArray||_ instanceof Int8Array))throw new mt("Cannot pass non-string to std::string");var O=f&&I?vn(_):_.length,D=hr(4+O+1),L=D+4;if(a()[D>>>2>>>0]=O,f&&I)Bt(_,L,O+1);else if(I)for(I=0;I<O;++I){var H=_.charCodeAt(I);if(255<H)throw tt(L),new mt("String has UTF-16 code units that do not fit in 8 bits");r()[L+I>>>0]=H}else for(I=0;I<O;++I)r()[L+I>>>0]=_[I];return b!==null&&b.push(tt,D),D},argPackAdvance:ft,readValueFromPointer:In,Db(b){tt(b)}})}var ci=typeof TextDecoder<"u"?new TextDecoder("utf-16le"):void 0,_c=(s,p)=>{for(var f=s>>1,b=f+p/2;!(f>=b)&&o()[f>>>0];)++f;if(32<(f<<=1)-s&&ci)return ci.decode(r().slice(s,f));for(f="",b=0;!(b>=p/2);++b){var _=n()[s+2*b>>>1>>>0];if(_==0)break;f+=String.fromCharCode(_)}return f},vc=(s,p,f)=>{if(f??=2147483647,2>f)return 0;var b=p;f=(f-=2)<2*s.length?f/2:s.length;for(var _=0;_<f;++_){var I=s.charCodeAt(_);n()[p>>>1>>>0]=I,p+=2}return n()[p>>>1>>>0]=0,p-b},$c=s=>2*s.length,xc=(s,p)=>{for(var f=0,b="";!(f>=p/4);){var _=i()[s+4*f>>>2>>>0];if(_==0)break;++f,65536<=_?(_-=65536,b+=String.fromCharCode(55296|_>>10,56320|1023&_)):b+=String.fromCharCode(_)}return b},Sc=(s,p,f)=>{if(p>>>=0,f??=2147483647,4>f)return 0;var b=p;f=b+f-4;for(var _=0;_<s.length;++_){var I=s.charCodeAt(_);if(55296<=I&&57343>=I&&(I=65536+((1023&I)<<10)|1023&s.charCodeAt(++_)),i()[p>>>2>>>0]=I,(p+=4)+4>f)break}return i()[p>>>2>>>0]=0,p-b},Tc=s=>{for(var p=0,f=0;f<s.length;++f){var b=s.charCodeAt(f);55296<=b&&57343>=b&&++f,p+=4}return p};function Ic(s,p,f){if(s>>>=0,p>>>=0,f=Je(f>>>=0),p===2)var b=_c,_=vc,I=$c,O=D=>o()[D>>>1>>>0];else p===4&&(b=xc,_=Sc,I=Tc,O=D=>a()[D>>>2>>>0]);ut(s,{name:f,fromWireType:D=>{for(var L,H=a()[D>>>2>>>0],X=D+4,fe=0;fe<=H;++fe){var be=D+4+fe*p;fe!=H&&O(be)!=0||(X=b(X,be-X),L===void 0?L=X:(L+=String.fromCharCode(0),L+=X),X=be+p)}return tt(D),L},toWireType:(D,L)=>{if(typeof L!="string")throw new mt(`Cannot pass non-string to C++ string type ${f}`);var H=I(L),X=hr(4+H+p);return a()[X>>>2>>>0]=H/p,_(L,X+4,H+p),D!==null&&D.push(tt,X),X},argPackAdvance:ft,readValueFromPointer:In,Db(D){tt(D)}})}function Cc(s,p){ut(s>>>=0,{Tb:!0,name:p=Je(p>>>0),argPackAdvance:0,fromWireType:()=>{},toWireType:()=>{}})}var Ac=()=>1;function kc(s){Bn(s>>>0,!g,1,!w,131072,!1),Lo()}var pi=s=>{if(!$e)try{if(s(),!(0<_t))try{y?gr(le):_n(le)}catch(p){p instanceof yn||p=="unwind"||A(1,p)}}catch(p){p instanceof yn||p=="unwind"||A(1,p)}};function Cn(s){s>>>=0,typeof Atomics.nc=="function"&&(Atomics.nc(i(),s>>>2,s).value.then(sr),s+=128,Atomics.store(i(),s>>>2,1))}var sr=()=>{var s=Rt();s&&(Cn(s),pi(Li))};function Ec(s,p){(s>>>=0)==p>>>0?setTimeout(sr):y?postMessage({targetThread:s,cmd:"checkMailbox"}):(s=Qe[s])&&s.postMessage({cmd:"checkMailbox"})}var An=[];function Pc(s,p,f,b,_){for(p>>>=0,b/=2,An.length=b,f=_>>>0>>>3,_=0;_<b;_++)An[_]=G[f+2*_]?G[f+2*_+1]:l()[f+2*_+1>>>0];return(p?gn[p]:_p[s])(...An)}function zc(s){s>>>=0,y?postMessage({cmd:"cleanupThread",thread:s}):Wo(Qe[s])}function Oc(s){}var kn=(s,p)=>{var f=xn[s];if(f===void 0)throw s=Ui(s),f=Je(s),tt(s),new mt(`${p} has unknown type ${f}`);return f},mi=(s,p,f)=>{var b=[];return s=s.toWireType(b,f),b.length&&(a()[p>>>2>>>0]=je(b)),s};function Bc(s,p,f){return p>>>=0,f>>>=0,s=qe(s>>>0),p=kn(p,"emval::as"),mi(p,f,s)}var ur=s=>{try{s()}catch(p){Ot(p)}},ht=0,et=null,fi=0,dr=[],hi={},gi={},Dc=0,En=null,Mc=[];function yi(s){return function(p){if(!$e){if(ht===0){var f=!1,b=!1;p((_=0)=>{if(!$e&&(fi=_,f=!0,b)){ht=2,ur(()=>ji(et)),typeof Browser<"u"&&Browser.Kb.Rb&&Browser.Kb.resume(),_=!1;try{var I=function(){var L=i()[et+8>>>2>>>0];return L=j[gi[L]],--_t,L()}()}catch(L){I=L,_=!0}var O=!1;if(!et){var D=En;D&&(En=null,(_?D.reject:D.resolve)(I),O=!0)}if(_&&!O)throw I}}),b=!0,f||(ht=1,et=function(){var _=hr(65548),I=_+12;a()[_>>>2>>>0]=I,a()[_+4>>>2>>>0]=I+65536,I=dr[0];var O=hi[I];return O===void 0&&(O=Dc++,hi[I]=O,gi[O]=I),I=O,i()[_+8>>>2>>>0]=I,_}(),typeof Browser<"u"&&Browser.Kb.Rb&&Browser.Kb.pause(),ur(()=>Fi(et)))}else ht===2?(ht=0,ur(Ki),tt(et),et=null,Mc.forEach(pi)):Ot(`invalid state: ${ht}`);return fi}}(p=>{s().then(p)})}function Rc(s){return s>>>=0,yi(()=>(s=qe(s)).then(je))}var lr=[];function Uc(s,p,f,b){return f>>>=0,b>>>=0,(s=lr[s>>>0])(null,p=qe(p>>>0),f,b)}var Vc={},cr=s=>{var p=Vc[s];return p===void 0?Je(s):p};function Nc(s,p,f,b,_){return f>>>=0,b>>>=0,_>>>=0,(s=lr[s>>>0])(p=qe(p>>>0),p[f=cr(f)],b,_)}var bi=()=>typeof globalThis=="object"?globalThis:Function("return this")();function Wc(s){return(s>>>=0)==0?je(bi()):(s=cr(s),je(bi()[s]))}var Lc=s=>{var p=lr.length;return lr.push(s),p},Hc=(s,p)=>{for(var f=Array(s),b=0;b<s;++b)f[b]=kn(a()[p+4*b>>>2>>>0],"parameter "+b);return f},wi=(s,p)=>Object.defineProperty(p,"name",{value:s});function Gc(s,p,f){var b=(p=Hc(s,p>>>0)).shift();s--;var _=`return function (obj, func, destructorsRef, args) {
3804
- `,I=0,O=[];f===0&&O.push("obj");for(var D=["retType"],L=[b],H=0;H<s;++H)O.push("arg"+H),D.push("argType"+H),L.push(p[H]),_+=` var arg${H} = argType${H}.readValueFromPointer(args${I?"+"+I:""});
3802
+ var Vn=Object.defineProperty;var vp=Object.getOwnPropertyDescriptor;var $p=Object.getOwnPropertyNames;var xp=Object.prototype.hasOwnProperty;var Wn=(e=>typeof require<"u"?require:typeof Proxy<"u"?new Proxy(e,{get:(t,r)=>(typeof require<"u"?require:t)[r]}):e)(function(e){if(typeof require<"u")return require.apply(this,arguments);throw Error('Dynamic require of "'+e+'" is not supported')});var U=(e,t)=>()=>(e&&(t=e(e=0)),t);var Gt=(e,t)=>{for(var r in t)Vn(e,r,{get:t[r],enumerable:!0})},Sp=(e,t,r,n)=>{if(t&&typeof t=="object"||typeof t=="function")for(let o of $p(t))!xp.call(e,o)&&o!==r&&Vn(e,o,{get:()=>t[o],enumerable:!(n=vp(t,o))||n.enumerable});return e};var br=e=>Sp(Vn({},"__esModule",{value:!0}),e);var wr,xt,St,Tp,_r,vr=U(()=>{"use strict";wr=new Map,xt=[],St=(e,t,r)=>{if(t&&typeof t.init=="function"&&typeof t.createInferenceSessionHandler=="function"){let n=wr.get(e);if(n===void 0)wr.set(e,{backend:t,priority:r});else{if(n.priority>r)return;if(n.priority===r&&n.backend!==t)throw new Error(`cannot register backend "${e}" using priority ${r}`)}if(r>=0){let o=xt.indexOf(e);o!==-1&&xt.splice(o,1);for(let i=0;i<xt.length;i++)if(wr.get(xt[i]).priority<=r){xt.splice(i,0,e);return}xt.push(e)}return}throw new TypeError("not a valid backend")},Tp=async e=>{let t=wr.get(e);if(!t)return"backend not found.";if(t.initialized)return t.backend;if(t.aborted)return t.error;{let r=!!t.initPromise;try{return r||(t.initPromise=t.backend.init(e)),await t.initPromise,t.initialized=!0,t.backend}catch(n){return r||(t.error=`${n}`,t.aborted=!0),t.error}finally{delete t.initPromise}}},_r=async e=>{let t=e.executionProviders||[],r=t.map(l=>typeof l=="string"?l:l.name),n=r.length===0?xt:r,o,i=[],a=new Set;for(let l of n){let c=await Tp(l);typeof c=="string"?i.push({name:l,err:c}):(o||(o=c),o===c&&a.add(l))}if(!o)throw new Error(`no available backend found. ERR: ${i.map(l=>`[${l.name}] ${l.err}`).join(", ")}`);for(let{name:l,err:c}of i)r.includes(l)&&console.warn(`removing requested execution provider "${l}" from session options because it is not available: ${c}`);let d=t.filter(l=>a.has(typeof l=="string"?l:l.name));return[o,new Proxy(e,{get:(l,c)=>c==="executionProviders"?d:Reflect.get(l,c)})]}});var Ji=U(()=>{"use strict";vr()});var ea,ta=U(()=>{"use strict";ea="1.20.0-dev.20241016-2b8fc5529b"});var ra,Ne,Nn=U(()=>{"use strict";ta();ra="warning",Ne={wasm:{},webgl:{},webgpu:{},versions:{common:ea},set logLevel(e){if(e!==void 0){if(typeof e!="string"||["verbose","info","warning","error","fatal"].indexOf(e)===-1)throw new Error(`Unsupported logging level: ${e}`);ra=e}},get logLevel(){return ra}};Object.defineProperty(Ne,"logLevel",{enumerable:!0})});var _e,na=U(()=>{"use strict";Nn();_e=Ne});var oa,ia,aa=U(()=>{"use strict";oa=(e,t)=>{let r=typeof document<"u"?document.createElement("canvas"):new OffscreenCanvas(1,1);r.width=e.dims[3],r.height=e.dims[2];let n=r.getContext("2d");if(n!=null){let o,i;t?.tensorLayout!==void 0&&t.tensorLayout==="NHWC"?(o=e.dims[2],i=e.dims[3]):(o=e.dims[3],i=e.dims[2]);let a=t?.format!==void 0?t.format:"RGB",d=t?.norm,l,c;d===void 0||d.mean===void 0?l=[255,255,255,255]:typeof d.mean=="number"?l=[d.mean,d.mean,d.mean,d.mean]:(l=[d.mean[0],d.mean[1],d.mean[2],0],d.mean[3]!==void 0&&(l[3]=d.mean[3])),d===void 0||d.bias===void 0?c=[0,0,0,0]:typeof d.bias=="number"?c=[d.bias,d.bias,d.bias,d.bias]:(c=[d.bias[0],d.bias[1],d.bias[2],0],d.bias[3]!==void 0&&(c[3]=d.bias[3]));let m=i*o,u=0,h=m,w=m*2,g=-1;a==="RGBA"?(u=0,h=m,w=m*2,g=m*3):a==="RGB"?(u=0,h=m,w=m*2):a==="RBG"&&(u=0,w=m,h=m*2);for(let y=0;y<i;y++)for(let S=0;S<o;S++){let $=(e.data[u++]-c[0])*l[0],v=(e.data[h++]-c[1])*l[1],x=(e.data[w++]-c[2])*l[2],T=g===-1?255:(e.data[g++]-c[3])*l[3];n.fillStyle="rgba("+$+","+v+","+x+","+T+")",n.fillRect(S,y,1,1)}if("toDataURL"in r)return r.toDataURL();throw new Error("toDataURL is not supported")}else throw new Error("Can not access image data")},ia=(e,t)=>{let r=typeof document<"u"?document.createElement("canvas").getContext("2d"):new OffscreenCanvas(1,1).getContext("2d"),n;if(r!=null){let o,i,a;t?.tensorLayout!==void 0&&t.tensorLayout==="NHWC"?(o=e.dims[2],i=e.dims[1],a=e.dims[3]):(o=e.dims[3],i=e.dims[2],a=e.dims[1]);let d=t!==void 0&&t.format!==void 0?t.format:"RGB",l=t?.norm,c,m;l===void 0||l.mean===void 0?c=[255,255,255,255]:typeof l.mean=="number"?c=[l.mean,l.mean,l.mean,l.mean]:(c=[l.mean[0],l.mean[1],l.mean[2],255],l.mean[3]!==void 0&&(c[3]=l.mean[3])),l===void 0||l.bias===void 0?m=[0,0,0,0]:typeof l.bias=="number"?m=[l.bias,l.bias,l.bias,l.bias]:(m=[l.bias[0],l.bias[1],l.bias[2],0],l.bias[3]!==void 0&&(m[3]=l.bias[3]));let u=i*o;if(t!==void 0&&(t.format!==void 0&&a===4&&t.format!=="RGBA"||a===3&&t.format!=="RGB"&&t.format!=="BGR"))throw new Error("Tensor format doesn't match input tensor dims");let h=4,w=0,g=1,y=2,S=3,$=0,v=u,x=u*2,T=-1;d==="RGBA"?($=0,v=u,x=u*2,T=u*3):d==="RGB"?($=0,v=u,x=u*2):d==="RBG"&&($=0,x=u,v=u*2),n=r.createImageData(o,i);for(let C=0;C<i*o;w+=h,g+=h,y+=h,S+=h,C++)n.data[w]=(e.data[$++]-m[0])*c[0],n.data[g]=(e.data[v++]-m[1])*c[1],n.data[y]=(e.data[x++]-m[2])*c[2],n.data[S]=T===-1?255:(e.data[T++]-m[3])*c[3]}else throw new Error("Can not access image data");return n}});var Ln,sa,ua,da,la,ca,pa=U(()=>{"use strict";$r();Ln=(e,t)=>{if(e===void 0)throw new Error("Image buffer must be defined");if(t.height===void 0||t.width===void 0)throw new Error("Image height and width must be defined");if(t.tensorLayout==="NHWC")throw new Error("NHWC Tensor layout is not supported yet");let{height:r,width:n}=t,o=t.norm??{mean:255,bias:0},i,a;typeof o.mean=="number"?i=[o.mean,o.mean,o.mean,o.mean]:i=[o.mean[0],o.mean[1],o.mean[2],o.mean[3]??255],typeof o.bias=="number"?a=[o.bias,o.bias,o.bias,o.bias]:a=[o.bias[0],o.bias[1],o.bias[2],o.bias[3]??0];let d=t.format!==void 0?t.format:"RGBA",l=t.tensorFormat!==void 0&&t.tensorFormat!==void 0?t.tensorFormat:"RGB",c=r*n,m=l==="RGBA"?new Float32Array(c*4):new Float32Array(c*3),u=4,h=0,w=1,g=2,y=3,S=0,$=c,v=c*2,x=-1;d==="RGB"&&(u=3,h=0,w=1,g=2,y=-1),l==="RGBA"?x=c*3:l==="RBG"?(S=0,v=c,$=c*2):l==="BGR"&&(v=0,$=c,S=c*2);for(let C=0;C<c;C++,h+=u,g+=u,w+=u,y+=u)m[S++]=(e[h]+a[0])/i[0],m[$++]=(e[w]+a[1])/i[1],m[v++]=(e[g]+a[2])/i[2],x!==-1&&y!==-1&&(m[x++]=(e[y]+a[3])/i[3]);return l==="RGBA"?new De("float32",m,[1,4,r,n]):new De("float32",m,[1,3,r,n])},sa=async(e,t)=>{let r=typeof HTMLImageElement<"u"&&e instanceof HTMLImageElement,n=typeof ImageData<"u"&&e instanceof ImageData,o=typeof ImageBitmap<"u"&&e instanceof ImageBitmap,i=typeof e=="string",a,d=t??{},l=()=>{if(typeof document<"u")return document.createElement("canvas");if(typeof OffscreenCanvas<"u")return new OffscreenCanvas(1,1);throw new Error("Canvas is not supported")},c=m=>typeof HTMLCanvasElement<"u"&&m instanceof HTMLCanvasElement||m instanceof OffscreenCanvas?m.getContext("2d"):null;if(r){let m=l();m.width=e.width,m.height=e.height;let u=c(m);if(u!=null){let h=e.height,w=e.width;if(t!==void 0&&t.resizedHeight!==void 0&&t.resizedWidth!==void 0&&(h=t.resizedHeight,w=t.resizedWidth),t!==void 0){if(d=t,t.tensorFormat!==void 0)throw new Error("Image input config format must be RGBA for HTMLImageElement");d.tensorFormat="RGBA",d.height=h,d.width=w}else d.tensorFormat="RGBA",d.height=h,d.width=w;u.drawImage(e,0,0),a=u.getImageData(0,0,w,h).data}else throw new Error("Can not access image data")}else if(n){let m,u;if(t!==void 0&&t.resizedWidth!==void 0&&t.resizedHeight!==void 0?(m=t.resizedHeight,u=t.resizedWidth):(m=e.height,u=e.width),t!==void 0&&(d=t),d.format="RGBA",d.height=m,d.width=u,t!==void 0){let h=l();h.width=u,h.height=m;let w=c(h);if(w!=null)w.putImageData(e,0,0),a=w.getImageData(0,0,u,m).data;else throw new Error("Can not access image data")}else a=e.data}else if(o){if(t===void 0)throw new Error("Please provide image config with format for Imagebitmap");let m=l();m.width=e.width,m.height=e.height;let u=c(m);if(u!=null){let h=e.height,w=e.width;return u.drawImage(e,0,0,w,h),a=u.getImageData(0,0,w,h).data,d.height=h,d.width=w,Ln(a,d)}else throw new Error("Can not access image data")}else{if(i)return new Promise((m,u)=>{let h=l(),w=c(h);if(!e||!w)return u();let g=new Image;g.crossOrigin="Anonymous",g.src=e,g.onload=()=>{h.width=g.width,h.height=g.height,w.drawImage(g,0,0,h.width,h.height);let y=w.getImageData(0,0,h.width,h.height);d.height=h.height,d.width=h.width,m(Ln(y.data,d))}});throw new Error("Input data provided is not supported - aborted tensor creation")}if(a!==void 0)return Ln(a,d);throw new Error("Input data provided is not supported - aborted tensor creation")},ua=(e,t)=>{let{width:r,height:n,download:o,dispose:i}=t,a=[1,n,r,4];return new De({location:"texture",type:"float32",texture:e,dims:a,download:o,dispose:i})},da=(e,t)=>{let{dataType:r,dims:n,download:o,dispose:i}=t;return new De({location:"gpu-buffer",type:r??"float32",gpuBuffer:e,dims:n,download:o,dispose:i})},la=(e,t)=>{let{dataType:r,dims:n,download:o,dispose:i}=t;return new De({location:"ml-tensor",type:r??"float32",mlTensor:e,dims:n,download:o,dispose:i})},ca=(e,t,r)=>new De({location:"cpu-pinned",type:e,data:t,dims:r??[t.length]})});var Tt,Ft,ma,fa,ha=U(()=>{"use strict";Tt=new Map([["float32",Float32Array],["uint8",Uint8Array],["int8",Int8Array],["uint16",Uint16Array],["int16",Int16Array],["int32",Int32Array],["bool",Uint8Array],["float64",Float64Array],["uint32",Uint32Array],["int4",Uint8Array],["uint4",Uint8Array]]),Ft=new Map([[Float32Array,"float32"],[Uint8Array,"uint8"],[Int8Array,"int8"],[Uint16Array,"uint16"],[Int16Array,"int16"],[Int32Array,"int32"],[Float64Array,"float64"],[Uint32Array,"uint32"]]),ma=!1,fa=()=>{if(!ma){ma=!0;let e=typeof BigInt64Array<"u"&&BigInt64Array.from,t=typeof BigUint64Array<"u"&&BigUint64Array.from,r=typeof Float16Array<"u"&&Float16Array.from;e&&(Tt.set("int64",BigInt64Array),Ft.set(BigInt64Array,"int64")),t&&(Tt.set("uint64",BigUint64Array),Ft.set(BigUint64Array,"uint64")),r?(Tt.set("float16",Float16Array),Ft.set(Float16Array,"float16")):Tt.set("float16",Uint16Array)}}});var ga,ya,ba=U(()=>{"use strict";$r();ga=e=>{let t=1;for(let r=0;r<e.length;r++){let n=e[r];if(typeof n!="number"||!Number.isSafeInteger(n))throw new TypeError(`dims[${r}] must be an integer, got: ${n}`);if(n<0)throw new RangeError(`dims[${r}] must be a non-negative integer, got: ${n}`);t*=n}return t},ya=(e,t)=>{switch(e.location){case"cpu":return new De(e.type,e.data,t);case"cpu-pinned":return new De({location:"cpu-pinned",data:e.data,type:e.type,dims:t});case"texture":return new De({location:"texture",texture:e.texture,type:e.type,dims:t});case"gpu-buffer":return new De({location:"gpu-buffer",gpuBuffer:e.gpuBuffer,type:e.type,dims:t});case"ml-tensor":return new De({location:"ml-tensor",mlTensor:e.mlTensor,type:e.type,dims:t});default:throw new Error(`tensorReshape: tensor location ${e.location} is not supported`)}}});var De,$r=U(()=>{"use strict";aa();pa();ha();ba();De=class{constructor(t,r,n){fa();let o,i;if(typeof t=="object"&&"location"in t)switch(this.dataLocation=t.location,o=t.type,i=t.dims,t.location){case"cpu-pinned":{let d=Tt.get(o);if(!d)throw new TypeError(`unsupported type "${o}" to create tensor from pinned buffer`);if(!(t.data instanceof d))throw new TypeError(`buffer should be of type ${d.name}`);this.cpuData=t.data;break}case"texture":{if(o!=="float32")throw new TypeError(`unsupported type "${o}" to create tensor from texture`);this.gpuTextureData=t.texture,this.downloader=t.download,this.disposer=t.dispose;break}case"gpu-buffer":{if(o!=="float32"&&o!=="float16"&&o!=="int32"&&o!=="int64"&&o!=="uint32"&&o!=="uint8"&&o!=="bool"&&o!=="uint4"&&o!=="int4")throw new TypeError(`unsupported type "${o}" to create tensor from gpu buffer`);this.gpuBufferData=t.gpuBuffer,this.downloader=t.download,this.disposer=t.dispose;break}case"ml-tensor":{if(o!=="float32"&&o!=="float16"&&o!=="int32"&&o!=="int64"&&o!=="uint32"&&o!=="uint64"&&o!=="int8"&&o!=="uint8"&&o!=="bool")throw new TypeError(`unsupported type "${o}" to create tensor from MLTensor`);this.mlTensorData=t.mlTensor,this.downloader=t.download,this.disposer=t.dispose;break}default:throw new Error(`Tensor constructor: unsupported location '${this.dataLocation}'`)}else{let d,l;if(typeof t=="string")if(o=t,l=n,t==="string"){if(!Array.isArray(r))throw new TypeError("A string tensor's data must be a string array.");d=r}else{let c=Tt.get(t);if(c===void 0)throw new TypeError(`Unsupported tensor type: ${t}.`);if(Array.isArray(r)){if(t==="float16"&&c===Uint16Array||t==="uint4"||t==="int4")throw new TypeError(`Creating a ${t} tensor from number array is not supported. Please use ${c.name} as data.`);t==="uint64"||t==="int64"?d=c.from(r,BigInt):d=c.from(r)}else if(r instanceof c)d=r;else if(r instanceof Uint8ClampedArray)if(t==="uint8")d=Uint8Array.from(r);else throw new TypeError("A Uint8ClampedArray tensor's data must be type of uint8");else throw new TypeError(`A ${o} tensor's data must be type of ${c}`)}else if(l=r,Array.isArray(t)){if(t.length===0)throw new TypeError("Tensor type cannot be inferred from an empty array.");let c=typeof t[0];if(c==="string")o="string",d=t;else if(c==="boolean")o="bool",d=Uint8Array.from(t);else throw new TypeError(`Invalid element type of data array: ${c}.`)}else if(t instanceof Uint8ClampedArray)o="uint8",d=Uint8Array.from(t);else{let c=Ft.get(t.constructor);if(c===void 0)throw new TypeError(`Unsupported type for tensor data: ${t.constructor}.`);o=c,d=t}if(l===void 0)l=[d.length];else if(!Array.isArray(l))throw new TypeError("A tensor's dims must be a number array");i=l,this.cpuData=d,this.dataLocation="cpu"}let a=ga(i);if(this.cpuData&&a!==this.cpuData.length&&!((o==="uint4"||o==="int4")&&Math.ceil(a/2)===this.cpuData.length))throw new Error(`Tensor's size(${a}) does not match data length(${this.cpuData.length}).`);this.type=o,this.dims=i,this.size=a}static async fromImage(t,r){return sa(t,r)}static fromTexture(t,r){return ua(t,r)}static fromGpuBuffer(t,r){return da(t,r)}static fromMLTensor(t,r){return la(t,r)}static fromPinnedBuffer(t,r,n){return ca(t,r,n)}toDataURL(t){return oa(this,t)}toImageData(t){return ia(this,t)}get data(){if(this.ensureValid(),!this.cpuData)throw new Error("The data is not on CPU. Use `getData()` to download GPU data to CPU, or use `texture` or `gpuBuffer` property to access the GPU data directly.");return this.cpuData}get location(){return this.dataLocation}get texture(){if(this.ensureValid(),!this.gpuTextureData)throw new Error("The data is not stored as a WebGL texture.");return this.gpuTextureData}get gpuBuffer(){if(this.ensureValid(),!this.gpuBufferData)throw new Error("The data is not stored as a WebGPU buffer.");return this.gpuBufferData}get mlTensor(){if(this.ensureValid(),!this.mlTensorData)throw new Error("The data is not stored as a WebNN MLTensor.");return this.mlTensorData}async getData(t){switch(this.ensureValid(),this.dataLocation){case"cpu":case"cpu-pinned":return this.data;case"texture":case"gpu-buffer":case"ml-tensor":{if(!this.downloader)throw new Error("The current tensor is not created with a specified data downloader.");if(this.isDownloading)throw new Error("The current tensor is being downloaded.");try{this.isDownloading=!0;let r=await this.downloader();return this.downloader=void 0,this.dataLocation="cpu",this.cpuData=r,t&&this.disposer&&(this.disposer(),this.disposer=void 0),r}finally{this.isDownloading=!1}}default:throw new Error(`cannot get data from location: ${this.dataLocation}`)}}dispose(){if(this.isDownloading)throw new Error("The current tensor is being downloaded.");this.disposer&&(this.disposer(),this.disposer=void 0),this.cpuData=void 0,this.gpuTextureData=void 0,this.gpuBufferData=void 0,this.mlTensorData=void 0,this.downloader=void 0,this.isDownloading=void 0,this.dataLocation="none"}ensureValid(){if(this.dataLocation==="none")throw new Error("The tensor is disposed.")}reshape(t){if(this.ensureValid(),this.downloader||this.disposer)throw new Error("Cannot reshape a tensor that owns GPU resource.");return ya(this,t)}}});var Be,xr=U(()=>{"use strict";$r();Be=De});var Sr,wa,Le,Ve,Hn=U(()=>{"use strict";Nn();Sr=(e,t)=>{(typeof Ne.trace>"u"?!Ne.wasm.trace:!Ne.trace)||console.timeStamp(`${e}::ORT::${t}`)},wa=(e,t)=>{let r=new Error().stack?.split(/\r\n|\r|\n/g)||[],n=!1;for(let o=0;o<r.length;o++){if(n&&!r[o].includes("TRACE_FUNC")){let i=`FUNC_${e}::${r[o].trim().split(" ")[1]}`;t&&(i+=`::${t}`),Sr("CPU",i);return}r[o].includes("TRACE_FUNC")&&(n=!0)}},Le=e=>{(typeof Ne.trace>"u"?!Ne.wasm.trace:!Ne.trace)||wa("BEGIN",e)},Ve=e=>{(typeof Ne.trace>"u"?!Ne.wasm.trace:!Ne.trace)||wa("END",e)}});var Tr,_a=U(()=>{"use strict";vr();xr();Hn();Tr=class e{constructor(t){this.handler=t}async run(t,r,n){Le();let o={},i={};if(typeof t!="object"||t===null||t instanceof Be||Array.isArray(t))throw new TypeError("'feeds' must be an object that use input names as keys and OnnxValue as corresponding values.");let a=!0;if(typeof r=="object"){if(r===null)throw new TypeError("Unexpected argument[1]: cannot be null.");if(r instanceof Be)throw new TypeError("'fetches' cannot be a Tensor");if(Array.isArray(r)){if(r.length===0)throw new TypeError("'fetches' cannot be an empty array.");a=!1;for(let c of r){if(typeof c!="string")throw new TypeError("'fetches' must be a string array or an object.");if(this.outputNames.indexOf(c)===-1)throw new RangeError(`'fetches' contains invalid output name: ${c}.`);o[c]=null}if(typeof n=="object"&&n!==null)i=n;else if(typeof n<"u")throw new TypeError("'options' must be an object.")}else{let c=!1,m=Object.getOwnPropertyNames(r);for(let u of this.outputNames)if(m.indexOf(u)!==-1){let h=r[u];(h===null||h instanceof Be)&&(c=!0,a=!1,o[u]=h)}if(c){if(typeof n=="object"&&n!==null)i=n;else if(typeof n<"u")throw new TypeError("'options' must be an object.")}else i=r}}else if(typeof r<"u")throw new TypeError("Unexpected argument[1]: must be 'fetches' or 'options'.");for(let c of this.inputNames)if(typeof t[c]>"u")throw new Error(`input '${c}' is missing in 'feeds'.`);if(a)for(let c of this.outputNames)o[c]=null;let d=await this.handler.run(t,o,i),l={};for(let c in d)if(Object.hasOwnProperty.call(d,c)){let m=d[c];m instanceof Be?l[c]=m:l[c]=new Be(m.type,m.data,m.dims)}return Ve(),l}async release(){return this.handler.dispose()}static async create(t,r,n,o){Le();let i,a={};if(typeof t=="string"){if(i=t,typeof r=="object"&&r!==null)a=r;else if(typeof r<"u")throw new TypeError("'options' must be an object.")}else if(t instanceof Uint8Array){if(i=t,typeof r=="object"&&r!==null)a=r;else if(typeof r<"u")throw new TypeError("'options' must be an object.")}else if(t instanceof ArrayBuffer||typeof SharedArrayBuffer<"u"&&t instanceof SharedArrayBuffer){let m=t,u=0,h=t.byteLength;if(typeof r=="object"&&r!==null)a=r;else if(typeof r=="number"){if(u=r,!Number.isSafeInteger(u))throw new RangeError("'byteOffset' must be an integer.");if(u<0||u>=m.byteLength)throw new RangeError(`'byteOffset' is out of range [0, ${m.byteLength}).`);if(h=t.byteLength-u,typeof n=="number"){if(h=n,!Number.isSafeInteger(h))throw new RangeError("'byteLength' must be an integer.");if(h<=0||u+h>m.byteLength)throw new RangeError(`'byteLength' is out of range (0, ${m.byteLength-u}].`);if(typeof o=="object"&&o!==null)a=o;else if(typeof o<"u")throw new TypeError("'options' must be an object.")}else if(typeof n<"u")throw new TypeError("'byteLength' must be a number.")}else if(typeof r<"u")throw new TypeError("'options' must be an object.");i=new Uint8Array(m,u,h)}else throw new TypeError("Unexpected argument[0]: must be 'path' or 'buffer'.");let[d,l]=await _r(a),c=await d.createInferenceSessionHandler(i,l);return Ve(),new e(c)}startProfiling(){this.handler.startProfiling()}endProfiling(){this.handler.endProfiling()}get inputNames(){return this.handler.inputNames}get outputNames(){return this.handler.outputNames}}});var Ip,va=U(()=>{"use strict";_a();Ip=Tr});var $a=U(()=>{"use strict"});var xa=U(()=>{"use strict"});var Sa=U(()=>{"use strict"});var Ta=U(()=>{"use strict"});var Cp,Ir,Ia=U(()=>{"use strict";vr();xr();Cp="Training backend could not be resolved. Make sure you're using the correct configuration & WebAssembly files.",Ir=class e{constructor(t,r,n){this.handler=t,this.hasOptimizerModel=r,this.hasEvalModel=n}get trainingInputNames(){return this.handler.inputNames}get trainingOutputNames(){return this.handler.outputNames}get evalInputNames(){if(this.hasEvalModel)return this.handler.evalInputNames;throw new Error("This training session has no evalModel loaded.")}get evalOutputNames(){if(this.hasEvalModel)return this.handler.evalOutputNames;throw new Error("This training session has no evalModel loaded.")}static async create(t,r){let n=t.evalModel||"",o=t.optimizerModel||"",i=r||{},[a,d]=await _r(i);if(a.createTrainingSessionHandler){let l=await a.createTrainingSessionHandler(t.checkpointState,t.trainModel,n,o,d);return new e(l,!!t.optimizerModel,!!t.evalModel)}else throw new Error(Cp)}typeNarrowingForRunStep(t,r,n,o,i){let a={},d={};if(typeof n!="object"||n===null||n instanceof Be||Array.isArray(n))throw new TypeError("'feeds' must be an object that use input names as keys and OnnxValue as corresponding values.");let l=!0;if(typeof o=="object"){if(o===null)throw new TypeError("Unexpected argument[1]: cannot be null.");if(o instanceof Be)throw new TypeError("'fetches' cannot be a Tensor");if(Array.isArray(o)){if(o.length===0)throw new TypeError("'fetches' cannot be an empty array.");l=!1;for(let c of o){if(typeof c!="string")throw new TypeError("'fetches' must be a string array or an object.");if(r.indexOf(c)===-1)throw new RangeError(`'fetches' contains invalid output name: ${c}.`);a[c]=null}if(typeof i=="object"&&i!==null)d=i;else if(typeof i<"u")throw new TypeError("'options' must be an object.")}else{let c=!1,m=Object.getOwnPropertyNames(o);for(let u of r)if(m.indexOf(u)!==-1){let h=o[u];(h===null||h instanceof Be)&&(c=!0,l=!1,a[u]=h)}if(c){if(typeof i=="object"&&i!==null)d=i;else if(typeof i<"u")throw new TypeError("'options' must be an object.")}else d=o}}else if(typeof o<"u")throw new TypeError("Unexpected argument[1]: must be 'fetches' or 'options'.");for(let c of t)if(typeof n[c]>"u")throw new Error(`input '${c}' is missing in 'feeds'.`);if(l)for(let c of r)a[c]=null;return[a,d]}convertHandlerReturnTypeToMapOfTensors(t){let r={};for(let n in t)if(Object.hasOwnProperty.call(t,n)){let o=t[n];o instanceof Be?r[n]=o:r[n]=new Be(o.type,o.data,o.dims)}return r}async lazyResetGrad(){await this.handler.lazyResetGrad()}async runTrainStep(t,r,n){let[o,i]=this.typeNarrowingForRunStep(this.trainingInputNames,this.trainingOutputNames,t,r,n),a=await this.handler.runTrainStep(t,o,i);return this.convertHandlerReturnTypeToMapOfTensors(a)}async runOptimizerStep(t){if(this.hasOptimizerModel)await this.handler.runOptimizerStep(t||{});else throw new Error("This TrainingSession has no OptimizerModel loaded.")}async runEvalStep(t,r,n){if(this.hasEvalModel){let[o,i]=this.typeNarrowingForRunStep(this.evalInputNames,this.evalOutputNames,t,r,n),a=await this.handler.runEvalStep(t,o,i);return this.convertHandlerReturnTypeToMapOfTensors(a)}else throw new Error("This TrainingSession has no EvalModel loaded.")}async getParametersSize(t=!0){return this.handler.getParametersSize(t)}async loadParametersBuffer(t,r=!0){let n=await this.getParametersSize(r);if(t.length!==4*n)throw new Error("Size of the buffer passed into loadParametersBuffer must match the number of parameters in the model. Please use getParametersSize method to check.");return this.handler.loadParametersBuffer(t,r)}async getContiguousParameters(t=!0){return this.handler.getContiguousParameters(t)}async release(){return this.handler.dispose()}}});var Ap,Ca=U(()=>{"use strict";Ia();Ap=Ir});var Gn={};Gt(Gn,{InferenceSession:()=>Ip,TRACE:()=>Sr,TRACE_FUNC_BEGIN:()=>Le,TRACE_FUNC_END:()=>Ve,Tensor:()=>Be,TrainingSession:()=>Ap,env:()=>_e,registerBackend:()=>St});var Ke=U(()=>{"use strict";Ji();na();va();xr();$a();xa();Hn();Sa();Ta();Ca()});var Cr=U(()=>{"use strict"});var Pa={};Gt(Pa,{default:()=>kp});var ka,Ea,kp,za=U(()=>{"use strict";Fn();gt();qt();ka="ort-wasm-proxy-worker",Ea=globalThis.self?.name===ka;Ea&&(self.onmessage=e=>{let{type:t,in:r}=e.data;try{switch(t){case"init-wasm":Ar(r.wasm).then(()=>{kr(r).then(()=>{postMessage({type:t})},n=>{postMessage({type:t,err:n})})},n=>{postMessage({type:t,err:n})});break;case"init-ep":{let{epName:n,env:o}=r;Er(o,n).then(()=>{postMessage({type:t})},i=>{postMessage({type:t,err:i})});break}case"copy-from":{let{buffer:n}=r,o=jt(n);postMessage({type:t,out:o});break}case"create":{let{model:n,options:o}=r;Pr(n,o).then(i=>{postMessage({type:t,out:i})},i=>{postMessage({type:t,err:i})});break}case"release":zr(r),postMessage({type:t});break;case"run":{let{sessionId:n,inputIndices:o,inputs:i,outputIndices:a,options:d}=r;Or(n,o,i,a,new Array(a.length).fill(null),d).then(l=>{l.some(c=>c[3]!=="cpu")?postMessage({type:t,err:"Proxy does not support non-cpu tensor location."}):postMessage({type:t,out:l},Br([...i,...l]))},l=>{postMessage({type:t,err:l})});break}case"end-profiling":Dr(r),postMessage({type:t});break;default:}}catch(n){postMessage({type:t,err:n})}});kp=Ea?null:e=>new Worker(e??Ut,{type:"module",name:ka})});var Da={};Gt(Da,{default:()=>Ep});var qn,Oa,Ep,Ba=U(()=>{"use strict";Oa=(qn=import.meta.url,async function(e={}){function t(){return ue.buffer!=ce.buffer&&Ce(),ce}function r(){return ue.buffer!=ce.buffer&&Ce(),q}function n(){return ue.buffer!=ce.buffer&&Ce(),le}function o(){return ue.buffer!=ce.buffer&&Ce(),re}function i(){return ue.buffer!=ce.buffer&&Ce(),ne}function a(){return ue.buffer!=ce.buffer&&Ce(),oe}function d(){return ue.buffer!=ce.buffer&&Ce(),R}function l(){return ue.buffer!=ce.buffer&&Ce(),Re}var c,m,u=Object.assign({},e),h=new Promise((s,p)=>{c=s,m=p}),w=typeof window=="object",g=typeof importScripts=="function",y=g&&self.name=="em-pthread";u.mountExternalData=(s,p)=>{s.startsWith("./")&&(s=s.substring(2)),(u.Eb||(u.Eb=new Map)).set(s,p)},u.unmountExternalData=()=>{delete u.Eb};var S=globalThis.SharedArrayBuffer??new WebAssembly.Memory({initial:0,maximum:0,shared:!0}).buffer.constructor;let $=()=>{let s=(f,b,_)=>(...I)=>{let O=et,B=b?.();I=f(...I);let L=b?.();return B!==L&&(f=L,_(B),b=_=null),et!=O?new Promise((H,Q)=>{Pn={resolve:H,reject:Q}}):I},p=f=>async(...b)=>{try{if(u.Fb)throw Error("Session already started");let _=u.Fb={fc:b[0],errors:[]},I=await f(...b);if(u.Fb!==_)throw Error("Session mismatch");u.Gb?.flush();let O=_.errors;if(0<O.length){let B=await Promise.all(O);if(B=B.filter(L=>L),0<B.length)throw Error(B.join(`
3803
+ `))}return I}finally{u.Fb=null}};u._OrtCreateSession=s(u._OrtCreateSession,()=>u._OrtCreateSession,f=>u._OrtCreateSession=f),u._OrtRun=p(s(u._OrtRun,()=>u._OrtRun,f=>u._OrtRun=f)),u._OrtRunWithBinding=p(s(u._OrtRunWithBinding,()=>u._OrtRunWithBinding,f=>u._OrtRunWithBinding=f)),u._OrtBindInput=s(u._OrtBindInput,()=>u._OrtBindInput,f=>u._OrtBindInput=f),$=void 0};u.jsepInit=(s,p)=>{if($?.(),s==="webgpu"){[u.Gb,u.Ub,u.Yb,u.Nb,u.Xb,u.jb,u.Zb,u.bc,u.Vb,u.Wb,u.$b]=p;let f=u.Gb;u.jsepRegisterBuffer=(b,_,I,O)=>f.registerBuffer(b,_,I,O),u.jsepGetBuffer=b=>f.getBuffer(b),u.jsepCreateDownloader=(b,_,I)=>f.createDownloader(b,_,I),u.jsepOnCreateSession=b=>{f.onCreateSession(b)},u.jsepOnReleaseSession=b=>{f.onReleaseSession(b)},u.jsepOnRunStart=b=>f.onRunStart(b),u.cc=(b,_)=>{f.upload(b,_)}}else if(s==="webnn"){[u.Gb,u.ac,u.Ob,u.jsepEnsureTensor,u.dc,u.jsepDownloadTensor]=p,u.jsepReleaseTensorId=u.Ob;let f=u.Gb;u.jsepOnRunStart=b=>f.onRunStart(b),u.jsepRegisterMLContext=(b,_)=>{f.registerMLContext(b,_)},u.jsepOnReleaseSession=b=>{f.onReleaseSession(b)},u.jsepCreateMLTensorDownloader=(b,_)=>f.createMLTensorDownloader(b,_),u.jsepRegisterMLTensor=(b,_,I)=>f.registerMLTensor(b,_,I),u.qc=(b,_,I,O,B)=>f.registerMLConstant(b,_,I,O,B,u.Eb)}};var v,x,T=Object.assign({},u),C="./this.program",A=(s,p)=>{throw p},P="";(w||g)&&(g?P=self.location.href:typeof document<"u"&&document.currentScript&&(P=document.currentScript.src),qn&&(P=qn),P=P.startsWith("blob:")?"":P.substr(0,P.replace(/[?#].*/,"").lastIndexOf("/")+1),g&&(x=s=>{var p=new XMLHttpRequest;return p.open("GET",s,!1),p.responseType="arraybuffer",p.send(null),new Uint8Array(p.response)}),v=(s,p,f)=>{var b=new XMLHttpRequest;b.open("GET",s,!0),b.responseType="arraybuffer",b.onload=()=>{b.status==200||b.status==0&&b.response?p(b.response):f()},b.onerror=f,b.send(null)});var D,W=console.log.bind(console),N=console.error.bind(console),j=W,Y=N;if(Object.assign(u,T),T=null,y){let s=function(p){try{var f=p.data,b=f.cmd;if(b==="load"){let _=[];self.onmessage=I=>_.push(I),self.startWorker=()=>{postMessage({cmd:"loaded"});for(let I of _)s(I);self.onmessage=s};for(let I of f.handlers)u[I]&&!u[I].proxy||(u[I]=(...O)=>{postMessage({Mb:"callHandler",oc:I,args:O})},I=="print"&&(j=u[I]),I=="printErr"&&(Y=u[I]));ue=f.wasmMemory,Ce(),Z(f.wasmModule)}else if(b==="run"){Bn(f.pthread_ptr,0,0,1,0,0),An(f.pthread_ptr),ic(),Go(),te||(Ni(),te=!0);try{ac(f.start_routine,f.arg)}catch(_){if(_!="unwind")throw _}}else b==="cancel"?Rt()&&gr(-1):f.target!=="setimmediate"&&(b==="checkMailbox"?te&&sr():b&&(Y(`worker: received unknown command ${b}`),Y(f)))}catch(_){throw Li(),_}};var jh=s,Z,te=!1;Y=function(...p){p=p.join(" "),console.error(p)},self.alert=function(...p){postMessage({Mb:"alert",text:p.join(" "),rc:Rt()})},u.instantiateWasm=(p,f)=>new Promise(b=>{Z=_=>{_=new WebAssembly.Instance(_,Vo()),f(_),b()}}),self.onunhandledrejection=p=>{throw p.reason||p},self.onmessage=s}u.wasmBinary&&(D=u.wasmBinary);var ue,K,de,ce,q,le,re,ne,oe,R,G,ye,Re,$e=!1;function Ce(){var s=ue.buffer;u.HEAP8=ce=new Int8Array(s),u.HEAP16=le=new Int16Array(s),u.HEAPU8=q=new Uint8Array(s),u.HEAPU16=re=new Uint16Array(s),u.HEAP32=ne=new Int32Array(s),u.HEAPU32=oe=new Uint32Array(s),u.HEAPF32=R=new Float32Array(s),u.HEAPF64=Re=new Float64Array(s),u.HEAP64=G=new BigInt64Array(s),u.HEAPU64=ye=new BigUint64Array(s)}if(!y){if(!((ue=new WebAssembly.Memory({initial:256,maximum:65536,shared:!0})).buffer instanceof S))throw Y("requested a shared WebAssembly.Memory but the returned buffer is not a SharedArrayBuffer, indicating that while the browser has SharedArrayBuffer it does not have WebAssembly threads support - you may need to set a flag"),Error("bad memory");Ce()}var bt=[],Ae=[],Me=[],Ue=0,zt=null,wt=null;function Do(){if(--Ue==0&&(zt!==null&&(clearInterval(zt),zt=null),wt)){var s=wt;wt=null,s()}}function Ot(s){throw Y(s="Aborted("+s+")"),$e=!0,de=1,s=new WebAssembly.RuntimeError(s+". Build with -sASSERTIONS for more info."),m(s),s}var gn,Bo=s=>s.startsWith("data:application/octet-stream;base64,"),Mo=s=>s.startsWith("file://");function Ro(s){if(s==gn&&D)return new Uint8Array(D);if(x)return x(s);throw"both async and sync fetching of the wasm failed"}function Uo(s,p,f){return function(b){if(!D&&(w||g)){if(typeof fetch=="function"&&!Mo(b))return fetch(b,{credentials:"same-origin"}).then(_=>{if(!_.ok)throw`failed to load wasm binary file at '${b}'`;return _.arrayBuffer()}).catch(()=>Ro(b));if(v)return new Promise((_,I)=>{v(b,O=>_(new Uint8Array(O)),I)})}return Promise.resolve().then(()=>Ro(b))}(s).then(b=>WebAssembly.instantiate(b,p)).then(f,b=>{Y(`failed to asynchronously prepare wasm: ${b}`),Ot(b)})}function Vo(){return{a:{O:oc,Aa:nc,b:uc,aa:Ko,B:Qo,qa:Zo,Y:ei,_:ti,ra:ri,oa:ni,ha:oi,na:ii,L:ai,Z:si,W:ui,pa:di,X:li,wa:dc,F:cc,Q:pc,P:fc,E:gc,u:yc,q:bc,G:wc,A:Ic,R:Cc,ua:Ac,ka:kc,U:Ec,ba:Pc,H:zc,ja:An,ta:Oc,t:Dc,x:Rc,n:Uc,l:Wc,c:In,o:Nc,j:Gc,w:Fc,p:qc,g:jc,s:Kc,m:Yc,e:Xc,k:Qc,i:Zc,h:Jc,d:ep,ea:tp,fa:rp,ga:np,ca:Si,da:Ti,T:op,f:ip,D:ap,I:sp,M:up,y:dp,sa:lp,V:cp,v:Ci,z:pp,N:mp,S:fp,za:hp,ya:gp,la:Ei,ma:Pi,$:vn,C:zi,K:Oi,ia:Di,J:Bi,a:ue,xa:_n,va:Ui,r:wp}}}var yn={867364:(s,p,f,b,_)=>{if(u===void 0||!u.Eb)return 1;if((s=ze(s>>>0)).startsWith("./")&&(s=s.substring(2)),!(s=u.Eb.get(s)))return 2;if(b>>>=0,(p>>>=0)+(f>>>=0)>s.byteLength)return 3;try{let I=s.subarray(p,p+f);switch(_){case 0:r().set(I,b>>>0);break;case 1:u.cc(b,I);break;default:return 4}return 0}catch{return 4}},868047:(s,p,f)=>{u.dc(s,r().subarray(p>>>0,p+f>>>0))},868110:()=>u.ac(),868151:s=>{u.Ob(s)},868187:()=>{u.Vb()},868218:()=>{u.Wb()},868247:()=>{u.$b()},868272:s=>u.Ub(s),868305:s=>u.Yb(s),868337:(s,p,f)=>{u.Nb(s,p,f,!0)},868376:(s,p,f)=>{u.Nb(s,p,f)},868409:()=>typeof wasmOffsetConverter<"u",868466:s=>{u.jb("Abs",s,void 0)},868517:s=>{u.jb("Neg",s,void 0)},868568:s=>{u.jb("Floor",s,void 0)},868621:s=>{u.jb("Ceil",s,void 0)},868673:s=>{u.jb("Reciprocal",s,void 0)},868731:s=>{u.jb("Sqrt",s,void 0)},868783:s=>{u.jb("Exp",s,void 0)},868834:s=>{u.jb("Erf",s,void 0)},868885:s=>{u.jb("Sigmoid",s,void 0)},868940:(s,p,f)=>{u.jb("HardSigmoid",s,{alpha:p,beta:f})},869019:s=>{u.jb("Log",s,void 0)},869070:s=>{u.jb("Sin",s,void 0)},869121:s=>{u.jb("Cos",s,void 0)},869172:s=>{u.jb("Tan",s,void 0)},869223:s=>{u.jb("Asin",s,void 0)},869275:s=>{u.jb("Acos",s,void 0)},869327:s=>{u.jb("Atan",s,void 0)},869379:s=>{u.jb("Sinh",s,void 0)},869431:s=>{u.jb("Cosh",s,void 0)},869483:s=>{u.jb("Asinh",s,void 0)},869536:s=>{u.jb("Acosh",s,void 0)},869589:s=>{u.jb("Atanh",s,void 0)},869642:s=>{u.jb("Tanh",s,void 0)},869694:s=>{u.jb("Not",s,void 0)},869745:(s,p,f)=>{u.jb("Clip",s,{min:p,max:f})},869814:s=>{u.jb("Clip",s,void 0)},869866:(s,p)=>{u.jb("Elu",s,{alpha:p})},869924:s=>{u.jb("Gelu",s,void 0)},869976:s=>{u.jb("Relu",s,void 0)},870028:(s,p)=>{u.jb("LeakyRelu",s,{alpha:p})},870092:(s,p)=>{u.jb("ThresholdedRelu",s,{alpha:p})},870162:(s,p)=>{u.jb("Cast",s,{to:p})},870220:s=>{u.jb("Add",s,void 0)},870271:s=>{u.jb("Sub",s,void 0)},870322:s=>{u.jb("Mul",s,void 0)},870373:s=>{u.jb("Div",s,void 0)},870424:s=>{u.jb("Pow",s,void 0)},870475:s=>{u.jb("Equal",s,void 0)},870528:s=>{u.jb("Greater",s,void 0)},870583:s=>{u.jb("GreaterOrEqual",s,void 0)},870645:s=>{u.jb("Less",s,void 0)},870697:s=>{u.jb("LessOrEqual",s,void 0)},870756:(s,p,f,b,_)=>{u.jb("ReduceMean",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},870915:(s,p,f,b,_)=>{u.jb("ReduceMax",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},871073:(s,p,f,b,_)=>{u.jb("ReduceMin",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},871231:(s,p,f,b,_)=>{u.jb("ReduceProd",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},871390:(s,p,f,b,_)=>{u.jb("ReduceSum",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},871548:(s,p,f,b,_)=>{u.jb("ReduceL1",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},871705:(s,p,f,b,_)=>{u.jb("ReduceL2",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},871862:(s,p,f,b,_)=>{u.jb("ReduceLogSum",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},872023:(s,p,f,b,_)=>{u.jb("ReduceSumSquare",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},872187:(s,p,f,b,_)=>{u.jb("ReduceLogSumExp",s,{keepDims:!!p,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},872351:s=>{u.jb("Where",s,void 0)},872404:(s,p,f)=>{u.jb("Transpose",s,{perm:p?Array.from(i().subarray(p>>>0,f>>>0)):[]})},872512:(s,p,f,b)=>{u.jb("DepthToSpace",s,{blocksize:p,mode:ze(f),format:b?"NHWC":"NCHW"})},872645:(s,p,f,b)=>{u.jb("DepthToSpace",s,{blocksize:p,mode:ze(f),format:b?"NHWC":"NCHW"})},872778:(s,p,f,b,_,I,O,B,L,H,Q,fe,be,z,me)=>{u.jb("ConvTranspose",s,{format:L?"NHWC":"NCHW",autoPad:p,dilations:[f],group:b,kernelShape:[_],pads:[I,O],strides:[B],wIsConst:()=>!!t()[H>>>0],outputPadding:Q?Array.from(i().subarray(Q>>>0,fe>>>0)):[],outputShape:be?Array.from(i().subarray(be>>>0,z>>>0)):[],activation:ze(me)})},873179:(s,p,f,b,_,I,O,B,L,H,Q,fe,be,z)=>{u.jb("ConvTranspose",s,{format:B?"NHWC":"NCHW",autoPad:p,dilations:Array.from(i().subarray(f>>>0,2+(f>>>0)>>>0)),group:b,kernelShape:Array.from(i().subarray(_>>>0,2+(_>>>0)>>>0)),pads:Array.from(i().subarray(I>>>0,4+(I>>>0)>>>0)),strides:Array.from(i().subarray(O>>>0,2+(O>>>0)>>>0)),wIsConst:()=>!!t()[L>>>0],outputPadding:H?Array.from(i().subarray(H>>>0,Q>>>0)):[],outputShape:fe?Array.from(i().subarray(fe>>>0,be>>>0)):[],activation:ze(z)})},873744:(s,p,f,b,_,I,O,B,L,H,Q,fe,be,z,me)=>{u.jb("ConvTranspose",s,{format:L?"NHWC":"NCHW",autoPad:p,dilations:[f],group:b,kernelShape:[_],pads:[I,O],strides:[B],wIsConst:()=>!!t()[H>>>0],outputPadding:Q?Array.from(i().subarray(Q>>>0,fe>>>0)):[],outputShape:be?Array.from(i().subarray(be>>>0,z>>>0)):[],activation:ze(me)})},874145:(s,p,f,b,_,I,O,B,L,H,Q,fe,be,z)=>{u.jb("ConvTranspose",s,{format:B?"NHWC":"NCHW",autoPad:p,dilations:Array.from(i().subarray(f>>>0,2+(f>>>0)>>>0)),group:b,kernelShape:Array.from(i().subarray(_>>>0,2+(_>>>0)>>>0)),pads:Array.from(i().subarray(I>>>0,4+(I>>>0)>>>0)),strides:Array.from(i().subarray(O>>>0,2+(O>>>0)>>>0)),wIsConst:()=>!!t()[L>>>0],outputPadding:H?Array.from(i().subarray(H>>>0,Q>>>0)):[],outputShape:fe?Array.from(i().subarray(fe>>>0,be>>>0)):[],activation:ze(z)})},874710:(s,p)=>{u.jb("GlobalAveragePool",s,{format:p?"NHWC":"NCHW"})},874801:(s,p,f,b,_,I,O,B,L,H,Q,fe,be,z)=>{u.jb("AveragePool",s,{format:z?"NHWC":"NCHW",auto_pad:p,ceil_mode:f,count_include_pad:b,storage_order:_,dilations:I?Array.from(i().subarray(I>>>0,O>>>0)):[],kernel_shape:B?Array.from(i().subarray(B>>>0,L>>>0)):[],pads:H?Array.from(i().subarray(H>>>0,Q>>>0)):[],strides:fe?Array.from(i().subarray(fe>>>0,be>>>0)):[]})},875216:(s,p)=>{u.jb("GlobalAveragePool",s,{format:p?"NHWC":"NCHW"})},875307:(s,p,f,b,_,I,O,B,L,H,Q,fe,be,z)=>{u.jb("AveragePool",s,{format:z?"NHWC":"NCHW",auto_pad:p,ceil_mode:f,count_include_pad:b,storage_order:_,dilations:I?Array.from(i().subarray(I>>>0,O>>>0)):[],kernel_shape:B?Array.from(i().subarray(B>>>0,L>>>0)):[],pads:H?Array.from(i().subarray(H>>>0,Q>>>0)):[],strides:fe?Array.from(i().subarray(fe>>>0,be>>>0)):[]})},875722:(s,p)=>{u.jb("GlobalMaxPool",s,{format:p?"NHWC":"NCHW"})},875809:(s,p,f,b,_,I,O,B,L,H,Q,fe,be,z)=>{u.jb("MaxPool",s,{format:z?"NHWC":"NCHW",auto_pad:p,ceil_mode:f,count_include_pad:b,storage_order:_,dilations:I?Array.from(i().subarray(I>>>0,O>>>0)):[],kernel_shape:B?Array.from(i().subarray(B>>>0,L>>>0)):[],pads:H?Array.from(i().subarray(H>>>0,Q>>>0)):[],strides:fe?Array.from(i().subarray(fe>>>0,be>>>0)):[]})},876220:(s,p)=>{u.jb("GlobalMaxPool",s,{format:p?"NHWC":"NCHW"})},876307:(s,p,f,b,_,I,O,B,L,H,Q,fe,be,z)=>{u.jb("MaxPool",s,{format:z?"NHWC":"NCHW",auto_pad:p,ceil_mode:f,count_include_pad:b,storage_order:_,dilations:I?Array.from(i().subarray(I>>>0,O>>>0)):[],kernel_shape:B?Array.from(i().subarray(B>>>0,L>>>0)):[],pads:H?Array.from(i().subarray(H>>>0,Q>>>0)):[],strides:fe?Array.from(i().subarray(fe>>>0,be>>>0)):[]})},876718:(s,p,f,b,_)=>{u.jb("Gemm",s,{alpha:p,beta:f,transA:b,transB:_})},876822:s=>{u.jb("MatMul",s,void 0)},876876:(s,p,f,b)=>{u.jb("ArgMax",s,{keepDims:!!p,selectLastIndex:!!f,axis:b})},876984:(s,p,f,b)=>{u.jb("ArgMin",s,{keepDims:!!p,selectLastIndex:!!f,axis:b})},877092:(s,p)=>{u.jb("Softmax",s,{axis:p})},877155:(s,p)=>{u.jb("Concat",s,{axis:p})},877215:(s,p,f,b,_)=>{u.jb("Split",s,{axis:p,numOutputs:f,splitSizes:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},877355:s=>{u.jb("Expand",s,void 0)},877409:(s,p)=>{u.jb("Gather",s,{axis:Number(p)})},877480:(s,p)=>{u.jb("GatherElements",s,{axis:Number(p)})},877559:(s,p,f,b,_,I,O,B,L,H,Q)=>{u.jb("Resize",s,{antialias:p,axes:f?Array.from(i().subarray(f>>>0,b>>>0)):[],coordinateTransformMode:ze(_),cubicCoeffA:I,excludeOutside:O,extrapolationValue:B,keepAspectRatioPolicy:ze(L),mode:ze(H),nearestMode:ze(Q)})},877905:(s,p,f,b,_,I,O)=>{u.jb("Slice",s,{starts:p?Array.from(i().subarray(p>>>0,f>>>0)):[],ends:b?Array.from(i().subarray(b>>>0,_>>>0)):[],axes:I?Array.from(i().subarray(I>>>0,O>>>0)):[]})},878121:s=>{u.jb("Tile",s,void 0)},878173:(s,p,f)=>{u.jb("InstanceNormalization",s,{epsilon:p,format:f?"NHWC":"NCHW"})},878287:(s,p,f)=>{u.jb("InstanceNormalization",s,{epsilon:p,format:f?"NHWC":"NCHW"})},878401:s=>{u.jb("Range",s,void 0)},878454:(s,p)=>{u.jb("Einsum",s,{equation:ze(p)})},878535:(s,p,f,b,_)=>{u.jb("Pad",s,{mode:p,value:f,pads:b?Array.from(i().subarray(b>>>0,_>>>0)):[]})},878662:(s,p,f,b,_,I)=>{u.jb("BatchNormalization",s,{epsilon:p,momentum:f,spatial:!!_,trainingMode:!!b,format:I?"NHWC":"NCHW"})},878831:(s,p,f,b,_,I)=>{u.jb("BatchNormalization",s,{epsilon:p,momentum:f,spatial:!!_,trainingMode:!!b,format:I?"NHWC":"NCHW"})},879e3:(s,p,f)=>{u.jb("CumSum",s,{exclusive:Number(p),reverse:Number(f)})},879097:(s,p,f)=>{u.jb("DequantizeLinear",s,{axis:p,blockSize:f})},879187:(s,p,f,b,_,I,O,B,L)=>{u.jb("Attention",s,{numHeads:p,isUnidirectional:f,maskFilterValue:b,scale:_,doRotary:I,qkvHiddenSizes:O?Array.from(i().subarray(Number(B)>>>0,Number(B)+O>>>0)):[],pastPresentShareBuffer:!!L})},879459:s=>{u.jb("BiasAdd",s,void 0)},879514:s=>{u.jb("BiasSplitGelu",s,void 0)},879575:s=>{u.jb("FastGelu",s,void 0)},879631:(s,p,f,b,_,I,O,B,L,H,Q,fe,be,z,me,Se)=>{u.jb("Conv",s,{format:fe?"NHWC":"NCHW",auto_pad:p,dilations:f?Array.from(i().subarray(f>>>0,b>>>0)):[],group:_,kernel_shape:I?Array.from(i().subarray(I>>>0,O>>>0)):[],pads:B?Array.from(i().subarray(B>>>0,L>>>0)):[],strides:H?Array.from(i().subarray(H>>>0,Q>>>0)):[],w_is_const:()=>!!t()[be>>>0],activation:ze(z),activation_params:me?Array.from(d().subarray(me>>>0,Se>>>0)):[]})},880127:s=>{u.jb("Gelu",s,void 0)},880179:(s,p,f,b,_,I,O,B,L)=>{u.jb("GroupQueryAttention",s,{numHeads:p,kvNumHeads:f,scale:b,softcap:_,doRotary:I,rotaryInterleaved:O,smoothSoftmax:B,localWindowSize:L})},880396:(s,p,f,b)=>{u.jb("LayerNormalization",s,{axis:p,epsilon:f,simplified:!!b})},880507:(s,p,f,b)=>{u.jb("LayerNormalization",s,{axis:p,epsilon:f,simplified:!!b})},880618:(s,p,f,b,_,I)=>{u.jb("MatMulNBits",s,{k:p,n:f,accuracyLevel:b,bits:_,blockSize:I})},880745:(s,p,f,b,_,I)=>{u.jb("MultiHeadAttention",s,{numHeads:p,isUnidirectional:f,maskFilterValue:b,scale:_,doRotary:I})},880904:(s,p)=>{u.jb("QuickGelu",s,{alpha:p})},880968:(s,p,f,b,_)=>{u.jb("RotaryEmbedding",s,{interleaved:!!p,numHeads:f,rotaryEmbeddingDim:b,scale:_})},881107:(s,p,f)=>{u.jb("SkipLayerNormalization",s,{epsilon:p,simplified:!!f})},881209:(s,p,f)=>{u.jb("SkipLayerNormalization",s,{epsilon:p,simplified:!!f})},881311:(s,p,f,b)=>{u.jb("GatherBlockQuantized",s,{gatherAxis:p,quantizeAxis:f,blockSize:b})},881432:s=>{u.Zb(s)},881466:(s,p)=>u.bc(s,p,u.Fb.fc,u.Fb.errors)};function nc(s,p,f){return wi(async()=>{await u.Xb(s,p,f)})}function oc(){return typeof wasmOffsetConverter<"u"}function bn(s){this.name="ExitStatus",this.message=`Program terminated with exit(${s})`,this.status=s}var wn=s=>{s.terminate(),s.onmessage=()=>{}},Wo=s=>{pt.length==0&&(qo(),Fo(pt[0]));var p=pt.pop();if(!p)return 6;vt.push(p),Ze[s.Ab]=p,p.Ab=s.Ab;var f={cmd:"run",start_routine:s.hc,arg:s.Qb,pthread_ptr:s.Ab};return p.postMessage(f,s.mc),0},_t=0,xe=(s,p,...f)=>{for(var b=2*f.length,_=Un(),I=Rn(8*b),O=I>>>3,B=0;B<f.length;B++){var L=f[B];typeof L=="bigint"?(G[O+2*B]=1n,G[O+2*B+1]=L):(G[O+2*B]=0n,l()[O+2*B+1>>>0]=L)}return s=Hi(s,0,b,I,p),yr(_),s};function _n(s){if(y)return xe(0,1,s);if(de=s,!(0<_t)){for(var p of vt)wn(p);for(p of pt)wn(p);pt=[],vt=[],Ze=[],$e=!0}A(s,new bn(s))}function No(s){if(y)return xe(1,0,s);vn(s)}var vn=s=>{if(de=s,y)throw No(s),"unwind";_n(s)},pt=[],vt=[],Lo=[],Ze={},Ho=s=>{var p=s.Ab;delete Ze[p],pt.push(s),vt.splice(vt.indexOf(s),1),s.Ab=0,Mn(p)};function Go(){Lo.forEach(s=>s())}var Fo=s=>new Promise(p=>{s.onmessage=_=>{var I=(_=_.data).cmd;if(_.targetThread&&_.targetThread!=Rt()){var O=Ze[_.targetThread];O?O.postMessage(_,_.transferList):Y(`Internal error! Worker sent a message "${I}" to target pthread ${_.targetThread}, but that thread no longer exists!`)}else I==="checkMailbox"?sr():I==="spawnThread"?Wo(_):I==="cleanupThread"?Ho(Ze[_.thread]):I==="killThread"?(_=_.thread,I=Ze[_],delete Ze[_],wn(I),Mn(_),vt.splice(vt.indexOf(I),1),I.Ab=0):I==="cancelThread"?Ze[_.thread].postMessage({cmd:"cancel"}):I==="loaded"?(s.loaded=!0,p(s)):I==="alert"?alert(`Thread ${_.threadId}: ${_.text}`):_.target==="setimmediate"?s.postMessage(_):I==="callHandler"?u[_.handler](..._.args):I&&Y(`worker sent an unknown command ${I}`)},s.onerror=_=>{throw Y(`worker sent an error! ${_.filename}:${_.lineno}: ${_.message}`),_};var f,b=[];for(f of[])u.hasOwnProperty(f)&&b.push(f);s.postMessage({cmd:"load",handlers:b,wasmMemory:ue,wasmModule:K})});function qo(){var s=new Worker(new URL(import.meta.url),{type:"module",workerData:"em-pthread",name:"em-pthread"});pt.push(s)}var ar=s=>{for(;0<s.length;)s.shift()(u)},ic=()=>{var s=Rt(),p=a()[s+52>>>2>>>0];s=a()[s+56>>>2>>>0],Fi(p,p-s),yr(p)},ac=(s,p)=>{_t=0,s=qi(s,p),0<_t?de=s:gr(s)};class sc{constructor(p){this.Jb=p-24}}function uc(s,p,f){var b=new sc(s>>>=0);throw p>>>=0,f>>>=0,a()[b.Jb+16>>>2>>>0]=0,a()[b.Jb+4>>>2>>>0]=p,a()[b.Jb+8>>>2>>>0]=f,s}function jo(s,p,f,b){return y?xe(2,1,s,p,f,b):Ko(s,p,f,b)}function Ko(s,p,f,b){if(s>>>=0,p>>>=0,f>>>=0,b>>>=0,S===void 0)return Y("Current environment does not support SharedArrayBuffer, pthreads are not available!"),6;var _=[];return y&&_.length===0?jo(s,p,f,b):(s={hc:f,Ab:s,Qb:b,mc:_},y?(s.Mb="spawnThread",postMessage(s,_),0):Wo(s))}var Yo=typeof TextDecoder<"u"?new TextDecoder("utf8"):void 0,Xo=(s,p,f)=>{var b=(p>>>=0)+f;for(f=p;s[f]&&!(f>=b);)++f;if(16<f-p&&s.buffer&&Yo)return Yo.decode(s.buffer instanceof S?s.slice(p,f):s.subarray(p,f));for(b="";p<f;){var _=s[p++];if(128&_){var I=63&s[p++];if((224&_)==192)b+=String.fromCharCode((31&_)<<6|I);else{var O=63&s[p++];65536>(_=(240&_)==224?(15&_)<<12|I<<6|O:(7&_)<<18|I<<12|O<<6|63&s[p++])?b+=String.fromCharCode(_):(_-=65536,b+=String.fromCharCode(55296|_>>10,56320|1023&_))}}else b+=String.fromCharCode(_)}return b},ze=(s,p)=>(s>>>=0)?Xo(r(),s,p):"";function Qo(s,p,f){return y?xe(3,1,s,p,f):0}function Zo(s,p){if(y)return xe(4,1,s,p)}var $n=s=>{for(var p=0,f=0;f<s.length;++f){var b=s.charCodeAt(f);127>=b?p++:2047>=b?p+=2:55296<=b&&57343>=b?(p+=4,++f):p+=3}return p},Jo=(s,p,f,b)=>{if(!(0<b))return 0;var _=f>>>=0;b=f+b-1;for(var I=0;I<s.length;++I){var O=s.charCodeAt(I);if(55296<=O&&57343>=O&&(O=65536+((1023&O)<<10)|1023&s.charCodeAt(++I)),127>=O){if(f>=b)break;p[f++>>>0]=O}else{if(2047>=O){if(f+1>=b)break;p[f++>>>0]=192|O>>6}else{if(65535>=O){if(f+2>=b)break;p[f++>>>0]=224|O>>12}else{if(f+3>=b)break;p[f++>>>0]=240|O>>18,p[f++>>>0]=128|O>>12&63}p[f++>>>0]=128|O>>6&63}p[f++>>>0]=128|63&O}}return p[f>>>0]=0,f-_},Dt=(s,p,f)=>Jo(s,r(),p,f);function ei(s,p){if(y)return xe(5,1,s,p)}function ti(s,p,f){if(y)return xe(6,1,s,p,f)}function ri(s,p,f){return y?xe(7,1,s,p,f):0}function ni(s,p){if(y)return xe(8,1,s,p)}function oi(s,p,f){if(y)return xe(9,1,s,p,f)}function ii(s,p,f,b){if(y)return xe(10,1,s,p,f,b)}function ai(s,p,f,b){if(y)return xe(11,1,s,p,f,b)}function si(s,p,f,b){if(y)return xe(12,1,s,p,f,b)}function ui(s){if(y)return xe(13,1,s)}function di(s,p){if(y)return xe(14,1,s,p)}function li(s,p,f){if(y)return xe(15,1,s,p,f)}var ci,mt,dc=()=>{Ot("")},Je=s=>{for(var p="";r()[s>>>0];)p+=ci[r()[s++>>>0]];return p},xn={},Sn={},lc={};function ut(s,p,f={}){if(!("argPackAdvance"in p))throw new TypeError("registerType registeredInstance requires argPackAdvance");return function(b,_,I={}){var O=_.name;if(!b)throw new mt(`type "${O}" must have a positive integer typeid pointer`);if(Sn.hasOwnProperty(b)){if(I.Sb)return;throw new mt(`Cannot register type '${O}' twice`)}Sn[b]=_,delete lc[b],xn.hasOwnProperty(b)&&(_=xn[b],delete xn[b],_.forEach(B=>B()))}(s,p,f)}var pi=(s,p,f)=>{switch(p){case 1:return f?b=>t()[b>>>0]:b=>r()[b>>>0];case 2:return f?b=>n()[b>>>1>>>0]:b=>o()[b>>>1>>>0];case 4:return f?b=>i()[b>>>2>>>0]:b=>a()[b>>>2>>>0];case 8:return f?b=>G[b>>>3]:b=>ye[b>>>3];default:throw new TypeError(`invalid integer width (${p}): ${s}`)}};function cc(s,p,f){f>>>=0,ut(s>>>=0,{name:p=Je(p>>>0),fromWireType:b=>b,toWireType:function(b,_){if(typeof _!="bigint"&&typeof _!="number")throw _=_===null?"null":(b=typeof _)=="object"||b==="array"||b==="function"?_.toString():""+_,new TypeError(`Cannot convert "${_}" to ${this.name}`);return typeof _=="number"&&(_=BigInt(_)),_},argPackAdvance:ft,readValueFromPointer:pi(p,f,p.indexOf("u")==-1),Db:null})}var ft=8;function pc(s,p,f,b){ut(s>>>=0,{name:p=Je(p>>>0),fromWireType:function(_){return!!_},toWireType:function(_,I){return I?f:b},argPackAdvance:ft,readValueFromPointer:function(_){return this.fromWireType(r()[_>>>0])},Db:null})}var Tn=[],dt=[];function In(s){9<(s>>>=0)&&--dt[s+1]==0&&(dt[s]=void 0,Tn.push(s))}var qe=s=>{if(!s)throw new mt("Cannot use deleted val. handle = "+s);return dt[s]},je=s=>{switch(s){case void 0:return 2;case null:return 4;case!0:return 6;case!1:return 8;default:let p=Tn.pop()||dt.length;return dt[p]=s,dt[p+1]=1,p}};function Cn(s){return this.fromWireType(a()[s>>>2>>>0])}var mc={name:"emscripten::val",fromWireType:s=>{var p=qe(s);return In(s),p},toWireType:(s,p)=>je(p),argPackAdvance:ft,readValueFromPointer:Cn,Db:null};function fc(s){return ut(s>>>0,mc)}var hc=(s,p)=>{switch(p){case 4:return function(f){return this.fromWireType(d()[f>>>2>>>0])};case 8:return function(f){return this.fromWireType(l()[f>>>3>>>0])};default:throw new TypeError(`invalid float width (${p}): ${s}`)}};function gc(s,p,f){f>>>=0,ut(s>>>=0,{name:p=Je(p>>>0),fromWireType:b=>b,toWireType:(b,_)=>_,argPackAdvance:ft,readValueFromPointer:hc(p,f),Db:null})}function yc(s,p,f,b,_){if(s>>>=0,f>>>=0,p=Je(p>>>0),_===-1&&(_=4294967295),_=B=>B,b===0){var I=32-8*f;_=B=>B<<I>>>I}var O=p.includes("unsigned")?function(B,L){return L>>>0}:function(B,L){return L};ut(s,{name:p,fromWireType:_,toWireType:O,argPackAdvance:ft,readValueFromPointer:pi(p,f,b!==0),Db:null})}function bc(s,p,f){function b(I){var O=a()[I>>>2>>>0];return I=a()[I+4>>>2>>>0],new _(t().buffer,I,O)}var _=[Int8Array,Uint8Array,Int16Array,Uint16Array,Int32Array,Uint32Array,Float32Array,Float64Array,BigInt64Array,BigUint64Array][p];ut(s>>>=0,{name:f=Je(f>>>0),fromWireType:b,argPackAdvance:ft,readValueFromPointer:b},{Sb:!0})}function wc(s,p){s>>>=0;var f=(p=Je(p>>>0))==="std::string";ut(s,{name:p,fromWireType:function(b){var _=a()[b>>>2>>>0],I=b+4;if(f)for(var O=I,B=0;B<=_;++B){var L=I+B;if(B==_||r()[L>>>0]==0){if(O=ze(O,L-O),H===void 0)var H=O;else H+=String.fromCharCode(0),H+=O;O=L+1}}else{for(H=Array(_),B=0;B<_;++B)H[B]=String.fromCharCode(r()[I+B>>>0]);H=H.join("")}return tt(b),H},toWireType:function(b,_){_ instanceof ArrayBuffer&&(_=new Uint8Array(_));var I=typeof _=="string";if(!(I||_ instanceof Uint8Array||_ instanceof Uint8ClampedArray||_ instanceof Int8Array))throw new mt("Cannot pass non-string to std::string");var O=f&&I?$n(_):_.length,B=hr(4+O+1),L=B+4;if(a()[B>>>2>>>0]=O,f&&I)Dt(_,L,O+1);else if(I)for(I=0;I<O;++I){var H=_.charCodeAt(I);if(255<H)throw tt(L),new mt("String has UTF-16 code units that do not fit in 8 bits");r()[L+I>>>0]=H}else for(I=0;I<O;++I)r()[L+I>>>0]=_[I];return b!==null&&b.push(tt,B),B},argPackAdvance:ft,readValueFromPointer:Cn,Db(b){tt(b)}})}var mi=typeof TextDecoder<"u"?new TextDecoder("utf-16le"):void 0,_c=(s,p)=>{for(var f=s>>1,b=f+p/2;!(f>=b)&&o()[f>>>0];)++f;if(32<(f<<=1)-s&&mi)return mi.decode(r().slice(s,f));for(f="",b=0;!(b>=p/2);++b){var _=n()[s+2*b>>>1>>>0];if(_==0)break;f+=String.fromCharCode(_)}return f},vc=(s,p,f)=>{if(f??=2147483647,2>f)return 0;var b=p;f=(f-=2)<2*s.length?f/2:s.length;for(var _=0;_<f;++_){var I=s.charCodeAt(_);n()[p>>>1>>>0]=I,p+=2}return n()[p>>>1>>>0]=0,p-b},$c=s=>2*s.length,xc=(s,p)=>{for(var f=0,b="";!(f>=p/4);){var _=i()[s+4*f>>>2>>>0];if(_==0)break;++f,65536<=_?(_-=65536,b+=String.fromCharCode(55296|_>>10,56320|1023&_)):b+=String.fromCharCode(_)}return b},Sc=(s,p,f)=>{if(p>>>=0,f??=2147483647,4>f)return 0;var b=p;f=b+f-4;for(var _=0;_<s.length;++_){var I=s.charCodeAt(_);if(55296<=I&&57343>=I&&(I=65536+((1023&I)<<10)|1023&s.charCodeAt(++_)),i()[p>>>2>>>0]=I,(p+=4)+4>f)break}return i()[p>>>2>>>0]=0,p-b},Tc=s=>{for(var p=0,f=0;f<s.length;++f){var b=s.charCodeAt(f);55296<=b&&57343>=b&&++f,p+=4}return p};function Ic(s,p,f){if(s>>>=0,p>>>=0,f=Je(f>>>=0),p===2)var b=_c,_=vc,I=$c,O=B=>o()[B>>>1>>>0];else p===4&&(b=xc,_=Sc,I=Tc,O=B=>a()[B>>>2>>>0]);ut(s,{name:f,fromWireType:B=>{for(var L,H=a()[B>>>2>>>0],Q=B+4,fe=0;fe<=H;++fe){var be=B+4+fe*p;fe!=H&&O(be)!=0||(Q=b(Q,be-Q),L===void 0?L=Q:(L+=String.fromCharCode(0),L+=Q),Q=be+p)}return tt(B),L},toWireType:(B,L)=>{if(typeof L!="string")throw new mt(`Cannot pass non-string to C++ string type ${f}`);var H=I(L),Q=hr(4+H+p);return a()[Q>>>2>>>0]=H/p,_(L,Q+4,H+p),B!==null&&B.push(tt,Q),Q},argPackAdvance:ft,readValueFromPointer:Cn,Db(B){tt(B)}})}function Cc(s,p){ut(s>>>=0,{Tb:!0,name:p=Je(p>>>0),argPackAdvance:0,fromWireType:()=>{},toWireType:()=>{}})}var Ac=()=>1;function kc(s){Bn(s>>>0,!g,1,!w,131072,!1),Go()}var fi=s=>{if(!$e)try{if(s(),!(0<_t))try{y?gr(de):vn(de)}catch(p){p instanceof bn||p=="unwind"||A(1,p)}}catch(p){p instanceof bn||p=="unwind"||A(1,p)}};function An(s){s>>>=0,typeof Atomics.nc=="function"&&(Atomics.nc(i(),s>>>2,s).value.then(sr),s+=128,Atomics.store(i(),s>>>2,1))}var sr=()=>{var s=Rt();s&&(An(s),fi(Gi))};function Ec(s,p){(s>>>=0)==p>>>0?setTimeout(sr):y?postMessage({targetThread:s,cmd:"checkMailbox"}):(s=Ze[s])&&s.postMessage({cmd:"checkMailbox"})}var kn=[];function Pc(s,p,f,b,_){for(p>>>=0,b/=2,kn.length=b,f=_>>>0>>>3,_=0;_<b;_++)kn[_]=G[f+2*_]?G[f+2*_+1]:l()[f+2*_+1>>>0];return(p?yn[p]:_p[s])(...kn)}function zc(s){s>>>=0,y?postMessage({cmd:"cleanupThread",thread:s}):Ho(Ze[s])}function Oc(s){}var En=(s,p)=>{var f=Sn[s];if(f===void 0)throw s=Wi(s),f=Je(s),tt(s),new mt(`${p} has unknown type ${f}`);return f},hi=(s,p,f)=>{var b=[];return s=s.toWireType(b,f),b.length&&(a()[p>>>2>>>0]=je(b)),s};function Dc(s,p,f){return p>>>=0,f>>>=0,s=qe(s>>>0),p=En(p,"emval::as"),hi(p,f,s)}var ur=s=>{try{s()}catch(p){Ot(p)}},ht=0,et=null,gi=0,dr=[],yi={},bi={},Bc=0,Pn=null,Mc=[];function wi(s){return function(p){if(!$e){if(ht===0){var f=!1,b=!1;p((_=0)=>{if(!$e&&(gi=_,f=!0,b)){ht=2,ur(()=>Yi(et)),typeof Browser<"u"&&Browser.Kb.Rb&&Browser.Kb.resume(),_=!1;try{var I=function(){var L=i()[et+8>>>2>>>0];return L=X[bi[L]],--_t,L()}()}catch(L){I=L,_=!0}var O=!1;if(!et){var B=Pn;B&&(Pn=null,(_?B.reject:B.resolve)(I),O=!0)}if(_&&!O)throw I}}),b=!0,f||(ht=1,et=function(){var _=hr(65548),I=_+12;a()[_>>>2>>>0]=I,a()[_+4>>>2>>>0]=I+65536,I=dr[0];var O=yi[I];return O===void 0&&(O=Bc++,yi[I]=O,bi[O]=I),I=O,i()[_+8>>>2>>>0]=I,_}(),typeof Browser<"u"&&Browser.Kb.Rb&&Browser.Kb.pause(),ur(()=>ji(et)))}else ht===2?(ht=0,ur(Xi),tt(et),et=null,Mc.forEach(fi)):Ot(`invalid state: ${ht}`);return gi}}(p=>{s().then(p)})}function Rc(s){return s>>>=0,wi(()=>(s=qe(s)).then(je))}var lr=[];function Uc(s,p,f,b){return f>>>=0,b>>>=0,(s=lr[s>>>0])(null,p=qe(p>>>0),f,b)}var Vc={},cr=s=>{var p=Vc[s];return p===void 0?Je(s):p};function Wc(s,p,f,b,_){return f>>>=0,b>>>=0,_>>>=0,(s=lr[s>>>0])(p=qe(p>>>0),p[f=cr(f)],b,_)}var _i=()=>typeof globalThis=="object"?globalThis:Function("return this")();function Nc(s){return(s>>>=0)==0?je(_i()):(s=cr(s),je(_i()[s]))}var Lc=s=>{var p=lr.length;return lr.push(s),p},Hc=(s,p)=>{for(var f=Array(s),b=0;b<s;++b)f[b]=En(a()[p+4*b>>>2>>>0],"parameter "+b);return f},vi=(s,p)=>Object.defineProperty(p,"name",{value:s});function Gc(s,p,f){var b=(p=Hc(s,p>>>0)).shift();s--;var _=`return function (obj, func, destructorsRef, args) {
3804
+ `,I=0,O=[];f===0&&O.push("obj");for(var B=["retType"],L=[b],H=0;H<s;++H)O.push("arg"+H),B.push("argType"+H),L.push(p[H]),_+=` var arg${H} = argType${H}.readValueFromPointer(args${I?"+"+I:""});
3805
3805
  `,I+=p[H].argPackAdvance;return _+=` var rv = ${f===1?"new func":"func.call"}(${O.join(", ")});
3806
- `,b.Tb||(D.push("emval_returnValue"),L.push(mi),_+=` return emval_returnValue(retType, destructorsRef, rv);
3807
- `),D.push(_+`};
3808
- `),s=function(X){var fe=Function;if(!(fe instanceof Function))throw new TypeError(`new_ called with constructor type ${typeof fe} which is not a function`);var be=wi(fe.name||"unknownFunctionName",function(){});return be.prototype=fe.prototype,be=new be,(X=fe.apply(be,X))instanceof Object?X:be}(D)(...L),f=`methodCaller<(${p.map(X=>X.name).join(", ")}) => ${b.name}>`,Lc(wi(f,s))}function Fc(s){return s=cr(s>>>0),je(u[s])}function qc(s,p){return p>>>=0,s=qe(s>>>0),p=qe(p),je(s[p])}function jc(s){9<(s>>>=0)&&(dt[s+1]+=1)}function Kc(){return je([])}function Yc(s){s=qe(s>>>0);for(var p=Array(s.length),f=0;f<s.length;f++)p[f]=s[f];return je(p)}function Xc(s){return je(cr(s>>>0))}function Zc(){return je({})}function Qc(s){for(var p=qe(s>>>=0);p.length;){var f=p.pop();p.pop()(f)}Tn(s)}function Jc(s,p,f){p>>>=0,f>>>=0,s=qe(s>>>0),p=qe(p),f=qe(f),s[p]=f}function ep(s,p){return p>>>=0,s=(s=kn(s>>>0,"_emval_take_value")).readValueFromPointer(p),je(s)}function tp(s,p){s=-9007199254740992>s||9007199254740992<s?NaN:Number(s),p>>>=0,s=new Date(1e3*s),i()[p>>>2>>>0]=s.getUTCSeconds(),i()[p+4>>>2>>>0]=s.getUTCMinutes(),i()[p+8>>>2>>>0]=s.getUTCHours(),i()[p+12>>>2>>>0]=s.getUTCDate(),i()[p+16>>>2>>>0]=s.getUTCMonth(),i()[p+20>>>2>>>0]=s.getUTCFullYear()-1900,i()[p+24>>>2>>>0]=s.getUTCDay(),s=(s.getTime()-Date.UTC(s.getUTCFullYear(),0,1,0,0,0,0))/864e5|0,i()[p+28>>>2>>>0]=s}var Dt=s=>s%4==0&&(s%100!=0||s%400==0),_i=[0,31,60,91,121,152,182,213,244,274,305,335],vi=[0,31,59,90,120,151,181,212,243,273,304,334];function rp(s,p){s=-9007199254740992>s||9007199254740992<s?NaN:Number(s),p>>>=0,s=new Date(1e3*s),i()[p>>>2>>>0]=s.getSeconds(),i()[p+4>>>2>>>0]=s.getMinutes(),i()[p+8>>>2>>>0]=s.getHours(),i()[p+12>>>2>>>0]=s.getDate(),i()[p+16>>>2>>>0]=s.getMonth(),i()[p+20>>>2>>>0]=s.getFullYear()-1900,i()[p+24>>>2>>>0]=s.getDay();var f=(Dt(s.getFullYear())?_i:vi)[s.getMonth()]+s.getDate()-1|0;i()[p+28>>>2>>>0]=f,i()[p+36>>>2>>>0]=-60*s.getTimezoneOffset(),f=new Date(s.getFullYear(),6,1).getTimezoneOffset();var b=new Date(s.getFullYear(),0,1).getTimezoneOffset();s=0|(f!=b&&s.getTimezoneOffset()==Math.min(b,f)),i()[p+32>>>2>>>0]=s}function np(s){s>>>=0;var p=new Date(i()[s+20>>>2>>>0]+1900,i()[s+16>>>2>>>0],i()[s+12>>>2>>>0],i()[s+8>>>2>>>0],i()[s+4>>>2>>>0],i()[s>>>2>>>0],0),f=i()[s+32>>>2>>>0],b=p.getTimezoneOffset(),_=new Date(p.getFullYear(),6,1).getTimezoneOffset(),I=new Date(p.getFullYear(),0,1).getTimezoneOffset(),O=Math.min(I,_);return 0>f?i()[s+32>>>2>>>0]=+(_!=I&&O==b):0<f!=(O==b)&&(_=Math.max(I,_),p.setTime(p.getTime()+6e4*((0<f?O:_)-b))),i()[s+24>>>2>>>0]=p.getDay(),f=(Dt(p.getFullYear())?_i:vi)[p.getMonth()]+p.getDate()-1|0,i()[s+28>>>2>>>0]=f,i()[s>>>2>>>0]=p.getSeconds(),i()[s+4>>>2>>>0]=p.getMinutes(),i()[s+8>>>2>>>0]=p.getHours(),i()[s+12>>>2>>>0]=p.getDate(),i()[s+16>>>2>>>0]=p.getMonth(),i()[s+20>>>2>>>0]=p.getYear(),s=p.getTime(),BigInt(isNaN(s)?-1:s/1e3)}function $i(s,p,f,b,_,I,O){return y?xe(16,1,s,p,f,b,_,I,O):-52}function xi(s,p,f,b,_,I){if(y)return xe(17,1,s,p,f,b,_,I)}function op(s,p,f,b){s>>>=0,p>>>=0,f>>>=0,b>>>=0;var _=new Date().getFullYear(),I=new Date(_,0,1),O=new Date(_,6,1);_=I.getTimezoneOffset();var D=O.getTimezoneOffset(),L=Math.max(_,D);a()[s>>>2>>>0]=60*L,i()[p>>>2>>>0]=+(_!=D),I=(s=H=>H.toLocaleTimeString(void 0,{hour12:!1,timeZoneName:"short"}).split(" ")[1])(I),O=s(O),D<_?(Bt(I,f,17),Bt(O,b,17)):(Bt(I,b,17),Bt(O,f,17))}var Pn=[],Si=(s,p)=>{Pn.length=0;for(var f;f=r()[s++>>>0];){var b=f!=105;p+=(b&=f!=112)&&p%8?4:0,Pn.push(f==112?a()[p>>>2>>>0]:f==106?G[p>>>3]:f==105?i()[p>>>2>>>0]:l()[p>>>3>>>0]),p+=b?8:4}return Pn};function ip(s,p,f){return s>>>=0,p=Si(p>>>0,f>>>0),gn[s](...p)}function ap(s,p,f){return s>>>=0,p=Si(p>>>0,f>>>0),gn[s](...p)}var sp=()=>{},up=()=>Date.now();function dp(s,p){return Z(ze(s>>>0,p>>>0))}var Ti,lp=()=>{throw _t+=1,"unwind"};function cp(){return 4294901760}Ti=()=>performance.timeOrigin+performance.now();var pp=()=>navigator.hardwareConcurrency;function mp(){return Ot("Cannot use emscripten_pc_get_function without -sUSE_OFFSET_CONVERTER"),0}function fp(s){s>>>=0;var p=r().length;if(s<=p||4294901760<s)return!1;for(var f=1;4>=f;f*=2){var b=p*(1+.2/f);b=Math.min(b,s+100663296);var _=Math;b=Math.max(s,b);e:{_=(_.min.call(_,4294901760,b+(65536-b%65536)%65536)-de.buffer.byteLength+65535)/65536;try{de.grow(_),Ce();var I=1;break e}catch{}I=void 0}if(I)return!0}return!1}var pr=()=>(Ot("Cannot use convertFrameToPC (needed by __builtin_return_address) without -sUSE_OFFSET_CONVERTER"),0),Mt={},Ii=s=>{s.forEach(p=>{var f=pr();f&&(Mt[f]=p)})};function hp(){var s=Error().stack.toString().split(`
3809
- `);return s[0]=="Error"&&s.shift(),Ii(s),Mt.Pb=pr(),Mt.ec=s,Mt.Pb}function gp(s,p,f){if(s>>>=0,p>>>=0,Mt.Pb==s)var b=Mt.ec;else(b=Error().stack.toString().split(`
3810
- `))[0]=="Error"&&b.shift(),Ii(b);for(var _=3;b[_]&&pr()!=s;)++_;for(s=0;s<f&&b[s+_];++s)i()[p+4*s>>>2>>>0]=pr();return s}var zn,On={},Ci=()=>{if(!zn){var s,p={USER:"web_user",LOGNAME:"web_user",PATH:"/",PWD:"/",HOME:"/home/web_user",LANG:(typeof navigator=="object"&&navigator.languages&&navigator.languages[0]||"C").replace("-","_")+".UTF-8",_:C||"./this.program"};for(s in On)On[s]===void 0?delete p[s]:p[s]=On[s];var f=[];for(s in p)f.push(`${s}=${p[s]}`);zn=f}return zn};function Ai(s,p){if(y)return xe(18,1,s,p);s>>>=0,p>>>=0;var f=0;return Ci().forEach((b,_)=>{var I=p+f;for(_=a()[s+4*_>>>2>>>0]=I,I=0;I<b.length;++I)t()[_++>>>0]=b.charCodeAt(I);t()[_>>>0]=0,f+=b.length+1}),0}function ki(s,p){if(y)return xe(19,1,s,p);s>>>=0,p>>>=0;var f=Ci();a()[s>>>2>>>0]=f.length;var b=0;return f.forEach(_=>b+=_.length+1),a()[p>>>2>>>0]=b,0}function Ei(s){return y?xe(20,1,s):52}function Pi(s,p,f,b){return y?xe(21,1,s,p,f,b):52}function zi(s,p,f,b){return y?xe(22,1,s,p,f,b):70}var yp=[null,[],[]];function Oi(s,p,f,b){if(y)return xe(23,1,s,p,f,b);p>>>=0,f>>>=0,b>>>=0;for(var _=0,I=0;I<f;I++){var O=a()[p>>>2>>>0],D=a()[p+4>>>2>>>0];p+=8;for(var L=0;L<D;L++){var H=r()[O+L>>>0],X=yp[s];H===0||H===10?((s===1?K:Z)(Ko(X,0)),X.length=0):X.push(H)}_+=D}return a()[b>>>2>>>0]=_,0}var Bi=[31,29,31,30,31,30,31,31,30,31,30,31],Di=[31,28,31,30,31,30,31,31,30,31,30,31],bp=(s,p)=>{t().set(s,p>>>0)};function Mi(s,p,f,b){function _(z,pe,Se){for(z=typeof z=="number"?z.toString():z||"";z.length<pe;)z=Se[0]+z;return z}function I(z,pe){return _(z,pe,"0")}function O(z,pe){function Se(Xi){return 0>Xi?-1:0<Xi?1:0}var $t;return($t=Se(z.getFullYear()-pe.getFullYear()))===0&&($t=Se(z.getMonth()-pe.getMonth()))===0&&($t=Se(z.getDate()-pe.getDate())),$t}function D(z){switch(z.getDay()){case 0:return new Date(z.getFullYear()-1,11,29);case 1:return z;case 2:return new Date(z.getFullYear(),0,3);case 3:return new Date(z.getFullYear(),0,2);case 4:return new Date(z.getFullYear(),0,1);case 5:return new Date(z.getFullYear()-1,11,31);case 6:return new Date(z.getFullYear()-1,11,30)}}function L(z){var pe=z.Bb;for(z=new Date(new Date(z.Cb+1900,0,1).getTime());0<pe;){var Se=z.getMonth(),$t=(Dt(z.getFullYear())?Bi:Di)[Se];if(!(pe>$t-z.getDate())){z.setDate(z.getDate()+pe);break}pe-=$t-z.getDate()+1,z.setDate(1),11>Se?z.setMonth(Se+1):(z.setMonth(0),z.setFullYear(z.getFullYear()+1))}return Se=new Date(z.getFullYear()+1,0,4),pe=D(new Date(z.getFullYear(),0,4)),Se=D(Se),0>=O(pe,z)?0>=O(Se,z)?z.getFullYear()+1:z.getFullYear():z.getFullYear()-1}s>>>=0,p>>>=0,f>>>=0,b>>>=0;var H=a()[b+40>>>2>>>0];for(var X in b={kc:i()[b>>>2>>>0],jc:i()[b+4>>>2>>>0],Hb:i()[b+8>>>2>>>0],Lb:i()[b+12>>>2>>>0],Ib:i()[b+16>>>2>>>0],Cb:i()[b+20>>>2>>>0],ub:i()[b+24>>>2>>>0],Bb:i()[b+28>>>2>>>0],rc:i()[b+32>>>2>>>0],ic:i()[b+36>>>2>>>0],lc:H?ze(H):""},f=ze(f),H={"%c":"%a %b %d %H:%M:%S %Y","%D":"%m/%d/%y","%F":"%Y-%m-%d","%h":"%b","%r":"%I:%M:%S %p","%R":"%H:%M","%T":"%H:%M:%S","%x":"%m/%d/%y","%X":"%H:%M:%S","%Ec":"%c","%EC":"%C","%Ex":"%m/%d/%y","%EX":"%H:%M:%S","%Ey":"%y","%EY":"%Y","%Od":"%d","%Oe":"%e","%OH":"%H","%OI":"%I","%Om":"%m","%OM":"%M","%OS":"%S","%Ou":"%u","%OU":"%U","%OV":"%V","%Ow":"%w","%OW":"%W","%Oy":"%y"})f=f.replace(new RegExp(X,"g"),H[X]);var fe="Sunday Monday Tuesday Wednesday Thursday Friday Saturday".split(" "),be="January February March April May June July August September October November December".split(" ");for(X in H={"%a":z=>fe[z.ub].substring(0,3),"%A":z=>fe[z.ub],"%b":z=>be[z.Ib].substring(0,3),"%B":z=>be[z.Ib],"%C":z=>I((z.Cb+1900)/100|0,2),"%d":z=>I(z.Lb,2),"%e":z=>_(z.Lb,2," "),"%g":z=>L(z).toString().substring(2),"%G":L,"%H":z=>I(z.Hb,2),"%I":z=>((z=z.Hb)==0?z=12:12<z&&(z-=12),I(z,2)),"%j":z=>{for(var pe=0,Se=0;Se<=z.Ib-1;pe+=(Dt(z.Cb+1900)?Bi:Di)[Se++]);return I(z.Lb+pe,3)},"%m":z=>I(z.Ib+1,2),"%M":z=>I(z.jc,2),"%n":()=>`
3811
- `,"%p":z=>0<=z.Hb&&12>z.Hb?"AM":"PM","%S":z=>I(z.kc,2),"%t":()=>" ","%u":z=>z.ub||7,"%U":z=>I(Math.floor((z.Bb+7-z.ub)/7),2),"%V":z=>{var pe=Math.floor((z.Bb+7-(z.ub+6)%7)/7);if(2>=(z.ub+371-z.Bb-2)%7&&pe++,pe)pe==53&&((Se=(z.ub+371-z.Bb)%7)==4||Se==3&&Dt(z.Cb)||(pe=1));else{pe=52;var Se=(z.ub+7-z.Bb-1)%7;(Se==4||Se==5&&Dt(z.Cb%400-1))&&pe++}return I(pe,2)},"%w":z=>z.ub,"%W":z=>I(Math.floor((z.Bb+7-(z.ub+6)%7)/7),2),"%y":z=>(z.Cb+1900).toString().substring(2),"%Y":z=>z.Cb+1900,"%z":z=>{var pe=0<=(z=z.ic);return z=Math.abs(z)/60,(pe?"+":"-")+("0000"+(z/60*100+z%60)).slice(-4)},"%Z":z=>z.lc,"%%":()=>"%"},f=f.replace(/%%/g,"\0\0"),H)f.includes(X)&&(f=f.replace(new RegExp(X,"g"),H[X](b)));return X=function(z){var pe=Array(vn(z)+1);return Zo(z,pe,0,pe.length),pe}(f=f.replace(/\0\0/g,"%")),X.length>p?0:(bp(X,s),X.length-1)}function wp(s,p,f,b){return Mi(s>>>0,p>>>0,f>>>0,b>>>0)}y||function(){for(var s=u.numThreads-1;s--;)Go();bt.unshift(()=>{Ue++,function(p){y?p():Promise.all(pt.map(Ho)).then(p)}(()=>zo())})}();for(var Ri=Array(256),mr=0;256>mr;++mr)Ri[mr]=String.fromCharCode(mr);di=Ri,mt=u.BindingError=class extends Error{constructor(s){super(s),this.name="BindingError"}},u.InternalError=class extends Error{constructor(s){super(s),this.name="InternalError"}},dt.push(0,1,void 0,1,null,1,!0,1,!1,1),u.count_emval_handles=()=>dt.length/2-5-Sn.length;var _p=[wn,Vo,Fo,Yo,Xo,Qo,Jo,ei,ti,ri,ni,oi,ii,ai,si,ui,$i,xi,Ai,ki,Ei,Pi,zi,Oi],j=function(){function s(f,b){return j=f.exports,j=function(){var _=j,I={};for(let[O,D]of Object.entries(_))I[O]=typeof D=="function"?(...L)=>{dr.push(O);try{return D(...L)}finally{$e||(dr.pop(),et&&ht===1&&dr.length===0&&(ht=0,_t+=1,ur(qi),typeof Fibers<"u"&&Fibers.sc()))}}:D;return I}(),j=function(){var _=j,I=D=>L=>D(L)>>>0,O=D=>()=>D()>>>0;return(_=Object.assign({},_)).Ca=I(_.Ca),_.fb=O(_.fb),_.gb=I(_.gb),_.emscripten_main_runtime_thread_id=O(_.emscripten_main_runtime_thread_id),_.sb=I(_.sb),_.tb=O(_.tb),_}(),No.push(j.ib),Ae.unshift(j.Ba),Y=b,zo(),j}var p=Ro();if(Ue++,u.instantiateWasm)try{return u.instantiateWasm(p,s)}catch(f){Z(`Module.instantiateWasm callback failed with error: ${f}`),m(f)}return hn||=u.locateFile?Oo("ort-wasm-simd-threaded.jsep.wasm")?"ort-wasm-simd-threaded.jsep.wasm":u.locateFile?u.locateFile("ort-wasm-simd-threaded.jsep.wasm",P):P+"ort-wasm-simd-threaded.jsep.wasm":new URL(/* asset import */ __webpack_require__(/*! ort-wasm-simd-threaded.jsep.wasm */ "./node_modules/onnxruntime-web/dist/ort-wasm-simd-threaded.jsep.wasm"), __webpack_require__.b).href,function(f,b){var _=hn;return B||typeof WebAssembly.instantiateStreaming!="function"||Oo(_)||Bo(_)||typeof fetch!="function"?Mo(_,f,b):fetch(_,{credentials:"same-origin"}).then(I=>WebAssembly.instantiateStreaming(I,f).then(b,function(O){return Z(`wasm streaming compile failed: ${O}`),Z("falling back to ArrayBuffer instantiation"),Mo(_,f,b)}))}(p,function(f){s(f.instance,f.module)}).catch(m),{}}(),Ui=s=>(Ui=j.Ca)(s),Vi=()=>(Vi=j.Da)();u._OrtInit=(s,p)=>(u._OrtInit=j.Ea)(s,p),u._OrtGetLastError=(s,p)=>(u._OrtGetLastError=j.Fa)(s,p),u._OrtCreateSessionOptions=(s,p,f,b,_,I,O,D,L,H)=>(u._OrtCreateSessionOptions=j.Ga)(s,p,f,b,_,I,O,D,L,H),u._OrtAppendExecutionProvider=(s,p)=>(u._OrtAppendExecutionProvider=j.Ha)(s,p),u._OrtAddFreeDimensionOverride=(s,p,f)=>(u._OrtAddFreeDimensionOverride=j.Ia)(s,p,f),u._OrtAddSessionConfigEntry=(s,p,f)=>(u._OrtAddSessionConfigEntry=j.Ja)(s,p,f),u._OrtReleaseSessionOptions=s=>(u._OrtReleaseSessionOptions=j.Ka)(s),u._OrtCreateSession=(s,p,f)=>(u._OrtCreateSession=j.La)(s,p,f),u._OrtReleaseSession=s=>(u._OrtReleaseSession=j.Ma)(s),u._OrtGetInputOutputCount=(s,p,f)=>(u._OrtGetInputOutputCount=j.Na)(s,p,f),u._OrtGetInputName=(s,p)=>(u._OrtGetInputName=j.Oa)(s,p),u._OrtGetOutputName=(s,p)=>(u._OrtGetOutputName=j.Pa)(s,p),u._OrtFree=s=>(u._OrtFree=j.Qa)(s),u._OrtCreateTensor=(s,p,f,b,_,I)=>(u._OrtCreateTensor=j.Ra)(s,p,f,b,_,I),u._OrtGetTensorData=(s,p,f,b,_)=>(u._OrtGetTensorData=j.Sa)(s,p,f,b,_),u._OrtReleaseTensor=s=>(u._OrtReleaseTensor=j.Ta)(s),u._OrtCreateRunOptions=(s,p,f,b)=>(u._OrtCreateRunOptions=j.Ua)(s,p,f,b),u._OrtAddRunConfigEntry=(s,p,f)=>(u._OrtAddRunConfigEntry=j.Va)(s,p,f),u._OrtReleaseRunOptions=s=>(u._OrtReleaseRunOptions=j.Wa)(s),u._OrtCreateBinding=s=>(u._OrtCreateBinding=j.Xa)(s),u._OrtBindInput=(s,p,f)=>(u._OrtBindInput=j.Ya)(s,p,f),u._OrtBindOutput=(s,p,f,b)=>(u._OrtBindOutput=j.Za)(s,p,f,b),u._OrtClearBoundOutputs=s=>(u._OrtClearBoundOutputs=j._a)(s),u._OrtReleaseBinding=s=>(u._OrtReleaseBinding=j.$a)(s),u._OrtRunWithBinding=(s,p,f,b,_)=>(u._OrtRunWithBinding=j.ab)(s,p,f,b,_),u._OrtRun=(s,p,f,b,_,I,O,D)=>(u._OrtRun=j.bb)(s,p,f,b,_,I,O,D),u._OrtEndProfiling=s=>(u._OrtEndProfiling=j.cb)(s),u._JsepOutput=(s,p,f)=>(u._JsepOutput=j.db)(s,p,f),u._JsepGetNodeName=s=>(u._JsepGetNodeName=j.eb)(s);var fr,Rt=()=>(Rt=j.fb)(),hr=u._malloc=s=>(hr=u._malloc=j.gb)(s),tt=u._free=s=>(tt=u._free=j.hb)(s),Bn=(s,p,f,b,_,I)=>(Bn=j.kb)(s,p,f,b,_,I),Ni=()=>(Ni=j.lb)(),Wi=(s,p,f,b,_)=>(Wi=j.mb)(s,p,f,b,_),Dn=s=>(Dn=j.nb)(s),gr=s=>(gr=j.ob)(s),Li=()=>(Li=j.pb)(),Hi=(s,p)=>(Hi=j.qb)(s,p),yr=s=>(yr=j.rb)(s),Mn=s=>(Mn=j.sb)(s),Rn=()=>(Rn=j.tb)(),Gi=u.dynCall_ii=(s,p)=>(Gi=u.dynCall_ii=j.vb)(s,p),Fi=s=>(Fi=j.wb)(s),qi=()=>(qi=j.xb)(),ji=s=>(ji=j.yb)(s),Ki=()=>(Ki=j.zb)();function Yi(){0<Ue||(y?(c(u),y||ar(Ae),startWorker(u)):(ar(bt),0<Ue||fr||(fr=!0,u.calledRun=!0,$e||(y||ar(Ae),c(u),y||ar(Me)))))}return u.___start_em_js=881730,u.___stop_em_js=881952,u.stackSave=()=>Rn(),u.stackRestore=s=>yr(s),u.stackAlloc=s=>Mn(s),u.UTF8ToString=ze,u.stringToUTF8=Bt,u.lengthBytesUTF8=vn,wt=function s(){fr||Yi(),fr||(wt=s)},Yi(),h}),Ep=Pa;globalThis.self?.name==="em-pthread"&&Pa()});var Ut,Pp,zp,Op,Ba,Da,Bp,Ma,qt=V(()=>{"use strict";Cr();Ut= false?0:import.meta.url??(typeof document<"u"?document.currentScript?.src:typeof self<"u"?self.location?.href:void 0),Pp= false||typeof location>"u"?void 0:location.origin,zp=(e,t)=>{try{let r=t??Ut;return(r?new URL(e,r):new URL(e)).origin===Pp}catch{return!1}},Op=async e=>{let r=await(await fetch(e,{credentials:"same-origin"})).blob();return URL.createObjectURL(r)},Ba=(Ea(),br(ka)).default,Da=async()=>{if(!Ut)throw new Error("Failed to load proxy worker: cannot determine the script source URL.");if(zp(Ut))return[void 0,Ba()];let e=await Op(Ut);return[e,Ba(e)]},Bp=(Oa(),br(za)).default,Ma=async(e,t,r)=>[void 0,Bp]});var qn,jn,Mr,Ra,Dp,Mp,Ar,Te,gt=V(()=>{"use strict";qt();jn=!1,Mr=!1,Ra=!1,Dp=()=>{if(typeof SharedArrayBuffer>"u")return!1;try{return typeof MessageChannel<"u"&&new MessageChannel().port1.postMessage(new SharedArrayBuffer(1)),WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,5,4,1,3,1,1,10,11,1,9,0,65,0,254,16,2,0,26,11]))}catch{return!1}},Mp=()=>{try{return WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,10,30,1,28,0,65,0,253,15,253,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,253,186,1,26,11]))}catch{return!1}},Ar=async e=>{if(jn)return Promise.resolve();if(Mr)throw new Error("multiple calls to 'initializeWebAssembly()' detected.");if(Ra)throw new Error("previous call to 'initializeWebAssembly()' failed.");Mr=!0;let t=e.initTimeout,r=e.numThreads;if(!Mp())throw new Error("WebAssembly SIMD is not supported in the current environment.");let n=Dp();r>1&&!n&&(typeof self<"u"&&!self.crossOriginIsolated&&console.warn("env.wasm.numThreads is set to "+r+", but this will not work unless you enable crossOriginIsolated mode. See https://web.dev/cross-origin-isolation-guide/ for more info."),console.warn("WebAssembly multi-threading is not supported in the current environment. Falling back to single-threading."),e.numThreads=r=1);let o=e.wasmPaths,i=typeof o=="string"?o:void 0,a=o?.mjs,d=a?.href??a,l=o?.wasm,c=l?.href??l,m=e.wasmBinary,[u,h]=await Ma(d,i,r>1),w=!1,g=[];if(t>0&&g.push(new Promise(y=>{setTimeout(()=>{w=!0,y()},t)})),g.push(new Promise((y,S)=>{let $={numThreads:r};m?$.wasmBinary=m:(c||i)&&($.locateFile=(v,x)=>c??(i??x)+v),h($).then(v=>{Mr=!1,jn=!0,qn=v,y(),u&&URL.revokeObjectURL(u)},v=>{Mr=!1,Ra=!0,S(v)})})),await Promise.race(g),w)throw new Error(`WebAssembly backend initializing failed due to timeout: ${t}ms`)},Te=()=>{if(jn&&qn)return qn;throw new Error("WebAssembly is not initialized yet.")}});var ke,Kt,ve,Rr=V(()=>{"use strict";gt();ke=(e,t)=>{let r=Te(),n=r.lengthBytesUTF8(e)+1,o=r._malloc(n);return r.stringToUTF8(e,o,n),t.push(o),o},Kt=(e,t,r,n)=>{if(typeof e=="object"&&e!==null){if(r.has(e))throw new Error("Circular reference in options");r.add(e)}Object.entries(e).forEach(([o,i])=>{let a=t?t+o:o;if(typeof i=="object")Kt(i,a+".",r,n);else if(typeof i=="string"||typeof i=="number")n(a,i.toString());else if(typeof i=="boolean")n(a,i?"1":"0");else throw new Error(`Can't handle extra config type: ${typeof i}`)})},ve=e=>{let t=Te(),r=t.stackSave();try{let n=t.stackAlloc(8);t._OrtGetLastError(n,n+4);let o=t.HEAP32[n/4],i=t.HEAPU32[n/4+1],a=i?t.UTF8ToString(i):"";throw new Error(`${e} ERROR_CODE: ${o}, ERROR_MESSAGE: ${a}`)}finally{t.stackRestore(r)}}});var Ua,Va=V(()=>{"use strict";gt();Rr();Ua=e=>{let t=Te(),r=0,n=[],o=e||{};try{if(e?.logSeverityLevel===void 0)o.logSeverityLevel=2;else if(typeof e.logSeverityLevel!="number"||!Number.isInteger(e.logSeverityLevel)||e.logSeverityLevel<0||e.logSeverityLevel>4)throw new Error(`log serverity level is not valid: ${e.logSeverityLevel}`);if(e?.logVerbosityLevel===void 0)o.logVerbosityLevel=0;else if(typeof e.logVerbosityLevel!="number"||!Number.isInteger(e.logVerbosityLevel))throw new Error(`log verbosity level is not valid: ${e.logVerbosityLevel}`);e?.terminate===void 0&&(o.terminate=!1);let i=0;return e?.tag!==void 0&&(i=ke(e.tag,n)),r=t._OrtCreateRunOptions(o.logSeverityLevel,o.logVerbosityLevel,!!o.terminate,i),r===0&&ve("Can't create run options."),e?.extra!==void 0&&Kt(e.extra,"",new WeakSet,(a,d)=>{let l=ke(a,n),c=ke(d,n);t._OrtAddRunConfigEntry(r,l,c)!==0&&ve(`Can't set a run config entry: ${a} - ${d}.`)}),[r,n]}catch(i){throw r!==0&&t._OrtReleaseRunOptions(r),n.forEach(a=>t._free(a)),i}}});var Rp,Up,Vp,Np,Na,Wa=V(()=>{"use strict";gt();Rr();Rp=e=>{switch(e){case"disabled":return 0;case"basic":return 1;case"extended":return 2;case"all":return 99;default:throw new Error(`unsupported graph optimization level: ${e}`)}},Up=e=>{switch(e){case"sequential":return 0;case"parallel":return 1;default:throw new Error(`unsupported execution mode: ${e}`)}},Vp=e=>{e.extra||(e.extra={}),e.extra.session||(e.extra.session={});let t=e.extra.session;t.use_ort_model_bytes_directly||(t.use_ort_model_bytes_directly="1"),e.executionProviders&&e.executionProviders.some(r=>(typeof r=="string"?r:r.name)==="webgpu")&&(e.enableMemPattern=!1)},Np=(e,t,r)=>{for(let n of t){let o=typeof n=="string"?n:n.name;switch(o){case"webnn":if(o="WEBNN",typeof n!="string"){let d=n?.deviceType;if(d){let l=ke("deviceType",r),c=ke(d,r);Te()._OrtAddSessionConfigEntry(e,l,c)!==0&&ve(`Can't set a session config entry: 'deviceType' - ${d}.`)}}break;case"webgpu":if(o="JS",typeof n!="string"){let a=n;if(a?.preferredLayout){if(a.preferredLayout!=="NCHW"&&a.preferredLayout!=="NHWC")throw new Error(`preferredLayout must be either 'NCHW' or 'NHWC': ${a.preferredLayout}`);let d=ke("preferredLayout",r),l=ke(a.preferredLayout,r);Te()._OrtAddSessionConfigEntry(e,d,l)!==0&&ve(`Can't set a session config entry: 'preferredLayout' - ${a.preferredLayout}.`)}}break;case"wasm":case"cpu":continue;default:throw new Error(`not supported execution provider: ${o}`)}let i=ke(o,r);Te()._OrtAppendExecutionProvider(e,i)!==0&&ve(`Can't append execution provider: ${o}.`)}},Na=e=>{let t=Te(),r=0,n=[],o=e||{};Vp(o);try{let i=Rp(o.graphOptimizationLevel??"all"),a=Up(o.executionMode??"sequential"),d=typeof o.logId=="string"?ke(o.logId,n):0,l=o.logSeverityLevel??2;if(!Number.isInteger(l)||l<0||l>4)throw new Error(`log serverity level is not valid: ${l}`);let c=o.logVerbosityLevel??0;if(!Number.isInteger(c)||c<0||c>4)throw new Error(`log verbosity level is not valid: ${c}`);let m=typeof o.optimizedModelFilePath=="string"?ke(o.optimizedModelFilePath,n):0;if(r=t._OrtCreateSessionOptions(i,!!o.enableCpuMemArena,!!o.enableMemPattern,a,!!o.enableProfiling,0,d,l,c,m),r===0&&ve("Can't create session options."),o.executionProviders&&Np(r,o.executionProviders,n),o.enableGraphCapture!==void 0){if(typeof o.enableGraphCapture!="boolean")throw new Error(`enableGraphCapture must be a boolean value: ${o.enableGraphCapture}`);let u=ke("enableGraphCapture",n),h=ke(o.enableGraphCapture.toString(),n);t._OrtAddSessionConfigEntry(r,u,h)!==0&&ve(`Can't set a session config entry: 'enableGraphCapture' - ${o.enableGraphCapture}.`)}if(o.freeDimensionOverrides)for(let[u,h]of Object.entries(o.freeDimensionOverrides)){if(typeof u!="string")throw new Error(`free dimension override name must be a string: ${u}`);if(typeof h!="number"||!Number.isInteger(h)||h<0)throw new Error(`free dimension override value must be a non-negative integer: ${h}`);let w=ke(u,n);t._OrtAddFreeDimensionOverride(r,w,h)!==0&&ve(`Can't set a free dimension override: ${u} - ${h}.`)}return o.extra!==void 0&&Kt(o.extra,"",new WeakSet,(u,h)=>{let w=ke(u,n),g=ke(h,n);t._OrtAddSessionConfigEntry(r,w,g)!==0&&ve(`Can't set a session config entry: ${u} - ${h}.`)}),[r,n]}catch(i){throw r!==0&&t._OrtReleaseSessionOptions(r),n.forEach(a=>t._free(a)),i}}});var Yt,yt,It,Ur,Xt,Vr,Nr,Kn,Q=V(()=>{"use strict";Yt=e=>{switch(e){case"int8":return 3;case"uint8":return 2;case"bool":return 9;case"int16":return 5;case"uint16":return 4;case"int32":return 6;case"uint32":return 12;case"float16":return 10;case"float32":return 1;case"float64":return 11;case"string":return 8;case"int64":return 7;case"uint64":return 13;case"int4":return 22;case"uint4":return 21;default:throw new Error(`unsupported data type: ${e}`)}},yt=e=>{switch(e){case 3:return"int8";case 2:return"uint8";case 9:return"bool";case 5:return"int16";case 4:return"uint16";case 6:return"int32";case 12:return"uint32";case 10:return"float16";case 1:return"float32";case 11:return"float64";case 8:return"string";case 7:return"int64";case 13:return"uint64";case 22:return"int4";case 21:return"uint4";default:throw new Error(`unsupported data type: ${e}`)}},It=(e,t)=>{let r=[-1,4,1,1,2,2,4,8,-1,1,2,8,4,8,-1,-1,-1,-1,-1,-1,-1,.5,.5][e],n=typeof t=="number"?t:t.reduce((o,i)=>o*i,1);return r>0?Math.ceil(n*r):void 0},Ur=e=>{switch(e){case"float16":return typeof Float16Array<"u"&&Float16Array.from?Float16Array:Uint16Array;case"float32":return Float32Array;case"uint8":return Uint8Array;case"int8":return Int8Array;case"uint16":return Uint16Array;case"int16":return Int16Array;case"int32":return Int32Array;case"bool":return Uint8Array;case"float64":return Float64Array;case"uint32":return Uint32Array;case"int64":return BigInt64Array;case"uint64":return BigUint64Array;default:throw new Error(`unsupported type: ${e}`)}},Xt=e=>{switch(e){case"verbose":return 0;case"info":return 1;case"warning":return 2;case"error":return 3;case"fatal":return 4;default:throw new Error(`unsupported logging level: ${e}`)}},Vr=e=>e==="float32"||e==="float16"||e==="int32"||e==="int64"||e==="uint32"||e==="uint8"||e==="bool"||e==="uint4"||e==="int4",Nr=e=>e==="float32"||e==="float16"||e==="int32"||e==="int64"||e==="uint32"||e==="uint64"||e==="int8"||e==="uint8"||e==="bool",Kn=e=>{switch(e){case"none":return 0;case"cpu":return 1;case"cpu-pinned":return 2;case"texture":return 3;case"gpu-buffer":return 4;case"ml-tensor":return 5;default:throw new Error(`unsupported data location: ${e}`)}}});var Zt,Yn=V(()=>{"use strict";Cr();Zt=async e=>{if(typeof e=="string")if(false){}else{let t=await fetch(e);if(!t.ok)throw new Error(`failed to load external data file: ${e}`);let r=t.headers.get("Content-Length"),n=r?parseInt(r,10):0;if(n<1073741824)return new Uint8Array(await t.arrayBuffer());{if(!t.body)throw new Error(`failed to load external data file: ${e}, no response body.`);let o=t.body.getReader(),i;try{i=new ArrayBuffer(n)}catch(d){if(d instanceof RangeError){let l=Math.ceil(n/65536);i=new WebAssembly.Memory({initial:l,maximum:l}).buffer}else throw d}let a=0;for(;;){let{done:d,value:l}=await o.read();if(d)break;let c=l.byteLength;new Uint8Array(i,a,c).set(l),a+=c}return new Uint8Array(i,0,n)}}else return e instanceof Blob?new Uint8Array(await e.arrayBuffer()):e instanceof Uint8Array?e:new Uint8Array(e)}});var Wp,Lp,La,Ha,Wr,Hp,me,Xe=V(()=>{"use strict";Q();Wp=["V","I","W","E","F"],Lp=(e,t)=>{console.log(`[${Wp[e]},${new Date().toISOString()}]${t}`)},Wr=(e,t)=>{La=e,Ha=t},Hp=(e,t)=>{let r=Xt(e),n=Xt(La);r>=n&&Lp(r,typeof t=="function"?t():t)},me=(...e)=>{Ha&&Hp(...e)}});var Lr,Xn=V(()=>{"use strict";Q();Lr=(e,t)=>new(Ur(t))(e)});var Hr=V(()=>{"use strict"});var Ga,Zn,Qn,Gp,Fp,Fa,eo,Jn,ja,Ka=V(()=>{"use strict";Xe();Hr();Ga=new Map([[64,250],[128,200],[256,200],[512,200],[2048,230],[4096,200],[8192,50],[16384,50],[32768,50],[65536,50],[131072,50],[262144,50],[524288,50],[1048576,50],[2097152,30],[4194304,20],[8388608,10],[12582912,10],[16777216,10],[26214400,15],[33554432,22],[44236800,2],[58982400,6],[67108864,6],[134217728,6],[167772160,6]]),Zn=[],Qn=e=>Math.ceil(e/16)*16,Gp=e=>{for(let t=0;t<Zn.length;t++){let r=Zn[t];if(e<=r)return r}return Math.ceil(e/16)*16},Fp=1,Fa=()=>Fp++,eo=async(e,t,r,n)=>{let o=Qn(r),i=e.device.createBuffer({size:o,usage:GPUBufferUsage.COPY_DST|GPUBufferUsage.MAP_READ});try{let a=e.getCommandEncoder();e.endComputePass(),a.copyBufferToBuffer(t,0,i,0,o),e.flush(),await i.mapAsync(GPUMapMode.READ);let d=i.getMappedRange();if(n){let l=n();return l.set(new Uint8Array(d,0,r)),l}else return new Uint8Array(d.slice(0,r))}finally{i.destroy()}},Jn=class{constructor(t){this.backend=t;this.storageCache=new Map,this.freeBuffers=new Map,this.freeUniformBuffers=new Map,this.buffersForUploadingPending=[],this.buffersPending=[],this.capturedPendingBuffers=new Map;for(let[r]of Ga)Zn.push(r),this.freeBuffers.set(r,[]),this.freeUniformBuffers.set(r,[])}upload(t,r){let n=r.buffer,o=r.byteOffset,i=r.byteLength,a=Qn(i),d=this.storageCache.get(t);if(!d)throw new Error("gpu data for uploading does not exist");if(d.originalSize!==i)throw new Error(`inconsistent data size. gpu data size=${d.originalSize}, data size=${i}`);let l=this.backend.device.createBuffer({mappedAtCreation:!0,size:a,usage:GPUBufferUsage.MAP_WRITE|GPUBufferUsage.COPY_SRC}),c=l.getMappedRange();new Uint8Array(c).set(new Uint8Array(n,o,i)),l.unmap();let m=this.backend.getCommandEncoder();this.backend.endComputePass(),m.copyBufferToBuffer(l,0,d.gpuData.buffer,0,a),me("verbose",()=>`[WebGPU] GpuDataManager.upload(id=${t})`),this.buffersForUploadingPending.push(l)}memcpy(t,r){let n=this.storageCache.get(t);if(!n)throw new Error("source gpu data for memcpy does not exist");let o=this.storageCache.get(r);if(!o)throw new Error("destination gpu data for memcpy does not exist");if(n.originalSize!==o.originalSize)throw new Error("inconsistent source and destination gpu data size");let i=Qn(n.originalSize),a=this.backend.getCommandEncoder();this.backend.endComputePass(),a.copyBufferToBuffer(n.gpuData.buffer,0,o.gpuData.buffer,0,i)}registerExternalBuffer(t,r,n){let o;if(n){if(o=n[0],t===n[1])return me("verbose",()=>`[WebGPU] GpuDataManager.registerExternalBuffer(size=${r}) => id=${o}, buffer is the same, skip.`),o;if(this.backend.capturedCommandList.has(this.backend.currentSessionId))throw new Error(`Registering a different external buffer under graph capture mode is not supported yet.
3812
- Please use the previous external buffer!`)}else o=Fa();return this.storageCache.set(o,{gpuData:{id:o,type:0,buffer:t},originalSize:r}),me("verbose",()=>`[WebGPU] GpuDataManager.registerExternalBuffer(size=${r}) => id=${o}, registered.`),o}unregisterExternalBuffer(t){t!==void 0&&(this.storageCache.delete(t),me("verbose",()=>`[WebGPU] GpuDataManager.unregisterExternalBuffer() => id=${t}`))}create(t,r=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST){let n=Gp(t),o,i=(r&GPUBufferUsage.STORAGE)===GPUBufferUsage.STORAGE,a=(r&GPUBufferUsage.UNIFORM)===GPUBufferUsage.UNIFORM;if(i||a){let c=(i?this.freeBuffers:this.freeUniformBuffers).get(n);c?c.length>0?o=c.pop():o=this.backend.device.createBuffer({size:n,usage:r}):o=this.backend.device.createBuffer({size:n,usage:r})}else o=this.backend.device.createBuffer({size:n,usage:r});let d={id:Fa(),type:0,buffer:o};return this.storageCache.set(d.id,{gpuData:d,originalSize:t}),me("verbose",()=>`[WebGPU] GpuDataManager.create(size=${t}) => id=${d.id}`),d}get(t){return this.storageCache.get(t)?.gpuData}release(t){let r=this.storageCache.get(t);if(!r)throw new Error("releasing data does not exist");return me("verbose",()=>`[WebGPU] GpuDataManager.release(id=${t}), gpuDataId=${r.gpuData.id}`),this.storageCache.delete(t),this.buffersPending.push(r.gpuData.buffer),r.originalSize}async download(t,r){let n=this.storageCache.get(t);if(!n)throw new Error("data does not exist");await eo(this.backend,n.gpuData.buffer,n.originalSize,r)}refreshPendingBuffers(){for(let t of this.buffersForUploadingPending)t.destroy();if(this.buffersForUploadingPending=[],this.buffersPending.length!==0)if(this.backend.sessionStatus==="default"){for(let t of this.buffersPending){let r=Ga.get(t.size);if((t.usage&GPUBufferUsage.STORAGE)===GPUBufferUsage.STORAGE){let n=this.freeBuffers.get(t.size)||[];r===void 0||n.length>=r?t.destroy():n.push(t)}else if((t.usage&GPUBufferUsage.UNIFORM)===GPUBufferUsage.UNIFORM){let n=this.freeUniformBuffers.get(t.size)||[];r===void 0||n.length>=r?t.destroy():n.push(t)}else t.destroy()}this.buffersPending=[]}else{let t=this.capturedPendingBuffers.get(this.backend.currentSessionId);t||(t=[],this.capturedPendingBuffers.set(this.backend.currentSessionId,t));for(let r of this.buffersPending)t.push(r);this.buffersPending=[]}}dispose(){this.freeBuffers.forEach(t=>{t.forEach(r=>{r.destroy()})}),this.freeUniformBuffers.forEach(t=>{t.forEach(r=>{r.destroy()})}),this.storageCache.forEach(t=>{t.gpuData.buffer.destroy()}),this.capturedPendingBuffers.forEach(t=>{t.forEach(r=>{r.destroy()})}),this.storageCache=new Map,this.freeBuffers=new Map,this.freeUniformBuffers=new Map,this.capturedPendingBuffers=new Map}onReleaseSession(t){let r=this.capturedPendingBuffers.get(t);r&&(r.forEach(n=>{n.destroy()}),this.capturedPendingBuffers.delete(t))}},ja=(...e)=>new Jn(...e)});var to,J,Ie=V(()=>{"use strict";to=class{constructor(t){Object.assign(this,t)}get cacheKey(){return this.key||(this.key=Object.getOwnPropertyNames(this).sort().map(t=>`${this[t]}`).join(";")),this.key}},J=e=>new to(e)});var ro,rt,k,Ct,Gr,Ya,Xa,ie=V(()=>{"use strict";ro=class{static calcMatMulShape(t,r){return t[1]!==r[0]?void 0:[t[0],r[1]]}},rt=class{static calcShape(t,r,n=!1){let o=t.length,i=r.length;if(o===0)return r;if(i===0)return t;let a=Math.max(t.length,r.length),d=new Array(a);if(n){if(o<2||i<2)return;let l=ro.calcMatMulShape([t[o-2],t[o-1]],[r[i-2],r[i-1]]);if(l===void 0)return;[d[a-2],d[a-1]]=l}for(let l=n?3:1;l<=a;l++){let c=o-l<0?1:t[o-l],m=i-l<0?1:r[i-l];if(c!==m&&c>1&&m>1)return;let u=Math.max(c,m);if(c&&m)d[a-l]=Math.max(c,m);else{if(u>1)return;d[a-l]=0}}return d}static isValidBroadcast(t,r){let n=t.length,o=r.length;if(n>o)return!1;for(let i=1;i<=n;i++)if(t[n-i]!==1&&t[n-i]!==r[o-i])return!1;return!0}},k=class e{static size(t){return e.getSizeFromDimensionRange(t,0,t.length)}static convertShape(t,r=4){let n=t.length;if(n===0)return[];let o=new Array(n),i=n-1;for(;i>=0;){if(t[i]%r===0){o[i]=t[i]/r;break}if(r%t[i]!==0)throw new Error("cannot convert shape");o[i]=1,r/=t[i],i--}for(i--;i>=0;i--)o[i]=t[i];return o}static sizeFromDimension(t,r){if(r<0||r>t.length)throw new Error(`invalid dimension of ${r} for sizeFromDimension as Tensor has ${t.length} dimensions.`);return e.getSizeFromDimensionRange(t,r,t.length)}static sizeToDimension(t,r){if(r<0||r>t.length)throw new Error(`invalid dimension of ${r} for sizeToDimension as Tensor has ${t.length} dimensions.`);return e.getSizeFromDimensionRange(t,0,r)}static getSizeFromDimensionRange(t,r,n){let o=1;for(let i=r;i<n;i++){if(t[i]<0)throw new Error("cannot get valid size from specified dimension range. Most likely the range contains negative values in them.");o*=t[i]}return o}static computeStrides(t){let r=t.length;if(r===0)return[];if(r===1)return[1];let n=new Array(r);n[r-1]=1,n[r-2]=t[r-1];for(let o=r-3;o>=0;--o)n[o]=n[o+1]*t[o+1];return n}static normalizeAxis(t,r){if(t<-r&&t>=r)throw new Error("unsupported axis for this operation.");return t<0?t+r:t}static normalizeAxes(t,r){return t.map(n=>this.normalizeAxis(n,r??t.length))}static sortBasedOnPerm(t,r){return r?r.map(n=>t[n]):t.slice().reverse()}static padShape(t,r){let n=t.length;return t.map((o,i)=>o+r[i]+r[i+n])}static areEqual(t,r){return t.length!==r.length?!1:t.every((n,o)=>n===r[o])}},Ct=class e{static adjustPoolAttributes(t,r,n,o,i,a){if(!t&&n.length!==r.length-2)throw new Error("length of specified kernel shapes should be 2 less than length of input dimensions");if(t)for(let d=0;d<r.length-2;d++)d>=n.length?n.push(r[d+2]):n[d]=r[d+2];for(let d=0;d<n.length;d++)if(d<o.length){if(o[d]<0)throw new Error("strides should be greater than or equal to 1")}else o.push(1);for(let d=0;d<n.length;d++)if(d<i.length){if(i[d]<0)throw new Error("dilations should be greater than or equal to 1")}else i.push(1);for(let d=0;d<n.length*2;d++)if(d<a.length){if(a[d]<0)throw new Error("pad should be greater than or equal to 1")}else a.push(0);for(let d=0;d<n.length;d++){if(n[d]<=0)throw new Error("kernel shapes need to be greater than 0");if(a[d]>=n[d]||a[d+n.length]>=n[d])throw new Error("pads should be smaller than kernel")}}static adjustPadsBasedOnAutoPad(t,r,n,o,i,a,d){if(d){if(i.length!==2*(t.length-2))throw new Error("length of pads should be twice the length of data dimensions");if(r.length!==t.length-2)throw new Error("length of strides should be the length of data dimensions");if(o.length!==t.length-2)throw new Error("length of kernel shapes should be the length of data dimensions");for(let l=0;l<t.length-2;l++)e.adjustPadAndReturnShape(t[l+(a?1:2)],r[l],n[l],o[l],i,l,l+t.length-2,d)}}static computePoolOutputShape(t,r,n,o,i,a,d){if(r.length<=0)throw new Error("input shape must be of size greater than 0");let l=[r[0],r[1]];return e.computeShapeHelper(t,r,l,n,o,i,a,d),l}static computeConvOutputShape(t,r,n,o,i,a,d){if(t.length<=0||r.length<=0)throw new Error("invalid input tensor dims or invalid filter tensor dims");let l=[t[0],r[0]];return e.computeShapeHelper(!1,t,l,n,o,i,a,d),l}static computeShapeHelper(t,r,n,o,i,a,d,l){if(t)for(let c=0;c<r.length-2;c++)n.push(1);else for(let c=0;c<r.length-2;c++)n.push(e.adjustPadAndReturnShape(r[c+2],o[c],i[c],a[c],d,c,c+r.length-2,l))}static adjustPadAndReturnShape(t,r,n,o,i,a,d,l){let c=n*(o-1)+1;if(l&&l!=="NOTSET")switch(l){case"VALID":return i[a]=0,i[d]=0,Math.floor((t-c)/r+1);case"SAME_LOWER":case"SAME_UPPER":if(n!==1)throw new Error("Dilation not supported for SAME_UPPER or SAME_LOWER");{let u=((t+r-1)/r-1)*r+o-t;return i[a]=Math.floor(l==="SAME_LOWER"?(u+1)/2:u/2),i[d]=u-i[a],Math.floor((t+u-o)/r+1)}default:throw new Error("Unsupported AutoPad type")}else return Math.floor((t+i[a]+i[d]-c)/r+1)}},Gr=class{static getShapeOfGemmResult(t,r,n,o,i){if(t.length!==2||n.length!==2)throw new Error("shape need to be of size 2");let a,d,l;r?(a=t[1],d=t[0]):(a=t[0],d=t[1]);let c=-1;if(o?(l=n[0],c=1):(l=n[1],c=0),n[c]!==d)throw new Error("dimension mismatch");if(a<=0||l<=0||d<=0)throw new Error("invalid shape specified");if(i&&!rt.isValidBroadcast(i,[a,l]))throw new Error("gemm: invalid bias shape for broadcast");return[a,l,d]}},Ya=-34028234663852886e22,Xa=34028234663852886e22});var At,oo,he,Ee,R,we,io,kt,Ze,F,ao,E,M,Fr,no,Za,Nt,ae=V(()=>{"use strict";Q();ie();At=64,oo=(e,t)=>{if(t===3)throw new Error("vec3 has same alignment as vec4, use vec4 instead");switch(e){case 10:return t>1?`vec${t}<f16>`:"f16";case 1:return t>1?`vec${t}<f32>`:"f32";case 6:return t>1?`vec${t}<i32>`:"i32";case 12:return t>1?`vec${t}<u32>`:"u32";case 7:if(t>1)throw new Error("currently not supported vecX of uint64 yet");return["vec2<u32>","i32"];case 13:if(t>1)throw new Error("currently not supported vecX of uint64 yet");return["vec2<u32>","u32"];case 9:if(t!==4)throw new Error("bool must be vec4");return["u32","vec4<bool>"];case 22:return"i32";case 21:return"u32";default:throw new Error(`Unknown data type: ${e}`)}},he=(e,t=1)=>{let r=oo(e,t);return typeof r=="string"?r:r[0]},Ee=(e,t=1)=>{let r=oo(e,t);return typeof r=="string"?r:r[1]},R=(...e)=>{let t=[];return e.forEach(r=>{r.length!==0&&t.push({type:12,data:r},{type:12,data:k.computeStrides(r)})}),t},we=e=>e%4===0?4:e%2===0?2:1,io=(e="f32",t,r="0")=>!t||t===1?`${e}(${r})`:`vec${t}<${e}>(${r})`,kt=(e,t,r)=>e==="f32"?r:t===1?`f32(${r})`:`vec${t}<f32>(${r})`,Ze=(e,t)=>t===4?`(${e}.x + ${e}.y + ${e}.z + ${e}.w)`:t===2?`(${e}.x + ${e}.y)`:t===3?`(${e}.x + ${e}.y + ${e}.z)`:e,F=(e,t,r,n)=>e.startsWith("uniforms.")&&r>4?typeof t=="string"?n==="f16"?`${e}[(${t}) / 8][(${t}) % 8 / 4][(${t}) % 8 % 4]`:`${e}[(${t}) / 4][(${t}) % 4]`:n==="f16"?`${e}[${Math.floor(t/8)}][${Math.floor(t%8/4)}][${t%8%4}]`:`${e}[${Math.floor(t/4)}][${t%4}]`:r>1?`${e}[${t}]`:e,ao=(e,t,r,n,o)=>{let i=typeof r=="number",a=i?r:r.length,d=[...new Array(a).keys()],l=a<2?"u32":a<=4?`vec${a}<u32>`:`array<u32, ${a}>`,c=oo(t,o),m=typeof c=="string"?c:c[1],u=typeof c=="string"?c:c[0],h={indices:l,value:m,storage:u,tensor:t},w=U=>typeof U=="string"?U:`${U}u`,g={offsetToIndices:!1,indicesToOffset:!1,broadcastedIndicesToOffset:!1,set:!1,setByIndices:!1,get:!1,getByIndices:!1},y=i?"uniforms.":"",S=`${y}${e}_shape`,$=`${y}${e}_strides`,v="";for(let U=0;U<a-1;U++)v+=`
3813
- let dim${U} = current / ${F($,U,a)};
3814
- let rest${U} = current % ${F($,U,a)};
3815
- indices[${U}] = dim${U};
3816
- current = rest${U};
3806
+ `,b.Tb||(B.push("emval_returnValue"),L.push(hi),_+=` return emval_returnValue(retType, destructorsRef, rv);
3807
+ `),B.push(_+`};
3808
+ `),s=function(Q){var fe=Function;if(!(fe instanceof Function))throw new TypeError(`new_ called with constructor type ${typeof fe} which is not a function`);var be=vi(fe.name||"unknownFunctionName",function(){});return be.prototype=fe.prototype,be=new be,(Q=fe.apply(be,Q))instanceof Object?Q:be}(B)(...L),f=`methodCaller<(${p.map(Q=>Q.name).join(", ")}) => ${b.name}>`,Lc(vi(f,s))}function Fc(s){return s=cr(s>>>0),je(u[s])}function qc(s,p){return p>>>=0,s=qe(s>>>0),p=qe(p),je(s[p])}function jc(s){9<(s>>>=0)&&(dt[s+1]+=1)}function Kc(){return je([])}function Yc(s){s=qe(s>>>0);for(var p=Array(s.length),f=0;f<s.length;f++)p[f]=s[f];return je(p)}function Xc(s){return je(cr(s>>>0))}function Qc(){return je({})}function Zc(s){for(var p=qe(s>>>=0);p.length;){var f=p.pop();p.pop()(f)}In(s)}function Jc(s,p,f){p>>>=0,f>>>=0,s=qe(s>>>0),p=qe(p),f=qe(f),s[p]=f}function ep(s,p){return p>>>=0,s=(s=En(s>>>0,"_emval_take_value")).readValueFromPointer(p),je(s)}function tp(s,p){s=-9007199254740992>s||9007199254740992<s?NaN:Number(s),p>>>=0,s=new Date(1e3*s),i()[p>>>2>>>0]=s.getUTCSeconds(),i()[p+4>>>2>>>0]=s.getUTCMinutes(),i()[p+8>>>2>>>0]=s.getUTCHours(),i()[p+12>>>2>>>0]=s.getUTCDate(),i()[p+16>>>2>>>0]=s.getUTCMonth(),i()[p+20>>>2>>>0]=s.getUTCFullYear()-1900,i()[p+24>>>2>>>0]=s.getUTCDay(),s=(s.getTime()-Date.UTC(s.getUTCFullYear(),0,1,0,0,0,0))/864e5|0,i()[p+28>>>2>>>0]=s}var Bt=s=>s%4==0&&(s%100!=0||s%400==0),$i=[0,31,60,91,121,152,182,213,244,274,305,335],xi=[0,31,59,90,120,151,181,212,243,273,304,334];function rp(s,p){s=-9007199254740992>s||9007199254740992<s?NaN:Number(s),p>>>=0,s=new Date(1e3*s),i()[p>>>2>>>0]=s.getSeconds(),i()[p+4>>>2>>>0]=s.getMinutes(),i()[p+8>>>2>>>0]=s.getHours(),i()[p+12>>>2>>>0]=s.getDate(),i()[p+16>>>2>>>0]=s.getMonth(),i()[p+20>>>2>>>0]=s.getFullYear()-1900,i()[p+24>>>2>>>0]=s.getDay();var f=(Bt(s.getFullYear())?$i:xi)[s.getMonth()]+s.getDate()-1|0;i()[p+28>>>2>>>0]=f,i()[p+36>>>2>>>0]=-60*s.getTimezoneOffset(),f=new Date(s.getFullYear(),6,1).getTimezoneOffset();var b=new Date(s.getFullYear(),0,1).getTimezoneOffset();s=0|(f!=b&&s.getTimezoneOffset()==Math.min(b,f)),i()[p+32>>>2>>>0]=s}function np(s){s>>>=0;var p=new Date(i()[s+20>>>2>>>0]+1900,i()[s+16>>>2>>>0],i()[s+12>>>2>>>0],i()[s+8>>>2>>>0],i()[s+4>>>2>>>0],i()[s>>>2>>>0],0),f=i()[s+32>>>2>>>0],b=p.getTimezoneOffset(),_=new Date(p.getFullYear(),6,1).getTimezoneOffset(),I=new Date(p.getFullYear(),0,1).getTimezoneOffset(),O=Math.min(I,_);return 0>f?i()[s+32>>>2>>>0]=+(_!=I&&O==b):0<f!=(O==b)&&(_=Math.max(I,_),p.setTime(p.getTime()+6e4*((0<f?O:_)-b))),i()[s+24>>>2>>>0]=p.getDay(),f=(Bt(p.getFullYear())?$i:xi)[p.getMonth()]+p.getDate()-1|0,i()[s+28>>>2>>>0]=f,i()[s>>>2>>>0]=p.getSeconds(),i()[s+4>>>2>>>0]=p.getMinutes(),i()[s+8>>>2>>>0]=p.getHours(),i()[s+12>>>2>>>0]=p.getDate(),i()[s+16>>>2>>>0]=p.getMonth(),i()[s+20>>>2>>>0]=p.getYear(),s=p.getTime(),BigInt(isNaN(s)?-1:s/1e3)}function Si(s,p,f,b,_,I,O){return y?xe(16,1,s,p,f,b,_,I,O):-52}function Ti(s,p,f,b,_,I){if(y)return xe(17,1,s,p,f,b,_,I)}function op(s,p,f,b){s>>>=0,p>>>=0,f>>>=0,b>>>=0;var _=new Date().getFullYear(),I=new Date(_,0,1),O=new Date(_,6,1);_=I.getTimezoneOffset();var B=O.getTimezoneOffset(),L=Math.max(_,B);a()[s>>>2>>>0]=60*L,i()[p>>>2>>>0]=+(_!=B),I=(s=H=>H.toLocaleTimeString(void 0,{hour12:!1,timeZoneName:"short"}).split(" ")[1])(I),O=s(O),B<_?(Dt(I,f,17),Dt(O,b,17)):(Dt(I,b,17),Dt(O,f,17))}var zn=[],Ii=(s,p)=>{zn.length=0;for(var f;f=r()[s++>>>0];){var b=f!=105;p+=(b&=f!=112)&&p%8?4:0,zn.push(f==112?a()[p>>>2>>>0]:f==106?G[p>>>3]:f==105?i()[p>>>2>>>0]:l()[p>>>3>>>0]),p+=b?8:4}return zn};function ip(s,p,f){return s>>>=0,p=Ii(p>>>0,f>>>0),yn[s](...p)}function ap(s,p,f){return s>>>=0,p=Ii(p>>>0,f>>>0),yn[s](...p)}var sp=()=>{},up=()=>Date.now();function dp(s,p){return Y(ze(s>>>0,p>>>0))}var Ci,lp=()=>{throw _t+=1,"unwind"};function cp(){return 4294901760}Ci=()=>performance.timeOrigin+performance.now();var pp=()=>navigator.hardwareConcurrency;function mp(){return Ot("Cannot use emscripten_pc_get_function without -sUSE_OFFSET_CONVERTER"),0}function fp(s){s>>>=0;var p=r().length;if(s<=p||4294901760<s)return!1;for(var f=1;4>=f;f*=2){var b=p*(1+.2/f);b=Math.min(b,s+100663296);var _=Math;b=Math.max(s,b);e:{_=(_.min.call(_,4294901760,b+(65536-b%65536)%65536)-ue.buffer.byteLength+65535)/65536;try{ue.grow(_),Ce();var I=1;break e}catch{}I=void 0}if(I)return!0}return!1}var pr=()=>(Ot("Cannot use convertFrameToPC (needed by __builtin_return_address) without -sUSE_OFFSET_CONVERTER"),0),Mt={},Ai=s=>{s.forEach(p=>{var f=pr();f&&(Mt[f]=p)})};function hp(){var s=Error().stack.toString().split(`
3809
+ `);return s[0]=="Error"&&s.shift(),Ai(s),Mt.Pb=pr(),Mt.ec=s,Mt.Pb}function gp(s,p,f){if(s>>>=0,p>>>=0,Mt.Pb==s)var b=Mt.ec;else(b=Error().stack.toString().split(`
3810
+ `))[0]=="Error"&&b.shift(),Ai(b);for(var _=3;b[_]&&pr()!=s;)++_;for(s=0;s<f&&b[s+_];++s)i()[p+4*s>>>2>>>0]=pr();return s}var On,Dn={},ki=()=>{if(!On){var s,p={USER:"web_user",LOGNAME:"web_user",PATH:"/",PWD:"/",HOME:"/home/web_user",LANG:(typeof navigator=="object"&&navigator.languages&&navigator.languages[0]||"C").replace("-","_")+".UTF-8",_:C||"./this.program"};for(s in Dn)Dn[s]===void 0?delete p[s]:p[s]=Dn[s];var f=[];for(s in p)f.push(`${s}=${p[s]}`);On=f}return On};function Ei(s,p){if(y)return xe(18,1,s,p);s>>>=0,p>>>=0;var f=0;return ki().forEach((b,_)=>{var I=p+f;for(_=a()[s+4*_>>>2>>>0]=I,I=0;I<b.length;++I)t()[_++>>>0]=b.charCodeAt(I);t()[_>>>0]=0,f+=b.length+1}),0}function Pi(s,p){if(y)return xe(19,1,s,p);s>>>=0,p>>>=0;var f=ki();a()[s>>>2>>>0]=f.length;var b=0;return f.forEach(_=>b+=_.length+1),a()[p>>>2>>>0]=b,0}function zi(s){return y?xe(20,1,s):52}function Oi(s,p,f,b){return y?xe(21,1,s,p,f,b):52}function Di(s,p,f,b){return y?xe(22,1,s,p,f,b):70}var yp=[null,[],[]];function Bi(s,p,f,b){if(y)return xe(23,1,s,p,f,b);p>>>=0,f>>>=0,b>>>=0;for(var _=0,I=0;I<f;I++){var O=a()[p>>>2>>>0],B=a()[p+4>>>2>>>0];p+=8;for(var L=0;L<B;L++){var H=r()[O+L>>>0],Q=yp[s];H===0||H===10?((s===1?j:Y)(Xo(Q,0)),Q.length=0):Q.push(H)}_+=B}return a()[b>>>2>>>0]=_,0}var Mi=[31,29,31,30,31,30,31,31,30,31,30,31],Ri=[31,28,31,30,31,30,31,31,30,31,30,31],bp=(s,p)=>{t().set(s,p>>>0)};function Ui(s,p,f,b){function _(z,me,Se){for(z=typeof z=="number"?z.toString():z||"";z.length<me;)z=Se[0]+z;return z}function I(z,me){return _(z,me,"0")}function O(z,me){function Se(Zi){return 0>Zi?-1:0<Zi?1:0}var $t;return($t=Se(z.getFullYear()-me.getFullYear()))===0&&($t=Se(z.getMonth()-me.getMonth()))===0&&($t=Se(z.getDate()-me.getDate())),$t}function B(z){switch(z.getDay()){case 0:return new Date(z.getFullYear()-1,11,29);case 1:return z;case 2:return new Date(z.getFullYear(),0,3);case 3:return new Date(z.getFullYear(),0,2);case 4:return new Date(z.getFullYear(),0,1);case 5:return new Date(z.getFullYear()-1,11,31);case 6:return new Date(z.getFullYear()-1,11,30)}}function L(z){var me=z.Bb;for(z=new Date(new Date(z.Cb+1900,0,1).getTime());0<me;){var Se=z.getMonth(),$t=(Bt(z.getFullYear())?Mi:Ri)[Se];if(!(me>$t-z.getDate())){z.setDate(z.getDate()+me);break}me-=$t-z.getDate()+1,z.setDate(1),11>Se?z.setMonth(Se+1):(z.setMonth(0),z.setFullYear(z.getFullYear()+1))}return Se=new Date(z.getFullYear()+1,0,4),me=B(new Date(z.getFullYear(),0,4)),Se=B(Se),0>=O(me,z)?0>=O(Se,z)?z.getFullYear()+1:z.getFullYear():z.getFullYear()-1}s>>>=0,p>>>=0,f>>>=0,b>>>=0;var H=a()[b+40>>>2>>>0];for(var Q in b={kc:i()[b>>>2>>>0],jc:i()[b+4>>>2>>>0],Hb:i()[b+8>>>2>>>0],Lb:i()[b+12>>>2>>>0],Ib:i()[b+16>>>2>>>0],Cb:i()[b+20>>>2>>>0],ub:i()[b+24>>>2>>>0],Bb:i()[b+28>>>2>>>0],sc:i()[b+32>>>2>>>0],ic:i()[b+36>>>2>>>0],lc:H?ze(H):""},f=ze(f),H={"%c":"%a %b %d %H:%M:%S %Y","%D":"%m/%d/%y","%F":"%Y-%m-%d","%h":"%b","%r":"%I:%M:%S %p","%R":"%H:%M","%T":"%H:%M:%S","%x":"%m/%d/%y","%X":"%H:%M:%S","%Ec":"%c","%EC":"%C","%Ex":"%m/%d/%y","%EX":"%H:%M:%S","%Ey":"%y","%EY":"%Y","%Od":"%d","%Oe":"%e","%OH":"%H","%OI":"%I","%Om":"%m","%OM":"%M","%OS":"%S","%Ou":"%u","%OU":"%U","%OV":"%V","%Ow":"%w","%OW":"%W","%Oy":"%y"})f=f.replace(new RegExp(Q,"g"),H[Q]);var fe="Sunday Monday Tuesday Wednesday Thursday Friday Saturday".split(" "),be="January February March April May June July August September October November December".split(" ");for(Q in H={"%a":z=>fe[z.ub].substring(0,3),"%A":z=>fe[z.ub],"%b":z=>be[z.Ib].substring(0,3),"%B":z=>be[z.Ib],"%C":z=>I((z.Cb+1900)/100|0,2),"%d":z=>I(z.Lb,2),"%e":z=>_(z.Lb,2," "),"%g":z=>L(z).toString().substring(2),"%G":L,"%H":z=>I(z.Hb,2),"%I":z=>((z=z.Hb)==0?z=12:12<z&&(z-=12),I(z,2)),"%j":z=>{for(var me=0,Se=0;Se<=z.Ib-1;me+=(Bt(z.Cb+1900)?Mi:Ri)[Se++]);return I(z.Lb+me,3)},"%m":z=>I(z.Ib+1,2),"%M":z=>I(z.jc,2),"%n":()=>`
3811
+ `,"%p":z=>0<=z.Hb&&12>z.Hb?"AM":"PM","%S":z=>I(z.kc,2),"%t":()=>" ","%u":z=>z.ub||7,"%U":z=>I(Math.floor((z.Bb+7-z.ub)/7),2),"%V":z=>{var me=Math.floor((z.Bb+7-(z.ub+6)%7)/7);if(2>=(z.ub+371-z.Bb-2)%7&&me++,me)me==53&&((Se=(z.ub+371-z.Bb)%7)==4||Se==3&&Bt(z.Cb)||(me=1));else{me=52;var Se=(z.ub+7-z.Bb-1)%7;(Se==4||Se==5&&Bt(z.Cb%400-1))&&me++}return I(me,2)},"%w":z=>z.ub,"%W":z=>I(Math.floor((z.Bb+7-(z.ub+6)%7)/7),2),"%y":z=>(z.Cb+1900).toString().substring(2),"%Y":z=>z.Cb+1900,"%z":z=>{var me=0<=(z=z.ic);return z=Math.abs(z)/60,(me?"+":"-")+("0000"+(z/60*100+z%60)).slice(-4)},"%Z":z=>z.lc,"%%":()=>"%"},f=f.replace(/%%/g,"\0\0"),H)f.includes(Q)&&(f=f.replace(new RegExp(Q,"g"),H[Q](b)));return Q=function(z){var me=Array($n(z)+1);return Jo(z,me,0,me.length),me}(f=f.replace(/\0\0/g,"%")),Q.length>p?0:(bp(Q,s),Q.length-1)}function wp(s,p,f,b){return Ui(s>>>0,p>>>0,f>>>0,b>>>0)}y||function(){for(var s=u.numThreads-1;s--;)qo();bt.unshift(()=>{Ue++,function(p){y?p():Promise.all(pt.map(Fo)).then(p)}(()=>Do())})}();for(var Vi=Array(256),mr=0;256>mr;++mr)Vi[mr]=String.fromCharCode(mr);ci=Vi,mt=u.BindingError=class extends Error{constructor(s){super(s),this.name="BindingError"}},u.InternalError=class extends Error{constructor(s){super(s),this.name="InternalError"}},dt.push(0,1,void 0,1,null,1,!0,1,!1,1),u.count_emval_handles=()=>dt.length/2-5-Tn.length;var _p=[_n,No,jo,Qo,Zo,ei,ti,ri,ni,oi,ii,ai,si,ui,di,li,Si,Ti,Ei,Pi,zi,Oi,Di,Bi],X=function(){function s(f,b){return X=f.exports,X=function(){var _=X,I={};for(let[O,B]of Object.entries(_))I[O]=typeof B=="function"?(...L)=>{dr.push(O);try{return B(...L)}finally{$e||(dr.pop(),et&&ht===1&&dr.length===0&&(ht=0,_t+=1,ur(Ki),typeof Fibers<"u"&&Fibers.tc()))}}:B;return I}(),X=function(){var _=X,I=B=>L=>B(L)>>>0,O=B=>()=>B()>>>0;return(_=Object.assign({},_)).Ca=I(_.Ca),_.fb=O(_.fb),_.hb=I(_.hb),_.emscripten_main_runtime_thread_id=O(_.emscripten_main_runtime_thread_id),_.sb=I(_.sb),_.tb=O(_.tb),_}(),Lo.push(X.ib),Ae.unshift(X.Ba),K=b,Do(),X}var p=Vo();if(Ue++,u.instantiateWasm)try{return u.instantiateWasm(p,s)}catch(f){Y(`Module.instantiateWasm callback failed with error: ${f}`),m(f)}return gn||=u.locateFile?Bo("ort-wasm-simd-threaded.jsep.wasm")?"ort-wasm-simd-threaded.jsep.wasm":u.locateFile?u.locateFile("ort-wasm-simd-threaded.jsep.wasm",P):P+"ort-wasm-simd-threaded.jsep.wasm":new URL(/* asset import */ __webpack_require__(/*! ort-wasm-simd-threaded.jsep.wasm */ "./node_modules/onnxruntime-web/dist/ort-wasm-simd-threaded.jsep.wasm"), __webpack_require__.b).href,function(f,b){var _=gn;return D||typeof WebAssembly.instantiateStreaming!="function"||Bo(_)||Mo(_)||typeof fetch!="function"?Uo(_,f,b):fetch(_,{credentials:"same-origin"}).then(I=>WebAssembly.instantiateStreaming(I,f).then(b,function(O){return Y(`wasm streaming compile failed: ${O}`),Y("falling back to ArrayBuffer instantiation"),Uo(_,f,b)}))}(p,function(f){s(f.instance,f.module)}).catch(m),{}}(),Wi=s=>(Wi=X.Ca)(s),Ni=()=>(Ni=X.Da)();u._OrtInit=(s,p)=>(u._OrtInit=X.Ea)(s,p),u._OrtGetLastError=(s,p)=>(u._OrtGetLastError=X.Fa)(s,p),u._OrtCreateSessionOptions=(s,p,f,b,_,I,O,B,L,H)=>(u._OrtCreateSessionOptions=X.Ga)(s,p,f,b,_,I,O,B,L,H),u._OrtAppendExecutionProvider=(s,p)=>(u._OrtAppendExecutionProvider=X.Ha)(s,p),u._OrtAddFreeDimensionOverride=(s,p,f)=>(u._OrtAddFreeDimensionOverride=X.Ia)(s,p,f),u._OrtAddSessionConfigEntry=(s,p,f)=>(u._OrtAddSessionConfigEntry=X.Ja)(s,p,f),u._OrtReleaseSessionOptions=s=>(u._OrtReleaseSessionOptions=X.Ka)(s),u._OrtCreateSession=(s,p,f)=>(u._OrtCreateSession=X.La)(s,p,f),u._OrtReleaseSession=s=>(u._OrtReleaseSession=X.Ma)(s),u._OrtGetInputOutputCount=(s,p,f)=>(u._OrtGetInputOutputCount=X.Na)(s,p,f),u._OrtGetInputName=(s,p)=>(u._OrtGetInputName=X.Oa)(s,p),u._OrtGetOutputName=(s,p)=>(u._OrtGetOutputName=X.Pa)(s,p),u._OrtFree=s=>(u._OrtFree=X.Qa)(s),u._OrtCreateTensor=(s,p,f,b,_,I)=>(u._OrtCreateTensor=X.Ra)(s,p,f,b,_,I),u._OrtGetTensorData=(s,p,f,b,_)=>(u._OrtGetTensorData=X.Sa)(s,p,f,b,_),u._OrtReleaseTensor=s=>(u._OrtReleaseTensor=X.Ta)(s),u._OrtCreateRunOptions=(s,p,f,b)=>(u._OrtCreateRunOptions=X.Ua)(s,p,f,b),u._OrtAddRunConfigEntry=(s,p,f)=>(u._OrtAddRunConfigEntry=X.Va)(s,p,f),u._OrtReleaseRunOptions=s=>(u._OrtReleaseRunOptions=X.Wa)(s),u._OrtCreateBinding=s=>(u._OrtCreateBinding=X.Xa)(s),u._OrtBindInput=(s,p,f)=>(u._OrtBindInput=X.Ya)(s,p,f),u._OrtBindOutput=(s,p,f,b)=>(u._OrtBindOutput=X.Za)(s,p,f,b),u._OrtClearBoundOutputs=s=>(u._OrtClearBoundOutputs=X._a)(s),u._OrtReleaseBinding=s=>(u._OrtReleaseBinding=X.$a)(s),u._OrtRunWithBinding=(s,p,f,b,_)=>(u._OrtRunWithBinding=X.ab)(s,p,f,b,_),u._OrtRun=(s,p,f,b,_,I,O,B)=>(u._OrtRun=X.bb)(s,p,f,b,_,I,O,B),u._OrtEndProfiling=s=>(u._OrtEndProfiling=X.cb)(s),u._JsepOutput=(s,p,f)=>(u._JsepOutput=X.db)(s,p,f),u._JsepGetNodeName=s=>(u._JsepGetNodeName=X.eb)(s);var fr,Rt=()=>(Rt=X.fb)(),tt=u._free=s=>(tt=u._free=X.gb)(s),hr=u._malloc=s=>(hr=u._malloc=X.hb)(s),Bn=(s,p,f,b,_,I)=>(Bn=X.kb)(s,p,f,b,_,I),Li=()=>(Li=X.lb)(),Hi=(s,p,f,b,_)=>(Hi=X.mb)(s,p,f,b,_),Mn=s=>(Mn=X.nb)(s),gr=s=>(gr=X.ob)(s),Gi=()=>(Gi=X.pb)(),Fi=(s,p)=>(Fi=X.qb)(s,p),yr=s=>(yr=X.rb)(s),Rn=s=>(Rn=X.sb)(s),Un=()=>(Un=X.tb)(),qi=u.dynCall_ii=(s,p)=>(qi=u.dynCall_ii=X.vb)(s,p),ji=s=>(ji=X.wb)(s),Ki=()=>(Ki=X.xb)(),Yi=s=>(Yi=X.yb)(s),Xi=()=>(Xi=X.zb)();function Qi(){0<Ue||(y?(c(u),y||ar(Ae),startWorker(u)):(ar(bt),0<Ue||fr||(fr=!0,u.calledRun=!0,$e||(y||ar(Ae),c(u),y||ar(Me)))))}return u.___start_em_js=881578,u.___stop_em_js=881800,u.stackSave=()=>Un(),u.stackRestore=s=>yr(s),u.stackAlloc=s=>Rn(s),u.UTF8ToString=ze,u.stringToUTF8=Dt,u.lengthBytesUTF8=$n,wt=function s(){fr||Qi(),fr||(wt=s)},Qi(),h}),Ep=Oa;globalThis.self?.name==="em-pthread"&&Oa()});var Ut,Pp,zp,Op,Ma,Ra,Dp,Ua,qt=U(()=>{"use strict";Cr();Ut= false?0:import.meta.url??(typeof document<"u"?document.currentScript?.src:typeof self<"u"?self.location?.href:void 0),Pp= false||typeof location>"u"?void 0:location.origin,zp=(e,t)=>{try{let r=t??Ut;return(r?new URL(e,r):new URL(e)).origin===Pp}catch{return!1}},Op=async e=>{let r=await(await fetch(e,{credentials:"same-origin"})).blob();return URL.createObjectURL(r)},Ma=(za(),br(Pa)).default,Ra=async()=>{if(!Ut)throw new Error("Failed to load proxy worker: cannot determine the script source URL.");if(zp(Ut))return[void 0,Ma()];let e=await Op(Ut);return[e,Ma(e)]},Dp=(Ba(),br(Da)).default,Ua=async(e,t,r)=>[void 0,Dp]});var jn,Kn,Mr,Va,Bp,Mp,Ar,Te,gt=U(()=>{"use strict";qt();Kn=!1,Mr=!1,Va=!1,Bp=()=>{if(typeof SharedArrayBuffer>"u")return!1;try{return typeof MessageChannel<"u"&&new MessageChannel().port1.postMessage(new SharedArrayBuffer(1)),WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,5,4,1,3,1,1,10,11,1,9,0,65,0,254,16,2,0,26,11]))}catch{return!1}},Mp=()=>{try{return WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,10,30,1,28,0,65,0,253,15,253,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,253,186,1,26,11]))}catch{return!1}},Ar=async e=>{if(Kn)return Promise.resolve();if(Mr)throw new Error("multiple calls to 'initializeWebAssembly()' detected.");if(Va)throw new Error("previous call to 'initializeWebAssembly()' failed.");Mr=!0;let t=e.initTimeout,r=e.numThreads;if(!Mp())throw new Error("WebAssembly SIMD is not supported in the current environment.");let n=Bp();r>1&&!n&&(typeof self<"u"&&!self.crossOriginIsolated&&console.warn("env.wasm.numThreads is set to "+r+", but this will not work unless you enable crossOriginIsolated mode. See https://web.dev/cross-origin-isolation-guide/ for more info."),console.warn("WebAssembly multi-threading is not supported in the current environment. Falling back to single-threading."),e.numThreads=r=1);let o=e.wasmPaths,i=typeof o=="string"?o:void 0,a=o?.mjs,d=a?.href??a,l=o?.wasm,c=l?.href??l,m=e.wasmBinary,[u,h]=await Ua(d,i,r>1),w=!1,g=[];if(t>0&&g.push(new Promise(y=>{setTimeout(()=>{w=!0,y()},t)})),g.push(new Promise((y,S)=>{let $={numThreads:r};m?$.wasmBinary=m:(c||i)&&($.locateFile=(v,x)=>c??(i??x)+v),h($).then(v=>{Mr=!1,Kn=!0,jn=v,y(),u&&URL.revokeObjectURL(u)},v=>{Mr=!1,Va=!0,S(v)})})),await Promise.race(g),w)throw new Error(`WebAssembly backend initializing failed due to timeout: ${t}ms`)},Te=()=>{if(Kn&&jn)return jn;throw new Error("WebAssembly is not initialized yet.")}});var ke,Kt,ve,Rr=U(()=>{"use strict";gt();ke=(e,t)=>{let r=Te(),n=r.lengthBytesUTF8(e)+1,o=r._malloc(n);return r.stringToUTF8(e,o,n),t.push(o),o},Kt=(e,t,r,n)=>{if(typeof e=="object"&&e!==null){if(r.has(e))throw new Error("Circular reference in options");r.add(e)}Object.entries(e).forEach(([o,i])=>{let a=t?t+o:o;if(typeof i=="object")Kt(i,a+".",r,n);else if(typeof i=="string"||typeof i=="number")n(a,i.toString());else if(typeof i=="boolean")n(a,i?"1":"0");else throw new Error(`Can't handle extra config type: ${typeof i}`)})},ve=e=>{let t=Te(),r=t.stackSave();try{let n=t.stackAlloc(8);t._OrtGetLastError(n,n+4);let o=t.HEAP32[n/4],i=t.HEAPU32[n/4+1],a=i?t.UTF8ToString(i):"";throw new Error(`${e} ERROR_CODE: ${o}, ERROR_MESSAGE: ${a}`)}finally{t.stackRestore(r)}}});var Wa,Na=U(()=>{"use strict";gt();Rr();Wa=e=>{let t=Te(),r=0,n=[],o=e||{};try{if(e?.logSeverityLevel===void 0)o.logSeverityLevel=2;else if(typeof e.logSeverityLevel!="number"||!Number.isInteger(e.logSeverityLevel)||e.logSeverityLevel<0||e.logSeverityLevel>4)throw new Error(`log serverity level is not valid: ${e.logSeverityLevel}`);if(e?.logVerbosityLevel===void 0)o.logVerbosityLevel=0;else if(typeof e.logVerbosityLevel!="number"||!Number.isInteger(e.logVerbosityLevel))throw new Error(`log verbosity level is not valid: ${e.logVerbosityLevel}`);e?.terminate===void 0&&(o.terminate=!1);let i=0;return e?.tag!==void 0&&(i=ke(e.tag,n)),r=t._OrtCreateRunOptions(o.logSeverityLevel,o.logVerbosityLevel,!!o.terminate,i),r===0&&ve("Can't create run options."),e?.extra!==void 0&&Kt(e.extra,"",new WeakSet,(a,d)=>{let l=ke(a,n),c=ke(d,n);t._OrtAddRunConfigEntry(r,l,c)!==0&&ve(`Can't set a run config entry: ${a} - ${d}.`)}),[r,n]}catch(i){throw r!==0&&t._OrtReleaseRunOptions(r),n.forEach(a=>t._free(a)),i}}});var Rp,Up,Vp,Wp,La,Ha=U(()=>{"use strict";gt();Rr();Rp=e=>{switch(e){case"disabled":return 0;case"basic":return 1;case"extended":return 2;case"all":return 99;default:throw new Error(`unsupported graph optimization level: ${e}`)}},Up=e=>{switch(e){case"sequential":return 0;case"parallel":return 1;default:throw new Error(`unsupported execution mode: ${e}`)}},Vp=e=>{e.extra||(e.extra={}),e.extra.session||(e.extra.session={});let t=e.extra.session;t.use_ort_model_bytes_directly||(t.use_ort_model_bytes_directly="1"),e.executionProviders&&e.executionProviders.some(r=>(typeof r=="string"?r:r.name)==="webgpu")&&(e.enableMemPattern=!1)},Wp=(e,t,r)=>{for(let n of t){let o=typeof n=="string"?n:n.name;switch(o){case"webnn":if(o="WEBNN",typeof n!="string"){let d=n?.deviceType;if(d){let l=ke("deviceType",r),c=ke(d,r);Te()._OrtAddSessionConfigEntry(e,l,c)!==0&&ve(`Can't set a session config entry: 'deviceType' - ${d}.`)}}break;case"webgpu":if(o="JS",typeof n!="string"){let a=n;if(a?.preferredLayout){if(a.preferredLayout!=="NCHW"&&a.preferredLayout!=="NHWC")throw new Error(`preferredLayout must be either 'NCHW' or 'NHWC': ${a.preferredLayout}`);let d=ke("preferredLayout",r),l=ke(a.preferredLayout,r);Te()._OrtAddSessionConfigEntry(e,d,l)!==0&&ve(`Can't set a session config entry: 'preferredLayout' - ${a.preferredLayout}.`)}}break;case"wasm":case"cpu":continue;default:throw new Error(`not supported execution provider: ${o}`)}let i=ke(o,r);Te()._OrtAppendExecutionProvider(e,i)!==0&&ve(`Can't append execution provider: ${o}.`)}},La=e=>{let t=Te(),r=0,n=[],o=e||{};Vp(o);try{let i=Rp(o.graphOptimizationLevel??"all"),a=Up(o.executionMode??"sequential"),d=typeof o.logId=="string"?ke(o.logId,n):0,l=o.logSeverityLevel??2;if(!Number.isInteger(l)||l<0||l>4)throw new Error(`log serverity level is not valid: ${l}`);let c=o.logVerbosityLevel??0;if(!Number.isInteger(c)||c<0||c>4)throw new Error(`log verbosity level is not valid: ${c}`);let m=typeof o.optimizedModelFilePath=="string"?ke(o.optimizedModelFilePath,n):0;if(r=t._OrtCreateSessionOptions(i,!!o.enableCpuMemArena,!!o.enableMemPattern,a,!!o.enableProfiling,0,d,l,c,m),r===0&&ve("Can't create session options."),o.executionProviders&&Wp(r,o.executionProviders,n),o.enableGraphCapture!==void 0){if(typeof o.enableGraphCapture!="boolean")throw new Error(`enableGraphCapture must be a boolean value: ${o.enableGraphCapture}`);let u=ke("enableGraphCapture",n),h=ke(o.enableGraphCapture.toString(),n);t._OrtAddSessionConfigEntry(r,u,h)!==0&&ve(`Can't set a session config entry: 'enableGraphCapture' - ${o.enableGraphCapture}.`)}if(o.freeDimensionOverrides)for(let[u,h]of Object.entries(o.freeDimensionOverrides)){if(typeof u!="string")throw new Error(`free dimension override name must be a string: ${u}`);if(typeof h!="number"||!Number.isInteger(h)||h<0)throw new Error(`free dimension override value must be a non-negative integer: ${h}`);let w=ke(u,n);t._OrtAddFreeDimensionOverride(r,w,h)!==0&&ve(`Can't set a free dimension override: ${u} - ${h}.`)}return o.extra!==void 0&&Kt(o.extra,"",new WeakSet,(u,h)=>{let w=ke(u,n),g=ke(h,n);t._OrtAddSessionConfigEntry(r,w,g)!==0&&ve(`Can't set a session config entry: ${u} - ${h}.`)}),[r,n]}catch(i){throw r!==0&&t._OrtReleaseSessionOptions(r),n.forEach(a=>t._free(a)),i}}});var Yt,yt,It,Ur,Xt,Vr,Wr,Yn,J=U(()=>{"use strict";Yt=e=>{switch(e){case"int8":return 3;case"uint8":return 2;case"bool":return 9;case"int16":return 5;case"uint16":return 4;case"int32":return 6;case"uint32":return 12;case"float16":return 10;case"float32":return 1;case"float64":return 11;case"string":return 8;case"int64":return 7;case"uint64":return 13;case"int4":return 22;case"uint4":return 21;default:throw new Error(`unsupported data type: ${e}`)}},yt=e=>{switch(e){case 3:return"int8";case 2:return"uint8";case 9:return"bool";case 5:return"int16";case 4:return"uint16";case 6:return"int32";case 12:return"uint32";case 10:return"float16";case 1:return"float32";case 11:return"float64";case 8:return"string";case 7:return"int64";case 13:return"uint64";case 22:return"int4";case 21:return"uint4";default:throw new Error(`unsupported data type: ${e}`)}},It=(e,t)=>{let r=[-1,4,1,1,2,2,4,8,-1,1,2,8,4,8,-1,-1,-1,-1,-1,-1,-1,.5,.5][e],n=typeof t=="number"?t:t.reduce((o,i)=>o*i,1);return r>0?Math.ceil(n*r):void 0},Ur=e=>{switch(e){case"float16":return typeof Float16Array<"u"&&Float16Array.from?Float16Array:Uint16Array;case"float32":return Float32Array;case"uint8":return Uint8Array;case"int8":return Int8Array;case"uint16":return Uint16Array;case"int16":return Int16Array;case"int32":return Int32Array;case"bool":return Uint8Array;case"float64":return Float64Array;case"uint32":return Uint32Array;case"int64":return BigInt64Array;case"uint64":return BigUint64Array;default:throw new Error(`unsupported type: ${e}`)}},Xt=e=>{switch(e){case"verbose":return 0;case"info":return 1;case"warning":return 2;case"error":return 3;case"fatal":return 4;default:throw new Error(`unsupported logging level: ${e}`)}},Vr=e=>e==="float32"||e==="float16"||e==="int32"||e==="int64"||e==="uint32"||e==="uint8"||e==="bool"||e==="uint4"||e==="int4",Wr=e=>e==="float32"||e==="float16"||e==="int32"||e==="int64"||e==="uint32"||e==="uint64"||e==="int8"||e==="uint8"||e==="bool",Yn=e=>{switch(e){case"none":return 0;case"cpu":return 1;case"cpu-pinned":return 2;case"texture":return 3;case"gpu-buffer":return 4;case"ml-tensor":return 5;default:throw new Error(`unsupported data location: ${e}`)}}});var Qt,Xn=U(()=>{"use strict";Cr();Qt=async e=>{if(typeof e=="string")if(false){}else{let t=await fetch(e);if(!t.ok)throw new Error(`failed to load external data file: ${e}`);let r=t.headers.get("Content-Length"),n=r?parseInt(r,10):0;if(n<1073741824)return new Uint8Array(await t.arrayBuffer());{if(!t.body)throw new Error(`failed to load external data file: ${e}, no response body.`);let o=t.body.getReader(),i;try{i=new ArrayBuffer(n)}catch(d){if(d instanceof RangeError){let l=Math.ceil(n/65536);i=new WebAssembly.Memory({initial:l,maximum:l}).buffer}else throw d}let a=0;for(;;){let{done:d,value:l}=await o.read();if(d)break;let c=l.byteLength;new Uint8Array(i,a,c).set(l),a+=c}return new Uint8Array(i,0,n)}}else return e instanceof Blob?new Uint8Array(await e.arrayBuffer()):e instanceof Uint8Array?e:new Uint8Array(e)}});var Np,Lp,Ga,Fa,Nr,Hp,pe,Xe=U(()=>{"use strict";J();Np=["V","I","W","E","F"],Lp=(e,t)=>{console.log(`[${Np[e]},${new Date().toISOString()}]${t}`)},Nr=(e,t)=>{Ga=e,Fa=t},Hp=(e,t)=>{let r=Xt(e),n=Xt(Ga);r>=n&&Lp(r,typeof t=="function"?t():t)},pe=(...e)=>{Fa&&Hp(...e)}});var Lr,Qn=U(()=>{"use strict";J();Lr=(e,t)=>new(Ur(t))(e)});var Hr=U(()=>{"use strict"});var qa,Zn,Jn,Gp,Fp,ja,to,eo,Ya,Xa=U(()=>{"use strict";Xe();Hr();qa=new Map([[64,250],[128,200],[256,200],[512,200],[2048,230],[4096,200],[8192,50],[16384,50],[32768,50],[65536,50],[131072,50],[262144,50],[524288,50],[1048576,50],[2097152,30],[4194304,20],[8388608,10],[12582912,10],[16777216,10],[26214400,15],[33554432,22],[44236800,2],[58982400,6],[67108864,6],[134217728,6],[167772160,6]]),Zn=[],Jn=e=>Math.ceil(e/16)*16,Gp=e=>{for(let t=0;t<Zn.length;t++){let r=Zn[t];if(e<=r)return r}return Math.ceil(e/16)*16},Fp=1,ja=()=>Fp++,to=async(e,t,r,n)=>{let o=Jn(r),i=e.device.createBuffer({size:o,usage:GPUBufferUsage.COPY_DST|GPUBufferUsage.MAP_READ});try{let a=e.getCommandEncoder();e.endComputePass(),a.copyBufferToBuffer(t,0,i,0,o),e.flush(),await i.mapAsync(GPUMapMode.READ);let d=i.getMappedRange();if(n){let l=n();return l.set(new Uint8Array(d,0,r)),l}else return new Uint8Array(d.slice(0,r))}finally{i.destroy()}},eo=class{constructor(t){this.backend=t;this.storageCache=new Map,this.freeBuffers=new Map,this.freeUniformBuffers=new Map,this.buffersForUploadingPending=[],this.buffersPending=[],this.capturedPendingBuffers=new Map;for(let[r]of qa)Zn.push(r),this.freeBuffers.set(r,[]),this.freeUniformBuffers.set(r,[]);this.sessionCount=0}upload(t,r){let n=r.buffer,o=r.byteOffset,i=r.byteLength,a=Jn(i),d=this.storageCache.get(t);if(!d)throw new Error("gpu data for uploading does not exist");if(d.originalSize!==i)throw new Error(`inconsistent data size. gpu data size=${d.originalSize}, data size=${i}`);let l=this.backend.device.createBuffer({mappedAtCreation:!0,size:a,usage:GPUBufferUsage.MAP_WRITE|GPUBufferUsage.COPY_SRC}),c=l.getMappedRange();new Uint8Array(c).set(new Uint8Array(n,o,i)),l.unmap();let m=this.backend.getCommandEncoder();this.backend.endComputePass(),m.copyBufferToBuffer(l,0,d.gpuData.buffer,0,a),pe("verbose",()=>`[WebGPU] GpuDataManager.upload(id=${t})`),this.buffersForUploadingPending.push(l)}memcpy(t,r){let n=this.storageCache.get(t);if(!n)throw new Error("source gpu data for memcpy does not exist");let o=this.storageCache.get(r);if(!o)throw new Error("destination gpu data for memcpy does not exist");if(n.originalSize!==o.originalSize)throw new Error("inconsistent source and destination gpu data size");let i=Jn(n.originalSize),a=this.backend.getCommandEncoder();this.backend.endComputePass(),a.copyBufferToBuffer(n.gpuData.buffer,0,o.gpuData.buffer,0,i)}registerExternalBuffer(t,r,n){let o;if(n){if(o=n[0],t===n[1])return pe("verbose",()=>`[WebGPU] GpuDataManager.registerExternalBuffer(size=${r}) => id=${o}, buffer is the same, skip.`),o;if(this.backend.capturedCommandList.has(this.backend.currentSessionId))throw new Error(`Registering a different external buffer under graph capture mode is not supported yet.
3812
+ Please use the previous external buffer!`)}else o=ja();return this.storageCache.set(o,{gpuData:{id:o,type:0,buffer:t},originalSize:r}),pe("verbose",()=>`[WebGPU] GpuDataManager.registerExternalBuffer(size=${r}) => id=${o}, registered.`),o}unregisterExternalBuffer(t){t!==void 0&&(this.storageCache.delete(t),pe("verbose",()=>`[WebGPU] GpuDataManager.unregisterExternalBuffer() => id=${t}`))}create(t,r=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST){let n=Gp(t),o,i=(r&GPUBufferUsage.STORAGE)===GPUBufferUsage.STORAGE,a=(r&GPUBufferUsage.UNIFORM)===GPUBufferUsage.UNIFORM;if(i||a){let c=(i?this.freeBuffers:this.freeUniformBuffers).get(n);c?c.length>0?o=c.pop():o=this.backend.device.createBuffer({size:n,usage:r}):o=this.backend.device.createBuffer({size:n,usage:r})}else o=this.backend.device.createBuffer({size:n,usage:r});let d={id:ja(),type:0,buffer:o};return this.storageCache.set(d.id,{gpuData:d,originalSize:t}),pe("verbose",()=>`[WebGPU] GpuDataManager.create(size=${t}) => id=${d.id}`),d}get(t){return this.storageCache.get(t)?.gpuData}release(t){let r=this.storageCache.get(t);if(!r){if(this.storageCache.size===0)return 0;throw new Error("releasing data does not exist")}return pe("verbose",()=>`[WebGPU] GpuDataManager.release(id=${t}), gpuDataId=${r.gpuData.id}`),this.storageCache.delete(t),this.buffersPending.push(r.gpuData.buffer),r.originalSize}async download(t,r){let n=this.storageCache.get(t);if(!n)throw new Error("data does not exist");await to(this.backend,n.gpuData.buffer,n.originalSize,r)}refreshPendingBuffers(){for(let t of this.buffersForUploadingPending)t.destroy();if(this.buffersForUploadingPending=[],this.buffersPending.length!==0)if(this.backend.sessionStatus==="default"){for(let t of this.buffersPending){let r=qa.get(t.size);if((t.usage&GPUBufferUsage.STORAGE)===GPUBufferUsage.STORAGE){let n=this.freeBuffers.get(t.size)||[];r===void 0||n.length>=r?t.destroy():n.push(t)}else if((t.usage&GPUBufferUsage.UNIFORM)===GPUBufferUsage.UNIFORM){let n=this.freeUniformBuffers.get(t.size)||[];r===void 0||n.length>=r?t.destroy():n.push(t)}else t.destroy()}this.buffersPending=[]}else{let t=this.capturedPendingBuffers.get(this.backend.currentSessionId);t||(t=[],this.capturedPendingBuffers.set(this.backend.currentSessionId,t));for(let r of this.buffersPending)t.push(r);this.buffersPending=[]}}dispose(){this.freeBuffers.forEach(t=>{t.forEach(r=>{r.destroy()})}),this.freeUniformBuffers.forEach(t=>{t.forEach(r=>{r.destroy()})}),this.storageCache.forEach(t=>{t.gpuData.buffer.destroy()}),this.capturedPendingBuffers.forEach(t=>{t.forEach(r=>{r.destroy()})}),this.storageCache=new Map,this.freeBuffers=new Map,this.freeUniformBuffers=new Map,this.capturedPendingBuffers=new Map}onCreateSession(){this.sessionCount+=1}onReleaseSession(t){let r=this.capturedPendingBuffers.get(t);r&&(r.forEach(n=>{n.destroy()}),this.capturedPendingBuffers.delete(t)),this.sessionCount-=1,this.sessionCount===0&&(pe("warning",()=>"[WebGPU] Clearing webgpu buffer cache"),this.storageCache.forEach(n=>{n.gpuData.buffer.destroy()}),this.storageCache=new Map)}},Ya=(...e)=>new eo(...e)});var ro,ee,Ie=U(()=>{"use strict";ro=class{constructor(t){Object.assign(this,t)}get cacheKey(){return this.key||(this.key=Object.getOwnPropertyNames(this).sort().map(t=>`${this[t]}`).join(";")),this.key}},ee=e=>new ro(e)});var no,rt,k,Ct,Gr,Qa,Za,ae=U(()=>{"use strict";no=class{static calcMatMulShape(t,r){return t[1]!==r[0]?void 0:[t[0],r[1]]}},rt=class{static calcShape(t,r,n=!1){let o=t.length,i=r.length;if(o===0)return r;if(i===0)return t;let a=Math.max(t.length,r.length),d=new Array(a);if(n){if(o<2||i<2)return;let l=no.calcMatMulShape([t[o-2],t[o-1]],[r[i-2],r[i-1]]);if(l===void 0)return;[d[a-2],d[a-1]]=l}for(let l=n?3:1;l<=a;l++){let c=o-l<0?1:t[o-l],m=i-l<0?1:r[i-l];if(c!==m&&c>1&&m>1)return;let u=Math.max(c,m);if(c&&m)d[a-l]=Math.max(c,m);else{if(u>1)return;d[a-l]=0}}return d}static isValidBroadcast(t,r){let n=t.length,o=r.length;if(n>o)return!1;for(let i=1;i<=n;i++)if(t[n-i]!==1&&t[n-i]!==r[o-i])return!1;return!0}},k=class e{static size(t){return e.getSizeFromDimensionRange(t,0,t.length)}static convertShape(t,r=4){let n=t.length;if(n===0)return[];let o=new Array(n),i=n-1;for(;i>=0;){if(t[i]%r===0){o[i]=t[i]/r;break}if(r%t[i]!==0)throw new Error("cannot convert shape");o[i]=1,r/=t[i],i--}for(i--;i>=0;i--)o[i]=t[i];return o}static sizeFromDimension(t,r){if(r<0||r>t.length)throw new Error(`invalid dimension of ${r} for sizeFromDimension as Tensor has ${t.length} dimensions.`);return e.getSizeFromDimensionRange(t,r,t.length)}static sizeToDimension(t,r){if(r<0||r>t.length)throw new Error(`invalid dimension of ${r} for sizeToDimension as Tensor has ${t.length} dimensions.`);return e.getSizeFromDimensionRange(t,0,r)}static getSizeFromDimensionRange(t,r,n){let o=1;for(let i=r;i<n;i++){if(t[i]<0)throw new Error("cannot get valid size from specified dimension range. Most likely the range contains negative values in them.");o*=t[i]}return o}static computeStrides(t){let r=t.length;if(r===0)return[];if(r===1)return[1];let n=new Array(r);n[r-1]=1,n[r-2]=t[r-1];for(let o=r-3;o>=0;--o)n[o]=n[o+1]*t[o+1];return n}static normalizeAxis(t,r){if(t<-r&&t>=r)throw new Error("unsupported axis for this operation.");return t<0?t+r:t}static normalizeAxes(t,r){return t.map(n=>this.normalizeAxis(n,r??t.length))}static sortBasedOnPerm(t,r){return r?r.map(n=>t[n]):t.slice().reverse()}static padShape(t,r){let n=t.length;return t.map((o,i)=>o+r[i]+r[i+n])}static areEqual(t,r){return t.length!==r.length?!1:t.every((n,o)=>n===r[o])}},Ct=class e{static adjustPoolAttributes(t,r,n,o,i,a){if(!t&&n.length!==r.length-2)throw new Error("length of specified kernel shapes should be 2 less than length of input dimensions");if(t)for(let d=0;d<r.length-2;d++)d>=n.length?n.push(r[d+2]):n[d]=r[d+2];for(let d=0;d<n.length;d++)if(d<o.length){if(o[d]<0)throw new Error("strides should be greater than or equal to 1")}else o.push(1);for(let d=0;d<n.length;d++)if(d<i.length){if(i[d]<0)throw new Error("dilations should be greater than or equal to 1")}else i.push(1);for(let d=0;d<n.length*2;d++)if(d<a.length){if(a[d]<0)throw new Error("pad should be greater than or equal to 1")}else a.push(0);for(let d=0;d<n.length;d++){if(n[d]<=0)throw new Error("kernel shapes need to be greater than 0");if(a[d]>=n[d]||a[d+n.length]>=n[d])throw new Error("pads should be smaller than kernel")}}static adjustPadsBasedOnAutoPad(t,r,n,o,i,a,d){if(d){if(i.length!==2*(t.length-2))throw new Error("length of pads should be twice the length of data dimensions");if(r.length!==t.length-2)throw new Error("length of strides should be the length of data dimensions");if(o.length!==t.length-2)throw new Error("length of kernel shapes should be the length of data dimensions");for(let l=0;l<t.length-2;l++)e.adjustPadAndReturnShape(t[l+(a?1:2)],r[l],n[l],o[l],i,l,l+t.length-2,d)}}static computePoolOutputShape(t,r,n,o,i,a,d){if(r.length<=0)throw new Error("input shape must be of size greater than 0");let l=[r[0],r[1]];return e.computeShapeHelper(t,r,l,n,o,i,a,d),l}static computeConvOutputShape(t,r,n,o,i,a,d){if(t.length<=0||r.length<=0)throw new Error("invalid input tensor dims or invalid filter tensor dims");let l=[t[0],r[0]];return e.computeShapeHelper(!1,t,l,n,o,i,a,d),l}static computeShapeHelper(t,r,n,o,i,a,d,l){if(t)for(let c=0;c<r.length-2;c++)n.push(1);else for(let c=0;c<r.length-2;c++)n.push(e.adjustPadAndReturnShape(r[c+2],o[c],i[c],a[c],d,c,c+r.length-2,l))}static adjustPadAndReturnShape(t,r,n,o,i,a,d,l){let c=n*(o-1)+1;if(l&&l!=="NOTSET")switch(l){case"VALID":return i[a]=0,i[d]=0,Math.floor((t-c)/r+1);case"SAME_LOWER":case"SAME_UPPER":if(n!==1)throw new Error("Dilation not supported for SAME_UPPER or SAME_LOWER");{let u=((t+r-1)/r-1)*r+o-t;return i[a]=Math.floor(l==="SAME_LOWER"?(u+1)/2:u/2),i[d]=u-i[a],Math.floor((t+u-o)/r+1)}default:throw new Error("Unsupported AutoPad type")}else return Math.floor((t+i[a]+i[d]-c)/r+1)}},Gr=class{static getShapeOfGemmResult(t,r,n,o,i){if(t.length!==2||n.length!==2)throw new Error("shape need to be of size 2");let a,d,l;r?(a=t[1],d=t[0]):(a=t[0],d=t[1]);let c=-1;if(o?(l=n[0],c=1):(l=n[1],c=0),n[c]!==d)throw new Error("dimension mismatch");if(a<=0||l<=0||d<=0)throw new Error("invalid shape specified");if(i&&!rt.isValidBroadcast(i,[a,l]))throw new Error("gemm: invalid bias shape for broadcast");return[a,l,d]}},Qa=-34028234663852886e22,Za=34028234663852886e22});var At,io,he,Ee,V,we,ao,kt,Qe,F,so,E,M,Fr,oo,Ja,Wt,se=U(()=>{"use strict";J();ae();At=64,io=(e,t)=>{if(t===3)throw new Error("vec3 has same alignment as vec4, use vec4 instead");switch(e){case 10:return t>1?`vec${t}<f16>`:"f16";case 1:return t>1?`vec${t}<f32>`:"f32";case 6:return t>1?`vec${t}<i32>`:"i32";case 12:return t>1?`vec${t}<u32>`:"u32";case 7:if(t>1)throw new Error("currently not supported vecX of uint64 yet");return["vec2<u32>","i32"];case 13:if(t>1)throw new Error("currently not supported vecX of uint64 yet");return["vec2<u32>","u32"];case 9:if(t!==4)throw new Error("bool must be vec4");return["u32","vec4<bool>"];case 22:return"i32";case 21:return"u32";default:throw new Error(`Unknown data type: ${e}`)}},he=(e,t=1)=>{let r=io(e,t);return typeof r=="string"?r:r[0]},Ee=(e,t=1)=>{let r=io(e,t);return typeof r=="string"?r:r[1]},V=(...e)=>{let t=[];return e.forEach(r=>{r.length!==0&&t.push({type:12,data:r},{type:12,data:k.computeStrides(r)})}),t},we=e=>e%4===0?4:e%2===0?2:1,ao=(e="f32",t,r="0")=>!t||t===1?`${e}(${r})`:`vec${t}<${e}>(${r})`,kt=(e,t,r)=>e==="f32"?r:t===1?`f32(${r})`:`vec${t}<f32>(${r})`,Qe=(e,t)=>t===4?`(${e}.x + ${e}.y + ${e}.z + ${e}.w)`:t===2?`(${e}.x + ${e}.y)`:t===3?`(${e}.x + ${e}.y + ${e}.z)`:e,F=(e,t,r,n)=>e.startsWith("uniforms.")&&r>4?typeof t=="string"?n==="f16"?`${e}[(${t}) / 8][(${t}) % 8 / 4][(${t}) % 8 % 4]`:`${e}[(${t}) / 4][(${t}) % 4]`:n==="f16"?`${e}[${Math.floor(t/8)}][${Math.floor(t%8/4)}][${t%8%4}]`:`${e}[${Math.floor(t/4)}][${t%4}]`:r>1?`${e}[${t}]`:e,so=(e,t,r,n,o)=>{let i=typeof r=="number",a=i?r:r.length,d=[...new Array(a).keys()],l=a<2?"u32":a<=4?`vec${a}<u32>`:`array<u32, ${a}>`,c=io(t,o),m=typeof c=="string"?c:c[1],u=typeof c=="string"?c:c[0],h={indices:l,value:m,storage:u,tensor:t},w=R=>typeof R=="string"?R:`${R}u`,g={offsetToIndices:!1,indicesToOffset:!1,broadcastedIndicesToOffset:!1,set:!1,setByIndices:!1,get:!1,getByIndices:!1},y=i?"uniforms.":"",S=`${y}${e}_shape`,$=`${y}${e}_strides`,v="";for(let R=0;R<a-1;R++)v+=`
3813
+ let dim${R} = current / ${F($,R,a)};
3814
+ let rest${R} = current % ${F($,R,a)};
3815
+ indices[${R}] = dim${R};
3816
+ current = rest${R};
3817
3817
  `;v+=`indices[${a-1}] = current;`;let x=a<2?"":`
3818
3818
  fn o2i_${e}(offset: u32) -> ${h.indices} {
3819
3819
  var indices: ${h.indices};
3820
3820
  var current = offset;
3821
3821
  ${v}
3822
3822
  return indices;
3823
- }`,T=U=>(g.offsetToIndices=!0,a<2?U:`o2i_${e}(${U})`),C=[];if(a>=2)for(let U=a-1;U>=0;U--)C.push(`${F($,U,a)} * (indices[${U}])`);let A=a<2?"":`
3823
+ }`,T=R=>(g.offsetToIndices=!0,a<2?R:`o2i_${e}(${R})`),C=[];if(a>=2)for(let R=a-1;R>=0;R--)C.push(`${F($,R,a)} * (indices[${R}])`);let A=a<2?"":`
3824
3824
  fn i2o_${e}(indices: ${h.indices}) -> u32 {
3825
3825
  return ${C.join("+")};
3826
- }`,P=U=>(g.indicesToOffset=!0,a<2?U:`i2o_${e}(${U})`),B=(...U)=>a===0?"0u":`${h.indices}(${U.map(w).join(",")})`,N=(U,G)=>a<2?`${U}`:`${F(U,G,a)}`,W=(U,G,ye)=>a<2?`${U}=${ye};`:`${F(U,G,a)}=${ye};`,K={},Z=(U,G)=>{g.broadcastedIndicesToOffset=!0;let ye=`${G.name}broadcastedIndicesTo${e}Offset`;if(ye in K)return`${ye}(${U})`;let Re=[];for(let $e=a-1;$e>=0;$e--){let Ce=G.indicesGet("outputIndices",$e+G.rank-a);Re.push(`${N($,$e)} * (${Ce} % ${N(S,$e)})`)}return K[ye]=`fn ${ye}(outputIndices: ${G.type.indices}) -> u32 {
3826
+ }`,P=R=>(g.indicesToOffset=!0,a<2?R:`i2o_${e}(${R})`),D=(...R)=>a===0?"0u":`${h.indices}(${R.map(w).join(",")})`,W=(R,G)=>a<2?`${R}`:`${F(R,G,a)}`,N=(R,G,ye)=>a<2?`${R}=${ye};`:`${F(R,G,a)}=${ye};`,j={},Y=(R,G)=>{g.broadcastedIndicesToOffset=!0;let ye=`${G.name}broadcastedIndicesTo${e}Offset`;if(ye in j)return`${ye}(${R})`;let Re=[];for(let $e=a-1;$e>=0;$e--){let Ce=G.indicesGet("outputIndices",$e+G.rank-a);Re.push(`${W($,$e)} * (${Ce} % ${W(S,$e)})`)}return j[ye]=`fn ${ye}(outputIndices: ${G.type.indices}) -> u32 {
3827
3827
  return ${Re.length>0?Re.join("+"):"0u"};
3828
- }`,`${ye}(${U})`},ee=(U,G)=>(()=>{if(h.storage===h.value)return`${e}[${U}]=${G};`;if(h.storage==="vec2<u32>"&&h.value==="i32")return`${e}[${U}]=vec2<u32>(u32(${G}), select(0u, 0xFFFFFFFFu, ${G} < 0));`;if(h.storage==="vec2<u32>"&&h.value==="u32")return`${e}[${U}]=vec2<u32>(u32(${G}), 0u);`;if(h.storage==="u32"&&h.value==="vec4<bool>")return`${e}[${U}]=dot(vec4<u32>(0x1, 0x100, 0x10000, 0x1000000), vec4<u32>(${G}));`;throw new Error(`not supported combination of storage type ${h.storage} and value type ${h.value} yet`)})(),se=U=>(()=>{if(h.storage===h.value)return`${e}[${U}]`;if(h.storage==="vec2<u32>"&&h.value==="i32")return`i32(${e}[${U}].x)`;if(h.storage==="vec2<u32>"&&h.value==="u32")return`u32(${e}[${U}].x)`;if(h.storage==="u32"&&h.value==="vec4<bool>")return`vec4<bool>(bool(${e}[${U}] & 0xFFu), bool(${e}[${U}] & 0xFF00u), bool(${e}[${U}] & 0xFF0000u), bool(${e}[${U}] & 0xFF000000u))`;throw new Error(`not supported combination of storage type ${h.storage} and value type ${h.value} yet`)})(),de=a<2?"":`
3828
+ }`,`${ye}(${R})`},Z=(R,G)=>(()=>{if(h.storage===h.value)return`${e}[${R}]=${G};`;if(h.storage==="vec2<u32>"&&h.value==="i32")return`${e}[${R}]=vec2<u32>(u32(${G}), select(0u, 0xFFFFFFFFu, ${G} < 0));`;if(h.storage==="vec2<u32>"&&h.value==="u32")return`${e}[${R}]=vec2<u32>(u32(${G}), 0u);`;if(h.storage==="u32"&&h.value==="vec4<bool>")return`${e}[${R}]=dot(vec4<u32>(0x1, 0x100, 0x10000, 0x1000000), vec4<u32>(${G}));`;throw new Error(`not supported combination of storage type ${h.storage} and value type ${h.value} yet`)})(),te=R=>(()=>{if(h.storage===h.value)return`${e}[${R}]`;if(h.storage==="vec2<u32>"&&h.value==="i32")return`i32(${e}[${R}].x)`;if(h.storage==="vec2<u32>"&&h.value==="u32")return`u32(${e}[${R}].x)`;if(h.storage==="u32"&&h.value==="vec4<bool>")return`vec4<bool>(bool(${e}[${R}] & 0xFFu), bool(${e}[${R}] & 0xFF00u), bool(${e}[${R}] & 0xFF0000u), bool(${e}[${R}] & 0xFF000000u))`;throw new Error(`not supported combination of storage type ${h.storage} and value type ${h.value} yet`)})(),ue=a<2?"":`
3829
3829
  fn get_${e}ByIndices(indices: ${h.indices}) -> ${m} {
3830
- return ${se(`i2o_${e}(indices)`)};
3831
- }`,Y=a<2?"":(()=>{let U=d.map(ye=>`d${ye}: u32`).join(", "),G=d.map(ye=>`d${ye}`).join(", ");return`
3832
- fn get_${e}(${U}) -> ${m} {
3833
- return get_${e}ByIndices(${B(G)});
3834
- }`})(),le=(...U)=>{if(U.length!==a)throw new Error(`indices length must be ${a}`);let G=U.map(w).join(",");return a===0?se("0u"):a===1?se(G[0]):(g.get=!0,g.getByIndices=!0,g.indicesToOffset=!0,`get_${e}(${G})`)},ce=U=>a<2?se(U):(g.getByIndices=!0,g.indicesToOffset=!0,`get_${e}ByIndices(${U})`),q=a<2?"":`
3830
+ return ${te(`i2o_${e}(indices)`)};
3831
+ }`,K=a<2?"":(()=>{let R=d.map(ye=>`d${ye}: u32`).join(", "),G=d.map(ye=>`d${ye}`).join(", ");return`
3832
+ fn get_${e}(${R}) -> ${m} {
3833
+ return get_${e}ByIndices(${D(G)});
3834
+ }`})(),de=(...R)=>{if(R.length!==a)throw new Error(`indices length must be ${a}`);let G=R.map(w).join(",");return a===0?te("0u"):a===1?te(G[0]):(g.get=!0,g.getByIndices=!0,g.indicesToOffset=!0,`get_${e}(${G})`)},ce=R=>a<2?te(R):(g.getByIndices=!0,g.indicesToOffset=!0,`get_${e}ByIndices(${R})`),q=a<2?"":`
3835
3835
  fn set_${e}ByIndices(indices: ${h.indices}, value: ${m}) {
3836
- ${ee(`i2o_${e}(indices)`,"value")}
3837
- }`,ue=a<2?"":(()=>{let U=d.map(ye=>`d${ye}: u32`).join(", "),G=d.map(ye=>`d${ye}`).join(", ");return`
3838
- fn set_${e}(${U}, value: ${m}) {
3839
- set_${e}ByIndices(${B(G)}, value);
3840
- }`})();return{impl:()=>{let U=[],G=!1;return g.offsetToIndices&&(U.push(x),G=!0),g.indicesToOffset&&(U.push(A),G=!0),g.broadcastedIndicesToOffset&&(Object.values(K).forEach(ye=>U.push(ye)),G=!0),g.set&&(U.push(ue),G=!0),g.setByIndices&&(U.push(q),G=!0),g.get&&(U.push(Y),G=!0),g.getByIndices&&(U.push(de),G=!0),!i&&G&&U.unshift(`const ${S} = ${h.indices}(${r.join(",")});`,`const ${$} = ${h.indices}(${k.computeStrides(r).join(",")});`),U.join(`
3841
- `)},type:h,offsetToIndices:T,indicesToOffset:P,broadcastedIndicesToOffset:Z,indices:B,indicesGet:N,indicesSet:W,set:(...U)=>{if(U.length!==a+1)throw new Error(`indices length must be ${a}`);let G=U[a];if(typeof G!="string")throw new Error("value must be string");let ye=U.slice(0,a).map(w).join(",");return a===0?ee("0u",G):a===1?ee(ye[0],G):(g.set=!0,g.setByIndices=!0,g.indicesToOffset=!0,`set_${e}(${ye}, ${G})`)},setByOffset:ee,setByIndices:(U,G)=>a<2?ee(U,G):(g.setByIndices=!0,g.indicesToOffset=!0,`set_${e}ByIndices(${U}, ${G});`),get:le,getByOffset:se,getByIndices:ce,usage:n,name:e,strides:$,shape:S,rank:a}},E=(e,t,r,n=1)=>ao(e,t,r,"input",n),M=(e,t,r,n=1)=>ao(e,t,r,"output",n),Fr=(e,t,r,n=1)=>ao(e,t,r,"internal",n),no=class{constructor(t,r){this.normalizedDispatchGroup=t;this.limits=r;this.internalVariables=[];this.variables=[];this.uniforms=[];this.variableIndex=0}guardAgainstOutOfBoundsWorkgroupSizes(t){return`if (global_idx >= ${typeof t=="number"?`${t}u`:t}) { return; }`}mainStart(t=At){let r=typeof t=="number"?t:t[0],n=typeof t=="number"?1:t[1],o=typeof t=="number"?1:t[2];if(r>this.limits.maxComputeWorkgroupSizeX||n>this.limits.maxComputeWorkgroupSizeY||o>this.limits.maxComputeWorkgroupSizeZ)throw new Error(`workgroup size [${r}, ${n}, ${o}] exceeds the maximum workgroup size [${this.limits.maxComputeWorkgroupSizeX}, ${this.limits.maxComputeWorkgroupSizeY}, ${this.limits.maxComputeWorkgroupSizeZ}].`);if(r*n*o>this.limits.maxComputeInvocationsPerWorkgroup)throw new Error(`workgroup size [${r}, ${n}, ${o}] exceeds the maximum workgroup invocations ${this.limits.maxComputeInvocationsPerWorkgroup}.`);let i=this.normalizedDispatchGroup[1]===1&&this.normalizedDispatchGroup[2]===1,a=i?`@builtin(global_invocation_id) global_id : vec3<u32>,
3836
+ ${Z(`i2o_${e}(indices)`,"value")}
3837
+ }`,le=a<2?"":(()=>{let R=d.map(ye=>`d${ye}: u32`).join(", "),G=d.map(ye=>`d${ye}`).join(", ");return`
3838
+ fn set_${e}(${R}, value: ${m}) {
3839
+ set_${e}ByIndices(${D(G)}, value);
3840
+ }`})();return{impl:()=>{let R=[],G=!1;return g.offsetToIndices&&(R.push(x),G=!0),g.indicesToOffset&&(R.push(A),G=!0),g.broadcastedIndicesToOffset&&(Object.values(j).forEach(ye=>R.push(ye)),G=!0),g.set&&(R.push(le),G=!0),g.setByIndices&&(R.push(q),G=!0),g.get&&(R.push(K),G=!0),g.getByIndices&&(R.push(ue),G=!0),!i&&G&&R.unshift(`const ${S} = ${h.indices}(${r.join(",")});`,`const ${$} = ${h.indices}(${k.computeStrides(r).join(",")});`),R.join(`
3841
+ `)},type:h,offsetToIndices:T,indicesToOffset:P,broadcastedIndicesToOffset:Y,indices:D,indicesGet:W,indicesSet:N,set:(...R)=>{if(R.length!==a+1)throw new Error(`indices length must be ${a}`);let G=R[a];if(typeof G!="string")throw new Error("value must be string");let ye=R.slice(0,a).map(w).join(",");return a===0?Z("0u",G):a===1?Z(ye[0],G):(g.set=!0,g.setByIndices=!0,g.indicesToOffset=!0,`set_${e}(${ye}, ${G})`)},setByOffset:Z,setByIndices:(R,G)=>a<2?Z(R,G):(g.setByIndices=!0,g.indicesToOffset=!0,`set_${e}ByIndices(${R}, ${G});`),get:de,getByOffset:te,getByIndices:ce,usage:n,name:e,strides:$,shape:S,rank:a}},E=(e,t,r,n=1)=>so(e,t,r,"input",n),M=(e,t,r,n=1)=>so(e,t,r,"output",n),Fr=(e,t,r,n=1)=>so(e,t,r,"internal",n),oo=class{constructor(t,r){this.normalizedDispatchGroup=t;this.limits=r;this.internalVariables=[];this.variables=[];this.uniforms=[];this.variableIndex=0}guardAgainstOutOfBoundsWorkgroupSizes(t){return`if (global_idx >= ${typeof t=="number"?`${t}u`:t}) { return; }`}mainStart(t=At){let r=typeof t=="number"?t:t[0],n=typeof t=="number"?1:t[1],o=typeof t=="number"?1:t[2];if(r>this.limits.maxComputeWorkgroupSizeX||n>this.limits.maxComputeWorkgroupSizeY||o>this.limits.maxComputeWorkgroupSizeZ)throw new Error(`workgroup size [${r}, ${n}, ${o}] exceeds the maximum workgroup size [${this.limits.maxComputeWorkgroupSizeX}, ${this.limits.maxComputeWorkgroupSizeY}, ${this.limits.maxComputeWorkgroupSizeZ}].`);if(r*n*o>this.limits.maxComputeInvocationsPerWorkgroup)throw new Error(`workgroup size [${r}, ${n}, ${o}] exceeds the maximum workgroup invocations ${this.limits.maxComputeInvocationsPerWorkgroup}.`);let i=this.normalizedDispatchGroup[1]===1&&this.normalizedDispatchGroup[2]===1,a=i?`@builtin(global_invocation_id) global_id : vec3<u32>,
3842
3842
  @builtin(workgroup_id) workgroup_id : vec3<u32>,
3843
3843
  @builtin(local_invocation_index) local_idx : u32,
3844
3844
  @builtin(local_invocation_id) local_id : vec3<u32>`:`@builtin(global_invocation_id) global_id : vec3<u32>,
@@ -3856,8 +3856,8 @@ var Un=Object.defineProperty;var vp=Object.getOwnPropertyDescriptor;var $p=Objec
3856
3856
  struct Uniforms { ${t.join(", ")} };
3857
3857
  @group(0) @binding(${this.variableIndex}) var<uniform> uniforms: Uniforms;`}get additionalImplementations(){return this.uniformDeclaration()+this.variables.map(t=>t.impl()).join(`
3858
3858
  `)+this.internalVariables.map(t=>t.impl()).join(`
3859
- `)}get variablesInfo(){if(this.uniforms.length===0)return;let t=r=>[12,10,1,6][["u32","f16","f32","i32"].indexOf(r)];return this.uniforms.map(r=>[t(r.type),r.length??1])}},Za=(e,t)=>new no(e,t),Nt=(e,t)=>{let r=e.length,n=[];for(let o=0;o<r;o++){let i=r-1-o,a=e[i]||1;(t[t.length-1-o]||1)>1&&a===1&&n.unshift(i)}return n}});var qp,Qa,jp,Kp,Yp,Pe,Ja,es,lt=V(()=>{"use strict";Q();ie();Ie();ae();qp=e=>{if(!e||e.length!==1)throw new Error("Transpose requires 1 input.")},Qa=(e,t)=>t&&t.length!==e?[...new Array(e).keys()].reverse():t,jp=(e,t)=>k.sortBasedOnPerm(e,Qa(e.length,t)),Kp=(e,t,r,n)=>{let o=`fn perm(i: ${n.type.indices}) -> ${r.type.indices} {
3860
- var a: ${r.type.indices};`;for(let i=0;i<t;++i)o+=r.indicesSet("a",e[i],`i[${i}]`);return o+="return a;}"},Yp=(e,t)=>{let r=[],n=[];for(let o=0;o<e.length;++o)e[o]!==1&&r.push(e[o]),e[t[o]]!==1&&n.push(t[o]);return{newShape:r,newPerm:n}},Pe=(e,t)=>{let r=e.dataType,n=e.dims.length,o=Qa(n,t),i=jp(e.dims,o),{newShape:a,newPerm:d}=Yp(e.dims,o),l=k.areEqual(d,[2,3,1]),c=k.areEqual(d,[3,1,2]),m=a.length===2&&d[0]>d[1]||l||c,u=m?a:e.dims,h=i;m&&(u=l?[a[0],a[1]*a[2]]:c?[a[0]*a[1],a[2]]:a,h=[u[1],u[0]]);let w=E("a",r,u.length),g=M("output",r,h.length),y=16,S;return m?S=$=>`
3859
+ `)}get variablesInfo(){if(this.uniforms.length===0)return;let t=r=>[12,10,1,6][["u32","f16","f32","i32"].indexOf(r)];return this.uniforms.map(r=>[t(r.type),r.length??1])}},Ja=(e,t)=>new oo(e,t),Wt=(e,t)=>{let r=e.length,n=[];for(let o=0;o<r;o++){let i=r-1-o,a=e[i]||1;(t[t.length-1-o]||1)>1&&a===1&&n.unshift(i)}return n}});var qp,es,jp,Kp,Yp,Pe,ts,rs,lt=U(()=>{"use strict";J();ae();Ie();se();qp=e=>{if(!e||e.length!==1)throw new Error("Transpose requires 1 input.")},es=(e,t)=>t&&t.length!==e?[...new Array(e).keys()].reverse():t,jp=(e,t)=>k.sortBasedOnPerm(e,es(e.length,t)),Kp=(e,t,r,n)=>{let o=`fn perm(i: ${n.type.indices}) -> ${r.type.indices} {
3860
+ var a: ${r.type.indices};`;for(let i=0;i<t;++i)o+=r.indicesSet("a",e[i],`i[${i}]`);return o+="return a;}"},Yp=(e,t)=>{let r=[],n=[];for(let o=0;o<e.length;++o)e[o]!==1&&r.push(e[o]),e[t[o]]!==1&&n.push(t[o]);return{newShape:r,newPerm:n}},Pe=(e,t)=>{let r=e.dataType,n=e.dims.length,o=es(n,t),i=jp(e.dims,o),{newShape:a,newPerm:d}=Yp(e.dims,o),l=k.areEqual(d,[2,3,1]),c=k.areEqual(d,[3,1,2]),m=a.length===2&&d[0]>d[1]||l||c,u=m?a:e.dims,h=i;m&&(u=l?[a[0],a[1]*a[2]]:c?[a[0]*a[1],a[2]]:a,h=[u[1],u[0]]);let w=E("a",r,u.length),g=M("output",r,h.length),y=16,S;return m?S=$=>`
3861
3861
  ${$.registerUniform("output_size","u32").declareVariables(w,g)}
3862
3862
  var<workgroup> tile : array<array<${g.type.value}, ${y+1}>, ${y}>;
3863
3863
  ${$.mainStart([y,y,1])}
@@ -3888,7 +3888,7 @@ var Un=Object.defineProperty;var vp=Object.getOwnPropertyDescriptor;var $p=Objec
3888
3888
  let aIndices = perm(indices);
3889
3889
 
3890
3890
  ${g.setByOffset("global_idx",w.getByIndices("aIndices"))}
3891
- }`,{name:m?"TransposeShared":"Transpose",shaderCache:{hint:`${t}`,inputDependencies:["rank"]},getRunData:()=>{let $=k.size(i);return{outputs:[{dims:i,dataType:e.dataType}],dispatchGroup:m?{x:Math.ceil(h[1]/y),y:Math.ceil(h[0]/y)}:{x:Math.ceil($/64)},programUniforms:[{type:12,data:$},...R(u,h)]}},getShaderSource:S}},Ja=(e,t)=>{qp(e.inputs),e.compute(Pe(e.inputs[0],t.perm))},es=e=>J({perm:e.perm})});var Xp,Zp,Qp,Jp,em,tm,rm,nm,om,im,nt,ts,rs,ns,os,is,as,ss,us,ds,ls,cs=V(()=>{"use strict";Q();ie();ae();qr();lt();Xp={max:"select(bestValue, candidate, candidate > bestValue)",min:"select(bestValue, candidate, candidate < bestValue)",mean:"bestValue + candidate",sum:"bestValue + candidate",prod:"bestValue * candidate",sumSquare:"bestValue + candidate * candidate",logSumExp:"bestValue + exp(candidate)",l1:"bestValue + abs(candidate)",l2:"bestValue + candidate * candidate",logSum:"bestValue + candidate"},Zp={max:"select(bestValue, candidate, candidate > bestValue)",min:"select(bestValue, candidate, candidate < bestValue)",mean:"bestValue + candidate",sum:"bestValue + candidate",prod:"bestValue * candidate",sumSquare:"bestValue + candidate",logSumExp:"bestValue + candidate",l1:"bestValue + candidate",l2:"bestValue + candidate",logSum:"bestValue + candidate"},Qp={max:"_A[offset]",min:"_A[offset]",mean:"0",sum:"0",prod:"1",sumSquare:"0",logSumExp:"0",l1:"0",l2:"0",logSum:"0"},Jp={max:"bestValue",min:"bestValue",sum:"bestValue",prod:"bestValue",sumSquare:"bestValue",logSumExp:"log(bestValue)",l1:"bestValue",l2:"sqrt(bestValue)",logSum:"log(bestValue)"},em=(e,t)=>{let r=[];for(let n=t-e;n<t;++n)r.push(n);return r},tm=(e,t)=>{let r=[],n=e.length;for(let i=0;i<n;i++)t.indexOf(i)===-1&&r.push(e[i]);let o=t.map(i=>e[i]);return[r,o]},rm=(e,t)=>{let r=e.length+t.length,n=[],o=0;for(let i=0;i<r;i++)t.indexOf(i)===-1?n.push(e[o++]):n.push(1);return n},nm=(e,t)=>{for(let r=0;r<e.length;++r)if(e[e.length-r-1]!==t-1-r)return!1;return!0},om=(e,t)=>{let r=[];if(!nm(e,t)){for(let n=0;n<t;++n)e.indexOf(n)===-1&&r.push(n);e.forEach(n=>r.push(n))}return r},im=(e,t,r,n,o,i,a)=>{let d=r[0].dims,l=k.size(i),c=k.size(a),m=E("_A",r[0].dataType,d),u=M("output",o,i),h=32,w=`
3891
+ }`,{name:m?"TransposeShared":"Transpose",shaderCache:{hint:`${t}`,inputDependencies:["rank"]},getRunData:()=>{let $=k.size(i);return{outputs:[{dims:i,dataType:e.dataType}],dispatchGroup:m?{x:Math.ceil(h[1]/y),y:Math.ceil(h[0]/y)}:{x:Math.ceil($/64)},programUniforms:[{type:12,data:$},...V(u,h)]}},getShaderSource:S}},ts=(e,t)=>{qp(e.inputs),e.compute(Pe(e.inputs[0],t.perm))},rs=e=>ee({perm:e.perm})});var Xp,Qp,Zp,Jp,em,tm,rm,nm,om,im,nt,ns,os,is,as,ss,us,ds,ls,cs,ps,ms=U(()=>{"use strict";J();ae();se();qr();lt();Xp={max:"select(bestValue, candidate, candidate > bestValue)",min:"select(bestValue, candidate, candidate < bestValue)",mean:"bestValue + candidate",sum:"bestValue + candidate",prod:"bestValue * candidate",sumSquare:"bestValue + candidate * candidate",logSumExp:"bestValue + exp(candidate)",l1:"bestValue + abs(candidate)",l2:"bestValue + candidate * candidate",logSum:"bestValue + candidate"},Qp={max:"select(bestValue, candidate, candidate > bestValue)",min:"select(bestValue, candidate, candidate < bestValue)",mean:"bestValue + candidate",sum:"bestValue + candidate",prod:"bestValue * candidate",sumSquare:"bestValue + candidate",logSumExp:"bestValue + candidate",l1:"bestValue + candidate",l2:"bestValue + candidate",logSum:"bestValue + candidate"},Zp={max:"_A[offset]",min:"_A[offset]",mean:"0",sum:"0",prod:"1",sumSquare:"0",logSumExp:"0",l1:"0",l2:"0",logSum:"0"},Jp={max:"bestValue",min:"bestValue",sum:"bestValue",prod:"bestValue",sumSquare:"bestValue",logSumExp:"log(bestValue)",l1:"bestValue",l2:"sqrt(bestValue)",logSum:"log(bestValue)"},em=(e,t)=>{let r=[];for(let n=t-e;n<t;++n)r.push(n);return r},tm=(e,t)=>{let r=[],n=e.length;for(let i=0;i<n;i++)t.indexOf(i)===-1&&r.push(e[i]);let o=t.map(i=>e[i]);return[r,o]},rm=(e,t)=>{let r=e.length+t.length,n=[],o=0;for(let i=0;i<r;i++)t.indexOf(i)===-1?n.push(e[o++]):n.push(1);return n},nm=(e,t)=>{for(let r=0;r<e.length;++r)if(e[e.length-r-1]!==t-1-r)return!1;return!0},om=(e,t)=>{let r=[];if(!nm(e,t)){for(let n=0;n<t;++n)e.indexOf(n)===-1&&r.push(n);e.forEach(n=>r.push(n))}return r},im=(e,t,r,n,o,i,a)=>{let d=r[0].dims,l=k.size(i),c=k.size(a),m=E("_A",r[0].dataType,d),u=M("output",o,i),h=32,w=`
3892
3892
  var<workgroup> aBestValues : array<f32, ${h}>;
3893
3893
  `;return{name:e,shaderCache:t,getShaderSource:y=>`
3894
3894
  ${y.registerUniform("reduceSize","u32").declareVariables(m,u)}
@@ -3901,7 +3901,7 @@ var Un=Object.defineProperty;var vp=Object.getOwnPropertyDescriptor;var $p=Objec
3901
3901
  let outputIndex = global_idx / ${h};
3902
3902
  let offset = outputIndex * uniforms.reduceSize;
3903
3903
 
3904
- var bestValue = f32(${Qp[n]});
3904
+ var bestValue = f32(${Zp[n]});
3905
3905
  let Length = uniforms.reduceSize;
3906
3906
  for (var k = local_idx; k < Length; k = k + ${h}) {
3907
3907
  let candidate = f32(${m.getByOffset("offset + k")});
@@ -3916,7 +3916,7 @@ var Un=Object.defineProperty;var vp=Object.getOwnPropertyDescriptor;var $p=Objec
3916
3916
  let interval = DIV_CEIL(reduceSize, 2u);
3917
3917
  if (local_idx < currentSize) {
3918
3918
  let candidate = aBestValues[local_idx + interval];
3919
- bestValue = ${Zp[n]};
3919
+ bestValue = ${Qp[n]};
3920
3920
  aBestValues[local_idx] = bestValue;
3921
3921
  }
3922
3922
  reduceSize = interval;
@@ -3926,7 +3926,7 @@ var Un=Object.defineProperty;var vp=Object.getOwnPropertyDescriptor;var $p=Objec
3926
3926
  if (local_idx == 0u) {
3927
3927
  ${u.setByOffset("outputIndex",`${n==="mean"?`${u.type.storage}(bestValue / f32(uniforms.reduceSize))`:`${u.type.storage}(${Jp[n]})`}`)};
3928
3928
  }
3929
- }`,getRunData:()=>({outputs:[{dims:i,dataType:o}],dispatchGroup:{x:l},programUniforms:[{type:12,data:c}]})}},nt=(e,t,r,n)=>{let o=e.inputs.length===1?r:so(e.inputs,r),i=o.axes;i.length===0&&!o.noopWithEmptyAxes&&(i=e.inputs[0].dims.map((w,g)=>g));let a=k.normalizeAxes(i,e.inputs[0].dims.length),d=a,l=e.inputs[0],c=om(d,e.inputs[0].dims.length);c.length>0&&(l=e.compute(Pe(e.inputs[0],c),{inputs:[0],outputs:[-1]})[0],d=em(d.length,l.dims.length));let[m,u]=tm(l.dims,d),h=m;o.keepDims&&(h=rm(m,a)),e.compute(im(t,{hint:o.cacheKey,inputDependencies:["type"]},[l],n,e.inputs[0].dataType,h,u),{inputs:[l]})},ts=(e,t)=>{nt(e,"ReduceMeanShared",t,"mean")},rs=(e,t)=>{nt(e,"ReduceL1Shared",t,"l1")},ns=(e,t)=>{nt(e,"ReduceL2Shared",t,"l2")},os=(e,t)=>{nt(e,"ReduceLogSumExpShared",t,"logSumExp")},is=(e,t)=>{nt(e,"ReduceMaxShared",t,"max")},as=(e,t)=>{nt(e,"ReduceMinShared",t,"min")},ss=(e,t)=>{nt(e,"ReduceProdShared",t,"prod")},us=(e,t)=>{nt(e,"ReduceSumShared",t,"sum")},ds=(e,t)=>{nt(e,"ReduceSumSquareShared",t,"sumSquare")},ls=(e,t)=>{nt(e,"ReduceLogSumShared",t,"logSum")}});var ot,am,jr,so,it,sm,um,dm,lm,cm,pm,mm,fm,hm,gm,at,ps,ms,fs,hs,gs,ys,bs,ws,_s,vs,qr=V(()=>{"use strict";Q();ie();Ie();ae();cs();ot=e=>{if(!e||e.length===0||e.length>2)throw new Error("Reduce op requires 1 or 2 inputs.");if(e.length===2&&e[1].dims.length!==1)throw new Error("Invalid axes input dims.")},am=e=>["","",`var value = ${e.getByIndices("input_indices")};`,""],jr=(e,t,r,n,o,i,a=!1,d=!1)=>{let l=[],c=r[0].dims,m=c.length,u=k.normalizeAxes(o,m),h=!d&&u.length===0;c.forEach((S,$)=>{h||u.indexOf($)>=0?a&&l.push(1):l.push(S)});let w=l.length,g=k.size(l);return{name:e,shaderCache:t,getShaderSource:S=>{let $=[],v=E("_A",r[0].dataType,m),x=M("output",i,w),T=n(v,x,u),C=T[2];for(let A=0,P=0;A<m;A++)h||u.indexOf(A)>=0?(a&&P++,C=`for(var j${A}: u32 = 0; j${A} < ${c[A]}; j${A}++) {
3929
+ }`,getRunData:()=>({outputs:[{dims:i,dataType:o}],dispatchGroup:{x:l},programUniforms:[{type:12,data:c}]})}},nt=(e,t,r,n)=>{let o=e.inputs.length===1?r:uo(e.inputs,r),i=o.axes;i.length===0&&!o.noopWithEmptyAxes&&(i=e.inputs[0].dims.map((w,g)=>g));let a=k.normalizeAxes(i,e.inputs[0].dims.length),d=a,l=e.inputs[0],c=om(d,e.inputs[0].dims.length);c.length>0&&(l=e.compute(Pe(e.inputs[0],c),{inputs:[0],outputs:[-1]})[0],d=em(d.length,l.dims.length));let[m,u]=tm(l.dims,d),h=m;o.keepDims&&(h=rm(m,a)),e.compute(im(t,{hint:o.cacheKey,inputDependencies:["type"]},[l],n,e.inputs[0].dataType,h,u),{inputs:[l]})},ns=(e,t)=>{nt(e,"ReduceMeanShared",t,"mean")},os=(e,t)=>{nt(e,"ReduceL1Shared",t,"l1")},is=(e,t)=>{nt(e,"ReduceL2Shared",t,"l2")},as=(e,t)=>{nt(e,"ReduceLogSumExpShared",t,"logSumExp")},ss=(e,t)=>{nt(e,"ReduceMaxShared",t,"max")},us=(e,t)=>{nt(e,"ReduceMinShared",t,"min")},ds=(e,t)=>{nt(e,"ReduceProdShared",t,"prod")},ls=(e,t)=>{nt(e,"ReduceSumShared",t,"sum")},cs=(e,t)=>{nt(e,"ReduceSumSquareShared",t,"sumSquare")},ps=(e,t)=>{nt(e,"ReduceLogSumShared",t,"logSum")}});var ot,am,jr,uo,it,sm,um,dm,lm,cm,pm,mm,fm,hm,gm,at,fs,hs,gs,ys,bs,ws,_s,vs,$s,xs,qr=U(()=>{"use strict";J();ae();Ie();se();ms();ot=e=>{if(!e||e.length===0||e.length>2)throw new Error("Reduce op requires 1 or 2 inputs.");if(e.length===2&&e[1].dims.length!==1)throw new Error("Invalid axes input dims.")},am=e=>["","",`var value = ${e.getByIndices("input_indices")};`,""],jr=(e,t,r,n,o,i,a=!1,d=!1)=>{let l=[],c=r[0].dims,m=c.length,u=k.normalizeAxes(o,m),h=!d&&u.length===0;c.forEach((S,$)=>{h||u.indexOf($)>=0?a&&l.push(1):l.push(S)});let w=l.length,g=k.size(l);return{name:e,shaderCache:t,getShaderSource:S=>{let $=[],v=E("_A",r[0].dataType,m),x=M("output",i,w),T=n(v,x,u),C=T[2];for(let A=0,P=0;A<m;A++)h||u.indexOf(A)>=0?(a&&P++,C=`for(var j${A}: u32 = 0; j${A} < ${c[A]}; j${A}++) {
3930
3930
  ${T[2].includes("last_index")?`let last_index = j${A};`:""}
3931
3931
  ${v.indicesSet("input_indices",A,`j${A}`)}
3932
3932
  ${C}
@@ -3947,160 +3947,195 @@ var Un=Object.defineProperty;var vp=Object.getOwnPropertyDescriptor;var $p=Objec
3947
3947
  ${T[3]}
3948
3948
  ${T.length===4?x.setByOffset("global_idx","value"):T.slice(4).join(`
3949
3949
  `)}
3950
- }`},getRunData:()=>({outputs:[{dims:l,dataType:i}],dispatchGroup:{x:Math.ceil(g/64)},programUniforms:[{type:12,data:g},...R(c,l)]})}},so=(e,t)=>{let r=[];return e[1].dims[0]>0&&e[1].getBigInt64Array().forEach(n=>r.push(Number(n))),J({axes:r,keepDims:t.keepDims,noopWithEmptyAxes:t.noopWithEmptyAxes})},it=(e,t,r,n)=>{let o=e.inputs,i=o.length===1?r:so(o,r);e.compute(jr(t,{hint:i.cacheKey,inputDependencies:["rank"]},[o[0]],i.noopWithEmptyAxes&&i.axes.length===0?am:n,i.axes,o[0].dataType,i.keepDims,i.noopWithEmptyAxes),{inputs:[0]})},sm=(e,t)=>{ot(e.inputs),it(e,"ReduceLogSum",t,(n,o)=>[`var value = ${o.type.storage}(0);`,"",`value += ${n.getByIndices("input_indices")};`,"value = log(value);"])},um=(e,t)=>{ot(e.inputs),it(e,"ReduceL1",t,(n,o)=>[`var value = ${o.type.storage}(0);`,"",`value += abs(${n.getByIndices("input_indices")});`,""])},dm=(e,t)=>{ot(e.inputs),it(e,"ReduceL2",t,(n,o)=>[`var t = ${o.type.value}(0); var value = ${o.type.value}(0);`,"",`t = ${n.getByIndices("input_indices")}; value += (t * t);`,"value = sqrt(value);"])},lm=(e,t)=>{ot(e.inputs),it(e,"ReduceLogSumExp",t,(n,o)=>[`var value = ${o.type.storage}(0);`,"",`value += exp(${n.getByIndices("input_indices")});`,"value = log(value);"])},cm=(e,t)=>{ot(e.inputs),it(e,"ReduceMax",t,(n,o,i)=>{let a=[];for(let d=0;d<n.rank;d++)(i.indexOf(d)>=0||i.length===0)&&a.push(n.indicesSet("input_indices",d,0));return[`${a.join(`
3950
+ }`},getRunData:()=>({outputs:[{dims:l,dataType:i}],dispatchGroup:{x:Math.ceil(g/64)},programUniforms:[{type:12,data:g},...V(c,l)]})}},uo=(e,t)=>{let r=[];return e[1].dims[0]>0&&e[1].getBigInt64Array().forEach(n=>r.push(Number(n))),ee({axes:r,keepDims:t.keepDims,noopWithEmptyAxes:t.noopWithEmptyAxes})},it=(e,t,r,n)=>{let o=e.inputs,i=o.length===1?r:uo(o,r);e.compute(jr(t,{hint:i.cacheKey,inputDependencies:["rank"]},[o[0]],i.noopWithEmptyAxes&&i.axes.length===0?am:n,i.axes,o[0].dataType,i.keepDims,i.noopWithEmptyAxes),{inputs:[0]})},sm=(e,t)=>{ot(e.inputs),it(e,"ReduceLogSum",t,(n,o)=>[`var value = ${o.type.storage}(0);`,"",`value += ${n.getByIndices("input_indices")};`,"value = log(value);"])},um=(e,t)=>{ot(e.inputs),it(e,"ReduceL1",t,(n,o)=>[`var value = ${o.type.storage}(0);`,"",`value += abs(${n.getByIndices("input_indices")});`,""])},dm=(e,t)=>{ot(e.inputs),it(e,"ReduceL2",t,(n,o)=>[`var t = ${o.type.value}(0); var value = ${o.type.value}(0);`,"",`t = ${n.getByIndices("input_indices")}; value += (t * t);`,"value = sqrt(value);"])},lm=(e,t)=>{ot(e.inputs),it(e,"ReduceLogSumExp",t,(n,o)=>[`var value = ${o.type.storage}(0);`,"",`value += exp(${n.getByIndices("input_indices")});`,"value = log(value);"])},cm=(e,t)=>{ot(e.inputs),it(e,"ReduceMax",t,(n,o,i)=>{let a=[];for(let d=0;d<n.rank;d++)(i.indexOf(d)>=0||i.length===0)&&a.push(n.indicesSet("input_indices",d,0));return[`${a.join(`
3951
3951
  `)}`,`var value = ${n.getByIndices("input_indices")};`,`value = max(value, ${n.getByIndices("input_indices")});`,""]})},pm=(e,t)=>{ot(e.inputs),it(e,"ReduceMean",t,(n,o,i)=>{let a=1;for(let d=0;d<n.rank;d++)(i.indexOf(d)>=0||i.length===0)&&(a*=e.inputs[0].dims[d]);return["var sum = f32(0);","",`sum += f32(${n.getByIndices("input_indices")});`,`let value = ${o.type.value}(sum / ${a});`]})},mm=(e,t)=>{ot(e.inputs),it(e,"ReduceMin",t,(n,o,i)=>{let a=[];for(let d=0;d<n.rank;d++)(i.indexOf(d)>=0||i.length===0)&&a.push(`input_indices[${d}] = 0;`);return[`${a.join(`
3952
- `)}`,`var value = ${n.getByIndices("input_indices")};`,`value = min(value, ${n.getByIndices("input_indices")});`,""]})},fm=(e,t)=>{ot(e.inputs),it(e,"ReduceProd",t,(n,o)=>[`var value = ${o.type.storage}(1);`,"",`value *= ${n.getByIndices("input_indices")};`,""])},hm=(e,t)=>{ot(e.inputs),it(e,"ReduceSum",t,(n,o)=>[`var value = ${o.type.storage}(0);`,"",`value += ${n.getByIndices("input_indices")};`,""])},gm=(e,t)=>{ot(e.inputs),it(e,"ReduceSumSquare",t,(n,o)=>[`var t = ${o.type.value}(0); var value = ${o.type.value}(0);`,"",`t = ${n.getByIndices("input_indices")}; value += t * t;`,""])},at=(e,t,r)=>{if(t.length===0)return r;let n=1,o=1;for(let i=0;i<t.length;i++)t.indexOf(i)===-1?n*=e[i]:o*=e[i];return o<32&&n>1024},ps=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?pm(e,t):ts(e,t)},ms=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?um(e,t):rs(e,t)},fs=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?dm(e,t):ns(e,t)},hs=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?lm(e,t):os(e,t)},gs=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?cm(e,t):is(e,t)},ys=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?mm(e,t):as(e,t)},bs=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?fm(e,t):ss(e,t)},ws=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?hm(e,t):us(e,t)},_s=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?gm(e,t):ds(e,t)},vs=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?sm(e,t):ls(e,t)}});var $s,xs,Ss,uo,Ts=V(()=>{"use strict";Q();Ie();qr();$s=e=>{if(!e||e.length===0||e.length>2)throw new Error("ArgMinMaxOp op requires 1 or 2 inputs.");if(e[0].dataType!==1)throw new Error("Invalid input type.")},xs=(e,t)=>{$s(e.inputs);let r=(n,o,i)=>{let a=[];for(let d=0;d<n.rank;d++)(i.indexOf(d)>=0||i.length===0)&&a.push(`input_indices[${d}] = 0;`);return[`${a.join(`
3952
+ `)}`,`var value = ${n.getByIndices("input_indices")};`,`value = min(value, ${n.getByIndices("input_indices")});`,""]})},fm=(e,t)=>{ot(e.inputs),it(e,"ReduceProd",t,(n,o)=>[`var value = ${o.type.storage}(1);`,"",`value *= ${n.getByIndices("input_indices")};`,""])},hm=(e,t)=>{ot(e.inputs),it(e,"ReduceSum",t,(n,o)=>[`var value = ${o.type.storage}(0);`,"",`value += ${n.getByIndices("input_indices")};`,""])},gm=(e,t)=>{ot(e.inputs),it(e,"ReduceSumSquare",t,(n,o)=>[`var t = ${o.type.value}(0); var value = ${o.type.value}(0);`,"",`t = ${n.getByIndices("input_indices")}; value += t * t;`,""])},at=(e,t,r)=>{if(t.length===0)return r;let n=1,o=1;for(let i=0;i<t.length;i++)t.indexOf(i)===-1?n*=e[i]:o*=e[i];return o<32&&n>1024},fs=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?pm(e,t):ns(e,t)},hs=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?um(e,t):os(e,t)},gs=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?dm(e,t):is(e,t)},ys=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?lm(e,t):as(e,t)},bs=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?cm(e,t):ss(e,t)},ws=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?mm(e,t):us(e,t)},_s=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?fm(e,t):ds(e,t)},vs=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?hm(e,t):ls(e,t)},$s=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?gm(e,t):cs(e,t)},xs=(e,t)=>{at(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?sm(e,t):ps(e,t)}});var Ss,Ts,Is,lo,Cs=U(()=>{"use strict";J();Ie();qr();Ss=e=>{if(!e||e.length===0||e.length>2)throw new Error("ArgMinMaxOp op requires 1 or 2 inputs.");if(e[0].dataType!==1)throw new Error("Invalid input type.")},Ts=(e,t)=>{Ss(e.inputs);let r=(n,o,i)=>{let a=[];for(let d=0;d<n.rank;d++)(i.indexOf(d)>=0||i.length===0)&&a.push(`input_indices[${d}] = 0;`);return[`${a.join(`
3953
3953
  `)}`,`var value = ${n.getByIndices("input_indices")};
3954
3954
  var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLastIndex>0?"<=":"<"} value) {
3955
3955
  value = ${n.getByIndices("input_indices")};
3956
3956
  best_index = i32(last_index);
3957
- }`,"",o.setByOffset("global_idx","best_index")]};e.compute(jr("ArgMin",{hint:t.cacheKey,inputDependencies:["rank"]},[e.inputs[0]],r,[t.axis],7,t.keepDims),{inputs:[0]})},Ss=(e,t)=>{$s(e.inputs);let r=(n,o,i)=>{let a=[];for(let d=0;d<n.rank;d++)(i.indexOf(d)>=0||i.length===0)&&a.push(`input_indices[${d}] = 0;`);return[`${a.join(`
3957
+ }`,"",o.setByOffset("global_idx","best_index")]};e.compute(jr("ArgMin",{hint:t.cacheKey,inputDependencies:["rank"]},[e.inputs[0]],r,[t.axis],7,t.keepDims),{inputs:[0]})},Is=(e,t)=>{Ss(e.inputs);let r=(n,o,i)=>{let a=[];for(let d=0;d<n.rank;d++)(i.indexOf(d)>=0||i.length===0)&&a.push(`input_indices[${d}] = 0;`);return[`${a.join(`
3958
3958
  `)}`,`var value = ${n.getByIndices("input_indices")};
3959
3959
  var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLastIndex>0?">=":">"} value) {
3960
3960
  value = ${n.getByIndices("input_indices")};
3961
3961
  best_index = i32(last_index);
3962
- }`,"",o.setByOffset("global_idx","best_index")]};e.compute(jr("argMax",{hint:t.cacheKey,inputDependencies:["rank"]},[e.inputs[0]],r,[t.axis],7,t.keepDims),{inputs:[0]})},uo=e=>J(e)});var ym,bm,wm,_m,Wt,vm,Is,Kr=V(()=>{"use strict";Q();ie();Hr();ae();ym=(e,t)=>{let r=e[0],n=e[1],o=e[2],i=e[3],a=e[4],d=e[5];if(a&&d)throw new Error("Attention cannot have both past and attention_bias");if(r.dims.length!==3)throw new Error('Input "input" must have 3 dimensions');let l=r.dims[0],c=r.dims[1],m=r.dims[2];if(o.dims.length!==1)throw new Error('Input "bias" is expected to have 1 dimensions');if(n.dims.length!==2)throw new Error('Input "weights" is expected to have 2 dimensions');if(n.dims[0]!==m)throw new Error("Input 1 dimension 0 should have same length as dimension 2 of input 0");if(o.dims[0]!==n.dims[1])throw new Error('Input "bias" dimension 0 should have same length as dimension 1 of input "weights"');let u=o.dims[0]/3,h=u,w=h;if(t.qkvHiddenSizes.length>0){if(t.qkvHiddenSizes.length!==3)throw new Error("qkv_hidden_sizes attribute should have 3 elements");for(let x of t.qkvHiddenSizes)if(x%t.numHeads!==0)throw new Error("qkv_hidden_sizes should be divisible by num_heads");u=t.qkvHiddenSizes[0],h=t.qkvHiddenSizes[1],w=t.qkvHiddenSizes[2]}let g=c;if(u!==h)throw new Error("qkv_hidden_sizes first element should be same as the second");if(o.dims[0]!==u+h+w)throw new Error('Input "bias" dimension 0 should have same length as sum of Q/K/V hidden sizes');let y=0;if(a){if(h!==w)throw new Error('Input "past" expect k_hidden_size == v_hidden_size');if(a.dims.length!==5)throw new Error('Input "past" must have 5 dimensions');if(a.dims[0]!==2)throw new Error('Input "past" first dimension must be 2');if(a.dims[1]!==l)throw new Error('Input "past" second dimension must be batch_size');if(a.dims[2]!==t.numHeads)throw new Error('Input "past" third dimension must be num_heads');if(a.dims[4]!==h/t.numHeads)throw new Error('Input "past" fifth dimension must be k_hidden_size / num_heads');t.pastPresentShareBuffer||(y=a.dims[3])}let S=g+y,$=-1,v=0;if(i)throw new Error("Mask not supported");if(a)throw new Error("past is not supported");if(d){if(d.dims.length!==4)throw new Error('Input "attention_bias" must have 4 dimensions');if(d.dims[0]!==l||d.dims[1]!==t.numHeads||d.dims[2]!==c||d.dims[3]!==S)throw new Error('Expect "attention_bias" shape (batch_size, num_heads, sequence_length, total_sequence_length)')}return{batchSize:l,sequenceLength:c,pastSequenceLength:y,kvSequenceLength:g,totalSequenceLength:S,maxSequenceLength:$,inputHiddenSize:m,hiddenSize:u,vHiddenSize:w,headSize:Math.floor(u/t.numHeads),vHeadSize:Math.floor(w/t.numHeads),numHeads:t.numHeads,isUnidirectional:!1,pastPresentShareBuffer:!1,maskFilterValue:t.maskFilterValue,maskType:v,scale:t.scale,broadcastResPosBias:!1,passPastInKv:!1,qkvFormat:1}},bm=(e,t,r)=>{let n=we(r),o=64,i=r/n;i<o&&(o=32);let a=Math.ceil(r/n/o),d=[{type:1,data:1/r},{type:12,data:i},{type:12,data:a}],l=he(e.dataType,n),c=Ee(1,n),m=["type"],u=h=>{let w=M("x",e.dataType,e.dims,n),g=Ee(e.dataType),y=[{name:"d_inv",type:"f32"},{name:"d_comp",type:"u32"},{name:"elements_per_thread",type:"u32"}];return`
3963
- var<workgroup> thread_max: array<f32, ${o}>;
3964
- var<workgroup> thread_sum: array<f32, ${o}>;
3965
- ${h.registerUniforms(y).declareVariables(w)}
3966
- ${h.mainStart([o,1,1])}
3962
+ }`,"",o.setByOffset("global_idx","best_index")]};e.compute(jr("argMax",{hint:t.cacheKey,inputDependencies:["rank"]},[e.inputs[0]],r,[t.axis],7,t.keepDims),{inputs:[0]})},lo=e=>ee(e)});var ym,co,bm,wm,_m,Nt,vm,As,Kr=U(()=>{"use strict";J();ae();Hr();se();ym=(e,t)=>{let r=e[0],n=e[1],o=e[2],i=e[3],a=e[4],d=e[5];if(a&&d)throw new Error("Attention cannot have both past and attention_bias");if(r.dims.length!==3)throw new Error('Input "input" must have 3 dimensions');let l=r.dims[0],c=r.dims[1],m=r.dims[2];if(o.dims.length!==1)throw new Error('Input "bias" is expected to have 1 dimensions');if(n.dims.length!==2)throw new Error('Input "weights" is expected to have 2 dimensions');if(n.dims[0]!==m)throw new Error("Input 1 dimension 0 should have same length as dimension 2 of input 0");if(o.dims[0]!==n.dims[1])throw new Error('Input "bias" dimension 0 should have same length as dimension 1 of input "weights"');let u=o.dims[0]/3,h=u,w=h;if(t.qkvHiddenSizes.length>0){if(t.qkvHiddenSizes.length!==3)throw new Error("qkv_hidden_sizes attribute should have 3 elements");for(let x of t.qkvHiddenSizes)if(x%t.numHeads!==0)throw new Error("qkv_hidden_sizes should be divisible by num_heads");u=t.qkvHiddenSizes[0],h=t.qkvHiddenSizes[1],w=t.qkvHiddenSizes[2]}let g=c;if(u!==h)throw new Error("qkv_hidden_sizes first element should be same as the second");if(o.dims[0]!==u+h+w)throw new Error('Input "bias" dimension 0 should have same length as sum of Q/K/V hidden sizes');let y=0;if(a){if(h!==w)throw new Error('Input "past" expect k_hidden_size == v_hidden_size');if(a.dims.length!==5)throw new Error('Input "past" must have 5 dimensions');if(a.dims[0]!==2)throw new Error('Input "past" first dimension must be 2');if(a.dims[1]!==l)throw new Error('Input "past" second dimension must be batch_size');if(a.dims[2]!==t.numHeads)throw new Error('Input "past" third dimension must be num_heads');if(a.dims[4]!==h/t.numHeads)throw new Error('Input "past" fifth dimension must be k_hidden_size / num_heads');t.pastPresentShareBuffer||(y=a.dims[3])}let S=g+y,$=-1,v=0;if(i)throw new Error("Mask not supported");if(a)throw new Error("past is not supported");if(d){if(d.dims.length!==4)throw new Error('Input "attention_bias" must have 4 dimensions');if(d.dims[0]!==l||d.dims[1]!==t.numHeads||d.dims[2]!==c||d.dims[3]!==S)throw new Error('Expect "attention_bias" shape (batch_size, num_heads, sequence_length, total_sequence_length)')}return{batchSize:l,sequenceLength:c,pastSequenceLength:y,kvSequenceLength:g,totalSequenceLength:S,maxSequenceLength:$,inputHiddenSize:m,hiddenSize:u,vHiddenSize:w,headSize:Math.floor(u/t.numHeads),vHeadSize:Math.floor(w/t.numHeads),numHeads:t.numHeads,isUnidirectional:!1,pastPresentShareBuffer:!1,maskFilterValue:t.maskFilterValue,maskType:v,scale:t.scale,broadcastResPosBias:!1,passPastInKv:!1,qkvFormat:1}},co=(e,t,r)=>t&&e?`
3963
+ let total_sequence_length_input = u32(${t.getByOffset("0")});
3964
+ let present_sequence_length = max(total_sequence_length_input, uniforms.past_sequence_length);
3965
+ let is_subsequent_prompt: bool = sequence_length > 1 && sequence_length != total_sequence_length_input;
3966
+ let is_first_prompt: bool = is_subsequent_prompt == false && sequence_length == total_sequence_length_input;
3967
+ total_sequence_length = u32(${e?.getByOffset("batchIdx")}) + 1;
3968
+ var past_sequence_length: u32 = 0;
3969
+ if (is_first_prompt == false) {
3970
+ past_sequence_length = total_sequence_length - sequence_length;
3971
+ }
3972
+ `:`
3973
+ ${r?"let past_sequence_length = uniforms.past_sequence_length":""};
3974
+ let present_sequence_length = total_sequence_length;
3975
+ `,bm=(e,t,r,n,o,i,a,d)=>{let l=we(a?1:i),c=64,m=i/l;m<c&&(c=32);let u=Math.ceil(i/l/c),h=[{type:12,data:t},{type:12,data:r},{type:12,data:n},{type:12,data:o},{type:12,data:m},{type:12,data:u}],w=he(e.dataType,l),g=Ee(1,l),y=["type"];a&&y.push("type"),d&&y.push("type");let S=$=>{let v=M("x",e.dataType,e.dims,l),x=[v],T=a?E("seq_lens",a.dataType,a.dims):void 0;T&&x.push(T);let C=d?E("total_sequence_length_input",d.dataType,d.dims):void 0;C&&x.push(C);let A=Ee(e.dataType),P=[{name:"batch_size",type:"u32"},{name:"num_heads",type:"u32"},{name:"past_sequence_length",type:"u32"},{name:"sequence_length",type:"u32"},{name:"total_sequence_length",type:"u32"},{name:"elements_per_thread",type:"u32"}];return`
3976
+ var<workgroup> thread_max: array<f32, ${c}>;
3977
+ var<workgroup> thread_sum: array<f32, ${c}>;
3978
+ ${$.registerUniforms(P).declareVariables(...x)}
3979
+ ${$.mainStart([c,1,1])}
3980
+ let batchIdx = workgroup_id.z / uniforms.num_heads;
3981
+ let headIdx = workgroup_id.z % uniforms.num_heads;
3982
+ let sequence_length = uniforms.sequence_length;
3983
+ var total_sequence_length = uniforms.total_sequence_length;
3984
+ ${co(T,C,!1)}
3967
3985
  let local_offset = local_idx * uniforms.elements_per_thread;
3968
- let offset = (global_idx / ${o}) * uniforms.d_comp + local_offset;
3969
-
3970
- var thread_max_vector = ${c}(-3.402823e+38f);
3971
- for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < uniforms.d_comp; i++) {
3972
- thread_max_vector = max(${c}(x[offset + i]), thread_max_vector);
3986
+ let offset = (global_idx / ${c}) * uniforms.total_sequence_length + local_offset;
3987
+ let seq_causal_length = ${a?"u32(past_sequence_length + workgroup_id.y + 1)":"total_sequence_length"};
3988
+ var thread_max_vector = ${g}(-3.402823e+38f);
3989
+ for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < seq_causal_length; i++) {
3990
+ thread_max_vector = max(${g}(x[offset + i]), thread_max_vector);
3973
3991
  }
3974
- thread_max[local_idx] = ${(()=>{switch(n){case 1:return"thread_max_vector";case 2:return"max(thread_max_vector.x, thread_max_vector.y)";case 4:return"max(max(thread_max_vector.x, thread_max_vector.y), max(thread_max_vector.z, thread_max_vector.w))";default:throw new Error(`Unsupported components: ${n}`)}})()};
3992
+ thread_max[local_idx] = ${(()=>{switch(l){case 1:return"thread_max_vector";case 2:return"max(thread_max_vector.x, thread_max_vector.y)";case 4:return"max(max(thread_max_vector.x, thread_max_vector.y), max(thread_max_vector.z, thread_max_vector.w))";default:throw new Error(`Unsupported components: ${l}`)}})()};
3975
3993
  workgroupBarrier();
3976
3994
 
3977
3995
  var max_value = f32(-3.402823e+38f);
3978
- for (var i = 0u; i < ${o}; i++) {
3996
+ for (var i = 0u; i < ${c}; i++) {
3979
3997
  max_value = max(thread_max[i], max_value);
3980
3998
  }
3981
3999
 
3982
- var sum_vector = ${c}(0);
3983
- for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < uniforms.d_comp; i++) {
3984
- sum_vector += exp(${c}(x[offset + i]) - max_value);
4000
+ var sum_vector = ${g}(0);
4001
+ for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < seq_causal_length; i++) {
4002
+ sum_vector += exp(${g}(x[offset + i]) - max_value);
3985
4003
  }
3986
- thread_sum[local_idx] = ${(()=>{switch(n){case 1:return"sum_vector";case 2:return"sum_vector.x + sum_vector.y";case 4:return"sum_vector.x + sum_vector.y + sum_vector.z + sum_vector.w";default:throw new Error(`Unsupported components: ${n}`)}})()};
4004
+ thread_sum[local_idx] = ${(()=>{switch(l){case 1:return"sum_vector";case 2:return"sum_vector.x + sum_vector.y";case 4:return"sum_vector.x + sum_vector.y + sum_vector.z + sum_vector.w";default:throw new Error(`Unsupported components: ${l}`)}})()};
3987
4005
  workgroupBarrier();
3988
4006
 
3989
4007
  var sum: f32 = 0;
3990
- for (var i = 0u; i < ${o}; i++) {
4008
+ for (var i = 0u; i < ${c}; i++) {
3991
4009
  sum += thread_sum[i];
3992
4010
  }
3993
4011
 
3994
4012
  if (sum == 0) {
3995
- for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < uniforms.d_comp; i++) {
3996
- x[offset + i] = ${w.type.value}(${g}(uniforms.d_inv));
4013
+ for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < seq_causal_length; i++) {
4014
+ x[offset + i] = ${v.type.value}(${A}(1.0) / ${A}(seq_causal_length));
3997
4015
  }
3998
4016
  } else {
3999
- for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < uniforms.d_comp; i++) {
4000
- var f32input = ${c}(x[offset + i]);
4001
- x[offset + i] = ${w.type.value}(exp(f32input - max_value) / sum);
4017
+ for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < seq_causal_length; i++) {
4018
+ var f32input = ${g}(x[offset + i]);
4019
+ x[offset + i] = ${v.type.value}(exp(f32input - max_value) / sum);
4002
4020
  }
4003
4021
  }
4004
- }`};return{name:"AttentionProbsSoftmax",shaderCache:{hint:`${o};${l};${n}`,inputDependencies:m},getShaderSource:u,getRunData:()=>({outputs:[],dispatchGroup:{x:t},programUniforms:d})}},wm=(e,t,r,n,o,i,a,d)=>{let l=d+i.kvSequenceLength,c=[i.batchSize,i.numHeads,i.sequenceLength,l],m=i.kvNumHeads===void 0&&e>1&&n,u=m?[i.batchSize,i.numHeads,l,i.headSize]:void 0,h=a.scale===0?1/Math.sqrt(i.headSize):a.scale,w=we(i.headSize),g=i.headSize/w,y=12,S={x:Math.ceil(l/y),y:Math.ceil(i.sequenceLength/y),z:i.batchSize*i.numHeads},$=[{type:12,data:i.sequenceLength},{type:12,data:g},{type:12,data:l},{type:12,data:i.numHeads},{type:1,data:h},{type:12,data:d},{type:12,data:i.kvSequenceLength}],v=m&&n&&k.size(n.dims)>0,x=["type","type"];v&&x.push("type"),o&&x.push("type");let T=[{dims:c,dataType:t.dataType,gpuDataType:0}];m&&T.push({dims:u,dataType:t.dataType,gpuDataType:0});let C=A=>{let P=E("q",t.dataType,t.dims,w),B=E("key",r.dataType,r.dims,w),N=[P,B];if(v){let se=E("past_key",n.dataType,n.dims,w);N.push(se)}o&&N.push(E("attention_bias",o.dataType,o.dims));let W=M("output",t.dataType,c),K=[W];m&&K.push(M("present_key",t.dataType,u,w));let Z=Ee(1,w),ee=[{name:"M",type:"u32"},{name:"K",type:"u32"},{name:"N",type:"u32"},{name:"num_heads",type:"u32"},{name:"alpha",type:"f32"},{name:"past_sequence_length",type:"u32"},{name:"kv_sequence_length",type:"u32"}];return`
4005
- const TILE_SIZE = ${y}u;
4022
+ ${a?`
4023
+ for (var total_seq_id: u32 = seq_causal_length; total_seq_id + local_offset < uniforms.total_sequence_length; total_seq_id++) {
4024
+ x[offset + total_seq_id] = ${v.type.value}(${A}(0));
4025
+ }`:""};
4026
+ }`};return{name:"AttentionProbsSoftmax",shaderCache:{hint:`${c};${w};${l}`,inputDependencies:y},getShaderSource:S,getRunData:()=>({outputs:[],dispatchGroup:{x:Math.ceil(i/c),y:o,z:t*r},programUniforms:h})}},wm=(e,t,r,n,o,i,a,d,l)=>{let c=a+i.kvSequenceLength,m=[i.batchSize,i.numHeads,i.sequenceLength,c],u=e>1&&n,h=i.kvNumHeads?i.kvNumHeads:i.numHeads,w=u?[i.batchSize,h,c,i.headSize]:void 0,g=i.nReps?i.nReps:1,y=i.scale===0?1/Math.sqrt(i.headSize):i.scale,S=we(i.headSize),$=i.headSize/S,v=12,x={x:Math.ceil(c/v),y:Math.ceil(i.sequenceLength/v),z:i.batchSize*i.numHeads},T=[{type:12,data:i.sequenceLength},{type:12,data:$},{type:12,data:c},{type:12,data:i.numHeads},{type:12,data:i.headSize},{type:1,data:y},{type:12,data:a},{type:12,data:i.kvSequenceLength},{type:12,data:g}],C=u&&n&&k.size(n.dims)>0,A=["type","type"];C&&A.push("type"),o&&A.push("type"),d&&A.push("type"),l&&A.push("type");let P=[{dims:m,dataType:t.dataType,gpuDataType:0}];u&&P.push({dims:w,dataType:t.dataType,gpuDataType:0});let D=W=>{let N=E("q",t.dataType,t.dims,S),j=E("key",r.dataType,r.dims,S),Y=[N,j];if(C){let q=E("past_key",n.dataType,n.dims,S);Y.push(q)}o&&Y.push(E("attention_bias",o.dataType,o.dims));let Z=d?E("seq_lens",d.dataType,d.dims):void 0;Z&&Y.push(Z);let te=l?E("total_sequence_length_input",l.dataType,l.dims):void 0;te&&Y.push(te);let ue=M("output",t.dataType,m),K=[ue];u&&K.push(M("present_key",t.dataType,w,S));let de=Ee(1,S),ce=[{name:"M",type:"u32"},{name:"K",type:"u32"},{name:"N",type:"u32"},{name:"num_heads",type:"u32"},{name:"head_size",type:"u32"},{name:"alpha",type:"f32"},{name:"past_sequence_length",type:"u32"},{name:"kv_sequence_length",type:"u32"},{name:"n_reps",type:"u32"}];return`
4027
+ const TILE_SIZE = ${v}u;
4006
4028
 
4007
- var<workgroup> tileQ: array<${P.type.storage}, ${y*y}>;
4008
- var<workgroup> tileK: array<${P.type.storage}, ${y*y}>;
4009
- ${A.registerUniforms(ee).declareVariables(...N,...K)}
4010
- ${A.mainStart([y,y,1])}
4029
+ var<workgroup> tileQ: array<${N.type.storage}, ${v*v}>;
4030
+ var<workgroup> tileK: array<${N.type.storage}, ${v*v}>;
4031
+ ${W.registerUniforms(ce).declareVariables(...Y,...K)}
4032
+ ${W.mainStart([v,v,1])}
4011
4033
  // x holds the N and y holds the M
4012
- let headIdx = workgroup_id.z;
4034
+ let headIdx = workgroup_id.z % uniforms.num_heads;
4035
+ let kvHeadIdx = ${g===1?"headIdx":"headIdx / uniforms.n_reps"};
4036
+ let kv_num_heads = ${g===1?"uniforms.num_heads":"uniforms.num_heads / uniforms.n_reps"};
4037
+ let batchIdx = workgroup_id.z / uniforms.num_heads;
4013
4038
  let m = workgroup_id.y * TILE_SIZE;
4014
4039
  let n = workgroup_id.x * TILE_SIZE;
4015
- let qOffset = uniforms.M * uniforms.K * headIdx + m * uniforms.K;
4016
- ${(()=>v&&m?`
4017
- let kOffset = uniforms.kv_sequence_length * uniforms.K * headIdx;
4018
- let pastKeyOffset = uniforms.past_sequence_length * uniforms.K * headIdx;`:`
4019
- let kOffset = uniforms.N * uniforms.K * headIdx + n * uniforms.K;`)()}
4020
- ${m?"let presentKeyOffset = headIdx * uniforms.N * uniforms.K;":""}
4021
- var value = ${Z}(0);
4040
+ let sequence_length = uniforms.M;
4041
+ var total_sequence_length = uniforms.N;
4042
+ ${co(Z,te,!0)}
4043
+ let absKvHeadIdx = batchIdx * kv_num_heads + kvHeadIdx;
4044
+ let qOffset = workgroup_id.z * uniforms.M * uniforms.K + m * uniforms.K;
4045
+ ${C&&u?"let pastKeyOffset = absKvHeadIdx * uniforms.past_sequence_length * uniforms.K;":""};
4046
+ let kOffset = absKvHeadIdx * uniforms.kv_sequence_length * uniforms.K;
4047
+ ${u?"let presentKeyOffset = absKvHeadIdx * uniforms.N * uniforms.K;":""}
4048
+ var value = ${de}(0);
4022
4049
  for (var w: u32 = 0u; w < uniforms.K; w += TILE_SIZE) {
4023
4050
  if (global_id.y < uniforms.M && w + local_id.x < uniforms.K) {
4024
4051
  tileQ[TILE_SIZE * local_id.y + local_id.x] = q[qOffset + local_id.y * uniforms.K + w + local_id.x];
4025
4052
  }
4026
4053
  if (n + local_id.y < uniforms.N && w + local_id.x < uniforms.K) {
4027
4054
  var idx = TILE_SIZE * local_id.y + local_id.x;
4028
- ${(()=>v&&m?`
4029
- if (n + local_id.y < uniforms.past_sequence_length) {
4055
+ ${(()=>C&&u?`
4056
+ if (n + local_id.y < past_sequence_length) {
4030
4057
  tileK[idx] = past_key[pastKeyOffset + (n + local_id.y) * uniforms.K + w + local_id.x];
4031
- } else {
4032
- tileK[idx] =
4033
- key[kOffset + (n + local_id.y - uniforms.past_sequence_length) * uniforms.K + w + local_id.x];
4034
- }`:"tileK[idx] = key[kOffset + local_id.y * uniforms.K + w + local_id.x];")()}
4035
- ${m?"present_key[presentKeyOffset + (n + local_id.y) * uniforms.K + w + local_id.x] = tileK[idx];":""}
4058
+ } else if (n + local_id.y - past_sequence_length < uniforms.kv_sequence_length) {
4059
+ tileK[idx] = key[kOffset + (n + local_id.y - past_sequence_length) * uniforms.K + w + local_id.x];
4060
+ }`:`
4061
+ if (n + local_id.y < uniforms.kv_sequence_length) {
4062
+ tileK[idx] = key[kOffset + (n + local_id.y) * uniforms.K + w + local_id.x];
4063
+ }`)()}
4064
+ ${u?`if (n + local_id.y < present_sequence_length) {
4065
+ present_key[presentKeyOffset + (n + local_id.y) * uniforms.K + w + local_id.x] = tileK[idx];
4066
+ }`:""}
4036
4067
  }
4037
4068
  workgroupBarrier();
4038
4069
 
4039
4070
  for (var k: u32 = 0u; k < TILE_SIZE && w+k < uniforms.K; k++) {
4040
- value += ${Z}(tileQ[TILE_SIZE * local_id.y + k] * tileK[TILE_SIZE * local_id.x + k]);
4071
+ value += ${de}(tileQ[TILE_SIZE * local_id.y + k] * tileK[TILE_SIZE * local_id.x + k]);
4041
4072
  }
4042
4073
 
4043
4074
  workgroupBarrier();
4044
4075
  }
4045
4076
 
4046
- let headOffset = headIdx * uniforms.M * uniforms.N;
4047
- if (global_id.y < uniforms.M && global_id.x < uniforms.N) {
4077
+ if (global_id.y < uniforms.M && global_id.x < total_sequence_length) {
4078
+ let headOffset = workgroup_id.z * uniforms.M * uniforms.N;
4048
4079
  let outputIdx = headOffset + global_id.y * uniforms.N + global_id.x;
4049
- var sum: f32 = ${(()=>{switch(w){case 1:return"value";case 2:return"value.x + value.y";case 4:return"value.x + value.y + value.z + value.w";default:throw new Error(`Unsupported components: ${w}`)}})()};
4050
- output[outputIdx] = ${W.type.value} (sum * uniforms.alpha) + ${o?"attention_bias[outputIdx]":"0.0"};
4051
- }
4052
- }`};return{name:"AttentionProbs",shaderCache:{hint:`${w};${o!==void 0};${n!==void 0};${e}`,inputDependencies:x},getRunData:()=>({outputs:T,dispatchGroup:S,programUniforms:$}),getShaderSource:C}},_m=(e,t,r,n,o,i)=>{let a=i+o.kvSequenceLength,d=o.nReps?o.nReps:1,l=o.vHiddenSize*d,c=o.kvNumHeads==null&&e>1&&n,m=c?[o.batchSize,o.numHeads,a,o.headSize]:void 0,u=[o.batchSize,o.sequenceLength,l],h=12,w={x:Math.ceil(o.vHeadSize/h),y:Math.ceil(o.sequenceLength/h),z:o.batchSize*o.numHeads},g=[{type:12,data:o.sequenceLength},{type:12,data:a},{type:12,data:o.vHeadSize},{type:12,data:o.numHeads},{type:12,data:l},{type:12,data:i},{type:12,data:o.kvSequenceLength}],y=c&&n&&k.size(n.dims)>0,S=["type","type"];y&&S.push("type");let $=[{dims:u,dataType:t.dataType,gpuDataType:0}];c&&$.push({dims:m,dataType:t.dataType,gpuDataType:0});let v=x=>{let T=E("probs",t.dataType,t.dims),C=E("v",r.dataType,r.dims),A=[T,C];y&&A.push(E("past_value",n.dataType,n.dims));let B=[M("output",t.dataType,u)];c&&B.push(M("present_value",t.dataType,m));let N=[{name:"M",type:"u32"},{name:"K",type:"u32"},{name:"N",type:"u32"},{name:"num_heads",type:"u32"},{name:"v_hidden_size",type:"u32"},{name:"past_sequence_length",type:"u32"},{name:"kv_sequence_length",type:"u32"}];return`
4053
- const TILE_SIZE = ${h}u;
4054
- var<workgroup> tileQ: array<${T.type.value}, ${h*h}>;
4055
- var<workgroup> tileK: array<${T.type.value}, ${h*h}>;
4056
- ${x.registerUniforms(N).declareVariables(...A,...B)}
4057
- ${x.mainStart([h,h,1])}
4058
- let headIdx = workgroup_id.z;
4080
+ var sum: f32 = ${(()=>{switch(S){case 1:return"value";case 2:return"value.x + value.y";case 4:return"value.x + value.y + value.z + value.w";default:throw new Error(`Unsupported components: ${S}`)}})()};
4081
+ output[outputIdx] = ${ue.type.value} (sum * uniforms.alpha) + ${o?"attention_bias[outputIdx]":"0.0"};
4082
+ }
4083
+ }`};return{name:"AttentionProbs",shaderCache:{hint:`${S};${o!==void 0};${n!==void 0};${e}`,inputDependencies:A},getRunData:()=>({outputs:P,dispatchGroup:x,programUniforms:T}),getShaderSource:D}},_m=(e,t,r,n,o,i,a=void 0,d=void 0)=>{let l=i+o.kvSequenceLength,c=o.nReps?o.nReps:1,m=o.vHiddenSize*c,u=e>1&&n,h=o.kvNumHeads?o.kvNumHeads:o.numHeads,w=u?[o.batchSize,h,l,o.headSize]:void 0,g=[o.batchSize,o.sequenceLength,m],y=12,S={x:Math.ceil(o.vHeadSize/y),y:Math.ceil(o.sequenceLength/y),z:o.batchSize*o.numHeads},$=[{type:12,data:o.sequenceLength},{type:12,data:l},{type:12,data:o.vHeadSize},{type:12,data:o.numHeads},{type:12,data:o.headSize},{type:12,data:m},{type:12,data:i},{type:12,data:o.kvSequenceLength},{type:12,data:c}],v=u&&n&&k.size(n.dims)>0,x=["type","type"];v&&x.push("type"),a&&x.push("type"),d&&x.push("type");let T=[{dims:g,dataType:t.dataType,gpuDataType:0}];u&&T.push({dims:w,dataType:t.dataType,gpuDataType:0});let C=A=>{let P=E("probs",t.dataType,t.dims),D=E("v",r.dataType,r.dims),W=[P,D];v&&W.push(E("past_value",n.dataType,n.dims));let N=a?E("seq_lens",a.dataType,a.dims):void 0;a&&W.push(N);let j=d?E("total_sequence_length_input",d.dataType,d.dims):void 0;d&&W.push(j);let Z=[M("output",t.dataType,g)];u&&Z.push(M("present_value",t.dataType,w));let te=[{name:"M",type:"u32"},{name:"K",type:"u32"},{name:"N",type:"u32"},{name:"num_heads",type:"u32"},{name:"head_size",type:"u32"},{name:"v_hidden_size",type:"u32"},{name:"past_sequence_length",type:"u32"},{name:"kv_sequence_length",type:"u32"},{name:"n_reps",type:"u32"}];return`
4084
+ const TILE_SIZE = ${y}u;
4085
+ var<workgroup> tileQ: array<${P.type.value}, ${y*y}>;
4086
+ var<workgroup> tileV: array<${P.type.value}, ${y*y}>;
4087
+ ${A.registerUniforms(te).declareVariables(...W,...Z)}
4088
+ ${A.mainStart([y,y,1])}
4089
+ let headIdx = workgroup_id.z % uniforms.num_heads;
4090
+ let batchIdx = workgroup_id.z / uniforms.num_heads;
4091
+ let kvHeadIdx = ${c===1?"headIdx":"headIdx / uniforms.n_reps"};
4092
+ let kv_num_heads = ${c===1?"uniforms.num_heads":"uniforms.num_heads / uniforms.n_reps"};
4059
4093
  let m = global_id.y;
4060
4094
  let n = global_id.x;
4061
-
4062
- let offsetA = headIdx * (uniforms.M * uniforms.K) + m * uniforms.K;
4063
- ${(()=>y&&c?`
4064
- let pastValueOffset = headIdx * uniforms.N * uniforms.past_sequence_length + n;
4065
- let vOffset = headIdx * uniforms.N * uniforms.kv_sequence_length + n;
4066
- `:`
4067
- let offsetB = headIdx * uniforms.N * uniforms.K + n;
4068
- `)()}
4069
- ${c?"let presentValueOffset = headIdx * uniforms.N * uniforms.K + n;":""}
4070
- var value = ${T.type.storage}(0);
4095
+ let sequence_length = uniforms.M;
4096
+ var total_sequence_length = uniforms.K;
4097
+ ${co(N,j,!0)}
4098
+ let offsetA = workgroup_id.z * uniforms.M * uniforms.K + m * uniforms.K;
4099
+ let absKvHeadIdx = batchIdx * kv_num_heads + kvHeadIdx; // kvHeadIdx is relative to the batch
4100
+ ${v&&u?"let pastValueOffset = absKvHeadIdx * uniforms.N * uniforms.past_sequence_length + n;":""};
4101
+ let vOffset = absKvHeadIdx * uniforms.N * uniforms.kv_sequence_length + n;
4102
+ ${u?"let presentValueOffset = absKvHeadIdx * uniforms.N * uniforms.K + n;":""}
4103
+ var value = ${P.type.storage}(0);
4071
4104
  for (var w: u32 = 0u; w < uniforms.K; w += TILE_SIZE) {
4072
4105
  if (m < uniforms.M && w + local_id.x < uniforms.K) {
4073
4106
  tileQ[TILE_SIZE * local_id.y + local_id.x] = probs[offsetA + w + local_id.x];
4074
4107
  }
4075
4108
  if (n < uniforms.N && w + local_id.y < uniforms.K) {
4076
4109
  var idx = TILE_SIZE * local_id.y + local_id.x;
4077
- ${(()=>y&&c?`
4078
- if (w + local_id.y < uniforms.past_sequence_length) {
4079
- tileK[idx] = past_value[pastValueOffset + (w + local_id.y) * uniforms.N];
4080
- } else {
4081
- tileK[idx] = v[vOffset + (w + local_id.y - uniforms.past_sequence_length) * uniforms.N];
4110
+ ${(()=>v&&u?`
4111
+ if (w + local_id.y < past_sequence_length) {
4112
+ tileV[idx] = past_value[pastValueOffset + (w + local_id.y) * uniforms.N];
4113
+ } else if (w + local_id.y - past_sequence_length < uniforms.kv_sequence_length) {
4114
+ tileV[idx] = v[vOffset + (w + local_id.y - past_sequence_length) * uniforms.N];
4082
4115
  }
4083
4116
  `:`
4084
- tileK[idx] = v[offsetB + (w + local_id.y) * uniforms.N];
4085
- `)()}
4086
- ${c?"present_value[presentValueOffset + (w + local_id.y) * uniforms.N] = tileK[idx];":""}
4117
+ if (w + local_id.y < uniforms.kv_sequence_length) {
4118
+ tileV[idx] = v[vOffset + (w + local_id.y) * uniforms.N];
4119
+ }`)()}
4120
+ ${u?`
4121
+ if (w + local_id.y < present_sequence_length) {
4122
+ present_value[presentValueOffset + (w + local_id.y) * uniforms.N] = tileV[idx];
4123
+ }`:""}
4087
4124
  }
4088
4125
  workgroupBarrier();
4089
- for (var k: u32 = 0u; k < TILE_SIZE && w+k < uniforms.K; k++) {
4090
- value += tileQ[TILE_SIZE * local_id.y + k] * tileK[TILE_SIZE * k + local_id.x];
4126
+ for (var k: u32 = 0u; k < TILE_SIZE && w+k < total_sequence_length; k++) {
4127
+ value += tileQ[TILE_SIZE * local_id.y + k] * tileV[TILE_SIZE * k + local_id.x];
4091
4128
  }
4092
4129
  workgroupBarrier();
4093
4130
  }
4094
4131
 
4095
4132
  // we need to transpose output from BNSH_v to BSND_v
4096
- let batchIdx = workgroup_id.z / uniforms.num_heads;
4097
- let currentBatchHeadNumber = workgroup_id.z % uniforms.num_heads;
4098
4133
  if (m < uniforms.M && n < uniforms.N) {
4099
4134
  let outputIdx = batchIdx * uniforms.M * uniforms.v_hidden_size + m * uniforms.v_hidden_size
4100
- + currentBatchHeadNumber * uniforms.N + n;
4135
+ + headIdx * uniforms.N + n;
4101
4136
  output[outputIdx] = value;
4102
4137
  }
4103
- }`};return{name:"AttentionScore",shaderCache:{hint:`${n!==void 0};${e}`,inputDependencies:S},getRunData:()=>({outputs:$,dispatchGroup:w,programUniforms:g}),getShaderSource:v}},Wt=(e,t,r,n,o,i,a,d,l,c,m)=>{let u=Math.min(e.outputCount,1+(a?1:0)+(d?1:0)),h=c.kvNumHeads!==void 0||u>1?c.pastSequenceLength:0,w=h+c.kvSequenceLength,g=l&&k.size(l.dims)>0?l:void 0,y=[t,r];c.kvNumHeads===void 0&&u>1&&a&&k.size(a.dims)>0&&y.push(a),g&&y.push(g);let S=e.compute(wm(u,t,r,a,g,c,m,h),{inputs:y,outputs:c.kvNumHeads===void 0&&u>1?[-1,1]:[-1]})[0];e.compute(bm(S,c.batchSize*c.numHeads*c.sequenceLength,w),{inputs:[S],outputs:[]});let $=[S,n];c.kvNumHeads===void 0&&u>1&&d&&k.size(d.dims)>0&&$.push(d),e.compute(_m(u,S,n,d,c,h),{inputs:$,outputs:c.kvNumHeads===void 0&&u>1?[0,2]:[0]})},vm=(e,t)=>{let r=[t.batchSize,t.numHeads,t.sequenceLength,t.headSize],n=t.sequenceLength,o=t.inputHiddenSize,i=t.headSize,a=12,d={x:Math.ceil(t.headSize/a),y:Math.ceil(t.sequenceLength/a),z:t.batchSize*t.numHeads},l=[e.inputs[0],e.inputs[1],e.inputs[2]],c=[{type:12,data:n},{type:12,data:o},{type:12,data:i},{type:12,data:t.numHeads},{type:12,data:t.headSize},{type:12,data:t.hiddenSize},{type:12,data:t.hiddenSize+t.hiddenSize+t.vHiddenSize}],m=u=>{let h=M("output_q",l[0].dataType,r),w=M("output_k",l[0].dataType,r),g=M("output_v",l[0].dataType,r),y=E("input",l[0].dataType,l[0].dims),S=E("weight",l[1].dataType,l[1].dims),$=E("bias",l[2].dataType,l[2].dims),v=y.type.storage,x=[{name:"M",type:"u32"},{name:"K",type:"u32"},{name:"N",type:"u32"},{name:"num_heads",type:"u32"},{name:"head_size",type:"u32"},{name:"hidden_size",type:"u32"},{name:"ldb",type:"u32"}];return`
4138
+ }`};return{name:"AttentionScore",shaderCache:{hint:`${n!==void 0};${e}`,inputDependencies:x},getRunData:()=>({outputs:T,dispatchGroup:S,programUniforms:$}),getShaderSource:C}},Nt=(e,t,r,n,o,i,a,d,l,c,m=void 0,u=void 0)=>{let h=Math.min(e.outputCount,1+(a?1:0)+(d?1:0)),w=h>1?c.pastSequenceLength:0,g=w+c.kvSequenceLength,y=l&&k.size(l.dims)>0?l:void 0,S=[t,r];h>1&&a&&k.size(a.dims)>0&&S.push(a),y&&S.push(y),m&&S.push(m),u&&S.push(u);let $=e.compute(wm(h,t,r,a,y,c,w,m,u),{inputs:S,outputs:h>1?[-1,1]:[-1]})[0];e.compute(bm($,c.batchSize,c.numHeads,w,c.sequenceLength,g,m,u),{inputs:m&&u?[$,m,u]:[$],outputs:[]});let v=[$,n];h>1&&d&&k.size(d.dims)>0&&v.push(d),m&&v.push(m),u&&v.push(u),e.compute(_m(h,$,n,d,c,w,m,u),{inputs:v,outputs:h>1?[0,2]:[0]})},vm=(e,t)=>{let r=[t.batchSize,t.numHeads,t.sequenceLength,t.headSize],n=t.sequenceLength,o=t.inputHiddenSize,i=t.headSize,a=12,d={x:Math.ceil(t.headSize/a),y:Math.ceil(t.sequenceLength/a),z:t.batchSize*t.numHeads},l=[e.inputs[0],e.inputs[1],e.inputs[2]],c=[{type:12,data:n},{type:12,data:o},{type:12,data:i},{type:12,data:t.numHeads},{type:12,data:t.headSize},{type:12,data:t.hiddenSize},{type:12,data:t.hiddenSize+t.hiddenSize+t.vHiddenSize}],m=u=>{let h=M("output_q",l[0].dataType,r),w=M("output_k",l[0].dataType,r),g=M("output_v",l[0].dataType,r),y=E("input",l[0].dataType,l[0].dims),S=E("weight",l[1].dataType,l[1].dims),$=E("bias",l[2].dataType,l[2].dims),v=y.type.storage,x=[{name:"M",type:"u32"},{name:"K",type:"u32"},{name:"N",type:"u32"},{name:"num_heads",type:"u32"},{name:"head_size",type:"u32"},{name:"hidden_size",type:"u32"},{name:"ldb",type:"u32"}];return`
4104
4139
  const TILE_SIZE = ${a}u;
4105
4140
  var<workgroup> tileInput: array<${v}, ${a*a}>;
4106
4141
  var<workgroup> tileWeightQ: array<${v}, ${a*a}>;
@@ -4155,7 +4190,7 @@ var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLas
4155
4190
  output_k[outputIdx] = valueK;
4156
4191
  output_v[outputIdx] = valueV;
4157
4192
  }
4158
- }`};return e.compute({name:"AttentionPrepare",shaderCache:{inputDependencies:["type","type","type"]},getRunData:()=>({outputs:[{dims:r,dataType:e.inputs[0].dataType,gpuDataType:0},{dims:r,dataType:e.inputs[0].dataType,gpuDataType:0},{dims:r,dataType:e.inputs[0].dataType,gpuDataType:0}],dispatchGroup:d,programUniforms:c}),getShaderSource:m},{inputs:l,outputs:[-1,-1,-1]})},Is=(e,t)=>{let r=ym(e.inputs,t),[n,o,i]=vm(e,r);return Wt(e,n,o,i,e.inputs[4],void 0,void 0,void 0,e.inputs[5],r,t)}});var $m,xm,Sm,Cs,As=V(()=>{"use strict";Ke();Q();ie();Ie();ae();$m=(e,t)=>{if(!e||e.length!==5)throw new Error("BatchNormalization requires 5 inputs");let r=(n,o,i)=>{let a=o.length;if(a!==n.length)throw new Error(`${i}: num dimensions != ${a}`);o.forEach((d,l)=>{if(d!==n[l])throw new Error(`${i}: dim[${l}] do not match`)})};if(e[0].dims.length>1){let n=t.format==="NHWC"?t.spatial?e[0].dims.slice(-1):e[0].dims.slice(-1).concat(e[0].dims.slice(1,e[0].dims.length-1)):e[0].dims.slice(1,t.spatial?2:void 0);r(e[1].dims,n,"Invalid input scale"),r(e[2].dims,n,"Invalid input B"),r(e[3].dims,n,"Invalid input mean"),r(e[4].dims,n,"Invalid input var")}else r(e[1].dims,[1],"Invalid input scale"),r(e[2].dims,[1],"Invalid input B"),r(e[3].dims,[1],"Invalid input mean"),r(e[4].dims,[1],"Invalid input var")},xm=(e,t)=>{let{epsilon:r,spatial:n,format:o}=t,i=e[0].dims,a=n?we(i[i.length-1]):1,d=o==="NHWC"&&i.length>1?a:1,l=k.size(i)/a,c=n,m=c?i.length:i,u=E("x",e[0].dataType,e[0].dims,a),h=E("scale",e[1].dataType,e[1].dims,d),w=E("bias",e[2].dataType,e[2].dims,d),g=E("inputMean",e[3].dataType,e[3].dims,d),y=E("inputVar",e[4].dataType,e[4].dims,d),S=M("y",e[0].dataType,m,a),$=()=>{let x="";if(n)x=`let cOffset = ${i.length===1?"0u":o==="NHWC"?`outputIndices[${i.length-1}] / ${a}`:"outputIndices[1]"};`;else if(o==="NCHW")x=`
4193
+ }`};return e.compute({name:"AttentionPrepare",shaderCache:{inputDependencies:["type","type","type"]},getRunData:()=>({outputs:[{dims:r,dataType:e.inputs[0].dataType,gpuDataType:0},{dims:r,dataType:e.inputs[0].dataType,gpuDataType:0},{dims:r,dataType:e.inputs[0].dataType,gpuDataType:0}],dispatchGroup:d,programUniforms:c}),getShaderSource:m},{inputs:l,outputs:[-1,-1,-1]})},As=(e,t)=>{let r=ym(e.inputs,t),[n,o,i]=vm(e,r);return Nt(e,n,o,i,e.inputs[4],void 0,void 0,void 0,e.inputs[5],r)}});var $m,xm,Sm,ks,Es=U(()=>{"use strict";Ke();J();ae();Ie();se();$m=(e,t)=>{if(!e||e.length!==5)throw new Error("BatchNormalization requires 5 inputs");let r=(n,o,i)=>{let a=o.length;if(a!==n.length)throw new Error(`${i}: num dimensions != ${a}`);o.forEach((d,l)=>{if(d!==n[l])throw new Error(`${i}: dim[${l}] do not match`)})};if(e[0].dims.length>1){let n=t.format==="NHWC"?t.spatial?e[0].dims.slice(-1):e[0].dims.slice(-1).concat(e[0].dims.slice(1,e[0].dims.length-1)):e[0].dims.slice(1,t.spatial?2:void 0);r(e[1].dims,n,"Invalid input scale"),r(e[2].dims,n,"Invalid input B"),r(e[3].dims,n,"Invalid input mean"),r(e[4].dims,n,"Invalid input var")}else r(e[1].dims,[1],"Invalid input scale"),r(e[2].dims,[1],"Invalid input B"),r(e[3].dims,[1],"Invalid input mean"),r(e[4].dims,[1],"Invalid input var")},xm=(e,t)=>{let{epsilon:r,spatial:n,format:o}=t,i=e[0].dims,a=n?we(i[i.length-1]):1,d=o==="NHWC"&&i.length>1?a:1,l=k.size(i)/a,c=n,m=c?i.length:i,u=E("x",e[0].dataType,e[0].dims,a),h=E("scale",e[1].dataType,e[1].dims,d),w=E("bias",e[2].dataType,e[2].dims,d),g=E("inputMean",e[3].dataType,e[3].dims,d),y=E("inputVar",e[4].dataType,e[4].dims,d),S=M("y",e[0].dataType,m,a),$=()=>{let x="";if(n)x=`let cOffset = ${i.length===1?"0u":o==="NHWC"?`outputIndices[${i.length-1}] / ${a}`:"outputIndices[1]"};`;else if(o==="NCHW")x=`
4159
4194
  ${S.indicesSet("outputIndices","0","0")}
4160
4195
  let cOffset = ${S.indicesToOffset("outputIndices")};`;else{x=`var cIndices = ${h.type.indices}(0);
4161
4196
  cIndices[0] = outputIndices[${i.length-1}];`;for(let T=1;T<h.rank;T++)x+=`cIndices[${T}] = outputIndices[${T}];`;x+=`let cOffset = ${h.indicesToOffset("cIndices")};`}return x},v=x=>`
@@ -4172,7 +4207,7 @@ var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLas
4172
4207
  let x = ${u.getByOffset("global_idx")};
4173
4208
  let value = (x - inputMean) * inverseSqrt(inputVar + epsilon) * scale + bias;
4174
4209
  ${S.setByOffset("global_idx","value")}
4175
- }`;return{name:"BatchNormalization",shaderCache:{hint:`${t.epsilon}_${t.format}_${n}_${a}`,inputDependencies:c?["rank","type","type","type","type"]:void 0},getShaderSource:v,getRunData:()=>({outputs:[{dims:e[0].dims,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(l/64)},programUniforms:c?[{type:12,data:l},...R(i)]:[{type:12,data:l}]})}},Sm=e=>J(e),Cs=(e,t)=>{let{inputs:r,outputCount:n}=e,o=Sm({...t,outputCount:n});if(_e.webgpu.validateInputContent&&$m(r,o),t.trainingMode)throw new Error("BatchNormalization trainingMode is not supported yet.");e.compute(xm(r,o))}});var Tm,Im,ks,Es=V(()=>{"use strict";ie();ae();Tm=e=>{if(e[0].dims.length!==3)throw new Error("input should have 3 dimensions");if(![320,640,1280].includes(e[0].dims[2]))throw new Error("number of channels should be 320, 640 or 1280");if(e[1].dims.length!==1)throw new Error("bias is expected to have 1 dimensions");if(e[0].dims[2]!==e[1].dims[0])throw new Error("last dimension of input and bias are not the same")},Im=e=>{let t=e[0].dims,r=e[0].dims[2],n=k.size(t)/4,o=e[0].dataType,i=E("input",o,t,4),a=E("bias",o,[r],4),d=E("residual",o,t,4),l=M("output",o,t,4);return{name:"BiasAdd",getRunData:()=>({outputs:[{dims:t,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(n/64)}}),getShaderSource:m=>`
4210
+ }`;return{name:"BatchNormalization",shaderCache:{hint:`${t.epsilon}_${t.format}_${n}_${a}`,inputDependencies:c?["rank","type","type","type","type"]:void 0},getShaderSource:v,getRunData:()=>({outputs:[{dims:e[0].dims,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(l/64)},programUniforms:c?[{type:12,data:l},...V(i)]:[{type:12,data:l}]})}},Sm=e=>ee(e),ks=(e,t)=>{let{inputs:r,outputCount:n}=e,o=Sm({...t,outputCount:n});if(_e.webgpu.validateInputContent&&$m(r,o),t.trainingMode)throw new Error("BatchNormalization trainingMode is not supported yet.");e.compute(xm(r,o))}});var Tm,Im,Ps,zs=U(()=>{"use strict";ae();se();Tm=e=>{if(e[0].dims.length!==3)throw new Error("input should have 3 dimensions");if(![320,640,1280].includes(e[0].dims[2]))throw new Error("number of channels should be 320, 640 or 1280");if(e[1].dims.length!==1)throw new Error("bias is expected to have 1 dimensions");if(e[0].dims[2]!==e[1].dims[0])throw new Error("last dimension of input and bias are not the same")},Im=e=>{let t=e[0].dims,r=e[0].dims[2],n=k.size(t)/4,o=e[0].dataType,i=E("input",o,t,4),a=E("bias",o,[r],4),d=E("residual",o,t,4),l=M("output",o,t,4);return{name:"BiasAdd",getRunData:()=>({outputs:[{dims:t,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(n/64)}}),getShaderSource:m=>`
4176
4211
  const channels = ${r}u / 4;
4177
4212
  ${m.declareVariables(i,a,d,l)}
4178
4213
 
@@ -4181,7 +4216,7 @@ var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLas
4181
4216
  let value = ${i.getByOffset("global_idx")}
4182
4217
  + ${a.getByOffset("global_idx % channels")} + ${d.getByOffset("global_idx")};
4183
4218
  ${l.setByOffset("global_idx","value")}
4184
- }`}},ks=e=>{Tm(e.inputs),e.compute(Im(e.inputs))}});var Cm,ge,Ps,zs,Os,Bs,Ds,Ms,Rs,Us,Vs,Am,Ns,Ws,Ls,Hs,Qt,Gs,Yr,Fs,qs,js,Ks,Ys,Xs,Zs,Qs,Js,eu,tu,ru,nu,ou,iu,au,su,uu,lo,co,du,lu,cu,km,Em,pu,Xr=V(()=>{"use strict";Q();ie();Ie();ae();Cm=(e,t,r,n,o,i,a)=>{let d=Math.ceil(t/4),l="";typeof o=="string"?l=`${o}(a)`:l=o("a");let c=E("inputData",r,[d],4),m=M("outputData",n,[d],4),u=[{name:"vec_size",type:"u32"}];return a&&u.push(...a),`
4219
+ }`}},Ps=e=>{Tm(e.inputs),e.compute(Im(e.inputs))}});var Cm,ge,Os,Ds,Bs,Ms,Rs,Us,Vs,Ws,Ns,Am,Ls,Hs,Gs,Fs,Zt,qs,Yr,js,Ks,Ys,Xs,Qs,Zs,Js,eu,tu,ru,nu,ou,iu,au,su,uu,du,lu,po,mo,cu,pu,mu,km,Em,fu,Xr=U(()=>{"use strict";J();ae();Ie();se();Cm=(e,t,r,n,o,i,a)=>{let d=Math.ceil(t/4),l="";typeof o=="string"?l=`${o}(a)`:l=o("a");let c=E("inputData",r,[d],4),m=M("outputData",n,[d],4),u=[{name:"vec_size",type:"u32"}];return a&&u.push(...a),`
4185
4220
  ${e.registerUniforms(u).declareVariables(c,m)}
4186
4221
 
4187
4222
  ${i??""}
@@ -4191,7 +4226,7 @@ var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLas
4191
4226
 
4192
4227
  let a = ${c.getByOffset("global_idx")};
4193
4228
  ${m.setByOffset("global_idx",l)}
4194
- }`},ge=(e,t,r,n,o,i=e.dataType,a,d)=>{let l=[{type:12,data:Math.ceil(k.size(e.dims)/4)}];return a&&l.push(...a),{name:t,shaderCache:{hint:o,inputDependencies:["type"]},getShaderSource:c=>Cm(c,k.size(e.dims),e.dataType,i,r,n,d),getRunData:c=>({outputs:[{dims:e.dims,dataType:i}],dispatchGroup:{x:Math.ceil(k.size(c[0].dims)/64/4)},programUniforms:l})}},Ps=e=>{e.compute(ge(e.inputs[0],"Abs","abs"))},zs=e=>{e.compute(ge(e.inputs[0],"Acos","acos"))},Os=e=>{e.compute(ge(e.inputs[0],"Acosh","acosh"))},Bs=e=>{e.compute(ge(e.inputs[0],"Asin","asin"))},Ds=e=>{e.compute(ge(e.inputs[0],"Asinh","asinh"))},Ms=e=>{e.compute(ge(e.inputs[0],"Atan","atan"))},Rs=e=>{e.compute(ge(e.inputs[0],"Atanh","atanh"))},Us=e=>J(e),Vs=(e,t)=>{let r;switch(t.to){case 10:r="vec4<f16>";break;case 1:r="vec4<f32>";break;case 12:r="vec4<u32>";break;case 6:r="vec4<i32>";break;case 9:r="vec4<bool>";break;default:throw new RangeError(`not supported type (specified in attribute 'to' from 'Cast' operator): ${t.to}`)}e.compute(ge(e.inputs[0],"Cast",r,void 0,t.cacheKey,t.to))},Am=e=>{let t,r,n=e.length>=2&&e[1].data!==0,o=e.length>=3&&e[2].data!==0;switch(e[0].dataType){case 1:t=n?e[1].getFloat32Array()[0]:-34028234663852886e22,r=o?e[2].getFloat32Array()[0]:34028234663852886e22;break;case 10:t=n?e[1].getUint16Array()[0]:64511,r=o?e[2].getUint16Array()[0]:31743;break;default:throw new Error("Unsupport data type")}return J({min:t,max:r})},Ns=(e,t)=>{let r=t||Am(e.inputs),n=Ee(e.inputs[0].dataType);e.compute(ge(e.inputs[0],"Clip",o=>`clamp(${o}, vec4<${n}>(uniforms.min), vec4<${n}>(uniforms.max))`,void 0,r.cacheKey,void 0,[{type:e.inputs[0].dataType,data:r.min},{type:e.inputs[0].dataType,data:r.max}],[{name:"min",type:n},{name:"max",type:n}]),{inputs:[0]})},Ws=e=>{e.compute(ge(e.inputs[0],"Ceil","ceil"))},Ls=e=>{e.compute(ge(e.inputs[0],"Cos","cos"))},Hs=e=>{e.compute(ge(e.inputs[0],"Cosh","cosh"))},Qt=e=>J(e),Gs=(e,t)=>{let r=Ee(e.inputs[0].dataType);e.compute(ge(e.inputs[0],"Elu",n=>`elu_vf32(${n})`,`
4229
+ }`},ge=(e,t,r,n,o,i=e.dataType,a,d)=>{let l=[{type:12,data:Math.ceil(k.size(e.dims)/4)}];return a&&l.push(...a),{name:t,shaderCache:{hint:o,inputDependencies:["type"]},getShaderSource:c=>Cm(c,k.size(e.dims),e.dataType,i,r,n,d),getRunData:c=>({outputs:[{dims:e.dims,dataType:i}],dispatchGroup:{x:Math.ceil(k.size(c[0].dims)/64/4)},programUniforms:l})}},Os=e=>{e.compute(ge(e.inputs[0],"Abs","abs"))},Ds=e=>{e.compute(ge(e.inputs[0],"Acos","acos"))},Bs=e=>{e.compute(ge(e.inputs[0],"Acosh","acosh"))},Ms=e=>{e.compute(ge(e.inputs[0],"Asin","asin"))},Rs=e=>{e.compute(ge(e.inputs[0],"Asinh","asinh"))},Us=e=>{e.compute(ge(e.inputs[0],"Atan","atan"))},Vs=e=>{e.compute(ge(e.inputs[0],"Atanh","atanh"))},Ws=e=>ee(e),Ns=(e,t)=>{let r;switch(t.to){case 10:r="vec4<f16>";break;case 1:r="vec4<f32>";break;case 12:r="vec4<u32>";break;case 6:r="vec4<i32>";break;case 9:r="vec4<bool>";break;default:throw new RangeError(`not supported type (specified in attribute 'to' from 'Cast' operator): ${t.to}`)}e.compute(ge(e.inputs[0],"Cast",r,void 0,t.cacheKey,t.to))},Am=e=>{let t,r,n=e.length>=2&&e[1].data!==0,o=e.length>=3&&e[2].data!==0;switch(e[0].dataType){case 1:t=n?e[1].getFloat32Array()[0]:-34028234663852886e22,r=o?e[2].getFloat32Array()[0]:34028234663852886e22;break;case 10:t=n?e[1].getUint16Array()[0]:64511,r=o?e[2].getUint16Array()[0]:31743;break;default:throw new Error("Unsupport data type")}return ee({min:t,max:r})},Ls=(e,t)=>{let r=t||Am(e.inputs),n=Ee(e.inputs[0].dataType);e.compute(ge(e.inputs[0],"Clip",o=>`clamp(${o}, vec4<${n}>(uniforms.min), vec4<${n}>(uniforms.max))`,void 0,r.cacheKey,void 0,[{type:e.inputs[0].dataType,data:r.min},{type:e.inputs[0].dataType,data:r.max}],[{name:"min",type:n},{name:"max",type:n}]),{inputs:[0]})},Hs=e=>{e.compute(ge(e.inputs[0],"Ceil","ceil"))},Gs=e=>{e.compute(ge(e.inputs[0],"Cos","cos"))},Fs=e=>{e.compute(ge(e.inputs[0],"Cosh","cosh"))},Zt=e=>ee(e),qs=(e,t)=>{let r=Ee(e.inputs[0].dataType);e.compute(ge(e.inputs[0],"Elu",n=>`elu_vf32(${n})`,`
4195
4230
  const elu_alpha_ = ${r}(${t.alpha});
4196
4231
 
4197
4232
  fn elu_f32(a: ${r}) -> ${r} {
@@ -4212,15 +4247,15 @@ fn erf_vf32(v: vec4<${e}>) -> vec4<${e}> {
4212
4247
  let absv = abs(v);
4213
4248
  let x = 1.0 / (1.0 + r0 * absv);
4214
4249
  return sign(v) * (1.0 - ((((r5 * x + r4) * x + r3) * x + r2) * x + r1) * x * exp(-absv * absv));
4215
- }`,Fs=e=>{let t=Ee(e.inputs[0].dataType);e.compute(ge(e.inputs[0],"Erf",r=>`erf_vf32(${r})`,Yr(t)))},qs=e=>{e.compute(ge(e.inputs[0],"Exp","exp"))},js=e=>{e.compute(ge(e.inputs[0],"Floor","floor"))},Ks=e=>{let t=Ee(e.inputs[0].dataType);e.compute(ge(e.inputs[0],"Gelu",r=>`0.5 * ${r} * (1.0 + erf_vf32(${r} * 0.7071067811865475))`,Yr(t)))},Ys=(e,t)=>{let r=Ee(e.inputs[0].dataType);e.compute(ge(e.inputs[0],"LeakyRelu",n=>`select(leaky_relu_alpha_ * ${n}, ${n}, ${n} >= vec4<${r}>(0.0))`,`const leaky_relu_alpha_ = ${r}(${t.alpha});`,t.cacheKey))},Xs=e=>{e.compute(ge(e.inputs[0],"Not",t=>`!${t}`))},Zs=e=>{e.compute(ge(e.inputs[0],"Neg",t=>`-${t}`))},Qs=e=>{e.compute(ge(e.inputs[0],"Reciprocal",t=>`1.0/${t}`))},Js=e=>{let t=Ee(e.inputs[0].dataType);e.compute(ge(e.inputs[0],"Relu",r=>`select(vec4<${t}>(0.0), ${r}, ${r} > vec4<${t}>(0.0))`))},eu=e=>{e.compute(ge(e.inputs[0],"Sigmoid",t=>`(1.0 / (1.0 + exp(-${t})))`))},tu=e=>J(e),ru=(e,t)=>{let r=Ee(e.inputs[0].dataType);e.compute(ge(e.inputs[0],"HardSigmoid",n=>`max(vec4<${r}>(0.0), min(vec4<${r}>(1.0), ${t.alpha} * ${n} + vec4<${r}>(${t.beta})))`,void 0,t.cacheKey))},nu=e=>{e.compute(ge(e.inputs[0],"Sin","sin"))},ou=e=>{e.compute(ge(e.inputs[0],"Sinh","sinh"))},iu=e=>{e.compute(ge(e.inputs[0],"Sqrt","sqrt"))},au=e=>{e.compute(ge(e.inputs[0],"Tan","tan"))},su=e=>`sign(${e}) * (1 - exp(-2 * abs(${e}))) / (1 + exp(-2 * abs(${e})))`,uu=e=>{e.compute(ge(e.inputs[0],"Tanh",su))},lo=(e="f32")=>`
4250
+ }`,js=e=>{let t=Ee(e.inputs[0].dataType);e.compute(ge(e.inputs[0],"Erf",r=>`erf_vf32(${r})`,Yr(t)))},Ks=e=>{e.compute(ge(e.inputs[0],"Exp","exp"))},Ys=e=>{e.compute(ge(e.inputs[0],"Floor","floor"))},Xs=e=>{let t=Ee(e.inputs[0].dataType);e.compute(ge(e.inputs[0],"Gelu",r=>`0.5 * ${r} * (1.0 + erf_vf32(${r} * 0.7071067811865475))`,Yr(t)))},Qs=(e,t)=>{let r=Ee(e.inputs[0].dataType);e.compute(ge(e.inputs[0],"LeakyRelu",n=>`select(leaky_relu_alpha_ * ${n}, ${n}, ${n} >= vec4<${r}>(0.0))`,`const leaky_relu_alpha_ = ${r}(${t.alpha});`,t.cacheKey))},Zs=e=>{e.compute(ge(e.inputs[0],"Not",t=>`!${t}`))},Js=e=>{e.compute(ge(e.inputs[0],"Neg",t=>`-${t}`))},eu=e=>{e.compute(ge(e.inputs[0],"Reciprocal",t=>`1.0/${t}`))},tu=e=>{let t=Ee(e.inputs[0].dataType);e.compute(ge(e.inputs[0],"Relu",r=>`select(vec4<${t}>(0.0), ${r}, ${r} > vec4<${t}>(0.0))`))},ru=e=>{e.compute(ge(e.inputs[0],"Sigmoid",t=>`(1.0 / (1.0 + exp(-${t})))`))},nu=e=>ee(e),ou=(e,t)=>{let r=Ee(e.inputs[0].dataType);e.compute(ge(e.inputs[0],"HardSigmoid",n=>`max(vec4<${r}>(0.0), min(vec4<${r}>(1.0), ${t.alpha} * ${n} + vec4<${r}>(${t.beta})))`,void 0,t.cacheKey))},iu=e=>{e.compute(ge(e.inputs[0],"Sin","sin"))},au=e=>{e.compute(ge(e.inputs[0],"Sinh","sinh"))},su=e=>{e.compute(ge(e.inputs[0],"Sqrt","sqrt"))},uu=e=>{e.compute(ge(e.inputs[0],"Tan","tan"))},du=e=>`sign(${e}) * (1 - exp(-2 * abs(${e}))) / (1 + exp(-2 * abs(${e})))`,lu=e=>{e.compute(ge(e.inputs[0],"Tanh",du))},po=(e="f32")=>`
4216
4251
  const fast_gelu_a: ${e} = 0.5;
4217
4252
  const fast_gelu_b: ${e} = 0.7978845608028654;
4218
4253
  const fast_gelu_c: ${e} = 0.035677408136300125;
4219
4254
 
4220
4255
  fn tanh_v(v: vec4<${e}>) -> vec4<${e}> {
4221
- return ${su("v")};
4256
+ return ${du("v")};
4222
4257
  }
4223
- `,co=e=>`(fast_gelu_a + fast_gelu_a * tanh_v(${e} * (fast_gelu_c * ${e} * ${e} + fast_gelu_b))) * ${e}`,du=e=>{let t=Ee(e.inputs[0].dataType);e.compute(ge(e.inputs[0],"FastGelu",co,lo(t),void 0,e.inputs[0].dataType))},lu=(e,t)=>{let r=Ee(e.inputs[0].dataType);return e.compute(ge(e.inputs[0],"ThresholdedRelu",n=>`select(vec4<${r}>(0.0), ${n}, ${n} > thresholded_relu_alpha_)`,`const thresholded_relu_alpha_ = vec4<${r}>(${t.alpha});`,t.cacheKey)),0},cu=e=>{e.compute(ge(e.inputs[0],"Log","log"))},km=(e,t)=>`
4258
+ `,mo=e=>`(fast_gelu_a + fast_gelu_a * tanh_v(${e} * (fast_gelu_c * ${e} * ${e} + fast_gelu_b))) * ${e}`,cu=e=>{let t=Ee(e.inputs[0].dataType);e.compute(ge(e.inputs[0],"FastGelu",mo,po(t),void 0,e.inputs[0].dataType))},pu=(e,t)=>{let r=Ee(e.inputs[0].dataType);return e.compute(ge(e.inputs[0],"ThresholdedRelu",n=>`select(vec4<${r}>(0.0), ${n}, ${n} > thresholded_relu_alpha_)`,`const thresholded_relu_alpha_ = vec4<${r}>(${t.alpha});`,t.cacheKey)),0},mu=e=>{e.compute(ge(e.inputs[0],"Log","log"))},km=(e,t)=>`
4224
4259
  const alpha = vec4<${e}>(${t});
4225
4260
  const one = ${e}(1.0);
4226
4261
  const zero = ${e}(0.0);
@@ -4237,7 +4272,7 @@ fn quick_gelu_impl(x: vec4<${e}>) -> vec4<${e}> {
4237
4272
  }
4238
4273
  return x * x1;
4239
4274
  }
4240
- `,Em=e=>`quick_gelu_impl(${e})`,pu=(e,t)=>{let r=Ee(e.inputs[0].dataType);e.compute(ge(e.inputs[0],"QuickGelu",Em,km(r,t.alpha),t.cacheKey,e.inputs[0].dataType))}});var Pm,zm,fu,hu=V(()=>{"use strict";ie();ae();Xr();Pm=e=>{if(e[0].dims.length!==3)throw new Error("input should have 3 dimensions");if(![2560,5120,10240].includes(e[0].dims[2]))throw new Error("hidden state should be 2560, 5120 or 10240");if(e[1].dims.length!==1)throw new Error("bias is expected to have 1 dimensions");if(e[0].dims[2]!==e[1].dims[0])throw new Error("last dimension of input and bias are not the same")},zm=e=>{let t=e[0].dims.slice();t[2]=t[2]/2;let r=E("input",e[0].dataType,e[0].dims,4),n=E("bias",e[0].dataType,[e[0].dims[2]],4),o=M("output",e[0].dataType,t,4),i=k.size(t)/4,a=he(e[0].dataType);return{name:"BiasSplitGelu",getRunData:()=>({outputs:[{dims:t,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(i/64)}}),getShaderSource:l=>`
4275
+ `,Em=e=>`quick_gelu_impl(${e})`,fu=(e,t)=>{let r=Ee(e.inputs[0].dataType);e.compute(ge(e.inputs[0],"QuickGelu",Em,km(r,t.alpha),t.cacheKey,e.inputs[0].dataType))}});var Pm,zm,gu,yu=U(()=>{"use strict";ae();se();Xr();Pm=e=>{if(e[0].dims.length!==3)throw new Error("input should have 3 dimensions");if(![2560,5120,10240].includes(e[0].dims[2]))throw new Error("hidden state should be 2560, 5120 or 10240");if(e[1].dims.length!==1)throw new Error("bias is expected to have 1 dimensions");if(e[0].dims[2]!==e[1].dims[0])throw new Error("last dimension of input and bias are not the same")},zm=e=>{let t=e[0].dims.slice();t[2]=t[2]/2;let r=E("input",e[0].dataType,e[0].dims,4),n=E("bias",e[0].dataType,[e[0].dims[2]],4),o=M("output",e[0].dataType,t,4),i=k.size(t)/4,a=he(e[0].dataType);return{name:"BiasSplitGelu",getRunData:()=>({outputs:[{dims:t,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(i/64)}}),getShaderSource:l=>`
4241
4276
  const M_SQRT2 = sqrt(2.0);
4242
4277
  const halfChannels = ${e[0].dims[2]/4/2}u;
4243
4278
 
@@ -4255,7 +4290,7 @@ fn quick_gelu_impl(x: vec4<${e}>) -> vec4<${e}> {
4255
4290
  let geluRight = valueRight * 0.5 * (erf_vf32(valueRight / M_SQRT2) + 1);
4256
4291
 
4257
4292
  ${o.setByOffset("global_idx","valueLeft * geluRight")}
4258
- }`}},fu=e=>{Pm(e.inputs),e.compute(zm(e.inputs))}});var Om,Bm,st,gu,yu,bu,wu,_u,vu,$u,xu,Su,Tu,Iu=V(()=>{"use strict";Q();ie();ae();Om=(e,t,r,n,o,i,a,d,l,c,m,u)=>{let h,w;typeof d=="string"?h=w=(v,x)=>`${d}((${v}),(${x}))`:typeof d=="function"?h=w=d:(h=d.scalar,w=d.vector);let g=M("outputData",m,n.length,4),y=E("aData",l,t.length,4),S=E("bData",c,r.length,4),$;if(o)if(i){let v=k.size(t)===1,x=k.size(r)===1,T=t.length>0&&t[t.length-1]%4===0,C=r.length>0&&r[r.length-1]%4===0;v||x?$=g.setByOffset("global_idx",w(v?`${y.type.value}(${y.getByOffset("0")}.x)`:y.getByOffset("global_idx"),x?`${S.type.value}(${S.getByOffset("0")}.x)`:S.getByOffset("global_idx"))):$=`
4293
+ }`}},gu=e=>{Pm(e.inputs),e.compute(zm(e.inputs))}});var Om,Dm,st,bu,wu,_u,vu,$u,xu,Su,Tu,Iu,Cu,Au=U(()=>{"use strict";J();ae();se();Om=(e,t,r,n,o,i,a,d,l,c,m,u)=>{let h,w;typeof d=="string"?h=w=(v,x)=>`${d}((${v}),(${x}))`:typeof d=="function"?h=w=d:(h=d.scalar,w=d.vector);let g=M("outputData",m,n.length,4),y=E("aData",l,t.length,4),S=E("bData",c,r.length,4),$;if(o)if(i){let v=k.size(t)===1,x=k.size(r)===1,T=t.length>0&&t[t.length-1]%4===0,C=r.length>0&&r[r.length-1]%4===0;v||x?$=g.setByOffset("global_idx",w(v?`${y.type.value}(${y.getByOffset("0")}.x)`:y.getByOffset("global_idx"),x?`${S.type.value}(${S.getByOffset("0")}.x)`:S.getByOffset("global_idx"))):$=`
4259
4294
  let outputIndices = ${g.offsetToIndices("global_idx * 4u")};
4260
4295
  let offsetA = ${y.broadcastedIndicesToOffset("outputIndices",g)};
4261
4296
  let offsetB = ${S.broadcastedIndicesToOffset("outputIndices",g)};
@@ -4288,7 +4323,7 @@ fn quick_gelu_impl(x: vec4<${e}>) -> vec4<${e}> {
4288
4323
  ${e.mainStart()}
4289
4324
  ${e.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")}
4290
4325
  ${$}
4291
- }`},Bm=(e,t,r,n,o,i,a=r.dataType)=>{let d=!k.areEqual(r.dims,n.dims),l=r.dims,c=k.size(r.dims),m=!1,u=!1,h=[d];if(d){let w=rt.calcShape(r.dims,n.dims,!1);if(!w)throw new Error("Can't perform binary op on the given tensors");l=w,c=k.size(l);let g=k.size(r.dims)===1,y=k.size(n.dims)===1,S=r.dims.length>0&&r.dims[r.dims.length-1]%4===0,$=n.dims.length>0&&n.dims[n.dims.length-1]%4===0;h.push(g),h.push(y),h.push(S),h.push($);let v=1;for(let x=1;x<l.length;x++){let T=r.dims[r.dims.length-x]??1,C=n.dims[n.dims.length-x]??1;if(T===C)v*=T;else break}v%4===0?(u=!0,m=!0):(g||y||S||$)&&(m=!0)}else m=!0;return h.push(m),{name:e,shaderCache:{hint:t+h.map(w=>w.toString()).join("_"),inputDependencies:["rank","rank"]},getShaderSource:w=>Om(w,r.dims,n.dims,l,m,d,u,o,r.dataType,n.dataType,a,i),getRunData:()=>({outputs:[{dims:l,dataType:a}],dispatchGroup:{x:Math.ceil(c/64/4)},programUniforms:[{type:12,data:Math.ceil(k.size(l)/4)},...R(r.dims,n.dims,l)]})}},st=(e,t,r,n,o,i)=>{e.compute(Bm(t,o??"",e.inputs[0],e.inputs[1],r,n,i))},gu=e=>{st(e,"Add",(t,r)=>`${t}+${r}`)},yu=e=>{st(e,"Div",(t,r)=>`${t}/${r}`)},bu=e=>{st(e,"Equal",{scalar:(t,r)=>`u32(${t}==${r})`,vector:(t,r)=>`vec4<u32>(${t}==${r})`},void 0,void 0,9)},wu=e=>{st(e,"Mul",(t,r)=>`${t}*${r}`)},_u=e=>{let t=E("input",e.inputs[0].dataType,e.inputs[0].dims).type.value;st(e,"Pow",{scalar:(n,o)=>`pow_custom(${n},${o})`,vector:(n,o)=>`pow_vector_custom(${n},${o})`},`
4326
+ }`},Dm=(e,t,r,n,o,i,a=r.dataType)=>{let d=!k.areEqual(r.dims,n.dims),l=r.dims,c=k.size(r.dims),m=!1,u=!1,h=[d];if(d){let w=rt.calcShape(r.dims,n.dims,!1);if(!w)throw new Error("Can't perform binary op on the given tensors");l=w,c=k.size(l);let g=k.size(r.dims)===1,y=k.size(n.dims)===1,S=r.dims.length>0&&r.dims[r.dims.length-1]%4===0,$=n.dims.length>0&&n.dims[n.dims.length-1]%4===0;h.push(g),h.push(y),h.push(S),h.push($);let v=1;for(let x=1;x<l.length;x++){let T=r.dims[r.dims.length-x]??1,C=n.dims[n.dims.length-x]??1;if(T===C)v*=T;else break}v%4===0?(u=!0,m=!0):(g||y||S||$)&&(m=!0)}else m=!0;return h.push(m),{name:e,shaderCache:{hint:t+h.map(w=>w.toString()).join("_"),inputDependencies:["rank","rank"]},getShaderSource:w=>Om(w,r.dims,n.dims,l,m,d,u,o,r.dataType,n.dataType,a,i),getRunData:()=>({outputs:[{dims:l,dataType:a}],dispatchGroup:{x:Math.ceil(c/64/4)},programUniforms:[{type:12,data:Math.ceil(k.size(l)/4)},...V(r.dims,n.dims,l)]})}},st=(e,t,r,n,o,i)=>{e.compute(Dm(t,o??"",e.inputs[0],e.inputs[1],r,n,i))},bu=e=>{st(e,"Add",(t,r)=>`${t}+${r}`)},wu=e=>{st(e,"Div",(t,r)=>`${t}/${r}`)},_u=e=>{st(e,"Equal",{scalar:(t,r)=>`u32(${t}==${r})`,vector:(t,r)=>`vec4<u32>(${t}==${r})`},void 0,void 0,9)},vu=e=>{st(e,"Mul",(t,r)=>`${t}*${r}`)},$u=e=>{let t=E("input",e.inputs[0].dataType,e.inputs[0].dims).type.value;st(e,"Pow",{scalar:(n,o)=>`pow_custom(${n},${o})`,vector:(n,o)=>`pow_vector_custom(${n},${o})`},`
4292
4327
  fn pow_custom(a : ${t}, b : ${t}) -> ${t} {
4293
4328
  if (b == ${t}(0.0)) {
4294
4329
  return ${t}(1.0);
@@ -4301,7 +4336,7 @@ fn quick_gelu_impl(x: vec4<${e}>) -> vec4<${e}> {
4301
4336
  // TODO: implement vectorized pow
4302
4337
  return vec4<${t}>(pow_custom(a.x, b.x), pow_custom(a.y, b.y), pow_custom(a.z, b.z), pow_custom(a.w, b.w));
4303
4338
  }
4304
- `)},vu=e=>{st(e,"Sub",(t,r)=>`${t}-${r}`)},$u=e=>{st(e,"Greater",{scalar:(t,r)=>`u32(${t}>${r})`,vector:(t,r)=>`vec4<u32>(${t}>${r})`},void 0,void 0,9)},xu=e=>{st(e,"Less",{scalar:(t,r)=>`u32(${t}<${r})`,vector:(t,r)=>`vec4<u32>(${t}<${r})`},void 0,void 0,9)},Su=e=>{st(e,"GreaterOrEqual",{scalar:(t,r)=>`u32(${t}>=${r})`,vector:(t,r)=>`vec4<u32>(${t}>=${r})`},void 0,void 0,9)},Tu=e=>{st(e,"LessOrEqual",{scalar:(t,r)=>`u32(${t}<=${r})`,vector:(t,r)=>`vec4<u32>(${t}<=${r})`},void 0,void 0,9)}});var Mm,Rm,Um,Vm,Cu,Au,ku=V(()=>{"use strict";Q();ie();Ie();ae();Mm=(e,t)=>{if(!e||e.length<1)throw new Error("too few inputs");let r=0,n=e[r],o=n.dataType,i=n.dims.length;e.forEach((a,d)=>{if(d!==r){if(a.dataType!==o)throw new Error("input tensors should be one type");if(a.dims.length!==i)throw new Error("input tensors should have the same shape");a.dims.forEach((l,c)=>{if(c!==t&&l!==n.dims[c])throw new Error("non concat dimensions must match")})}})},Rm=(e,t)=>`
4339
+ `)},xu=e=>{st(e,"Sub",(t,r)=>`${t}-${r}`)},Su=e=>{st(e,"Greater",{scalar:(t,r)=>`u32(${t}>${r})`,vector:(t,r)=>`vec4<u32>(${t}>${r})`},void 0,void 0,9)},Tu=e=>{st(e,"Less",{scalar:(t,r)=>`u32(${t}<${r})`,vector:(t,r)=>`vec4<u32>(${t}<${r})`},void 0,void 0,9)},Iu=e=>{st(e,"GreaterOrEqual",{scalar:(t,r)=>`u32(${t}>=${r})`,vector:(t,r)=>`vec4<u32>(${t}>=${r})`},void 0,void 0,9)},Cu=e=>{st(e,"LessOrEqual",{scalar:(t,r)=>`u32(${t}<=${r})`,vector:(t,r)=>`vec4<u32>(${t}<=${r})`},void 0,void 0,9)}});var Mm,Rm,Um,Vm,ku,Eu,Pu=U(()=>{"use strict";J();ae();Ie();se();Mm=(e,t)=>{if(!e||e.length<1)throw new Error("too few inputs");let r=0,n=e[r],o=n.dataType,i=n.dims.length;e.forEach((a,d)=>{if(d!==r){if(a.dataType!==o)throw new Error("input tensors should be one type");if(a.dims.length!==i)throw new Error("input tensors should have the same shape");a.dims.forEach((l,c)=>{if(c!==t&&l!==n.dims[c])throw new Error("non concat dimensions must match")})}})},Rm=(e,t)=>`
4305
4340
  fn calculateInputIndex(index: u32) -> u32 {
4306
4341
  let sizeInConcatAxis = array<u32, ${e}u>(${t});
4307
4342
  for (var i: u32 = 0u; i < ${e}; i += 1u ) {
@@ -4311,7 +4346,7 @@ fn quick_gelu_impl(x: vec4<${e}>) -> vec4<${e}> {
4311
4346
  }
4312
4347
  return ${e}u;
4313
4348
  }`,Um=(e,t)=>{let r=e.length,n=[];for(let o=0;o<r;++o){let i=t.setByOffset("global_idx",e[o].getByIndices("indices"));r===1?n.push(i):o===0?n.push(`if (inputIndex == ${o}u) { ${i} }`):o===r-1?n.push(`else { ${i} }`):n.push(`else if (inputIndex == ${o}) { ${i} }`)}return n.join(`
4314
- `)},Vm=(e,t,r,n)=>{let o=k.size(r),i=new Array(e.length),a=new Array(e.length),d=0,l=[],c=[],m=[{type:12,data:o}];for(let y=0;y<e.length;++y)d+=e[y].dims[t],i[y]=d,c.push(e[y].dims.length),a[y]=E(`input${y}`,n,c[y]),l.push("rank"),m.push({type:12,data:i[y]});for(let y=0;y<e.length;++y)m.push(...R(e[y].dims));m.push(...R(r));let u=M("output",n,r.length),h=u.indicesGet("indices",t),w=Array.from(Array(i.length).keys()).map(y=>`uniforms.sizeInConcatAxis${y}`).join(","),g=y=>`
4349
+ `)},Vm=(e,t,r,n)=>{let o=k.size(r),i=new Array(e.length),a=new Array(e.length),d=0,l=[],c=[],m=[{type:12,data:o}];for(let y=0;y<e.length;++y)d+=e[y].dims[t],i[y]=d,c.push(e[y].dims.length),a[y]=E(`input${y}`,n,c[y]),l.push("rank"),m.push({type:12,data:i[y]});for(let y=0;y<e.length;++y)m.push(...V(e[y].dims));m.push(...V(r));let u=M("output",n,r.length),h=u.indicesGet("indices",t),w=Array.from(Array(i.length).keys()).map(y=>`uniforms.sizeInConcatAxis${y}`).join(","),g=y=>`
4315
4350
 
4316
4351
  ${(()=>{y.registerUniform("outputSize","u32");for(let S=0;S<e.length;S++)y.registerUniform(`sizeInConcatAxis${S}`,"u32");return y.declareVariables(...a,u)})()}
4317
4352
 
@@ -4329,11 +4364,11 @@ fn quick_gelu_impl(x: vec4<${e}>) -> vec4<${e}> {
4329
4364
  }
4330
4365
 
4331
4366
  ${Um(a,u)}
4332
- }`;return{name:"Concat",shaderCache:{hint:`${t}`,inputDependencies:l},getRunData:()=>({outputs:[{dims:r,dataType:n}],dispatchGroup:{x:Math.ceil(o/64)},programUniforms:m}),getShaderSource:g}},Cu=(e,t)=>{let r=e.inputs,n=r[0].dims,o=k.normalizeAxis(t.axis,n.length);Mm(r,o);let i=n.slice();i[o]=r.reduce((d,l)=>d+(l.dims.length>o?l.dims[o]:0),0);let a=r.filter(d=>k.size(d.dims)>0);e.compute(Vm(a,o,i,r[0].dataType),{inputs:a})},Au=e=>J({axis:e.axis})});var He,Ge,Fe,Zr,ct=V(()=>{"use strict";Q();ie();He=(e,t,r="f32")=>{switch(e.activation){case"Relu":return`value = max(value, ${t}(0.0));`;case"Sigmoid":return`value = (${t}(1.0) / (${t}(1.0) + exp(-value)));`;case"Clip":return`value = clamp(value, ${t}(${r}(uniforms.clip_min)), ${t}(${r}(uniforms.clip_max)));`;case"HardSigmoid":return`value = max(${t}(0.0), min(${t}(1.0), ${r}(uniforms.alpha) * value + ${r}(uniforms.beta)));`;case"LeakyRelu":return`value = select(${r}(uniforms.alpha) * value, value, value >= ${t}(0.0));`;case"Tanh":return`let e2x = exp(-2.0 * abs(value));
4367
+ }`;return{name:"Concat",shaderCache:{hint:`${t}`,inputDependencies:l},getRunData:()=>({outputs:[{dims:r,dataType:n}],dispatchGroup:{x:Math.ceil(o/64)},programUniforms:m}),getShaderSource:g}},ku=(e,t)=>{let r=e.inputs,n=r[0].dims,o=k.normalizeAxis(t.axis,n.length);Mm(r,o);let i=n.slice();i[o]=r.reduce((d,l)=>d+(l.dims.length>o?l.dims[o]:0),0);let a=r.filter(d=>k.size(d.dims)>0);e.compute(Vm(a,o,i,r[0].dataType),{inputs:a})},Eu=e=>ee({axis:e.axis})});var He,Ge,Fe,Qr,ct=U(()=>{"use strict";J();ae();He=(e,t,r="f32")=>{switch(e.activation){case"Relu":return`value = max(value, ${t}(0.0));`;case"Sigmoid":return`value = (${t}(1.0) / (${t}(1.0) + exp(-value)));`;case"Clip":return`value = clamp(value, ${t}(${r}(uniforms.clip_min)), ${t}(${r}(uniforms.clip_max)));`;case"HardSigmoid":return`value = max(${t}(0.0), min(${t}(1.0), ${r}(uniforms.alpha) * value + ${r}(uniforms.beta)));`;case"LeakyRelu":return`value = select(${r}(uniforms.alpha) * value, value, value >= ${t}(0.0));`;case"Tanh":return`let e2x = exp(-2.0 * abs(value));
4333
4368
  value = sign(value) * (1.0 - e2x) / (1.0 + e2x);
4334
- `;case"":return"";default:throw new Error(`Unsupported activation ${e.activation}`)}},Ge=(e,t)=>{e.activation==="Clip"?t.push({type:1,data:e.clipMax},{type:1,data:e.clipMin}):e.activation==="HardSigmoid"?t.push({type:1,data:e.alpha},{type:1,data:e.beta}):e.activation==="LeakyRelu"&&t.push({type:1,data:e.alpha})},Fe=(e,t)=>{e.activation==="Clip"?t.push({name:"clip_max",type:"f32"},{name:"clip_min",type:"f32"}):e.activation==="HardSigmoid"?t.push({name:"alpha",type:"f32"},{name:"beta",type:"f32"}):e.activation==="LeakyRelu"&&t.push({name:"alpha",type:"f32"})},Zr=e=>{let t=e?.activation||"";if(t==="HardSigmoid"){let[r,n]=e?.activation_params||[.2,.5];return{activation:t,alpha:r,beta:n}}else if(t==="Clip"){let[r,n]=e?.activation_params||[Ya,Xa];return{activation:t,clipMax:n,clipMin:r}}else if(t==="LeakyRelu"){let[r]=e?.activation_params||[.01];return{activation:t,alpha:r}}return{activation:t}}});var Oe,Qr,Jt=V(()=>{"use strict";Oe=(e,t)=>{switch(e){case 1:return t;case 2:return`vec2<${t}>`;case 3:return`vec3<${t}>`;case 4:return`vec4<${t}>`;default:throw new Error(`${e}-component is not supported.`)}},Qr=e=>`
4369
+ `;case"":return"";default:throw new Error(`Unsupported activation ${e.activation}`)}},Ge=(e,t)=>{e.activation==="Clip"?t.push({type:1,data:e.clipMax},{type:1,data:e.clipMin}):e.activation==="HardSigmoid"?t.push({type:1,data:e.alpha},{type:1,data:e.beta}):e.activation==="LeakyRelu"&&t.push({type:1,data:e.alpha})},Fe=(e,t)=>{e.activation==="Clip"?t.push({name:"clip_max",type:"f32"},{name:"clip_min",type:"f32"}):e.activation==="HardSigmoid"?t.push({name:"alpha",type:"f32"},{name:"beta",type:"f32"}):e.activation==="LeakyRelu"&&t.push({name:"alpha",type:"f32"})},Qr=e=>{let t=e?.activation||"";if(t==="HardSigmoid"){let[r,n]=e?.activation_params||[.2,.5];return{activation:t,alpha:r,beta:n}}else if(t==="Clip"){let[r,n]=e?.activation_params||[Qa,Za];return{activation:t,clipMax:n,clipMin:r}}else if(t==="LeakyRelu"){let[r]=e?.activation_params||[.01];return{activation:t,alpha:r}}return{activation:t}}});var Oe,Zr,Jt=U(()=>{"use strict";Oe=(e,t)=>{switch(e){case 1:return t;case 2:return`vec2<${t}>`;case 3:return`vec3<${t}>`;case 4:return`vec4<${t}>`;default:throw new Error(`${e}-component is not supported.`)}},Zr=e=>`
4335
4370
  ${e?"value = value + getBiasByOutputCoords(coords);":""}
4336
- `});var Jr,po=V(()=>{"use strict";Jr=e=>`
4371
+ `});var Jr,fo=U(()=>{"use strict";Jr=e=>`
4337
4372
  fn getIndexFromCoords4D(coords : vec4<i32>, shape : vec4<i32>) -> i32 {
4338
4373
  return dot(coords, vec4<i32>(
4339
4374
  shape.y * shape.z * shape.w, shape.z * shape.w, shape.w, 1));
@@ -4342,7 +4377,7 @@ fn getOutputIndexFromCoords(coords : vec4<i32>) -> i32 {
4342
4377
  return dot(coords, vec4<i32>(
4343
4378
  i32(${e}.x), i32(${e}.y), i32(${e}.z), 1));
4344
4379
  }
4345
- `});var Nm,Wm,er,Eu,Lm,tr,Hm,en,rr=V(()=>{"use strict";Q();ie();ae();ct();Jt();Nm=(e,t)=>e?`
4380
+ `});var Wm,Nm,er,zu,Lm,tr,Hm,en,rr=U(()=>{"use strict";J();ae();se();ct();Jt();Wm=(e,t)=>e?`
4346
4381
  mm_Asub[inputRow][inputCol] = mm_readA(batch,
4347
4382
  kStart + inputRow,
4348
4383
  globalRowStart / innerElementSize + inputCol${t?", batchIndices":""});
@@ -4350,7 +4385,7 @@ fn getOutputIndexFromCoords(coords : vec4<i32>) -> i32 {
4350
4385
  mm_Asub[inputRow][inputCol] = mm_readA(batch,
4351
4386
  globalRow + innerRow,
4352
4387
  kStart / innerElementSize + inputCol${t?", batchIndices":""});
4353
- `,Wm=(e,t)=>e?`
4388
+ `,Nm=(e,t)=>e?`
4354
4389
  let ACached0 = mm_Asub[k * innerElementSize][localRow];
4355
4390
  let ACached1 = mm_Asub[k * innerElementSize + 1][localRow];
4356
4391
  let ACached2 = mm_Asub[k * innerElementSize + 2][localRow];
@@ -4404,7 +4439,7 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
4404
4439
  for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) {
4405
4440
  let inputRow = tileRow + innerRow;
4406
4441
  let inputCol = tileCol;
4407
- ${Nm(o,n)}
4442
+ ${Wm(o,n)}
4408
4443
  }
4409
4444
 
4410
4445
  // Load one tile of B into local memory.
@@ -4423,7 +4458,7 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
4423
4458
  let BCached2 = mm_Bsub[k * innerElementSize + 2][tileCol];
4424
4459
  ${h===3?"":"let BCached3 = mm_Bsub[k * innerElementSize + 3][tileCol];"}
4425
4460
 
4426
- ${Wm(o,h)}
4461
+ ${Nm(o,h)}
4427
4462
  }
4428
4463
 
4429
4464
  workgroupBarrier();
@@ -4432,7 +4467,7 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
4432
4467
  for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) {
4433
4468
  mm_write(batch, globalRow + innerRow, globalCol, acc[innerRow]);
4434
4469
  }
4435
- }`},Eu=(e,t)=>e?`
4470
+ }`},zu=(e,t)=>e?`
4436
4471
  mm_Asub[inputRow][inputCol] = mm_readA(batch,
4437
4472
  kStart + inputRow,
4438
4473
  globalRowStart + inputCol${t?", batchIndices":""});
@@ -4451,7 +4486,7 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
4451
4486
  // Load one tile of A into local memory.
4452
4487
  for (var inputRow = localRow; inputRow < ${h}; inputRow = inputRow + ${t[1]}) {
4453
4488
  for (var inputCol = localCol; inputCol < ${u}; inputCol = inputCol + ${t[0]}) {
4454
- ${Eu(o,n)}
4489
+ ${zu(o,n)}
4455
4490
  }
4456
4491
  }
4457
4492
  // Load one tile of B into local memory.
@@ -4506,7 +4541,7 @@ for (var t = 0; t < num_tiles; t = t + 1) {
4506
4541
  for (var innerCol = 0; innerCol < ${g}; innerCol = innerCol + 1) {
4507
4542
  let inputRow = tileRowA + innerRow;
4508
4543
  let inputCol = tileColA + innerCol;
4509
- ${Eu(o,n)}
4544
+ ${zu(o,n)}
4510
4545
  }
4511
4546
  }
4512
4547
 
@@ -4566,7 +4601,7 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
4566
4601
  var acc : array<array<${r}, colPerThread>, rowPerThread>;
4567
4602
  ${S}
4568
4603
  }
4569
- `},Hm=(e,t,r,n,o,i=!1)=>{let[a,d,l]=o,[c,m,u,h]=n,w=Nt(a,l),g=Nt(d,l),y=he(n[0].type.tensor),S=()=>{let x=m.rank,T=c.rank,C=`var aIndices: ${m.type.indices};`;for(let A=x-2-1,P=T-1;A>=0;A--,P--)C+=`
4604
+ `},Hm=(e,t,r,n,o,i=!1)=>{let[a,d,l]=o,[c,m,u,h]=n,w=Wt(a,l),g=Wt(d,l),y=he(n[0].type.tensor),S=()=>{let x=m.rank,T=c.rank,C=`var aIndices: ${m.type.indices};`;for(let A=x-2-1,P=T-1;A>=0;A--,P--)C+=`
4570
4605
  aIndices[${A}] = ${T>1?`batchIndices[${P}]`:"batchIndices"};`;return w.forEach(A=>{C+=`
4571
4606
  aIndices[${A}] = 0;`}),C+=`
4572
4607
  aIndices[${x-2}] = u32(row);
@@ -4607,11 +4642,11 @@ bIndices[${x-2}] = u32(row);
4607
4642
  ${h.setByIndices("vec3<u32>(coords)","value")}
4608
4643
  }
4609
4644
  }
4610
- `},en=(e,t,r,n,o=!1,i)=>{let a=e[0].dims,d=e[1].dims,l=a.slice(0,-2),c=d.slice(0,-2),m=n?n.slice(0,-2):r.slice(0,-2),u=k.size(m),h=a[a.length-2],w=a[a.length-1],g=d[d.length-1],y=w%4===0&&g%4===0,S=h<=8?[4,1,1]:[4,4,1],$=[8,8,1],v=[Math.ceil(g/$[0]/S[0]),Math.ceil(h/$[1]/S[1]),Math.ceil(u/$[2]/S[2])],x=y?4:1,T=[...l,h,w/x],C=T.length,A=[...c,w,g/x],P=A.length,B=[u,h,g/x],N=[{type:6,data:h},{type:6,data:g},{type:6,data:w}];Ge(t,N),N.push(...R(m,T,A));let W=["rank","rank"],K=e.length>2;K&&(N.push(...R(e[2].dims)),W.push("rank")),N.push(...R(B));let Z=ee=>{let se=m.length,de=Fr("batchDims",e[0].dataType,se,1),Y=he(e[0].dataType),le=E("a",e[0].dataType,C,x),ce=E("b",e[1].dataType,P,x),q=M("result",e[0].dataType,B.length,x),ue=[le,ce];if(K){let G=o?x:1;ue.push(E("bias",e[2].dataType,e[2].dims.length,G))}let re=[{name:"dim_a_outer",type:"i32"},{name:"dim_b_outer",type:"i32"},{name:"dim_inner",type:"i32"}];Fe(t,re);let ne=he(q.type.tensor),oe=He(t,q.type.value,ne),U=Hm(x,K,oe,[de,le,ce,q],[l,c,m],o);return`
4611
- ${ee.registerUniforms(re).registerInternalVariables(de).declareVariables(...ue,q)}
4612
- ${U}
4613
- ${y?er(S,$,Y,de):tr(S,$,Y,de)}
4614
- `};return{name:"MatMul",shaderCache:{hint:`${S};${t.activation};${y};${o}`,inputDependencies:W},getRunData:()=>({outputs:[{dims:i?i(r):r,dataType:e[0].dataType}],dispatchGroup:{x:v[0],y:v[1],z:v[2]},programUniforms:N}),getShaderSource:Z}}});var Gm,Pu,zu=V(()=>{"use strict";Q();Xe();ae();ct();Jt();po();rr();Gm=(e,t,r,n,o=!1,i,a=4,d=4,l=4,c="f32")=>{let m=W=>{switch(W){case 1:return"resData = x[xIndex];";case 3:return`resData = vec3<${c}>(x[xIndex], x[xIndex + 1], x[xIndex + 2]);`;case 4:return"resData = x[xIndex / 4];";default:throw new Error(`innerElementSize ${W} is not supported.`)}},u=W=>{switch(W){case 1:return"return w[row * i32(uniforms.w_shape[3]) + colIn];";case 4:return"return w[row * i32(uniforms.w_shape[3]) / 4 + colIn];";default:throw new Error(`innerElementSize ${W} is not supported.`)}},h=e?`
4645
+ `},en=(e,t,r,n,o=!1,i)=>{let a=e[0].dims,d=e[1].dims,l=a.slice(0,-2),c=d.slice(0,-2),m=n?n.slice(0,-2):r.slice(0,-2),u=k.size(m),h=a[a.length-2],w=a[a.length-1],g=d[d.length-1],y=w%4===0&&g%4===0,S=h<=8?[4,1,1]:[4,4,1],$=[8,8,1],v=[Math.ceil(g/$[0]/S[0]),Math.ceil(h/$[1]/S[1]),Math.ceil(u/$[2]/S[2])],x=y?4:1,T=[...l,h,w/x],C=T.length,A=[...c,w,g/x],P=A.length,D=[u,h,g/x],W=[{type:6,data:h},{type:6,data:g},{type:6,data:w}];Ge(t,W),W.push(...V(m,T,A));let N=["rank","rank"],j=e.length>2;j&&(W.push(...V(e[2].dims)),N.push("rank")),W.push(...V(D));let Y=Z=>{let te=m.length,ue=Fr("batchDims",e[0].dataType,te,1),K=he(e[0].dataType),de=E("a",e[0].dataType,C,x),ce=E("b",e[1].dataType,P,x),q=M("result",e[0].dataType,D.length,x),le=[de,ce];if(j){let G=o?x:1;le.push(E("bias",e[2].dataType,e[2].dims.length,G))}let re=[{name:"dim_a_outer",type:"i32"},{name:"dim_b_outer",type:"i32"},{name:"dim_inner",type:"i32"}];Fe(t,re);let ne=he(q.type.tensor),oe=He(t,q.type.value,ne),R=Hm(x,j,oe,[ue,de,ce,q],[l,c,m],o);return`
4646
+ ${Z.registerUniforms(re).registerInternalVariables(ue).declareVariables(...le,q)}
4647
+ ${R}
4648
+ ${y?er(S,$,K,ue):tr(S,$,K,ue)}
4649
+ `};return{name:"MatMul",shaderCache:{hint:`${S};${t.activation};${y};${o}`,inputDependencies:N},getRunData:()=>({outputs:[{dims:i?i(r):r,dataType:e[0].dataType}],dispatchGroup:{x:v[0],y:v[1],z:v[2]},programUniforms:W}),getShaderSource:Y}}});var Gm,Ou,Du=U(()=>{"use strict";J();Xe();se();ct();Jt();fo();rr();Gm=(e,t,r,n,o=!1,i,a=4,d=4,l=4,c="f32")=>{let m=N=>{switch(N){case 1:return"resData = x[xIndex];";case 3:return`resData = vec3<${c}>(x[xIndex], x[xIndex + 1], x[xIndex + 2]);`;case 4:return"resData = x[xIndex / 4];";default:throw new Error(`innerElementSize ${N} is not supported.`)}},u=N=>{switch(N){case 1:return"return w[row * i32(uniforms.w_shape[3]) + colIn];";case 4:return"return w[row * i32(uniforms.w_shape[3]) / 4 + colIn];";default:throw new Error(`innerElementSize ${N} is not supported.`)}},h=e?`
4615
4650
  let coord = vec4<i32>(batch, xRow, xCol, xCh);
4616
4651
  `:`
4617
4652
  let coord = vec4<i32>(batch, xCh, xRow, xCol);
@@ -4660,7 +4695,7 @@ bIndices[${x-2}] = u32(row);
4660
4695
  if (row < uniforms.dim_inner && col < uniforms.dim_b_outer) {
4661
4696
  ${v}
4662
4697
  }
4663
- return ${Oe(a,c)}(0.0);`,T=`${u(d)}`,C=Oe(l,c),A=e?Oe(a,c):Oe(d,c),P=e?Oe(d,c):Oe(a,c),B=He(i,C,c);return`
4698
+ return ${Oe(a,c)}(0.0);`,T=`${u(d)}`,C=Oe(l,c),A=e?Oe(a,c):Oe(d,c),P=e?Oe(d,c):Oe(a,c),D=He(i,C,c);return`
4664
4699
  fn mm_readA(batch: i32, row : i32, colIn : i32) -> ${A} {
4665
4700
  ${e?x:T}
4666
4701
  }
@@ -4676,18 +4711,18 @@ bIndices[${x-2}] = u32(row);
4676
4711
  var value = valueIn;
4677
4712
  let outWidth = ${e?"i32(uniforms.result_shape[2])":"i32(uniforms.result_shape[3])"};
4678
4713
  ${w}
4679
- ${Qr(o)}
4680
- ${B}
4714
+ ${Zr(o)}
4715
+ ${D}
4681
4716
  setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value);
4682
4717
  }
4683
- }`},Pu=(e,t,r,n,o,i,a,d,l)=>{let c=t.format==="NHWC",m=c?e[0].dims[3]:e[0].dims[1],u=r[0],h=c?r[2]:r[3],w=c?r[1]:r[2],g=c?r[3]:r[1],y=c&&(m%4===0||m%3===0)&&g%4===0,S=c?g:h*w,$=c?h*w:g,v=[8,8,1],x=n<=8?[4,1,1]:[4,4,1],T=[Math.ceil(S/v[0]/x[0]),Math.ceil($/v[1]/x[1]),Math.ceil(u/v[2]/x[2])];me("verbose",()=>`[conv2d_mm_webgpu] dispatch = ${T}`);let C=y?c&&m%4!==0?3:4:1,A=v[1]*x[1],P=v[0]*x[0],B=Math.max(v[0]*C,v[1]),N=n%A===0,W=o%P===0,K=i%B===0,Z=y?[C,4,4]:[1,1,1],ee=[{type:6,data:n},{type:6,data:o},{type:6,data:i},{type:6,data:[t.pads[0],t.pads[1]]},{type:6,data:t.strides},{type:6,data:t.dilations}];Ge(t,ee),ee.push(...R(e[0].dims,e[1].dims));let se=["rank","rank"];a&&(ee.push(...R(e[2].dims)),se.push("rank")),ee.push(...R(r));let de=Y=>{let le=[{name:"dim_a_outer",type:"i32"},{name:"dim_b_outer",type:"i32"},{name:"dim_inner",type:"i32"},{name:"pad",type:"i32",length:2},{name:"stride",type:"i32",length:2},{name:"dilation",type:"i32",length:2}];Fe(t,le);let ce=y?4:1,q=he(e[0].dataType),ue=`
4718
+ }`},Ou=(e,t,r,n,o,i,a,d,l)=>{let c=t.format==="NHWC",m=c?e[0].dims[3]:e[0].dims[1],u=r[0],h=c?r[2]:r[3],w=c?r[1]:r[2],g=c?r[3]:r[1],y=c&&(m%4===0||m%3===0)&&g%4===0,S=c?g:h*w,$=c?h*w:g,v=[8,8,1],x=n<=8?[4,1,1]:[4,4,1],T=[Math.ceil(S/v[0]/x[0]),Math.ceil($/v[1]/x[1]),Math.ceil(u/v[2]/x[2])];pe("verbose",()=>`[conv2d_mm_webgpu] dispatch = ${T}`);let C=y?c&&m%4!==0?3:4:1,A=v[1]*x[1],P=v[0]*x[0],D=Math.max(v[0]*C,v[1]),W=n%A===0,N=o%P===0,j=i%D===0,Y=y?[C,4,4]:[1,1,1],Z=[{type:6,data:n},{type:6,data:o},{type:6,data:i},{type:6,data:[t.pads[0],t.pads[1]]},{type:6,data:t.strides},{type:6,data:t.dilations}];Ge(t,Z),Z.push(...V(e[0].dims,e[1].dims));let te=["rank","rank"];a&&(Z.push(...V(e[2].dims)),te.push("rank")),Z.push(...V(r));let ue=K=>{let de=[{name:"dim_a_outer",type:"i32"},{name:"dim_b_outer",type:"i32"},{name:"dim_inner",type:"i32"},{name:"pad",type:"i32",length:2},{name:"stride",type:"i32",length:2},{name:"dilation",type:"i32",length:2}];Fe(t,de);let ce=y?4:1,q=he(e[0].dataType),le=`
4684
4719
  fn setOutputAtIndex(flatIndex : i32, value : ${y?`vec4<${q}>`:q}) {
4685
4720
  result[flatIndex] = ${y?`vec4<${q}>`:q}(value);
4686
4721
  }
4687
4722
  fn setOutputAtCoords(d0 : i32, d1 : i32, d2 : i32, d3 : i32, value : ${y?`vec4<${q}>`:q}) {
4688
4723
  let flatIndex = getOutputIndexFromCoords(vec4<i32>(d0, d1, d2, d3));
4689
4724
  setOutputAtIndex(flatIndex ${y?"/ 4":""}, value);
4690
- }`,re=E("x",e[0].dataType,e[0].dims.length,C===3?1:C),ne=E("w",e[1].dataType,e[1].dims.length,ce),oe=[re,ne],U=M("result",e[0].dataType,r.length,ce);if(a){let G=E("bias",e[2].dataType,e[2].dims.length,ce);oe.push(G),ue+=`
4725
+ }`,re=E("x",e[0].dataType,e[0].dims.length,C===3?1:C),ne=E("w",e[1].dataType,e[1].dims.length,ce),oe=[re,ne],R=M("result",e[0].dataType,r.length,ce);if(a){let G=E("bias",e[2].dataType,e[2].dims.length,ce);oe.push(G),le+=`
4691
4726
  fn getBiasByOutputCoords(coords : vec4<i32>) -> ${y?`vec4<${q}>`:q} {
4692
4727
  return bias[coords.${c?"w":"y"}${y?"/ 4":""}];
4693
4728
  }`}return`
@@ -4695,14 +4730,14 @@ bIndices[${x-2}] = u32(row);
4695
4730
  //struct Uniforms { xShape : vec4<i32>, wShape : vec4<i32>, outShape : vec4<i32>,
4696
4731
  // outShapeStrides: vec3<i32>, filterDims : vec2<i32>, pad : vec2<i32>, stride : vec2<i32>,
4697
4732
  // dilation : vec2<i32>, dimAOuter : i32, dimBOuter : i32, dimInner : i32 };
4698
- ${Y.registerUniforms(le).declareVariables(...oe,U)}
4699
- ${ue}
4700
- ${Gm(c,N,W,K,a,t,Z[0],Z[1],Z[2],q)}
4701
- ${y?er(x,v,q,void 0,!c,B):tr(x,v,q,void 0,!c,B,!1,void 0,d)}`};return{name:"Conv2DMatMul",shaderCache:{hint:`${t.cacheKey};${C};${y};${N};${W};${K};${A};${P};${B}`,inputDependencies:se},getRunData:()=>({outputs:[{dims:l?l(r):r,dataType:e[0].dataType}],dispatchGroup:{x:T[0],y:T[1],z:T[2]},programUniforms:ee}),getShaderSource:de}}});var Fm,Ou,tn,qm,Bu,jm,Du,Mu,Ru=V(()=>{"use strict";Q();Xe();ie();ae();ct();Jt();Fm=e=>{let t=1;for(let r=0;r<e.length;r++)t*=e[r];return t},Ou=e=>typeof e=="number"?[e,e,e]:e,tn=(e,t)=>t<=1?e:e+(e-1)*(t-1),qm=(e,t,r,n=1)=>{let o=tn(t,n);return Math.floor((e[0]*(r-1)-r+o)/2)},Bu=(e,t,r,n,o)=>{o==null&&(o=qm(e,t[0],n[0]));let i=[0,0,0,r];for(let a=0;a<3;a++)e[a]+2*o>=t[a]&&(i[a]=Math.trunc((e[a]-t[a]+2*o)/n[a]+1));return i},jm=(e,t,r,n,o,i,a,d,l,c)=>{let m,u,h,w;if(e==="VALID"&&(e=0),typeof e=="number"){m={top:e,bottom:e,left:e,right:e,front:e,back:e};let g=Bu([t,r,n,1],[d,l,c],1,[o,i,a],e);u=g[0],h=g[1],w=g[2]}else if(Array.isArray(e)){if(!e.every((y,S,$)=>y===$[0]))throw Error(`Unsupported padding parameter: ${e}`);m={top:e[0],bottom:e[1],left:e[2],right:e[3],front:e[4],back:e[5]};let g=Bu([t,r,n,1],[d,l,c],1,[o,i,a],e[0]);u=g[0],h=g[1],w=g[2]}else if(e==="SAME_UPPER"){u=Math.ceil(t/o),h=Math.ceil(r/i),w=Math.ceil(n/a);let g=(u-1)*o+d-t,y=(h-1)*i+l-r,S=(w-1)*a+c-n,$=Math.floor(g/2),v=g-$,x=Math.floor(y/2),T=y-x,C=Math.floor(S/2),A=S-C;m={top:x,bottom:T,left:C,right:A,front:$,back:v}}else throw Error(`Unknown padding parameter: ${e}`);return{padInfo:m,outDepth:u,outHeight:h,outWidth:w}},Du=(e,t,r,n,o,i=!1,a="channelsLast")=>{let d,l,c,m,u;if(a==="channelsLast")[d,l,c,m,u]=e;else if(a==="channelsFirst")[d,u,l,c,m]=e;else throw new Error(`Unknown dataFormat ${a}`);let[h,,w,g,y]=t,[S,$,v]=Ou(r),[x,T,C]=Ou(n),A=tn(w,x),P=tn(g,T),B=tn(y,C),{padInfo:N,outDepth:W,outHeight:K,outWidth:Z}=jm(o,l,c,m,S,$,v,A,P,B),ee=i?h*u:h,se=[0,0,0,0,0];return a==="channelsFirst"?se=[d,ee,W,K,Z]:a==="channelsLast"&&(se=[d,W,K,Z,ee]),{batchSize:d,dataFormat:a,inDepth:l,inHeight:c,inWidth:m,inChannels:u,outDepth:W,outHeight:K,outWidth:Z,outChannels:ee,padInfo:N,strideDepth:S,strideHeight:$,strideWidth:v,filterDepth:w,filterHeight:g,filterWidth:y,effectiveFilterDepth:A,effectiveFilterHeight:P,effectiveFilterWidth:B,dilationDepth:x,dilationHeight:T,dilationWidth:C,inShape:e,outShape:se,filterShape:t}},Mu=(e,t,r,n,o,i)=>{let a=i==="channelsLast",d=a?e[0].dims[3]:e[0].dims[1],l=!1,c=[64,1,1],m={x:r.map((v,x)=>x)},u=[Math.ceil(Fm(m.x.map(v=>r[v]))/c[0]),1,1];me("verbose",()=>`[conv3d_naive_webgpu] dispatch = ${u}`);let h=l?a&&d%4!==0?3:4:1,w=k.size(r),g=[{type:12,data:w},{type:12,data:n},{type:12,data:o},{type:12,data:t.strides},{type:12,data:t.dilations}];Ge(t,g),g.push(...R(e[0].dims,e[1].dims));let y=["rank","rank"],S=e.length===3;S&&(g.push(...R(e[2].dims)),y.push("rank")),g.push(...R(r));let $=v=>{let x=[{name:"output_size",type:"u32"},{name:"filter_dims",type:"u32",length:n.length},{name:"pads",type:"u32",length:o.length},{name:"strides",type:"u32",length:t.strides.length},{name:"dilations",type:"u32",length:t.dilations.length}];Fe(t,x);let T=l?4:1,C=he(e[0].dataType),A=E("x",e[0].dataType,e[0].dims.length,h===3?1:h),P=E("W",e[1].dataType,e[1].dims.length,T),B=[A,P],N=M("result",e[0].dataType,r.length,T),W="";if(S){let ee=E("bias",e[2].dataType,e[2].dims.length,T);B.push(ee),W+=`
4733
+ ${K.registerUniforms(de).declareVariables(...oe,R)}
4734
+ ${le}
4735
+ ${Gm(c,W,N,j,a,t,Y[0],Y[1],Y[2],q)}
4736
+ ${y?er(x,v,q,void 0,!c,D):tr(x,v,q,void 0,!c,D,!1,void 0,d)}`};return{name:"Conv2DMatMul",shaderCache:{hint:`${t.cacheKey};${C};${y};${W};${N};${j};${A};${P};${D}`,inputDependencies:te},getRunData:()=>({outputs:[{dims:l?l(r):r,dataType:e[0].dataType}],dispatchGroup:{x:T[0],y:T[1],z:T[2]},programUniforms:Z}),getShaderSource:ue}}});var Fm,Bu,tn,qm,Mu,jm,Ru,Uu,Vu=U(()=>{"use strict";J();Xe();ae();se();ct();Jt();Fm=e=>{let t=1;for(let r=0;r<e.length;r++)t*=e[r];return t},Bu=e=>typeof e=="number"?[e,e,e]:e,tn=(e,t)=>t<=1?e:e+(e-1)*(t-1),qm=(e,t,r,n=1)=>{let o=tn(t,n);return Math.floor((e[0]*(r-1)-r+o)/2)},Mu=(e,t,r,n,o)=>{o==null&&(o=qm(e,t[0],n[0]));let i=[0,0,0,r];for(let a=0;a<3;a++)e[a]+2*o>=t[a]&&(i[a]=Math.trunc((e[a]-t[a]+2*o)/n[a]+1));return i},jm=(e,t,r,n,o,i,a,d,l,c)=>{let m,u,h,w;if(e==="VALID"&&(e=0),typeof e=="number"){m={top:e,bottom:e,left:e,right:e,front:e,back:e};let g=Mu([t,r,n,1],[d,l,c],1,[o,i,a],e);u=g[0],h=g[1],w=g[2]}else if(Array.isArray(e)){if(!e.every((y,S,$)=>y===$[0]))throw Error(`Unsupported padding parameter: ${e}`);m={top:e[0],bottom:e[1],left:e[2],right:e[3],front:e[4],back:e[5]};let g=Mu([t,r,n,1],[d,l,c],1,[o,i,a],e[0]);u=g[0],h=g[1],w=g[2]}else if(e==="SAME_UPPER"){u=Math.ceil(t/o),h=Math.ceil(r/i),w=Math.ceil(n/a);let g=(u-1)*o+d-t,y=(h-1)*i+l-r,S=(w-1)*a+c-n,$=Math.floor(g/2),v=g-$,x=Math.floor(y/2),T=y-x,C=Math.floor(S/2),A=S-C;m={top:x,bottom:T,left:C,right:A,front:$,back:v}}else throw Error(`Unknown padding parameter: ${e}`);return{padInfo:m,outDepth:u,outHeight:h,outWidth:w}},Ru=(e,t,r,n,o,i=!1,a="channelsLast")=>{let d,l,c,m,u;if(a==="channelsLast")[d,l,c,m,u]=e;else if(a==="channelsFirst")[d,u,l,c,m]=e;else throw new Error(`Unknown dataFormat ${a}`);let[h,,w,g,y]=t,[S,$,v]=Bu(r),[x,T,C]=Bu(n),A=tn(w,x),P=tn(g,T),D=tn(y,C),{padInfo:W,outDepth:N,outHeight:j,outWidth:Y}=jm(o,l,c,m,S,$,v,A,P,D),Z=i?h*u:h,te=[0,0,0,0,0];return a==="channelsFirst"?te=[d,Z,N,j,Y]:a==="channelsLast"&&(te=[d,N,j,Y,Z]),{batchSize:d,dataFormat:a,inDepth:l,inHeight:c,inWidth:m,inChannels:u,outDepth:N,outHeight:j,outWidth:Y,outChannels:Z,padInfo:W,strideDepth:S,strideHeight:$,strideWidth:v,filterDepth:w,filterHeight:g,filterWidth:y,effectiveFilterDepth:A,effectiveFilterHeight:P,effectiveFilterWidth:D,dilationDepth:x,dilationHeight:T,dilationWidth:C,inShape:e,outShape:te,filterShape:t}},Uu=(e,t,r,n,o,i)=>{let a=i==="channelsLast",d=a?e[0].dims[3]:e[0].dims[1],l=!1,c=[64,1,1],m={x:r.map((v,x)=>x)},u=[Math.ceil(Fm(m.x.map(v=>r[v]))/c[0]),1,1];pe("verbose",()=>`[conv3d_naive_webgpu] dispatch = ${u}`);let h=l?a&&d%4!==0?3:4:1,w=k.size(r),g=[{type:12,data:w},{type:12,data:n},{type:12,data:o},{type:12,data:t.strides},{type:12,data:t.dilations}];Ge(t,g),g.push(...V(e[0].dims,e[1].dims));let y=["rank","rank"],S=e.length===3;S&&(g.push(...V(e[2].dims)),y.push("rank")),g.push(...V(r));let $=v=>{let x=[{name:"output_size",type:"u32"},{name:"filter_dims",type:"u32",length:n.length},{name:"pads",type:"u32",length:o.length},{name:"strides",type:"u32",length:t.strides.length},{name:"dilations",type:"u32",length:t.dilations.length}];Fe(t,x);let T=l?4:1,C=he(e[0].dataType),A=E("x",e[0].dataType,e[0].dims.length,h===3?1:h),P=E("W",e[1].dataType,e[1].dims.length,T),D=[A,P],W=M("result",e[0].dataType,r.length,T),N="";if(S){let Z=E("bias",e[2].dataType,e[2].dims.length,T);D.push(Z),N+=`
4702
4737
  fn getBiasByOutputCoords(coords : array<u32, 5>) -> ${l?`vec4<${C}>`:C} {
4703
4738
  return bias[${a?F("coords",4,5):F("coords",1,5)}${l?"/ 4":""}];
4704
- }`}let K=Oe(h,C),Z=He(t,K,C);return`
4705
- ${W}
4739
+ }`}let j=Oe(h,C),Y=He(t,j,C);return`
4740
+ ${N}
4706
4741
  fn getX(d0 : u32, d1 : u32, d2 : u32, d3 : u32, d4 : u32) -> f32 {
4707
4742
  let aIndices = array<u32, 5>(d0, d1, d2, d3, d4);
4708
4743
  return ${A.getByIndices("aIndices")};
@@ -4711,10 +4746,10 @@ bIndices[${x-2}] = u32(row);
4711
4746
  let aIndices = array<u32, 5>(d0, d1, d2, d3, d4);
4712
4747
  return ${P.getByIndices("aIndices")};
4713
4748
  }
4714
- ${v.registerUniforms(x).declareVariables(...B,N)}
4749
+ ${v.registerUniforms(x).declareVariables(...D,W)}
4715
4750
  ${v.mainStart()}
4716
4751
  ${v.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
4717
- let coords = ${N.offsetToIndices("global_idx")};
4752
+ let coords = ${W.offsetToIndices("global_idx")};
4718
4753
  let batch = ${F("coords",0,A.rank)};
4719
4754
  let d2 = ${a?F("coords",A.rank-1,A.rank):F("coords",1,A.rank)};
4720
4755
  let xFRCCorner = vec3<u32>(${a?F("coords",1,A.rank):F("coords",2,A.rank)},
@@ -4804,9 +4839,9 @@ bIndices[${x-2}] = u32(row);
4804
4839
  }
4805
4840
  }
4806
4841
  ${S?"value = value + getBiasByOutputCoords(coords)":""};
4807
- ${Z}
4842
+ ${Y}
4808
4843
  result[global_idx] = f32(value);
4809
- }`};return{name:"Conv3DNaive",shaderCache:{hint:`${t.cacheKey};${a};${h};${S}`,inputDependencies:y},getRunData:()=>({outputs:[{dims:r,dataType:e[0].dataType}],dispatchGroup:{x:u[0],y:u[1],z:u[2]},programUniforms:g}),getShaderSource:$}}});var Uu,Vu,Nu=V(()=>{"use strict";Q();ie();ae();ct();Uu=(e,t,r,n)=>{let o=e.length>2,i=o?"value += b[output_channel];":"",a=e[0].dims,d=e[1].dims,l=t.format==="NHWC",c=l?r[3]:r[1],m=c/t.group,u=l&&m>=4?we(c):1,h=k.size(r)/u,w=[{type:12,data:h},{type:12,data:t.dilations},{type:12,data:[t.strides[0],t.strides[1]]},{type:12,data:[t.pads[0],t.pads[1]]},{type:12,data:m}];Ge(t,w),w.push(...R(a,[d[0],d[1],d[2],d[3]/u]));let g=o?["rank","rank","rank"]:["rank","rank"];w.push(...R([r[0],r[1],r[2],r[3]/u]));let y=S=>{let $=M("output",e[0].dataType,r.length,u),v=he($.type.tensor),x=He(t,$.type.value,v),T=E("x",e[0].dataType,a.length),C=E("w",e[1].dataType,d.length,u),A=[T,C];o&&A.push(E("b",e[2].dataType,e[2].dims,u));let P=[{name:"output_size",type:"u32"},{name:"dilations",type:"u32",length:t.dilations.length},{name:"strides",type:"u32",length:2},{name:"pads",type:"u32",length:2},{name:"output_channels_per_group",type:"u32"}];Fe(t,P);let B=l?`
4844
+ }`};return{name:"Conv3DNaive",shaderCache:{hint:`${t.cacheKey};${a};${h};${S}`,inputDependencies:y},getRunData:()=>({outputs:[{dims:r,dataType:e[0].dataType}],dispatchGroup:{x:u[0],y:u[1],z:u[2]},programUniforms:g}),getShaderSource:$}}});var Wu,Nu,Lu=U(()=>{"use strict";J();ae();se();ct();Wu=(e,t,r,n)=>{let o=e.length>2,i=o?"value += b[output_channel];":"",a=e[0].dims,d=e[1].dims,l=t.format==="NHWC",c=l?r[3]:r[1],m=c/t.group,u=l&&m>=4?we(c):1,h=k.size(r)/u,w=[{type:12,data:h},{type:12,data:t.dilations},{type:12,data:[t.strides[0],t.strides[1]]},{type:12,data:[t.pads[0],t.pads[1]]},{type:12,data:m}];Ge(t,w),w.push(...V(a,[d[0],d[1],d[2],d[3]/u]));let g=o?["rank","rank","rank"]:["rank","rank"];w.push(...V([r[0],r[1],r[2],r[3]/u]));let y=S=>{let $=M("output",e[0].dataType,r.length,u),v=he($.type.tensor),x=He(t,$.type.value,v),T=E("x",e[0].dataType,a.length),C=E("w",e[1].dataType,d.length,u),A=[T,C];o&&A.push(E("b",e[2].dataType,e[2].dims,u));let P=[{name:"output_size",type:"u32"},{name:"dilations",type:"u32",length:t.dilations.length},{name:"strides",type:"u32",length:2},{name:"pads",type:"u32",length:2},{name:"output_channels_per_group",type:"u32"}];Fe(t,P);let D=l?`
4810
4845
  for (var wHeight: u32 = 0u; wHeight < uniforms.w_shape[0]; wHeight++) {
4811
4846
  let xHeight = xRCCorner.x + wHeight * uniforms.dilations[0];
4812
4847
 
@@ -4864,11 +4899,11 @@ bIndices[${x-2}] = u32(row);
4864
4899
  var in_channel_offset = group_id * uniforms.w_shape[${l?2:1}];
4865
4900
 
4866
4901
  var value: ${$.type.value} = ${$.type.value}(0);
4867
- ${B}
4902
+ ${D}
4868
4903
  ${i}
4869
4904
  ${x}
4870
4905
  ${$.setByOffset("global_idx","value")}
4871
- }`};return{name:"GroupedConv",shaderCache:{hint:`${t.cacheKey}_${u}`,inputDependencies:g},getRunData:()=>({outputs:[{dims:n?n(r):r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(h/64)},programUniforms:w}),getShaderSource:y}},Vu=(e,t,r,n)=>{let o=e.length>2,i=we(r[3]),a=we(r[2]),d=k.size(r)/i/a,l=[e[0].dims[0],e[0].dims[1],e[0].dims[2],e[0].dims[3]/i],c=[e[1].dims[0],e[1].dims[1],e[1].dims[2],e[1].dims[3]/i],m=[r[0],r[1],r[2],r[3]/i],u=[{type:12,data:d},{type:6,data:[t.strides[0],t.strides[1]]},{type:6,data:[t.pads[0],t.pads[1]]}];Ge(t,u),u.push(...R(l,c,m));let h=(a-1)*t.strides[1]+c[1],w=g=>{let y=M("output",e[0].dataType,m.length,i),S=he(y.type.tensor),$=He(t,y.type.value,S),v=E("x",e[0].dataType,l.length,i),x=E("w",e[1].dataType,c.length,i),T=[v,x];o&&T.push(E("b",e[2].dataType,e[2].dims,i));let C=o?"value += b[output_channel];":"",A=[{name:"output_size",type:"u32"},{name:"strides",type:"i32",length:2},{name:"pads",type:"i32",length:2}];return Fe(t,A),`
4906
+ }`};return{name:"GroupedConv",shaderCache:{hint:`${t.cacheKey}_${u}`,inputDependencies:g},getRunData:()=>({outputs:[{dims:n?n(r):r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(h/64)},programUniforms:w}),getShaderSource:y}},Nu=(e,t,r,n)=>{let o=e.length>2,i=we(r[3]),a=we(r[2]),d=k.size(r)/i/a,l=[e[0].dims[0],e[0].dims[1],e[0].dims[2],e[0].dims[3]/i],c=[e[1].dims[0],e[1].dims[1],e[1].dims[2],e[1].dims[3]/i],m=[r[0],r[1],r[2],r[3]/i],u=[{type:12,data:d},{type:6,data:[t.strides[0],t.strides[1]]},{type:6,data:[t.pads[0],t.pads[1]]}];Ge(t,u),u.push(...V(l,c,m));let h=(a-1)*t.strides[1]+c[1],w=g=>{let y=M("output",e[0].dataType,m.length,i),S=he(y.type.tensor),$=He(t,y.type.value,S),v=E("x",e[0].dataType,l.length,i),x=E("w",e[1].dataType,c.length,i),T=[v,x];o&&T.push(E("b",e[2].dataType,e[2].dims,i));let C=o?"value += b[output_channel];":"",A=[{name:"output_size",type:"u32"},{name:"strides",type:"i32",length:2},{name:"pads",type:"i32",length:2}];return Fe(t,A),`
4872
4907
  ${g.registerUniforms(A).declareVariables(...T,y)}
4873
4908
  ${g.mainStart()}
4874
4909
  ${g.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
@@ -4913,14 +4948,14 @@ bIndices[${x-2}] = u32(row);
4913
4948
  ${$}
4914
4949
  ${y.set("batch","row","col + i","output_channel","value")};
4915
4950
  }
4916
- }`};return{name:"GroupedConv-Vectorize",shaderCache:{hint:`${t.cacheKey};${i};${a};${h};${c[0]};${c[1]}`,inputDependencies:o?["rank","rank","type"]:["rank","rank"]},getRunData:()=>({outputs:[{dims:n?n(r):r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(d/64)},programUniforms:u}),getShaderSource:w}}});var mo,Km,Wu,fo=V(()=>{"use strict";Q();ie();rr();ae();ct();mo=(e,t,r,n,o=!1,i)=>{let a=e[0].dims,d=e[1].dims,l=a[a.length-2],c=d[d.length-1],m=a[a.length-1],u=we(c),h=we(m),w=we(l),g=k.size(r)/u/w,y=e.length>2,S=n?n.slice(0,-2):r.slice(0,-2),v=[k.size(S),l,c],x=[{type:12,data:g},{type:12,data:l},{type:12,data:c},{type:12,data:m}];Ge(t,x),x.push(...R(S,a,d)),y&&x.push(...R(e[2].dims)),x.push(...R(v));let T=C=>{let A=Fr("batch_dims",e[0].dataType,S.length),P=E("a",e[0].dataType,a.length,h),B=E("b",e[1].dataType,d.length,u),N=M("output",e[0].dataType,v.length,u),W=he(N.type.tensor),K=He(t,N.type.value,W),Z=[P,B],ee="";if(y){let re=o?u:1;Z.push(E("bias",e[2].dataType,e[2].dims.length,re)),ee=`${o?`value += bias[col / ${re}];`:`value += ${N.type.value}(bias[row + i]);`}`}let se=a.slice(0,-2),de=d.slice(0,-2),Y=Nt(se,S),le=Nt(de,S),ce=[{name:"output_size",type:"u32"},{name:"M",type:"u32"},{name:"N",type:"u32"},{name:"K",type:"u32"}];Fe(t,ce);let q=(re,ne)=>{let oe=re.rank,U=re.name;if(oe===2)return`var ${U}_indices = ${re.type.indices}(0u, 0u);`;let G=A.rank,ye=`var ${U}_indices: ${re.type.indices};`;for(let Re=oe-2-1,$e=G-1;Re>=0;Re--,$e--)ye+=`
4917
- ${U}_indices[${Re}] = ${G>1?`batch_indices[${$e}]`:"batch_indices"};`;return ne.forEach(Re=>{ye+=`
4918
- ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
4919
- ${U}_indices[${oe-1}] = 0u;`,ye},ue=()=>{let re=`var a_data: ${P.type.value};`;for(let ne=0;ne<h;ne++)re+=`
4951
+ }`};return{name:"GroupedConv-Vectorize",shaderCache:{hint:`${t.cacheKey};${i};${a};${h};${c[0]};${c[1]}`,inputDependencies:o?["rank","rank","type"]:["rank","rank"]},getRunData:()=>({outputs:[{dims:n?n(r):r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(d/64)},programUniforms:u}),getShaderSource:w}}});var ho,Km,Hu,go=U(()=>{"use strict";J();ae();rr();se();ct();ho=(e,t,r,n,o=!1,i)=>{let a=e[0].dims,d=e[1].dims,l=a[a.length-2],c=d[d.length-1],m=a[a.length-1],u=we(c),h=we(m),w=we(l),g=k.size(r)/u/w,y=e.length>2,S=n?n.slice(0,-2):r.slice(0,-2),v=[k.size(S),l,c],x=[{type:12,data:g},{type:12,data:l},{type:12,data:c},{type:12,data:m}];Ge(t,x),x.push(...V(S,a,d)),y&&x.push(...V(e[2].dims)),x.push(...V(v));let T=C=>{let A=Fr("batch_dims",e[0].dataType,S.length),P=E("a",e[0].dataType,a.length,h),D=E("b",e[1].dataType,d.length,u),W=M("output",e[0].dataType,v.length,u),N=he(W.type.tensor),j=He(t,W.type.value,N),Y=[P,D],Z="";if(y){let re=o?u:1;Y.push(E("bias",e[2].dataType,e[2].dims.length,re)),Z=`${o?`value += bias[col / ${re}];`:`value += ${W.type.value}(bias[row + i]);`}`}let te=a.slice(0,-2),ue=d.slice(0,-2),K=Wt(te,S),de=Wt(ue,S),ce=[{name:"output_size",type:"u32"},{name:"M",type:"u32"},{name:"N",type:"u32"},{name:"K",type:"u32"}];Fe(t,ce);let q=(re,ne)=>{let oe=re.rank,R=re.name;if(oe===2)return`var ${R}_indices = ${re.type.indices}(0u, 0u);`;let G=A.rank,ye=`var ${R}_indices: ${re.type.indices};`;for(let Re=oe-2-1,$e=G-1;Re>=0;Re--,$e--)ye+=`
4952
+ ${R}_indices[${Re}] = ${G>1?`batch_indices[${$e}]`:"batch_indices"};`;return ne.forEach(Re=>{ye+=`
4953
+ ${R}_indices[${Re}] = 0;`}),ye+=`${R}_indices[${oe-2}] = 0u;
4954
+ ${R}_indices[${oe-1}] = 0u;`,ye},le=()=>{let re=`var a_data: ${P.type.value};`;for(let ne=0;ne<h;ne++)re+=`
4920
4955
  let b_data${ne} = b[(b_offset + (k + ${ne}) * uniforms.N + col) / ${u}];`;for(let ne=0;ne<w;ne++){re+=`a_data = a[(a_offset + (row + ${ne}) * uniforms.K + k) / ${h}];`;for(let oe=0;oe<h;oe++)re+=`
4921
- values[${ne}] = fma(${B.type.value}(a_data${h===1?"":`[${oe}]`}), b_data${oe}, values[${ne}]);
4956
+ values[${ne}] = fma(${D.type.value}(a_data${h===1?"":`[${oe}]`}), b_data${oe}, values[${ne}]);
4922
4957
  `}return re};return`
4923
- ${C.registerUniforms(ce).registerInternalVariables(A).declareVariables(...Z,N)}
4958
+ ${C.registerUniforms(ce).registerInternalVariables(A).declareVariables(...Y,W)}
4924
4959
  ${C.mainStart()}
4925
4960
  ${C.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
4926
4961
  let col = (global_idx % (uniforms.N / ${u})) * ${u};
@@ -4930,24 +4965,24 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
4930
4965
  let batch = index1 / stride1;
4931
4966
 
4932
4967
  ${r.length===2?"":`let batch_indices = ${A.offsetToIndices("batch")};`}
4933
- ${q(P,Y)}
4968
+ ${q(P,K)}
4934
4969
  let a_offset = ${P.indicesToOffset("a_indices")};
4935
- ${q(B,le)}
4936
- let b_offset = ${B.indicesToOffset("b_indices")};
4937
- var values: array<${N.type.value}, ${w}>;
4970
+ ${q(D,de)}
4971
+ let b_offset = ${D.indicesToOffset("b_indices")};
4972
+ var values: array<${W.type.value}, ${w}>;
4938
4973
  for (var k: u32 = 0u; k < uniforms.K; k = k + ${h}) {
4939
- ${ue()}
4974
+ ${le()}
4940
4975
  }
4941
4976
  for (var i = 0u; i < ${w}u; i++) {
4942
4977
  var value = values[i];
4943
- ${ee}
4944
- ${K}
4945
- let cur_indices = ${N.type.indices}(batch, row + i, col);
4946
- let offset = ${N.indicesToOffset("cur_indices")};
4947
- ${N.setByOffset(`offset / ${u}`,"value")};
4978
+ ${Z}
4979
+ ${j}
4980
+ let cur_indices = ${W.type.indices}(batch, row + i, col);
4981
+ let offset = ${W.indicesToOffset("cur_indices")};
4982
+ ${W.setByOffset(`offset / ${u}`,"value")};
4948
4983
  }
4949
4984
  }
4950
- `};return{name:"MatMulNaive",shaderCache:{hint:`${t.activation};${u};${h};${w};${o}`,inputDependencies:y?["rank","rank","rank"]:["rank","rank"]},getRunData:()=>({outputs:[{dims:i?i(r):r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(g/64)},programUniforms:x}),getShaderSource:T}},Km=e=>{if(!e||e.length!==2)throw new Error("MatMul requires 2 inputs.");if(e[0].dims[e[0].dims.length-1]!==e[1].dims[e[1].dims.length-2])throw new Error("shared dimension does not match.")},Wu=e=>{Km(e.inputs);let t=rt.calcShape(e.inputs[0].dims,e.inputs[1].dims,!0);if(!t)throw new Error("Can't use matmul on the given tensors");let r=t[t.length-1],n=e.inputs[0].dims[e.inputs[0].dims.length-1];r<8&&n<8?e.compute(mo(e.inputs,{activation:""},t)):e.compute(en(e.inputs,{activation:""},t))}});var Ym,ho,Xm,go,yo,Lu,Zm,Qm,bo,Hu=V(()=>{"use strict";ie();zu();Ru();rr();Nu();ct();fo();lt();Ym=(e,t,r,n,o,i)=>{let a=e[0],d=e.slice(i?1:2,i?3:4),l=d.length,c=t[0],u=t.slice(2).map((g,y)=>g+(g-1)*(r[y]-1)),w=d.map((g,y)=>g+n[y]+n[y+l]).map((g,y)=>Math.floor((g-u[y]+o[y])/o[y]));return w.splice(0,0,a),w.splice(i?3:1,0,c),w},ho=[2,3,1,0],Xm=(e,t)=>{if(!e||e.length!==2&&e.length!==3)throw new Error("Conv requires 2 or 3 inputs");if(e[0].dims.length>5)throw new Error("greater than 5D is not supported");if(e[0].dims.length!==e[1].dims.length)throw new Error("filter does not have same dimension as input");let r=e[0].dims[t.format==="NHWC"?e[0].dims.length-1:1],n=e[1].dims[1]*t.group;if(r!==n)throw new Error("FILTER_IN_CHANNEL should be equal to DATA_CHANNEL");if(e.length===3&&(e[2].dims.length!==1||e[1].dims[0]!==e[2].dims[0]))throw new Error("invalid bias");let o=e[0].dims.length-2;if(t.dilations.length!==o)throw new Error(`dilations should be ${o}D`);if(t.strides.length!==o)throw new Error(`strides should be ${o}D`);if(t.pads.length!==o*2)throw new Error(`pads should be ${o*2}D`);if(t.kernelShape.length!==0&&t.kernelShape.length!==e[1].dims.length-2)throw new Error("invalid kernel shape")},go=(e,t)=>{let r=e.kernelShape.slice();r.length<t[1].dims.length-2&&r.push(...Array(t[1].dims.length-2-r.length).fill(0));for(let i=2;i<t[1].dims.length;++i)r[i-2]===0&&(r[i-2]=t[1].dims[i]);let n=e.pads.slice();Ct.adjustPadsBasedOnAutoPad(t[0].dims,e.strides,e.dilations,r,n,e.format==="NHWC",e.autoPad);let o=Object.assign({},e);return Object.assign(o,{kernelShape:r,pads:n}),o},yo=e=>{let t=Zr(e),r=e.format,n=["NOTSET","VALID","SAME_UPPER","SAME_LOWER"][e.auto_pad],o=e.dilations,i=e.group,a=e.kernel_shape,d=e.pads,l=e.strides,c=e.w_is_const();return{autoPad:n,format:r,dilations:o,group:i,kernelShape:a,pads:d,strides:l,wIsConst:c,...t,cacheKey:`${e.format};${t.activation};`}},Lu=(e,t,r,n)=>{let o=r.format==="NHWC",i=Ym(t[0].dims,t[1].dims,r.dilations,r.pads,r.strides,o);if(r.group!==1){let A=[t[0]];if(o){let B=e.kernelCustomData.wT??e.compute(Pe(t[1],ho),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=B),A.push(B)}else A.push(t[1]);t.length===3&&A.push(t[2]),!e.adapterInfo.isArchitecture("ampere")&&o&&t[1].dims[0]===r.group&&t[1].dims[1]===1&&r.dilations[0]===1&&r.dilations[1]===1?e.compute(Vu(A,r,i,n),{inputs:A}):e.compute(Uu(A,r,i,n),{inputs:A});return}let a=t.length===3,d=t[0].dims[o?1:2],l=t[0].dims[o?2:3],c=t[0].dims[o?3:1],m=t[1].dims[2],u=t[1].dims[3],h=i[o?1:2],w=i[o?2:3],g=i[o?3:1],y=o&&m===d&&u===l&&r.pads[0]===0&&r.pads[1]===0;if(y||m===1&&u===1&&r.dilations[0]===1&&r.dilations[1]===1&&r.strides[0]===1&&r.strides[1]===1&&r.pads[0]===0&&r.pads[1]===0){let A=i[0],P,B,N,W=[];if(o){let ee=e.kernelCustomData.wT??e.compute(Pe(t[1],ho),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];if(r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=ee),y){let se=d*l*c;P=t[0].reshape([1,A,se]),B=ee.reshape([1,se,g]),N=[1,A,g]}else P=t[0].reshape([A,d*l,c]),B=ee.reshape([1,c,g]),N=[A,h*w,g];W.push(P),W.push(B)}else P=t[0].reshape([A,c,d*l]),B=t[1].reshape([1,g,c]),N=[A,g,h*w],W.push(B),W.push(P);a&&W.push(t[2]);let K=N[2],Z=W[0].dims[W[0].dims.length-1];K<8&&Z<8?e.compute(mo(W,r,i,N,o,n),{inputs:W}):e.compute(en(W,r,i,N,o,n),{inputs:W});return}let S=!0,$=e.kernelCustomData.wT??e.compute(Pe(t[1],ho),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=$);let v=[t[0],$];a&&v.push(t[2]);let x=o?h*w:g,T=o?g:h*w,C=m*u*c;e.compute(Pu(v,r,i,x,T,C,a,S,n),{inputs:v})},Zm=(e,t)=>{let r=t.format==="NHWC",n=[e.inputs[0].reshape(r?[e.inputs[0].dims[0],1,e.inputs[0].dims[1],e.inputs[0].dims[2]]:[e.inputs[0].dims[0],e.inputs[0].dims[1],1,e.inputs[0].dims[2]]),e.inputs[1].reshape([e.inputs[1].dims[0],e.inputs[1].dims[1],1,e.inputs[1].dims[2]])];e.inputs.length===3&&n.push(e.inputs[2]);let o=[0,t.pads[0],0,t.pads[1]],i=[1].concat(t.strides),a=[1].concat(t.dilations),d=[1].concat(t.kernelShape),l=go({...t,pads:o,strides:i,dilations:a,kernelShape:d},n);Lu(e,n,l,c=>r?[c[0],c[2],c[3]]:[c[0],c[1],c[3]])},Qm=(e,t,r)=>{let n=r.format==="NHWC"?"channelsLast":"channelsFirst",o=go(r,t),i=r.autoPad==="NOTSET"?r.pads:r.autoPad,a=Du(t[0].dims,t[1].dims,r.strides,r.dilations,i,!1,n);e.compute(Mu(t,o,a.outShape,[a.filterDepth,a.filterHeight,a.filterWidth],[a.padInfo.front,a.padInfo.top,a.padInfo.left],n))},bo=(e,t)=>{if(Xm(e.inputs,t),e.inputs[0].dims.length===3)Zm(e,t);else if(e.inputs[0].dims.length===5)Qm(e,e.inputs,t);else{let r=go(t,e.inputs);Lu(e,e.inputs,r)}}});var Jm,Gu,Fu=V(()=>{"use strict";Q();Xe();ae();ct();Jt();po();rr();Jm=(e,t=!1,r,n,o=4)=>{let i=$=>{switch($){case 1:return"return w[getIndexFromCoords4D(coord, vec4<i32>(uniforms.w_shape))];";case 4:return`
4985
+ `};return{name:"MatMulNaive",shaderCache:{hint:`${t.activation};${u};${h};${w};${o}`,inputDependencies:y?["rank","rank","rank"]:["rank","rank"]},getRunData:()=>({outputs:[{dims:i?i(r):r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(g/64)},programUniforms:x}),getShaderSource:T}},Km=e=>{if(!e||e.length!==2)throw new Error("MatMul requires 2 inputs.");if(e[0].dims[e[0].dims.length-1]!==e[1].dims[e[1].dims.length-2])throw new Error("shared dimension does not match.")},Hu=e=>{Km(e.inputs);let t=rt.calcShape(e.inputs[0].dims,e.inputs[1].dims,!0);if(!t)throw new Error("Can't use matmul on the given tensors");let r=t[t.length-1],n=e.inputs[0].dims[e.inputs[0].dims.length-1];r<8&&n<8?e.compute(ho(e.inputs,{activation:""},t)):e.compute(en(e.inputs,{activation:""},t))}});var Ym,yo,Xm,bo,wo,Gu,Qm,Zm,_o,Fu=U(()=>{"use strict";ae();Du();Vu();rr();Lu();ct();go();lt();Ym=(e,t,r,n,o,i)=>{let a=e[0],d=e.slice(i?1:2,i?3:4),l=d.length,c=t[0],u=t.slice(2).map((g,y)=>g+(g-1)*(r[y]-1)),w=d.map((g,y)=>g+n[y]+n[y+l]).map((g,y)=>Math.floor((g-u[y]+o[y])/o[y]));return w.splice(0,0,a),w.splice(i?3:1,0,c),w},yo=[2,3,1,0],Xm=(e,t)=>{if(!e||e.length!==2&&e.length!==3)throw new Error("Conv requires 2 or 3 inputs");if(e[0].dims.length>5)throw new Error("greater than 5D is not supported");if(e[0].dims.length!==e[1].dims.length)throw new Error("filter does not have same dimension as input");let r=e[0].dims[t.format==="NHWC"?e[0].dims.length-1:1],n=e[1].dims[1]*t.group;if(r!==n)throw new Error("FILTER_IN_CHANNEL should be equal to DATA_CHANNEL");if(e.length===3&&(e[2].dims.length!==1||e[1].dims[0]!==e[2].dims[0]))throw new Error("invalid bias");let o=e[0].dims.length-2;if(t.dilations.length!==o)throw new Error(`dilations should be ${o}D`);if(t.strides.length!==o)throw new Error(`strides should be ${o}D`);if(t.pads.length!==o*2)throw new Error(`pads should be ${o*2}D`);if(t.kernelShape.length!==0&&t.kernelShape.length!==e[1].dims.length-2)throw new Error("invalid kernel shape")},bo=(e,t)=>{let r=e.kernelShape.slice();r.length<t[1].dims.length-2&&r.push(...Array(t[1].dims.length-2-r.length).fill(0));for(let i=2;i<t[1].dims.length;++i)r[i-2]===0&&(r[i-2]=t[1].dims[i]);let n=e.pads.slice();Ct.adjustPadsBasedOnAutoPad(t[0].dims,e.strides,e.dilations,r,n,e.format==="NHWC",e.autoPad);let o=Object.assign({},e);return Object.assign(o,{kernelShape:r,pads:n}),o},wo=e=>{let t=Qr(e),r=e.format,n=["NOTSET","VALID","SAME_UPPER","SAME_LOWER"][e.auto_pad],o=e.dilations,i=e.group,a=e.kernel_shape,d=e.pads,l=e.strides,c=e.w_is_const();return{autoPad:n,format:r,dilations:o,group:i,kernelShape:a,pads:d,strides:l,wIsConst:c,...t,cacheKey:`${e.format};${t.activation};`}},Gu=(e,t,r,n)=>{let o=r.format==="NHWC",i=Ym(t[0].dims,t[1].dims,r.dilations,r.pads,r.strides,o);if(r.group!==1){let A=[t[0]];if(o){let D=e.kernelCustomData.wT??e.compute(Pe(t[1],yo),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=D),A.push(D)}else A.push(t[1]);t.length===3&&A.push(t[2]),!e.adapterInfo.isArchitecture("ampere")&&o&&t[1].dims[0]===r.group&&t[1].dims[1]===1&&r.dilations[0]===1&&r.dilations[1]===1?e.compute(Nu(A,r,i,n),{inputs:A}):e.compute(Wu(A,r,i,n),{inputs:A});return}let a=t.length===3,d=t[0].dims[o?1:2],l=t[0].dims[o?2:3],c=t[0].dims[o?3:1],m=t[1].dims[2],u=t[1].dims[3],h=i[o?1:2],w=i[o?2:3],g=i[o?3:1],y=o&&m===d&&u===l&&r.pads[0]===0&&r.pads[1]===0;if(y||m===1&&u===1&&r.dilations[0]===1&&r.dilations[1]===1&&r.strides[0]===1&&r.strides[1]===1&&r.pads[0]===0&&r.pads[1]===0){let A=i[0],P,D,W,N=[];if(o){let Z=e.kernelCustomData.wT??e.compute(Pe(t[1],yo),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];if(r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=Z),y){let te=d*l*c;P=t[0].reshape([1,A,te]),D=Z.reshape([1,te,g]),W=[1,A,g]}else P=t[0].reshape([A,d*l,c]),D=Z.reshape([1,c,g]),W=[A,h*w,g];N.push(P),N.push(D)}else P=t[0].reshape([A,c,d*l]),D=t[1].reshape([1,g,c]),W=[A,g,h*w],N.push(D),N.push(P);a&&N.push(t[2]);let j=W[2],Y=N[0].dims[N[0].dims.length-1];j<8&&Y<8?e.compute(ho(N,r,i,W,o,n),{inputs:N}):e.compute(en(N,r,i,W,o,n),{inputs:N});return}let S=!0,$=e.kernelCustomData.wT??e.compute(Pe(t[1],yo),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=$);let v=[t[0],$];a&&v.push(t[2]);let x=o?h*w:g,T=o?g:h*w,C=m*u*c;e.compute(Ou(v,r,i,x,T,C,a,S,n),{inputs:v})},Qm=(e,t)=>{let r=t.format==="NHWC",n=[e.inputs[0].reshape(r?[e.inputs[0].dims[0],1,e.inputs[0].dims[1],e.inputs[0].dims[2]]:[e.inputs[0].dims[0],e.inputs[0].dims[1],1,e.inputs[0].dims[2]]),e.inputs[1].reshape([e.inputs[1].dims[0],e.inputs[1].dims[1],1,e.inputs[1].dims[2]])];e.inputs.length===3&&n.push(e.inputs[2]);let o=[0,t.pads[0],0,t.pads[1]],i=[1].concat(t.strides),a=[1].concat(t.dilations),d=[1].concat(t.kernelShape),l=bo({...t,pads:o,strides:i,dilations:a,kernelShape:d},n);Gu(e,n,l,c=>r?[c[0],c[2],c[3]]:[c[0],c[1],c[3]])},Zm=(e,t,r)=>{let n=r.format==="NHWC"?"channelsLast":"channelsFirst",o=bo(r,t),i=r.autoPad==="NOTSET"?r.pads:r.autoPad,a=Ru(t[0].dims,t[1].dims,r.strides,r.dilations,i,!1,n);e.compute(Uu(t,o,a.outShape,[a.filterDepth,a.filterHeight,a.filterWidth],[a.padInfo.front,a.padInfo.top,a.padInfo.left],n))},_o=(e,t)=>{if(Xm(e.inputs,t),e.inputs[0].dims.length===3)Qm(e,t);else if(e.inputs[0].dims.length===5)Zm(e,e.inputs,t);else{let r=bo(t,e.inputs);Gu(e,e.inputs,r)}}});var Jm,qu,ju=U(()=>{"use strict";J();Xe();se();ct();Jt();fo();rr();Jm=(e,t=!1,r,n,o=4)=>{let i=$=>{switch($){case 1:return"return w[getIndexFromCoords4D(coord, vec4<i32>(uniforms.w_shape))];";case 4:return`
4951
4986
  let coord1 = vec4<i32>(coordX, coordY, col + 1, rowInner);
4952
4987
  let coord2 = vec4<i32>(coordX, coordY, col + 2, rowInner);
4953
4988
  let coord3 = vec4<i32>(coordX, coordY, col + 3, rowInner);
@@ -5028,19 +5063,19 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5028
5063
  var value = valueInput;
5029
5064
  let outWidth = ${e?"i32(uniforms.result_shape[2])":"i32(uniforms.result_shape[3])"};
5030
5065
  ${d}
5031
- ${Qr(t)}
5066
+ ${Zr(t)}
5032
5067
  ${y}
5033
5068
  result[getIndexFromCoords4D(coords, vec4<i32>(uniforms.result_shape))/${o}] = value;
5034
5069
  }
5035
- }`},Gu=(e,t,r,n,o,i,a,d)=>{let l=t.format==="NHWC",c=l?e[0].dims[3]:e[0].dims[1],m=r[0],u=l?r[2]:r[3],h=l?r[1]:r[2],w=l?r[3]:r[1],g=l&&c%4===0&&c%3&&w%4===0,y=l?w:u*h,S=l?u*h:w,$=[8,8,1],v=n<=8?[4,1,1]:[4,4,1],x=[Math.ceil(y/$[0]/v[0]),Math.ceil(S/$[1]/v[1]),Math.ceil(m/$[2]/v[2])];me("verbose",()=>`[conv_backprop_mm_webgpu] dispatch = ${x}`);let T=g?4:1,C=Math.max($[0]*T,$[1]),A=g?4:1,P=[t.kernelShape[l?1:2],t.kernelShape[l?2:3]],B=[P[0]+(t.dilations[0]<=1?0:(P[0]-1)*(t.dilations[0]-1)),P[1]+(t.dilations[1]<=1?0:(P[1]-1)*(t.dilations[1]-1))],N=[B[0]-1-Math.floor((t.pads[0]+t.pads[2])/2),B[1]-1-Math.floor((t.pads[1]+t.pads[3])/2)],W=[{type:6,data:n},{type:6,data:o},{type:6,data:i},{type:6,data:t.strides},{type:6,data:t.dilations},{type:6,data:P},{type:6,data:N}];Ge(t,W),W.push(...R(e[0].dims,e[1].dims));let K=["rank","rank"];a&&(W.push(...R(e[2].dims)),K.push("rank")),W.push(...R(r));let Z=ee=>{let se=E("x",e[0].dataType,e[0].dims.length,A),de=E("w",e[1].dataType,e[1].dims.length,1),Y=M("result",e[0].dataType,r.length,A),le=[se,de],ce="";if(a){let re=E("bias",e[2].dataType,e[2].dims.length,A);le.push(re),ce+=`
5070
+ }`},qu=(e,t,r,n,o,i,a,d)=>{let l=t.format==="NHWC",c=l?e[0].dims[3]:e[0].dims[1],m=r[0],u=l?r[2]:r[3],h=l?r[1]:r[2],w=l?r[3]:r[1],g=l&&c%4===0&&c%3&&w%4===0,y=l?w:u*h,S=l?u*h:w,$=[8,8,1],v=n<=8?[4,1,1]:[4,4,1],x=[Math.ceil(y/$[0]/v[0]),Math.ceil(S/$[1]/v[1]),Math.ceil(m/$[2]/v[2])];pe("verbose",()=>`[conv_backprop_mm_webgpu] dispatch = ${x}`);let T=g?4:1,C=Math.max($[0]*T,$[1]),A=g?4:1,P=[t.kernelShape[l?1:2],t.kernelShape[l?2:3]],D=[P[0]+(t.dilations[0]<=1?0:(P[0]-1)*(t.dilations[0]-1)),P[1]+(t.dilations[1]<=1?0:(P[1]-1)*(t.dilations[1]-1))],W=[D[0]-1-Math.floor((t.pads[0]+t.pads[2])/2),D[1]-1-Math.floor((t.pads[1]+t.pads[3])/2)],N=[{type:6,data:n},{type:6,data:o},{type:6,data:i},{type:6,data:t.strides},{type:6,data:t.dilations},{type:6,data:P},{type:6,data:W}];Ge(t,N),N.push(...V(e[0].dims,e[1].dims));let j=["rank","rank"];a&&(N.push(...V(e[2].dims)),j.push("rank")),N.push(...V(r));let Y=Z=>{let te=E("x",e[0].dataType,e[0].dims.length,A),ue=E("w",e[1].dataType,e[1].dims.length,1),K=M("result",e[0].dataType,r.length,A),de=[te,ue],ce="";if(a){let re=E("bias",e[2].dataType,e[2].dims.length,A);de.push(re),ce+=`
5036
5071
  fn getBiasByOutputCoords(coords : vec4<i32>) -> ${re.type.value} {
5037
5072
  return bias[coords.${l?"w":"y"}${g?"/ 4":""}];
5038
- }`}let q=[{name:"dim_a_outer",type:"i32"},{name:"dim_b_outer",type:"i32"},{name:"dim_inner",type:"i32"},{name:"strides",type:"i32",length:2},{name:"dilations",type:"i32",length:2},{name:"filter_dims",type:"i32",length:P.length},{name:"pads",type:"i32",length:N.length}];Fe(t,q);let ue=he(e[0].dataType,1);if(ue!=="f16"&&ue!=="f32")throw new Error(`elemType ${ue} is not supported.`);return`
5073
+ }`}let q=[{name:"dim_a_outer",type:"i32"},{name:"dim_b_outer",type:"i32"},{name:"dim_inner",type:"i32"},{name:"strides",type:"i32",length:2},{name:"dilations",type:"i32",length:2},{name:"filter_dims",type:"i32",length:P.length},{name:"pads",type:"i32",length:W.length}];Fe(t,q);let le=he(e[0].dataType,1);if(le!=="f16"&&le!=="f32")throw new Error(`elemType ${le} is not supported.`);return`
5039
5074
  ${Jr("uniforms.result_strides")}
5040
- ${ee.registerUniforms(q).declareVariables(...le,Y)};
5075
+ ${Z.registerUniforms(q).declareVariables(...de,K)};
5041
5076
  ${ce}
5042
- ${Jm(l,a,t,se.type.value,T)}
5043
- ${g?er(v,$,ue,void 0,!l,C):tr(v,$,ue,void 0,!l,C,!1,void 0,d)}`};return{name:"Conv2DTransposeMatMul",shaderCache:{hint:`${t.cacheKey};${v};${$};${g}`,inputDependencies:K},getRunData:()=>({outputs:[{dims:r,dataType:e[0].dataType}],dispatchGroup:{x:x[0],y:x[1],z:x[2]},programUniforms:W}),getShaderSource:Z}}});var ef,wo,qu=V(()=>{"use strict";Q();Xe();ie();ae();ef=(e,t,r,n,o,i=!1,a,d,l=!1)=>{let c=l?1:2,m=l?2:3,u=l?3:1,h=i?2:1,w=`
5077
+ ${Jm(l,a,t,te.type.value,T)}
5078
+ ${g?er(v,$,le,void 0,!l,C):tr(v,$,le,void 0,!l,C,!1,void 0,d)}`};return{name:"Conv2DTransposeMatMul",shaderCache:{hint:`${t.cacheKey};${v};${$};${g}`,inputDependencies:j},getRunData:()=>({outputs:[{dims:r,dataType:e[0].dataType}],dispatchGroup:{x:x[0],y:x[1],z:x[2]},programUniforms:N}),getShaderSource:Y}}});var ef,vo,Ku=U(()=>{"use strict";J();Xe();ae();se();ef=(e,t,r,n,o,i=!1,a,d,l=!1)=>{let c=l?1:2,m=l?2:3,u=l?3:1,h=i?2:1,w=`
5044
5079
  fn setOutputAtIndex(flatIndex : u32, value : ${i?`vec4<${a}>`:a}) {
5045
5080
  result[flatIndex] = ${i?`vec4<${a}>`:a}(value);
5046
5081
  }`;n&&(w+=`
@@ -5203,7 +5238,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5203
5238
 
5204
5239
  ${e.mainStart()}
5205
5240
  ${e.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")};
5206
- ${i?x:T}}`},wo=(e,t,r)=>{let n=e.length>2,o=t.outputShape,i=k.size(o),a=[Math.ceil(i/64),1,1];me("verbose",()=>`[conv2d_backprop_webgpu] dispatch = ${a}`);let d=t.format==="NHWC",l=["rank","rank"],c=[t.strides[0],t.strides[1]],m=[t.kernelShape[d?1:2],t.kernelShape[d?2:3]],u=[t.dilations[0],t.dilations[1]],h=[m[0]+(t.dilations[0]<=1?0:(t.kernelShape[d?1:2]-1)*(t.dilations[0]-1)),m[1]+(t.dilations[1]<=1?0:(t.kernelShape[d?2:3]-1)*(t.dilations[1]-1))],w=[h[0]-1-Math.floor((t.pads[0]+t.pads[2])/2),h[1]-1-Math.floor(t.pads[1]+t.pads[3])/2],g=!1,y=t.group,S=e[1].dims,$=S[0]/y,v=S[1],x=[{type:12,data:i},{type:12,data:c},{type:12,data:m},{type:12,data:u},{type:12,data:h},{type:6,data:w},{type:12,data:$},{type:12,data:v},...R(e[0].dims,e[1].dims)];n&&(x.push(...R(e[2].dims)),l.push("rank")),x.push(...R(o));let T=a[1]===1&&a[2]===1,C=A=>{let P=[{name:"output_size",type:"u32"},{name:"strides",type:"u32",length:c.length},{name:"filter_dims",type:"u32",length:m.length},{name:"dilations",type:"u32",length:m.length},{name:"effective_filter_dims",type:"u32",length:h.length},{name:"pads",type:"i32",length:w.length},{name:"input_channels_per_group",type:"u32"},{name:"output_channels_per_group",type:"u32"}],B=he(e[0].dataType);return`${ef(A,e,o,n,T,g,B,P,d)}`};return{name:"ConvTranspose2D",shaderCache:{hint:`${t.cacheKey};`,inputDependencies:l},getRunData:()=>({dispatchGroup:{x:a[0],y:a[1],z:a[2]},outputs:[{dims:r?r(o):o,dataType:e[0].dataType}],programUniforms:x}),getShaderSource:C}}});var tf,rf,nf,ju,Ku,of,af,sf,uf,Yu,Xu=V(()=>{"use strict";Fu();qu();ct();lt();tf=(e,t,r,n,o,i)=>(e-1)*t+r+(n-1)*o+1-i,rf=(e,t,r,n,o)=>{let i=Math.floor(e/2);t==="SAME_UPPER"?(r[n]=i,r[o]=e-i):t==="SAME_LOWER"&&(r[n]=e-i,r[o]=i)},nf=(e,t,r,n,o,i,a,d,l,c)=>{let m=e.length-2,u=c.length===0;l.length<m&&l.push(...Array(m-l.length).fill(0));let h=e[0],w=t[d?3:1]*o;for(let g=0,y=e.length-m-(d?1:0);g<m;++g,++y){let S=e[y],$=u?S*a[g]:c[g],v=tf(S,a[g],i[g],t[y],r[g],$);rf(v,n,i,g,g+m),u&&c.push(a[g]*(S-1)+l[g]+(t[y]-1)*r[g]+1-i[g]-i[g+m])}c.splice(0,0,h),c.splice(d?3:1,0,w)},ju=(e,t)=>{let r=e.kernelShape.slice();if(e.kernelShape.length===0||e.kernelShape.reduce((u,h)=>u*h,1)===0){r.length=0;for(let u=2;u<t[1].dims.length;++u)r.push(t[1].dims[u])}let n=e.format==="NHWC";r.splice(0,0,t[1].dims[0]),r.splice(n?3:1,0,t[1].dims[1]);let o=e.pads.slice(),i=e.outputShape.slice(),a=e.outputPadding.slice(),d=t[0].dims,l=e.dilations.slice();if(l.reduce((u,h)=>u+h,0)===0){let u=t[0].dims.length-2;l=new Array(u).fill(1)}let c=e.strides.slice();if(c.reduce((u,h)=>u+h,0)===0){let u=t[0].dims.length-2;c=new Array(u).fill(1)}nf(d,r,l,e.autoPad,e.group,o,c,n,a,i);let m=Object.assign({},e);return Object.assign(m,{kernelShape:r,pads:o,outputPadding:a,outputShape:i,dilations:l,strides:c}),m},Ku=e=>{let t=Zr(e),r=e.format,n=["NOTSET","VALID","SAME_UPPER","SAME_LOWER"][typeof e.autoPad>"u"?0:e.autoPad],o=e.dilations,i=e.group,a=e.kernelShape,d=e.pads,l=e.strides,c=e.wIsConst(),m=e.outputPadding,u=e.outputShape;return{autoPad:n,format:r,dilations:o,group:i,kernelShape:a,outputPadding:m,outputShape:u,pads:d,strides:l,wIsConst:c,...t,cacheKey:`${e.format};${t.activation};`}},of=(e,t)=>{if(!e||e.length!==2&&e.length!==3)throw new Error("Conv requires 2 or 3 inputs");if(e[0].dims.length!==4&&e[0].dims.length!==3)throw new Error("currently only support 2-dimensional conv");if(e[0].dims.length!==e[1].dims.length)throw new Error("filter does not have same dimension as input");let r=e[0].dims[t.format==="NHWC"?e[0].dims.length-1:1],n=e[1].dims[0];if(r!==n)throw new Error("FILTER_IN_CHANNEL should be equal to DATA_CHANNEL");let o=e[1].dims[1]*t.group;if(e.length===3&&(e[2].dims.length!==1||e[2].dims[0]!==o))throw new Error("invalid bias");let i=e[0].dims.length-2;if(t.dilations.reduce((m,u)=>m+u,0)>0&&t.dilations.length!==i)throw new Error(`dilations should be ${i}D`);if(t.strides.reduce((m,u)=>m+u,0)>0&&t.strides.length!==i)throw new Error(`strides should be ${i}D`);if(t.pads.reduce((m,u)=>m+u,0)>0&&t.pads.length!==i*2)throw new Error(`pads should be ${i*2}D`);if(t.outputPadding.length!==i&&t.outputPadding.length!==0)throw new Error(`output_padding should be ${i}D`);if(t.kernelShape.reduce((m,u)=>m+u,0)>0&&t.kernelShape.length!==0&&t.kernelShape.length!==e[1].dims.length-2)throw new Error("invalid kernel shape");if(t.outputShape.length!==0&&t.outputShape.length!==e[0].dims.length-2)throw new Error("invalid output shape")},af=[2,3,1,0],sf=(e,t,r)=>{let n=ju(r,t),o=r.format==="NHWC",i=n.outputShape,a=i[o?3:1],d=t[0].dims[o?3:1];if(n.group!==1||a===1&&d===1){e.compute(wo(t,n));return}let l=i[o?1:2],c=i[o?2:3],m=t[1].dims[2],u=t[1].dims[3],h=o?l*c:a,w=o?a:l*c,g=m*u*d,y=!0,S=e.kernelCustomData.wT??e.compute(Pe(t[1],af),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=S);let $=[t[0],S],v=t.length===3;v&&(!o&&t[2].dims.length===1?$.push(t[2].reshape([t[2].dims[0],1,1])):$.push(t[2])),e.compute(Gu($,n,i,h,w,g,v,y),{inputs:$})},uf=(e,t)=>{let r=t.format==="NHWC",n=[e.inputs[0].reshape(r?[e.inputs[0].dims[0],1,e.inputs[0].dims[1],e.inputs[0].dims[2]]:[e.inputs[0].dims[0],e.inputs[0].dims[1],1,e.inputs[0].dims[2]]),e.inputs[1].reshape([e.inputs[1].dims[0],e.inputs[1].dims[1],1,e.inputs[1].dims[2]])];e.inputs.length===3&&n.push(e.inputs[2]);let o=t.kernelShape;(o.length===0||o[0]===0)&&(o=[e.inputs[1].dims[2]]);let i=t.dilations;(i.length===0||i[0]===0)&&(i=[1]);let a=t.strides;(a.length===0||a[0]===0)&&(a=[1]);let d=t.pads;d.length===0&&(d=[0,0]),d=[0,d[0],0,d[1]],a=[1].concat(a),i=[1].concat(i),o=[1].concat(o);let l=ju({...t,pads:d,strides:a,dilations:i,kernelShape:o},n);e.compute(wo(n,l,c=>r?[c[0],c[2],c[3]]:[c[0],c[1],c[3]]))},Yu=(e,t)=>{of(e.inputs,t),e.inputs[0].dims.length===3?uf(e,t):sf(e,e.inputs,t)}});var df,Zu,Qu,Ju=V(()=>{"use strict";Q();ie();Ie();ae();df=(e,t,r,n)=>{let o=k.size(t),i=t.length,a=E("input",e,i),d=M("output",e,i),l=r.dataType===6?r.getInt32Array()[0]:Number(r.getBigInt64Array()[0]),c=k.normalizeAxis(l,i),m=u=>{let h=` i32(${a.indicesGet("inputIndices","uniforms.axis")}) `,w=F("uniforms.input_shape","uniforms.axis",i),g=n.reverse?h+(n.exclusive?" + 1":""):"0",y=n.reverse?w:h+(n.exclusive?"":" + 1");return`
5241
+ ${i?x:T}}`},vo=(e,t,r)=>{let n=e.length>2,o=t.outputShape,i=k.size(o),a=[Math.ceil(i/64),1,1];pe("verbose",()=>`[conv2d_backprop_webgpu] dispatch = ${a}`);let d=t.format==="NHWC",l=["rank","rank"],c=[t.strides[0],t.strides[1]],m=[t.kernelShape[d?1:2],t.kernelShape[d?2:3]],u=[t.dilations[0],t.dilations[1]],h=[m[0]+(t.dilations[0]<=1?0:(t.kernelShape[d?1:2]-1)*(t.dilations[0]-1)),m[1]+(t.dilations[1]<=1?0:(t.kernelShape[d?2:3]-1)*(t.dilations[1]-1))],w=[h[0]-1-Math.floor((t.pads[0]+t.pads[2])/2),h[1]-1-Math.floor(t.pads[1]+t.pads[3])/2],g=!1,y=t.group,S=e[1].dims,$=S[0]/y,v=S[1],x=[{type:12,data:i},{type:12,data:c},{type:12,data:m},{type:12,data:u},{type:12,data:h},{type:6,data:w},{type:12,data:$},{type:12,data:v},...V(e[0].dims,e[1].dims)];n&&(x.push(...V(e[2].dims)),l.push("rank")),x.push(...V(o));let T=a[1]===1&&a[2]===1,C=A=>{let P=[{name:"output_size",type:"u32"},{name:"strides",type:"u32",length:c.length},{name:"filter_dims",type:"u32",length:m.length},{name:"dilations",type:"u32",length:m.length},{name:"effective_filter_dims",type:"u32",length:h.length},{name:"pads",type:"i32",length:w.length},{name:"input_channels_per_group",type:"u32"},{name:"output_channels_per_group",type:"u32"}],D=he(e[0].dataType);return`${ef(A,e,o,n,T,g,D,P,d)}`};return{name:"ConvTranspose2D",shaderCache:{hint:`${t.cacheKey};`,inputDependencies:l},getRunData:()=>({dispatchGroup:{x:a[0],y:a[1],z:a[2]},outputs:[{dims:r?r(o):o,dataType:e[0].dataType}],programUniforms:x}),getShaderSource:C}}});var tf,rf,nf,Yu,Xu,of,af,sf,uf,Qu,Zu=U(()=>{"use strict";ju();Ku();ct();lt();tf=(e,t,r,n,o,i)=>(e-1)*t+r+(n-1)*o+1-i,rf=(e,t,r,n,o)=>{let i=Math.floor(e/2);t==="SAME_UPPER"?(r[n]=i,r[o]=e-i):t==="SAME_LOWER"&&(r[n]=e-i,r[o]=i)},nf=(e,t,r,n,o,i,a,d,l,c)=>{let m=e.length-2,u=c.length===0;l.length<m&&l.push(...Array(m-l.length).fill(0));let h=e[0],w=t[d?3:1]*o;for(let g=0,y=e.length-m-(d?1:0);g<m;++g,++y){let S=e[y],$=u?S*a[g]:c[g],v=tf(S,a[g],i[g],t[y],r[g],$);rf(v,n,i,g,g+m),u&&c.push(a[g]*(S-1)+l[g]+(t[y]-1)*r[g]+1-i[g]-i[g+m])}c.splice(0,0,h),c.splice(d?3:1,0,w)},Yu=(e,t)=>{let r=e.kernelShape.slice();if(e.kernelShape.length===0||e.kernelShape.reduce((u,h)=>u*h,1)===0){r.length=0;for(let u=2;u<t[1].dims.length;++u)r.push(t[1].dims[u])}let n=e.format==="NHWC";r.splice(0,0,t[1].dims[0]),r.splice(n?3:1,0,t[1].dims[1]);let o=e.pads.slice(),i=e.outputShape.slice(),a=e.outputPadding.slice(),d=t[0].dims,l=e.dilations.slice();if(l.reduce((u,h)=>u+h,0)===0){let u=t[0].dims.length-2;l=new Array(u).fill(1)}let c=e.strides.slice();if(c.reduce((u,h)=>u+h,0)===0){let u=t[0].dims.length-2;c=new Array(u).fill(1)}nf(d,r,l,e.autoPad,e.group,o,c,n,a,i);let m=Object.assign({},e);return Object.assign(m,{kernelShape:r,pads:o,outputPadding:a,outputShape:i,dilations:l,strides:c}),m},Xu=e=>{let t=Qr(e),r=e.format,n=["NOTSET","VALID","SAME_UPPER","SAME_LOWER"][typeof e.autoPad>"u"?0:e.autoPad],o=e.dilations,i=e.group,a=e.kernelShape,d=e.pads,l=e.strides,c=e.wIsConst(),m=e.outputPadding,u=e.outputShape;return{autoPad:n,format:r,dilations:o,group:i,kernelShape:a,outputPadding:m,outputShape:u,pads:d,strides:l,wIsConst:c,...t,cacheKey:`${e.format};${t.activation};`}},of=(e,t)=>{if(!e||e.length!==2&&e.length!==3)throw new Error("Conv requires 2 or 3 inputs");if(e[0].dims.length!==4&&e[0].dims.length!==3)throw new Error("currently only support 2-dimensional conv");if(e[0].dims.length!==e[1].dims.length)throw new Error("filter does not have same dimension as input");let r=e[0].dims[t.format==="NHWC"?e[0].dims.length-1:1],n=e[1].dims[0];if(r!==n)throw new Error("FILTER_IN_CHANNEL should be equal to DATA_CHANNEL");let o=e[1].dims[1]*t.group;if(e.length===3&&(e[2].dims.length!==1||e[2].dims[0]!==o))throw new Error("invalid bias");let i=e[0].dims.length-2;if(t.dilations.reduce((m,u)=>m+u,0)>0&&t.dilations.length!==i)throw new Error(`dilations should be ${i}D`);if(t.strides.reduce((m,u)=>m+u,0)>0&&t.strides.length!==i)throw new Error(`strides should be ${i}D`);if(t.pads.reduce((m,u)=>m+u,0)>0&&t.pads.length!==i*2)throw new Error(`pads should be ${i*2}D`);if(t.outputPadding.length!==i&&t.outputPadding.length!==0)throw new Error(`output_padding should be ${i}D`);if(t.kernelShape.reduce((m,u)=>m+u,0)>0&&t.kernelShape.length!==0&&t.kernelShape.length!==e[1].dims.length-2)throw new Error("invalid kernel shape");if(t.outputShape.length!==0&&t.outputShape.length!==e[0].dims.length-2)throw new Error("invalid output shape")},af=[2,3,1,0],sf=(e,t,r)=>{let n=Yu(r,t),o=r.format==="NHWC",i=n.outputShape,a=i[o?3:1],d=t[0].dims[o?3:1];if(n.group!==1||a===1&&d===1){e.compute(vo(t,n));return}let l=i[o?1:2],c=i[o?2:3],m=t[1].dims[2],u=t[1].dims[3],h=o?l*c:a,w=o?a:l*c,g=m*u*d,y=!0,S=e.kernelCustomData.wT??e.compute(Pe(t[1],af),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=S);let $=[t[0],S],v=t.length===3;v&&(!o&&t[2].dims.length===1?$.push(t[2].reshape([t[2].dims[0],1,1])):$.push(t[2])),e.compute(qu($,n,i,h,w,g,v,y),{inputs:$})},uf=(e,t)=>{let r=t.format==="NHWC",n=[e.inputs[0].reshape(r?[e.inputs[0].dims[0],1,e.inputs[0].dims[1],e.inputs[0].dims[2]]:[e.inputs[0].dims[0],e.inputs[0].dims[1],1,e.inputs[0].dims[2]]),e.inputs[1].reshape([e.inputs[1].dims[0],e.inputs[1].dims[1],1,e.inputs[1].dims[2]])];e.inputs.length===3&&n.push(e.inputs[2]);let o=t.kernelShape;(o.length===0||o[0]===0)&&(o=[e.inputs[1].dims[2]]);let i=t.dilations;(i.length===0||i[0]===0)&&(i=[1]);let a=t.strides;(a.length===0||a[0]===0)&&(a=[1]);let d=t.pads;d.length===0&&(d=[0,0]),d=[0,d[0],0,d[1]],a=[1].concat(a),i=[1].concat(i),o=[1].concat(o);let l=Yu({...t,pads:d,strides:a,dilations:i,kernelShape:o},n);e.compute(vo(n,l,c=>r?[c[0],c[2],c[3]]:[c[0],c[1],c[3]]))},Qu=(e,t)=>{of(e.inputs,t),e.inputs[0].dims.length===3?uf(e,t):sf(e,e.inputs,t)}});var df,Ju,ed,td=U(()=>{"use strict";J();ae();Ie();se();df=(e,t,r,n)=>{let o=k.size(t),i=t.length,a=E("input",e,i),d=M("output",e,i),l=r.dataType===6?r.getInt32Array()[0]:Number(r.getBigInt64Array()[0]),c=k.normalizeAxis(l,i),m=u=>{let h=` i32(${a.indicesGet("inputIndices","uniforms.axis")}) `,w=F("uniforms.input_shape","uniforms.axis",i),g=n.reverse?h+(n.exclusive?" + 1":""):"0",y=n.reverse?w:h+(n.exclusive?"":" + 1");return`
5207
5242
  ${u.registerUniform("outputSize","u32").registerUniform("axis","u32").declareVariables(a,d)}
5208
5243
  ${u.mainStart()}
5209
5244
  ${u.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}
@@ -5216,7 +5251,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5216
5251
  sum = sum + ${a.getByIndices("inputIndices")};
5217
5252
  }
5218
5253
  ${d.setByOffset("global_idx","sum")};
5219
- }`};return{name:"CumSum",shaderCache:{hint:n.cacheKey,inputDependencies:["rank"]},getRunData:()=>({outputs:[{dims:t,dataType:e}],dispatchGroup:{x:Math.ceil(o/64)},programUniforms:[{type:12,data:o},{type:12,data:c},...R(t,t)]}),getShaderSource:m}},Zu=(e,t)=>{let r=e.inputs[0].dims,n=e.inputs[0].dataType,o=e.inputs[1];e.compute(df(n,r,o,t),{inputs:[0]})},Qu=e=>{let t=e.exclusive===1,r=e.reverse===1;return J({exclusive:t,reverse:r})}});var lf,cf,pf,ed,td,rd=V(()=>{"use strict";Q();ie();Ie();ae();lf=e=>{if(!e||e.length!==1)throw new Error("DepthToSpace requires 1 input.");if(e[0].dims.length!==4)throw new Error("DepthToSpace requires 4D input.")},cf=(e,t,r,n)=>{let o=[];o.push(`fn perm(i: ${n.type.indices}) -> ${r.type.indices} {
5254
+ }`};return{name:"CumSum",shaderCache:{hint:n.cacheKey,inputDependencies:["rank"]},getRunData:()=>({outputs:[{dims:t,dataType:e}],dispatchGroup:{x:Math.ceil(o/64)},programUniforms:[{type:12,data:o},{type:12,data:c},...V(t,t)]}),getShaderSource:m}},Ju=(e,t)=>{let r=e.inputs[0].dims,n=e.inputs[0].dataType,o=e.inputs[1];e.compute(df(n,r,o,t),{inputs:[0]})},ed=e=>{let t=e.exclusive===1,r=e.reverse===1;return ee({exclusive:t,reverse:r})}});var lf,cf,pf,rd,nd,od=U(()=>{"use strict";J();ae();Ie();se();lf=e=>{if(!e||e.length!==1)throw new Error("DepthToSpace requires 1 input.");if(e[0].dims.length!==4)throw new Error("DepthToSpace requires 4D input.")},cf=(e,t,r,n)=>{let o=[];o.push(`fn perm(i: ${n.type.indices}) -> ${r.type.indices} {
5220
5255
  var a: ${r.type.indices};`);for(let i=0;i<t;++i)o.push(r.indicesSet("a",e[i],`i[${i}]`));return o.push("return a;}"),o.join(`
5221
5256
  `)},pf=(e,t)=>{let r,n,o,i,a,d,l=t.format==="NHWC",c=t.blocksize,m=t.mode==="DCR";l?([r,n,o,i]=e.dims,a=m?[r,n,o,c,c,i/c**2]:[r,n,o,i/c**2,c,c],d=m?[0,1,3,2,4,5]:[0,1,4,2,5,3]):([r,n,o,i]=[e.dims[0],e.dims[2],e.dims[3],e.dims[1]],a=m?[r,c,c,i/c**2,n,o]:[r,i/c**2,c,c,n,o],d=m?[0,3,4,1,5,2]:[0,1,4,2,5,3]);let u=e.reshape(a),h=u.dims.length,w=e.dataType,g=E("a",w,h),y=M("output",w,h),S=$=>`
5222
5257
  ${$.registerUniform("output_size","u32").declareVariables(g,y)}
@@ -5230,8 +5265,8 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5230
5265
  let aIndices = perm(indices);
5231
5266
 
5232
5267
  ${y.setByOffset("global_idx",g.getByIndices("aIndices"))}
5233
- }`;return{name:"DepthToSpace",shaderCache:{hint:`${e.dims};${t.blocksize};${t.mode}`,inputDependencies:["rank"]},getRunData:$=>{let v=l?[r,n*c,o*c,i/c**2]:[r,i/c**2,n*c,o*c],x=k.size(v),T=u.dims,C=k.sortBasedOnPerm(T,d);return{outputs:[{dims:v,dataType:$[0].dataType}],dispatchGroup:{x:Math.ceil(x/64)},programUniforms:[{type:12,data:x},...R(T,C)]}},getShaderSource:S}},ed=(e,t)=>{lf(e.inputs),e.compute(pf(e.inputs[0],t))},td=e=>J({blocksize:e.blocksize,mode:e.mode,format:e.format})});var _o,rn,nd,mf,ff,vo,$o,od,hf,id,ad,sd=V(()=>{"use strict";Q();ie();Ie();ae();_o="[a-zA-Z]|\\.\\.\\.",rn="("+_o+")+",nd="^"+rn+"$",mf="("+rn+",)*"+rn,ff="^"+mf+"$",vo=class{constructor(t=-1){this.symbolToIndices=new Map,this.inputIndex=t}addSymbol(t,r){let n=this.symbolToIndices.get(t);n===void 0?n=[r]:n.push(r),this.symbolToIndices.set(t,n)}},$o=class{constructor(t,r){this.equation=r;this.hasEllipsis=!1,this.symbolToInfo=new Map,this.lhs=new Array,this.outputDims=[];let[n,o]=r.includes("->")?r.split("->",2):[r,""];if(!n.match(RegExp(ff)))throw new Error("Invalid LHS term");if(n.split(",").forEach((d,l)=>{let c=t[l].dims.slice();if(!d.match(RegExp(nd)))throw new Error("Invalid LHS term");let m=this.processTerm(d,!0,c,l);this.lhs.push(m)}),o==="")o+=[...this.symbolToInfo.entries()].filter(([d,l])=>l.count===1||d==="...").map(([d])=>d).join("");else if(!o.match(RegExp(rn)))throw new Error("Invalid RHS");o.match(RegExp(_o,"g"))?.forEach(d=>{if(d==="...")this.outputDims=this.outputDims.concat(this.ellipsisDims);else{let l=this.symbolToInfo.get(d);if(l===void 0)throw new Error("Invalid RHS symbol");this.outputDims.push(l.dimValue)}}),this.rhs=this.processTerm(o,!1,this.outputDims)}addSymbol(t,r,n){let o=this.symbolToInfo.get(t);if(o!==void 0){if(o.dimValue!==r&&o.count!==1)throw new Error("Dimension mismatch");o.count++,o.inputIndices.push(n)}else o={count:1,dimValue:r,inputIndices:[n]};this.symbolToInfo.set(t,o)}processTerm(t,r,n,o=-1){let i=n.length,a=!1,d=[],l=0;if(!t.match(RegExp(nd))&&!r&&t!=="")throw new Error("Invalid LHS term");let c=t.match(RegExp(_o,"g")),m=new vo(o);return c?.forEach((u,h)=>{if(u==="..."){if(a)throw new Error("Only one ellipsis is allowed per input term");a=!0;let w=i-c.length+1;if(w<0)throw new Error("Ellipsis out of bounds");if(d=n.slice(l,l+w),this.hasEllipsis){if(this.ellipsisDims.length!==d.length||this.ellipsisDims.toString()!==d.toString())throw new Error("Ellipsis dimensions mismatch")}else if(r)this.hasEllipsis=!0,this.ellipsisDims=d;else throw new Error("Ellipsis must be specified in the LHS");for(let g=0;g<d.length;g++){let y=String.fromCharCode("0".charCodeAt(0)+g);m.addSymbol(y,h+g),this.addSymbol(y,n[l++],o)}}else m.addSymbol(u,h+(this.hasEllipsis?this.ellipsisDims.length-1:0)),this.addSymbol(u,n[l++],o)}),m}},od=e=>e+"_max",hf=(e,t,r,n)=>{let i=e.map(m=>m.length).map((m,u)=>E(`input${u}`,t,m)),a=k.size(n),d=M("output",t,n.length),l=[...r.symbolToInfo.keys()].filter(m=>!r.rhs.symbolToIndices.has(m)),c=m=>{let u=[],h="var prod = 1.0;",w="var sum = 0.0;",g="sum += prod;",y=[],S=[],$=[],v=[],x=r.symbolToInfo.size===r.rhs.symbolToIndices.size;r.symbolToInfo.forEach((C,A)=>{if(r.rhs.symbolToIndices.has(A)){let P=r.rhs.symbolToIndices.get(A)?.[0];P!==void 0&&r.lhs.forEach((B,N)=>{if(C.inputIndices.includes(N)){let W=B.symbolToIndices.get(A);if(W===void 0)throw new Error("Invalid symbol error");W.forEach(K=>{u.push(`${i[N].indicesSet(`input${N}Indices`,K,d.indicesGet("outputIndices",P))}`)})}})}else r.lhs.forEach((P,B)=>{if(C.inputIndices.includes(B)){let N=P.symbolToIndices.get(A);if(N===void 0)throw new Error("Invalid symbol error");N.forEach(W=>{y.push(`${i[B].indicesSet(`input${B}Indices`,W,`${A}`)}`)}),v.push(`prod *= ${i[B].getByIndices(`input${B}Indices`)};`)}}),S.push(`for(var ${A}: u32 = 0; ${A} < uniforms.${od(A)}; ${A}++) {`),$.push("}")});let T=x?[...u,`let sum = ${i.map((C,A)=>C.getByIndices(`input${A}Indices`)).join(" * ")};`]:[...u,w,...S,...y,h,...v,g,...$];return`
5234
- ${m.registerUniforms(l.map(C=>({name:`${od(C)}`,type:"u32"}))).registerUniform("outputSize","u32").declareVariables(...i,d)}
5268
+ }`;return{name:"DepthToSpace",shaderCache:{hint:`${e.dims};${t.blocksize};${t.mode}`,inputDependencies:["rank"]},getRunData:$=>{let v=l?[r,n*c,o*c,i/c**2]:[r,i/c**2,n*c,o*c],x=k.size(v),T=u.dims,C=k.sortBasedOnPerm(T,d);return{outputs:[{dims:v,dataType:$[0].dataType}],dispatchGroup:{x:Math.ceil(x/64)},programUniforms:[{type:12,data:x},...V(T,C)]}},getShaderSource:S}},rd=(e,t)=>{lf(e.inputs),e.compute(pf(e.inputs[0],t))},nd=e=>ee({blocksize:e.blocksize,mode:e.mode,format:e.format})});var $o,rn,id,mf,ff,xo,So,ad,hf,sd,ud,dd=U(()=>{"use strict";J();ae();Ie();se();$o="[a-zA-Z]|\\.\\.\\.",rn="("+$o+")+",id="^"+rn+"$",mf="("+rn+",)*"+rn,ff="^"+mf+"$",xo=class{constructor(t=-1){this.symbolToIndices=new Map,this.inputIndex=t}addSymbol(t,r){let n=this.symbolToIndices.get(t);n===void 0?n=[r]:n.push(r),this.symbolToIndices.set(t,n)}},So=class{constructor(t,r){this.equation=r;this.hasEllipsis=!1,this.symbolToInfo=new Map,this.lhs=new Array,this.outputDims=[];let[n,o]=r.includes("->")?r.split("->",2):[r,""];if(!n.match(RegExp(ff)))throw new Error("Invalid LHS term");if(n.split(",").forEach((d,l)=>{let c=t[l].dims.slice();if(!d.match(RegExp(id)))throw new Error("Invalid LHS term");let m=this.processTerm(d,!0,c,l);this.lhs.push(m)}),o==="")o+=[...this.symbolToInfo.entries()].filter(([d,l])=>l.count===1||d==="...").map(([d])=>d).join("");else if(!o.match(RegExp(rn)))throw new Error("Invalid RHS");o.match(RegExp($o,"g"))?.forEach(d=>{if(d==="...")this.outputDims=this.outputDims.concat(this.ellipsisDims);else{let l=this.symbolToInfo.get(d);if(l===void 0)throw new Error("Invalid RHS symbol");this.outputDims.push(l.dimValue)}}),this.rhs=this.processTerm(o,!1,this.outputDims)}addSymbol(t,r,n){let o=this.symbolToInfo.get(t);if(o!==void 0){if(o.dimValue!==r&&o.count!==1)throw new Error("Dimension mismatch");o.count++,o.inputIndices.push(n)}else o={count:1,dimValue:r,inputIndices:[n]};this.symbolToInfo.set(t,o)}processTerm(t,r,n,o=-1){let i=n.length,a=!1,d=[],l=0;if(!t.match(RegExp(id))&&!r&&t!=="")throw new Error("Invalid LHS term");let c=t.match(RegExp($o,"g")),m=new xo(o);return c?.forEach((u,h)=>{if(u==="..."){if(a)throw new Error("Only one ellipsis is allowed per input term");a=!0;let w=i-c.length+1;if(w<0)throw new Error("Ellipsis out of bounds");if(d=n.slice(l,l+w),this.hasEllipsis){if(this.ellipsisDims.length!==d.length||this.ellipsisDims.toString()!==d.toString())throw new Error("Ellipsis dimensions mismatch")}else if(r)this.hasEllipsis=!0,this.ellipsisDims=d;else throw new Error("Ellipsis must be specified in the LHS");for(let g=0;g<d.length;g++){let y=String.fromCharCode("0".charCodeAt(0)+g);m.addSymbol(y,h+g),this.addSymbol(y,n[l++],o)}}else m.addSymbol(u,h+(this.hasEllipsis?this.ellipsisDims.length-1:0)),this.addSymbol(u,n[l++],o)}),m}},ad=e=>e+"_max",hf=(e,t,r,n)=>{let i=e.map(m=>m.length).map((m,u)=>E(`input${u}`,t,m)),a=k.size(n),d=M("output",t,n.length),l=[...r.symbolToInfo.keys()].filter(m=>!r.rhs.symbolToIndices.has(m)),c=m=>{let u=[],h="var prod = 1.0;",w="var sum = 0.0;",g="sum += prod;",y=[],S=[],$=[],v=[],x=r.symbolToInfo.size===r.rhs.symbolToIndices.size;r.symbolToInfo.forEach((C,A)=>{if(r.rhs.symbolToIndices.has(A)){let P=r.rhs.symbolToIndices.get(A)?.[0];P!==void 0&&r.lhs.forEach((D,W)=>{if(C.inputIndices.includes(W)){let N=D.symbolToIndices.get(A);if(N===void 0)throw new Error("Invalid symbol error");N.forEach(j=>{u.push(`${i[W].indicesSet(`input${W}Indices`,j,d.indicesGet("outputIndices",P))}`)})}})}else r.lhs.forEach((P,D)=>{if(C.inputIndices.includes(D)){let W=P.symbolToIndices.get(A);if(W===void 0)throw new Error("Invalid symbol error");W.forEach(N=>{y.push(`${i[D].indicesSet(`input${D}Indices`,N,`${A}`)}`)}),v.push(`prod *= ${i[D].getByIndices(`input${D}Indices`)};`)}}),S.push(`for(var ${A}: u32 = 0; ${A} < uniforms.${ad(A)}; ${A}++) {`),$.push("}")});let T=x?[...u,`let sum = ${i.map((C,A)=>C.getByIndices(`input${A}Indices`)).join(" * ")};`]:[...u,w,...S,...y,h,...v,g,...$];return`
5269
+ ${m.registerUniforms(l.map(C=>({name:`${ad(C)}`,type:"u32"}))).registerUniform("outputSize","u32").declareVariables(...i,d)}
5235
5270
 
5236
5271
  ${m.mainStart()}
5237
5272
  ${m.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}
@@ -5241,7 +5276,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5241
5276
  ${T.join(`
5242
5277
  `)};
5243
5278
  ${d.setByOffset("global_idx","sum")};
5244
- }`};return{name:"Einsum",shaderCache:{hint:r.equation,inputDependencies:e.map(()=>"rank")},getRunData:()=>{let m=l.filter(h=>r.symbolToInfo.has(h)).map(h=>({type:12,data:r.symbolToInfo.get(h)?.dimValue||0}));m.push({type:12,data:a});let u=e.map((h,w)=>[...R(h)]).reduce((h,w)=>h.concat(w),m);return u.push(...R(n)),{outputs:[{dims:n,dataType:t}],dispatchGroup:{x:Math.ceil(a/64)},programUniforms:u}},getShaderSource:c}},id=(e,t)=>{let r=new $o(e.inputs,t.equation),n=r.outputDims,o=e.inputs.map((i,a)=>i.dims);e.compute(hf(o,e.inputs[0].dataType,r,n))},ad=e=>{let t=e.equation.replace(/\s+/g,"");return J({equation:t})}});var gf,ud,yf,bf,dd,ld=V(()=>{"use strict";Q();ie();ae();gf=e=>{if(!e||e.length!==2)throw new Error("Expand requires 2 input.");let t=e[0].dims,r=Array.from(e[1].getBigInt64Array(),Number),n=r.length<t.length?0:r.length-t.length,o=t.length<r.length?0:t.length-r.length;for(;n<r.length&&o<t.length;++n,++o)if(r[n]!==t[o]&&r[n]!==1&&t[o]!==1)throw new Error("Expand requires shape to be broadcastable to input")},ud=(e,t)=>{let r=e.length-t.length,n=[];for(let o=0;o<r;++o)n.push(e[o]);for(let o=0;o<t.length;++o)n.push(t[o]===1?e[o+r]:t[o]);return n},yf=(e,t)=>e.length>t.length?ud(e,t):ud(t,e),bf=e=>{let t=e[0].dims,r=Array.from(e[1].getBigInt64Array(),Number),n=yf(t,r),o=e[0].dataType,i=o===9?4:1,a=Math.ceil(k.size(n)/i),d=c=>{let m=E("input",o,t.length,i),u=M("output",o,n.length,i),h;if(o===9){let w=(g,y,S="")=>`
5279
+ }`};return{name:"Einsum",shaderCache:{hint:r.equation,inputDependencies:e.map(()=>"rank")},getRunData:()=>{let m=l.filter(h=>r.symbolToInfo.has(h)).map(h=>({type:12,data:r.symbolToInfo.get(h)?.dimValue||0}));m.push({type:12,data:a});let u=e.map((h,w)=>[...V(h)]).reduce((h,w)=>h.concat(w),m);return u.push(...V(n)),{outputs:[{dims:n,dataType:t}],dispatchGroup:{x:Math.ceil(a/64)},programUniforms:u}},getShaderSource:c}},sd=(e,t)=>{let r=new So(e.inputs,t.equation),n=r.outputDims,o=e.inputs.map((i,a)=>i.dims);e.compute(hf(o,e.inputs[0].dataType,r,n))},ud=e=>{let t=e.equation.replace(/\s+/g,"");return ee({equation:t})}});var gf,ld,yf,bf,cd,pd=U(()=>{"use strict";J();ae();se();gf=e=>{if(!e||e.length!==2)throw new Error("Expand requires 2 input.");let t=e[0].dims,r=Array.from(e[1].getBigInt64Array(),Number),n=r.length<t.length?0:r.length-t.length,o=t.length<r.length?0:t.length-r.length;for(;n<r.length&&o<t.length;++n,++o)if(r[n]!==t[o]&&r[n]!==1&&t[o]!==1)throw new Error("Expand requires shape to be broadcastable to input")},ld=(e,t)=>{let r=e.length-t.length,n=[];for(let o=0;o<r;++o)n.push(e[o]);for(let o=0;o<t.length;++o)n.push(t[o]===1?e[o+r]:t[o]);return n},yf=(e,t)=>e.length>t.length?ld(e,t):ld(t,e),bf=e=>{let t=e[0].dims,r=Array.from(e[1].getBigInt64Array(),Number),n=yf(t,r),o=e[0].dataType,i=o===9?4:1,a=Math.ceil(k.size(n)/i),d=c=>{let m=E("input",o,t.length,i),u=M("output",o,n.length,i),h;if(o===9){let w=(g,y,S="")=>`
5245
5280
  let outputIndices${y} = ${u.offsetToIndices(`outputOffset + ${y}u`)};
5246
5281
  let offset${y} = ${m.broadcastedIndicesToOffset(`outputIndices${y}`,u)};
5247
5282
  let index${y} = offset${y} / 4u;
@@ -5263,13 +5298,13 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5263
5298
  ${c.registerUniform("vec_size","u32").declareVariables(m,u)}
5264
5299
  ${c.mainStart()}
5265
5300
  ${c.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")}
5266
- ${h}`},l=[{type:12,data:a},...R(t,n)];return{name:"Expand",shaderCache:{hint:`${n.length}`,inputDependencies:["rank"]},getShaderSource:d,getRunData:()=>({outputs:[{dims:n,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(a/64)},programUniforms:l})}},dd=e=>{gf(e.inputs),e.compute(bf(e.inputs),{inputs:[0]})}});var wf,cd,pd=V(()=>{"use strict";Q();ie();ae();Xr();wf=e=>{let t=e[0].dataType,r=k.size(e[0].dims),n=k.size(e[1].dims),o=n%4===0,i=a=>{let d=E("x",t,[1],4),l=E("bias",t,[1],4),c=M("y",t,[1],4),m=[{name:"output_vec_size",type:"u32"},{name:"bias_size",type:"u32"}],u=w=>`
5301
+ ${h}`},l=[{type:12,data:a},...V(t,n)];return{name:"Expand",shaderCache:{hint:`${n.length}`,inputDependencies:["rank"]},getShaderSource:d,getRunData:()=>({outputs:[{dims:n,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(a/64)},programUniforms:l})}},cd=e=>{gf(e.inputs),e.compute(bf(e.inputs),{inputs:[0]})}});var wf,md,fd=U(()=>{"use strict";J();ae();se();Xr();wf=e=>{let t=e[0].dataType,r=k.size(e[0].dims),n=k.size(e[1].dims),o=n%4===0,i=a=>{let d=E("x",t,[1],4),l=E("bias",t,[1],4),c=M("y",t,[1],4),m=[{name:"output_vec_size",type:"u32"},{name:"bias_size",type:"u32"}],u=w=>`
5267
5302
  let bias${w}_offset: u32 = (global_idx * 4 + ${w}) % uniforms.bias_size;
5268
5303
  let bias${w} = ${l.getByOffset(`bias${w}_offset / 4`)}[bias${w}_offset % 4];`,h=o?`
5269
5304
  let bias = ${l.getByOffset("global_idx % (uniforms.bias_size / 4)")};`:`${u(0)}${u(1)}${u(2)}${u(3)}
5270
5305
  let bias = ${d.type.value}(bias0, bias1, bias2, bias3);`;return`${a.registerUniforms(m).declareVariables(d,l,c)}
5271
5306
 
5272
- ${lo(Ee(t))}
5307
+ ${po(Ee(t))}
5273
5308
 
5274
5309
  ${a.mainStart(At)}
5275
5310
  ${a.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_vec_size")}
@@ -5277,8 +5312,8 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5277
5312
  let x = ${d.getByOffset("global_idx")};
5278
5313
  ${h}
5279
5314
  let x_in = x + bias;
5280
- ${c.setByOffset("global_idx",co("x_in"))}
5281
- }`};return{name:"FastGeluWithBias",shaderCache:{hint:`${o}`,inputDependencies:["type","type"]},getShaderSource:i,getRunData:a=>({outputs:[{dims:a[0].dims,dataType:a[0].dataType}],programUniforms:[{type:12,data:Math.ceil(r/4)},{type:12,data:n}],dispatchGroup:{x:Math.ceil(r/At/4)}})}},cd=e=>{e.inputs.length<2||k.size(e.inputs[1].dims)===0?du(e):e.compute(wf(e.inputs))}});var _f,vf,md,fd,hd=V(()=>{"use strict";Q();ie();Ie();ae();_f=e=>{if(!e||e.length!==2)throw new Error("Gather requires 2 inputs.")},vf=(e,t)=>{let r=e[0].dims,n=e[1].dims,o=r.length,i=k.normalizeAxis(t.axis,o),a=r.slice(0);a.splice(i,1,...n);let d=r[i],l=e[0].dataType===9?4:1,c=Math.ceil(k.size(a)/l),m=[{type:12,data:c},{type:6,data:d},{type:12,data:i},...R(e[0].dims,e[1].dims,a)],u=h=>{let w=E("data",e[0].dataType,e[0].dims.length,l),g=E("inputIndices",e[1].dataType,e[1].dims.length),y=M("output",e[0].dataType,a.length,l),S=v=>{let x=n.length,T=`var indicesIndices${v} = ${g.type.indices}(0);`;for(let C=0;C<x;C++)T+=`${x>1?`indicesIndices${v}[${C}]`:`indicesIndices${v}`} = ${a.length>1?`outputIndices${v}[uniforms.axis + ${C}]`:`outputIndices${v}`};`;T+=`
5315
+ ${c.setByOffset("global_idx",mo("x_in"))}
5316
+ }`};return{name:"FastGeluWithBias",shaderCache:{hint:`${o}`,inputDependencies:["type","type"]},getShaderSource:i,getRunData:a=>({outputs:[{dims:a[0].dims,dataType:a[0].dataType}],programUniforms:[{type:12,data:Math.ceil(r/4)},{type:12,data:n}],dispatchGroup:{x:Math.ceil(r/At/4)}})}},md=e=>{e.inputs.length<2||k.size(e.inputs[1].dims)===0?cu(e):e.compute(wf(e.inputs))}});var _f,vf,hd,gd,yd=U(()=>{"use strict";J();ae();Ie();se();_f=e=>{if(!e||e.length!==2)throw new Error("Gather requires 2 inputs.")},vf=(e,t)=>{let r=e[0].dims,n=e[1].dims,o=r.length,i=k.normalizeAxis(t.axis,o),a=r.slice(0);a.splice(i,1,...n);let d=r[i],l=e[0].dataType===9?4:1,c=Math.ceil(k.size(a)/l),m=[{type:12,data:c},{type:6,data:d},{type:12,data:i},...V(e[0].dims,e[1].dims,a)],u=h=>{let w=E("data",e[0].dataType,e[0].dims.length,l),g=E("inputIndices",e[1].dataType,e[1].dims.length),y=M("output",e[0].dataType,a.length,l),S=v=>{let x=n.length,T=`var indicesIndices${v} = ${g.type.indices}(0);`;for(let C=0;C<x;C++)T+=`${x>1?`indicesIndices${v}[${C}]`:`indicesIndices${v}`} = ${a.length>1?`outputIndices${v}[uniforms.axis + ${C}]`:`outputIndices${v}`};`;T+=`
5282
5317
  var idx${v} = ${g.getByIndices(`indicesIndices${v}`)};
5283
5318
  if (idx${v} < 0) {
5284
5319
  idx${v} = idx${v} + uniforms.axisDimLimit;
@@ -5309,7 +5344,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5309
5344
  ${h.mainStart()}
5310
5345
  ${h.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}
5311
5346
  ${$}
5312
- }`};return{name:"Gather",shaderCache:{hint:t.cacheKey,inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:a,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(c/64)},programUniforms:m}),getShaderSource:u}},md=e=>J({axis:e.axis}),fd=(e,t)=>{let r=e.inputs;_f(r),e.compute(vf(e.inputs,t))}});var $f,xf,gd,yd,bd=V(()=>{"use strict";Q();ie();Ie();ae();$f=(e,t)=>{if(e.length<3||e.length>4)throw new Error("GatherBlockQuantized requires 3 or 4 inputs.");let r=k.normalizeAxis(t.quantizeAxis,e[0].dims.length),n=t.blockSize,o=e[0],i=e[2],a=e.length===4?e[3]:void 0;if(i.dims.length!==o.dims.length||!o.dims.map((d,l)=>l===r?Math.ceil(d/n)===i.dims[l]:d===i.dims[l]).reduce((d,l)=>d&&l,!0))throw new Error("Scales must have the same rank as the input tensor and the dims should match except on gatherAxis.");if(a){if(a.dataType!==o.dataType)throw new Error("Zero point must have the same data type as the input tensor.");if(a.dims.length!==i.dims.length||!a.dims.map((d,l)=>d===i.dims[l]).reduce((d,l)=>d&&l,!0))throw new Error("Zero point must have the same rank as the input tensor and the dims should match except on quantizeAxis.")}},xf=(e,t)=>{let r=e[0].dims,n=e[1].dims,o=r.length,i=k.normalizeAxis(t.gatherAxis,o),a=k.normalizeAxis(t.quantizeAxis,o),d=r.slice(0);d.splice(i,1,...n);let l=k.size(d),c=e[2].dataType,u=e[0].dataType===22,h=[{type:12,data:l},{type:12,data:a},{type:12,data:i},{type:12,data:t.blockSize},...R(...e.map((g,y)=>g.dims),d)],w=g=>{let y=E("data",e[0].dataType,e[0].dims.length),S=E("inputIndices",e[1].dataType,e[1].dims.length),$=E("scales",e[2].dataType,e[2].dims.length),v=e.length>3?E("zeroPoint",e[3].dataType,e[3].dims.length):void 0,x=M("output",c,d.length),T=[y,S,$];v&&T.push(v);let C=[{name:"output_size",type:"u32"},{name:"quantize_axis",type:"u32"},{name:"gather_axis",type:"u32"},{name:"block_size",type:"u32"}];return`
5347
+ }`};return{name:"Gather",shaderCache:{hint:t.cacheKey,inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:a,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(c/64)},programUniforms:m}),getShaderSource:u}},hd=e=>ee({axis:e.axis}),gd=(e,t)=>{let r=e.inputs;_f(r),e.compute(vf(e.inputs,t))}});var $f,xf,bd,wd,_d=U(()=>{"use strict";J();ae();Ie();se();$f=(e,t)=>{if(e.length<3||e.length>4)throw new Error("GatherBlockQuantized requires 3 or 4 inputs.");let r=k.normalizeAxis(t.quantizeAxis,e[0].dims.length),n=t.blockSize,o=e[0],i=e[2],a=e.length===4?e[3]:void 0;if(i.dims.length!==o.dims.length||!o.dims.map((d,l)=>l===r?Math.ceil(d/n)===i.dims[l]:d===i.dims[l]).reduce((d,l)=>d&&l,!0))throw new Error("Scales must have the same rank as the input tensor and the dims should match except on gatherAxis.");if(a){if(a.dataType!==o.dataType)throw new Error("Zero point must have the same data type as the input tensor.");if(a.dims.length!==i.dims.length||!a.dims.map((d,l)=>d===i.dims[l]).reduce((d,l)=>d&&l,!0))throw new Error("Zero point must have the same rank as the input tensor and the dims should match except on quantizeAxis.")}},xf=(e,t)=>{let r=e[0].dims,n=e[1].dims,o=r.length,i=k.normalizeAxis(t.gatherAxis,o),a=k.normalizeAxis(t.quantizeAxis,o),d=r.slice(0);d.splice(i,1,...n);let l=k.size(d),c=e[2].dataType,u=e[0].dataType===22,h=[{type:12,data:l},{type:12,data:a},{type:12,data:i},{type:12,data:t.blockSize},...V(...e.map((g,y)=>g.dims),d)],w=g=>{let y=E("data",e[0].dataType,e[0].dims.length),S=E("inputIndices",e[1].dataType,e[1].dims.length),$=E("scales",e[2].dataType,e[2].dims.length),v=e.length>3?E("zeroPoint",e[3].dataType,e[3].dims.length):void 0,x=M("output",c,d.length),T=[y,S,$];v&&T.push(v);let C=[{name:"output_size",type:"u32"},{name:"quantize_axis",type:"u32"},{name:"gather_axis",type:"u32"},{name:"block_size",type:"u32"}];return`
5313
5348
  ${g.registerUniforms(C).declareVariables(...T,x)}
5314
5349
  ${g.mainStart()}
5315
5350
  let output_indices = ${x.offsetToIndices("global_idx")};
@@ -5354,8 +5389,8 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5354
5389
  let zero_point = zero_point_vec[zero_point_index / 2];`:"var zero_point = 0")()};
5355
5390
  let dequantized_data = ${Ee(c)}(quantized_data - zero_point) * scale;
5356
5391
  ${x.setByOffset("global_idx","dequantized_data")};
5357
- }`};return{name:"GatherBlockQuantized",shaderCache:{hint:`${t.cacheKey};${e.filter((g,y)=>y!==1).map(g=>g.dims.join("_")).join(";")}`,inputDependencies:Array.from({length:e.length},(g,y)=>"rank")},getRunData:()=>({outputs:[{dims:d,dataType:c}],dispatchGroup:{x:Math.ceil(l/64)},programUniforms:h}),getShaderSource:w}},gd=(e,t)=>{let r=e.inputs;$f(r,t),e.compute(xf(e.inputs,t))},yd=e=>J({blockSize:e.blockSize,gatherAxis:e.gatherAxis,quantizeAxis:e.quantizeAxis})});var Sf,Tf,wd,_d,vd=V(()=>{"use strict";Q();ie();Ie();ae();Sf=e=>{if(!e||e.length!==2)throw new Error("GatherElements requires 2 inputs.");if(e[0].dims.length<1)throw new Error("GatherElements requires that the data input be rank >= 1.");if(e[0].dims.length!==e[1].dims.length)throw new Error(`GatherElements requires that the data input and
5358
- indices input tensors be of same rank.`)},Tf=(e,t)=>{let r=e[0].dims,n=e[0].dataType,o=r.length,i=e[1].dims,a=e[1].dataType,d=k.normalizeAxis(t.axis,o),l=r[d],c=i.slice(0),m=k.size(c),u=E("input",n,o),h=E("indicesInput",a,i.length),w=M("output",n,c.length),g=[{type:12,data:m},{type:6,data:l},{type:12,data:d}];return g.push(...R(r,i,c)),{name:"GatherElements",shaderCache:{inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:c,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(m/64)},programUniforms:g}),getShaderSource:$=>`
5392
+ }`};return{name:"GatherBlockQuantized",shaderCache:{hint:`${t.cacheKey};${e.filter((g,y)=>y!==1).map(g=>g.dims.join("_")).join(";")}`,inputDependencies:Array.from({length:e.length},(g,y)=>"rank")},getRunData:()=>({outputs:[{dims:d,dataType:c}],dispatchGroup:{x:Math.ceil(l/64)},programUniforms:h}),getShaderSource:w}},bd=(e,t)=>{let r=e.inputs;$f(r,t),e.compute(xf(e.inputs,t))},wd=e=>ee({blockSize:e.blockSize,gatherAxis:e.gatherAxis,quantizeAxis:e.quantizeAxis})});var Sf,Tf,vd,$d,xd=U(()=>{"use strict";J();ae();Ie();se();Sf=e=>{if(!e||e.length!==2)throw new Error("GatherElements requires 2 inputs.");if(e[0].dims.length<1)throw new Error("GatherElements requires that the data input be rank >= 1.");if(e[0].dims.length!==e[1].dims.length)throw new Error(`GatherElements requires that the data input and
5393
+ indices input tensors be of same rank.`)},Tf=(e,t)=>{let r=e[0].dims,n=e[0].dataType,o=r.length,i=e[1].dims,a=e[1].dataType,d=k.normalizeAxis(t.axis,o),l=r[d],c=i.slice(0),m=k.size(c),u=E("input",n,o),h=E("indicesInput",a,i.length),w=M("output",n,c.length),g=[{type:12,data:m},{type:6,data:l},{type:12,data:d}];return g.push(...V(r,i,c)),{name:"GatherElements",shaderCache:{inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:c,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(m/64)},programUniforms:g}),getShaderSource:$=>`
5359
5394
  ${$.registerUniform("outputSize","u32").registerUniform("axisDimLimit","i32").registerUniform("axis","u32").declareVariables(u,h,w)}
5360
5395
  ${$.mainStart()}
5361
5396
  ${$.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}
@@ -5371,7 +5406,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5371
5406
  let value = ${u.getByIndices("inputIndices")};
5372
5407
 
5373
5408
  ${w.setByOffset("global_idx","value")};
5374
- }`}},wd=e=>J({axis:e.axis}),_d=(e,t)=>{let r=e.inputs;Sf(r),e.compute(Tf(e.inputs,t))}});var If,Cf,$d,xd,Sd=V(()=>{"use strict";Q();ie();ae();If=e=>{if(!e)throw new Error("Input is missing");if(e.length<2||e.length>3)throw new Error("Invaid input number.");if(e.length===3&&e[2].dims.length>2)throw new Error("Invalid input shape of C");if(e[0].dataType!==e[1].dataType||e.length===3&&e[0].dataType!==e[2].dataType)throw new Error("Input types are mismatched")},Cf=(e,t)=>{let r=e[0].dims.slice(),n=e[1].dims.slice(),[o,i,a]=Gr.getShapeOfGemmResult(r,t.transA,n,t.transB,e.length===3?e[2].dims:void 0),d=[o,i];if(!d)throw new Error("Can't use gemm on the given tensors");let l=k.size(d),c=[{type:12,data:l},{type:12,data:o},{type:12,data:i},{type:12,data:a},{type:1,data:t.alpha},{type:1,data:t.beta}],m=["type","type"];e.length===3&&(c.push(...R(e[2].dims)),m.push("rank")),c.push(...R(d));let u=h=>{let w="";t.transA&&t.transB?w="value += a[k * uniforms.M + m] * b[n * uniforms.K + k];":t.transA&&!t.transB?w="value += a[k * uniforms.M + m] * b[k * uniforms.N + n];":!t.transA&&t.transB?w="value += a[m * uniforms.K + k] * b[n * uniforms.K + k];":!t.transA&&!t.transB&&(w="value += a[m * uniforms.K + k] * b[k * uniforms.N + n];");let g=t.alpha===1?"":"value *= uniforms.alpha;",y=E("a",e[0].dataType,e[0].dims),S=E("b",e[1].dataType,e[1].dims),$=y.type.value,v=null,x=[y,S];e.length===3&&(v=E("c",e[2].dataType,e[2].dims.length),x.push(v));let T=M("output",e[0].dataType,d.length);x.push(T);let C=[{name:"output_size",type:"u32"},{name:"M",type:"u32"},{name:"N",type:"u32"},{name:"K",type:"u32"},{name:"alpha",type:"f32"},{name:"beta",type:"f32"}];return`
5409
+ }`}},vd=e=>ee({axis:e.axis}),$d=(e,t)=>{let r=e.inputs;Sf(r),e.compute(Tf(e.inputs,t))}});var If,Cf,Sd,Td,Id=U(()=>{"use strict";J();ae();se();If=e=>{if(!e)throw new Error("Input is missing");if(e.length<2||e.length>3)throw new Error("Invaid input number.");if(e.length===3&&e[2].dims.length>2)throw new Error("Invalid input shape of C");if(e[0].dataType!==e[1].dataType||e.length===3&&e[0].dataType!==e[2].dataType)throw new Error("Input types are mismatched")},Cf=(e,t)=>{let r=e[0].dims.slice(),n=e[1].dims.slice(),[o,i,a]=Gr.getShapeOfGemmResult(r,t.transA,n,t.transB,e.length===3?e[2].dims:void 0),d=[o,i];if(!d)throw new Error("Can't use gemm on the given tensors");let l=k.size(d),c=[{type:12,data:l},{type:12,data:o},{type:12,data:i},{type:12,data:a},{type:1,data:t.alpha},{type:1,data:t.beta}],m=["type","type"];e.length===3&&(c.push(...V(e[2].dims)),m.push("rank")),c.push(...V(d));let u=h=>{let w="";t.transA&&t.transB?w="value += a[k * uniforms.M + m] * b[n * uniforms.K + k];":t.transA&&!t.transB?w="value += a[k * uniforms.M + m] * b[k * uniforms.N + n];":!t.transA&&t.transB?w="value += a[m * uniforms.K + k] * b[n * uniforms.K + k];":!t.transA&&!t.transB&&(w="value += a[m * uniforms.K + k] * b[k * uniforms.N + n];");let g=t.alpha===1?"":"value *= uniforms.alpha;",y=E("a",e[0].dataType,e[0].dims),S=E("b",e[1].dataType,e[1].dims),$=y.type.value,v=null,x=[y,S];e.length===3&&(v=E("c",e[2].dataType,e[2].dims.length),x.push(v));let T=M("output",e[0].dataType,d.length);x.push(T);let C=[{name:"output_size",type:"u32"},{name:"M",type:"u32"},{name:"N",type:"u32"},{name:"K",type:"u32"},{name:"alpha",type:"f32"},{name:"beta",type:"f32"}];return`
5375
5410
  ${h.registerUniforms(C).declareVariables(...x)}
5376
5411
 
5377
5412
  ${h.mainStart()}
@@ -5388,74 +5423,42 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5388
5423
  ${g}
5389
5424
  ${(()=>v!=null?`let cOffset = ${v.broadcastedIndicesToOffset("vec2(m, n)",T)}; value += ${$}(uniforms.beta) * ${v.getByOffset("cOffset")};`:"")()}
5390
5425
  output[global_idx] = value;
5391
- }`};return{name:"Gemm",shaderCache:{hint:`${t.cacheKey}`,inputDependencies:m},getRunData:()=>({outputs:[{dims:d,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(l/64)},programUniforms:c}),getShaderSource:u}},$d=e=>{let t=e.transA,r=e.transB,n=e.alpha,o=e.beta;return{transA:t,transB:r,alpha:n,beta:o,cacheKey:`${e.transA};${e.transB};${e.alpha===1}`}},xd=(e,t)=>{If(e.inputs),e.compute(Cf(e.inputs,t))}});var Ne,Ef,Id,Td,Pf,nr,Cd,xo=V(()=>{"use strict";Q();ie();Ie();Hr();Kr();ae();lt();Ne=(e,t)=>e.length>t&&e[t].dims.length>0?e[t]:void 0,Ef=(e,t)=>{let r=e[0],n=Ne(e,1),o=Ne(e,2),i=Ne(e,3),a=Ne(e,4),d=Ne(e,5),l=Ne(e,6),c=Ne(e,7);if(r.dims.length!==3&&r.dims.length!==5)throw new Error("Input query is expected to have 3 or 5 dimensions");let m=r.dims[0],u=r.dims[1],h=r.dims.length===3?r.dims[2]:t.numHeads*r.dims[4],w=u,g=0,y=0,S=Math.floor(h/t.numHeads);if(l&&c&&k.size(l.dims)&&k.size(c.dims)){if(l.dims.length!==4)throw new Error('Input "past_key" is expected to have 4 dimensions');if(l.dims[0]!==m||l.dims[1]!==t.numHeads||l.dims[3]!==S)throw new Error('Input "past_key" shape (batch_size, num_heads, past_sequence_length, head_size)');if(c.dims[0]!==m||c.dims[1]!==t.numHeads||c.dims[3]!==S)throw new Error('Input "past_value" shape (batch_size, num_heads, past_sequence_length, head_size)');if(l.dims[2]!==c.dims[2])throw new Error('Input "past_key" and "past_value" shall have same dim 2 (past_sequence_length)');if(c.dims.length!==4)throw new Error('Input "past_value" is expected to have 4 dimensions');g=l.dims[2],y=l.dims[2]}else if(l&&k.size(l.dims)||c&&k.size(c.dims))throw new Error('Input "past_key" and "past_value" shall be both present or both absent');let $;if(n&&k.size(n.dims)>0){if(r.dims.length!==3)throw new Error('Input "query" is expected to have 3 dimensions when key is given');if(n.dims.length<3||n.dims.length>5)throw new Error('Input "key" is expected to have 3, 4, or 5 dimensions');if(r.dims[0]!==n.dims[0])throw new Error('Input "query" and "key" shall have same dim 0 (batch size)');if(n.dims.length===3){if(n.dims[2]!==r.dims[2])throw new Error('Input "query" and "key" shall have same dim 2 (hidden_size)');$=2,w=n.dims[1]}else if(n.dims.length===5){if(n.dims[2]!==t.numHeads||n.dims[3]!==2||n.dims[4]!==S)throw new Error('Expect "key" shape (batch_size, kv_sequence_length, num_heads, 2, head_size) for packed kv');if(o)throw new Error('Expect "value" be none when "key" has packed kv format.');$=5,w=n.dims[1]}else{if(n.dims[1]!==t.numHeads||n.dims[3]!==S)throw new Error('Expect "key" shape (batch_size, num_heads, kv_sequence_length, head_size) for past_key');$=0,w=n.dims[2]}}else{if(r.dims.length!==5)throw new Error('Input "query" is expected to have 5 dimensions when key is empty');if(r.dims[2]!==t.numHeads||r.dims[3]!==3)throw new Error('Expect "query" shape (batch_size, kv_sequence_length, num_heads, 3, head_size) for packed kv');$=3}if(i&&k.size(i.dims)>0){if(i.dims.length!==1)throw new Error('Input "bias" is expected to have 1 dimension');if(n&&n.dims.length===5&&n.dims[3]===2)throw new Error("bias is not allowed for packed kv.")}let v=g+w,x=0;if(a&&k.size(a.dims)>0){x=8;let P=a.dims;throw P.length===1?P[0]===m?x=1:P[0]===3*m+2&&(x=3):P.length===2&&P[0]===m&&P[1]===v&&(x=5),x===8?new Error('Input "key_padding_mask" shape shall be (batch_size) or (batch_size, total_sequence_length)'):new Error("Mask not supported")}let T=!1,C=h;if(o&&k.size(o.dims)>0){if(o.dims.length!==3&&o.dims.length!==4)throw new Error('Input "value" is expected to have 3 or 4 dimensions');if(r.dims[0]!==o.dims[0])throw new Error('Input "query" and "value" shall have same dim 0 (batch_size)');if(o.dims.length===3){if(w!==o.dims[1])throw new Error('Input "key" and "value" shall have the same dim 1 (kv_sequence_length)');C=o.dims[2]}else{if(w!==o.dims[2])throw new Error('Input "key" and "value" shall have the same dim 2 (kv_sequence_length)');C=o.dims[1]*o.dims[3],T=!0}}let A=!1;if(a&&k.size(a.dims)>0)throw new Error("Key padding mask is not supported");if(d&&k.size(d.dims)>0){if(d.dims.length!==4)throw new Error('Input "attention_bias" is expected to have 4 dimensions');if(d.dims[0]!==m||d.dims[1]!==t.numHeads||d.dims[2]!==u||d.dims[3]!==v)throw new Error('Expect "attention_bias" shape (batch_size, num_heads, sequence_length, total_sequence_length)')}return{batchSize:m,sequenceLength:u,pastSequenceLength:g,kvSequenceLength:w,totalSequenceLength:v,maxSequenceLength:y,inputHiddenSize:0,hiddenSize:h,vHiddenSize:C,headSize:S,vHeadSize:Math.floor(C/t.numHeads),numHeads:t.numHeads,isUnidirectional:!1,pastPresentShareBuffer:!1,maskFilterValue:t.maskFilterValue,maskType:x,scale:t.scale,broadcastResPosBias:A,passPastInKv:T,qkvFormat:$}},Id=e=>J({...e}),Td=J({perm:[0,2,1,3]}),Pf=(e,t,r,n,o,i,a)=>{let d=[n,o,i],l=k.size(d),c=[{type:12,data:l},{type:12,data:a},{type:12,data:i}],m=u=>{let h=M("qkv_with_bias",t.dataType,d),w=E("qkv",t.dataType,d),g=E("bias",r.dataType,d),y=[{name:"output_size",type:"u32"},{name:"bias_offset",type:"u32"},{name:"hidden_size",type:"u32"}];return`
5426
+ }`};return{name:"Gemm",shaderCache:{hint:`${t.cacheKey}`,inputDependencies:m},getRunData:()=>({outputs:[{dims:d,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(l/64)},programUniforms:c}),getShaderSource:u}},Sd=e=>{let t=e.transA,r=e.transB,n=e.alpha,o=e.beta;return{transA:t,transB:r,alpha:n,beta:o,cacheKey:`${e.transA};${e.transB};${e.alpha===1}`}},Td=(e,t)=>{If(e.inputs),e.compute(Cf(e.inputs,t))}});var We,Ef,Ad,Cd,Pf,nr,kd,To=U(()=>{"use strict";J();ae();Ie();Hr();Kr();se();lt();We=(e,t)=>e.length>t&&e[t].dims.length>0?e[t]:void 0,Ef=(e,t)=>{let r=e[0],n=We(e,1),o=We(e,2),i=We(e,3),a=We(e,4),d=We(e,5),l=We(e,6),c=We(e,7);if(r.dims.length!==3&&r.dims.length!==5)throw new Error("Input query is expected to have 3 or 5 dimensions");let m=r.dims[0],u=r.dims[1],h=r.dims.length===3?r.dims[2]:t.numHeads*r.dims[4],w=u,g=0,y=0,S=Math.floor(h/t.numHeads);if(l&&c&&k.size(l.dims)&&k.size(c.dims)){if(l.dims.length!==4)throw new Error('Input "past_key" is expected to have 4 dimensions');if(l.dims[0]!==m||l.dims[1]!==t.numHeads||l.dims[3]!==S)throw new Error('Input "past_key" shape (batch_size, num_heads, past_sequence_length, head_size)');if(c.dims[0]!==m||c.dims[1]!==t.numHeads||c.dims[3]!==S)throw new Error('Input "past_value" shape (batch_size, num_heads, past_sequence_length, head_size)');if(l.dims[2]!==c.dims[2])throw new Error('Input "past_key" and "past_value" shall have same dim 2 (past_sequence_length)');if(c.dims.length!==4)throw new Error('Input "past_value" is expected to have 4 dimensions');g=l.dims[2],y=l.dims[2]}else if(l&&k.size(l.dims)||c&&k.size(c.dims))throw new Error('Input "past_key" and "past_value" shall be both present or both absent');let $;if(n&&k.size(n.dims)>0){if(r.dims.length!==3)throw new Error('Input "query" is expected to have 3 dimensions when key is given');if(n.dims.length<3||n.dims.length>5)throw new Error('Input "key" is expected to have 3, 4, or 5 dimensions');if(r.dims[0]!==n.dims[0])throw new Error('Input "query" and "key" shall have same dim 0 (batch size)');if(n.dims.length===3){if(n.dims[2]!==r.dims[2])throw new Error('Input "query" and "key" shall have same dim 2 (hidden_size)');$=2,w=n.dims[1]}else if(n.dims.length===5){if(n.dims[2]!==t.numHeads||n.dims[3]!==2||n.dims[4]!==S)throw new Error('Expect "key" shape (batch_size, kv_sequence_length, num_heads, 2, head_size) for packed kv');if(o)throw new Error('Expect "value" be none when "key" has packed kv format.');$=5,w=n.dims[1]}else{if(n.dims[1]!==t.numHeads||n.dims[3]!==S)throw new Error('Expect "key" shape (batch_size, num_heads, kv_sequence_length, head_size) for past_key');$=0,w=n.dims[2]}}else{if(r.dims.length!==5)throw new Error('Input "query" is expected to have 5 dimensions when key is empty');if(r.dims[2]!==t.numHeads||r.dims[3]!==3)throw new Error('Expect "query" shape (batch_size, kv_sequence_length, num_heads, 3, head_size) for packed kv');$=3}if(i&&k.size(i.dims)>0){if(i.dims.length!==1)throw new Error('Input "bias" is expected to have 1 dimension');if(n&&n.dims.length===5&&n.dims[3]===2)throw new Error("bias is not allowed for packed kv.")}let v=g+w,x=0;if(a&&k.size(a.dims)>0){x=8;let P=a.dims;throw P.length===1?P[0]===m?x=1:P[0]===3*m+2&&(x=3):P.length===2&&P[0]===m&&P[1]===v&&(x=5),x===8?new Error('Input "key_padding_mask" shape shall be (batch_size) or (batch_size, total_sequence_length)'):new Error("Mask not supported")}let T=!1,C=h;if(o&&k.size(o.dims)>0){if(o.dims.length!==3&&o.dims.length!==4)throw new Error('Input "value" is expected to have 3 or 4 dimensions');if(r.dims[0]!==o.dims[0])throw new Error('Input "query" and "value" shall have same dim 0 (batch_size)');if(o.dims.length===3){if(w!==o.dims[1])throw new Error('Input "key" and "value" shall have the same dim 1 (kv_sequence_length)');C=o.dims[2]}else{if(w!==o.dims[2])throw new Error('Input "key" and "value" shall have the same dim 2 (kv_sequence_length)');C=o.dims[1]*o.dims[3],T=!0}}let A=!1;if(a&&k.size(a.dims)>0)throw new Error("Key padding mask is not supported");if(d&&k.size(d.dims)>0){if(d.dims.length!==4)throw new Error('Input "attention_bias" is expected to have 4 dimensions');if(d.dims[0]!==m||d.dims[1]!==t.numHeads||d.dims[2]!==u||d.dims[3]!==v)throw new Error('Expect "attention_bias" shape (batch_size, num_heads, sequence_length, total_sequence_length)')}return{batchSize:m,sequenceLength:u,pastSequenceLength:g,kvSequenceLength:w,totalSequenceLength:v,maxSequenceLength:y,inputHiddenSize:0,hiddenSize:h,vHiddenSize:C,headSize:S,vHeadSize:Math.floor(C/t.numHeads),numHeads:t.numHeads,isUnidirectional:!1,pastPresentShareBuffer:!1,maskFilterValue:t.maskFilterValue,maskType:x,scale:t.scale,broadcastResPosBias:A,passPastInKv:T,qkvFormat:$}},Ad=e=>ee({...e}),Cd=ee({perm:[0,2,1,3]}),Pf=(e,t,r,n,o,i,a)=>{let d=[n,o,i],l=k.size(d),c=[{type:12,data:l},{type:12,data:a},{type:12,data:i}],m=u=>{let h=M("qkv_with_bias",t.dataType,d),w=E("qkv",t.dataType,d),g=E("bias",r.dataType,d),y=[{name:"output_size",type:"u32"},{name:"bias_offset",type:"u32"},{name:"hidden_size",type:"u32"}];return`
5392
5427
  ${u.registerUniforms(y).declareVariables(w,g,h)}
5393
5428
  ${u.mainStart()}
5394
5429
  ${u.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
5395
5430
  let bias_offset_idx = (global_idx % uniforms.hidden_size) + uniforms.bias_offset;
5396
5431
 
5397
5432
  qkv_with_bias[global_idx] = qkv[global_idx] + bias[bias_offset_idx];
5398
- }`};return e.compute({name:"MultiHeadAttentionAddBias",shaderCache:{inputDependencies:["type","type"]},getRunData:()=>({outputs:[{dims:d,dataType:t.dataType,gpuDataType:0}],dispatchGroup:{x:Math.ceil(l/64)},programUniforms:c}),getShaderSource:m},{inputs:[t,r],outputs:[-1]})[0]},nr=(e,t,r,n,o,i,a,d)=>{let l=i;if(a&&k.size(a.dims)>0){if(n===1)throw new Error("AddBiasReshape is not implemented. Please export your model with packed QKV or KV");return l=Pf(e,i,a,t,n,r*o,d),l=l.reshape([t,n,r,o]),r===1||n===1?l:e.compute(Pe(l,Td.perm),{inputs:[l],outputs:[-1]})[0]}else return i.dims.length===3&&(l=i.reshape([t,n,r,o])),r===1||n===1?l:e.compute(Pe(l,Td.perm),{inputs:[l],outputs:[-1]})[0]},Cd=(e,t)=>{let r=Ef(e.inputs,t),n=e.inputs[0],o=Ne(e.inputs,1),i=Ne(e.inputs,2),a=Ne(e.inputs,3),d=Ne(e.inputs,4),l=Ne(e.inputs,5),c=Ne(e.inputs,6),m=Ne(e.inputs,7);if(n.dims.length===5)throw new Error("Packed QKV is not implemented");if(o?.dims.length===5)throw new Error("Packed KV is not implemented");let u=o&&i&&o.dims.length===4&&i.dims.length===4,h=nr(e,r.batchSize,r.numHeads,r.sequenceLength,r.headSize,n,a,0);if(u)return Wt(e,h,o,i,d,void 0,c,m,l,r,t);if(!o||!i)throw new Error("key and value must be provided");let w=nr(e,r.batchSize,r.numHeads,r.kvSequenceLength,r.headSize,o,a,r.hiddenSize),g=nr(e,r.batchSize,r.numHeads,r.kvSequenceLength,r.vHeadSize,i,a,2*r.hiddenSize);Wt(e,h,w,g,d,void 0,c,m,l,r,t)}});var Ad,zf,Of,So,kd,To=V(()=>{"use strict";Q();ie();ae();Ad=e=>Array.from(e.getBigInt64Array(),Number),zf=e=>{if(!e||e.length!==2)throw new Error("Tile requires 2 inputs.");if(e[0].dataType!==1&&e[0].dataType!==10&&e[0].dataType!==6&&e[0].dataType!==12)throw new Error("Tile only support float, float16, int32, and uint32 data types");if(e[1].dataType!==7)throw new Error("Tile `repeats` input should be of int64 data type");if(e[1].dims.length!==1)throw new Error("Tile `repeats` input should be 1-D");if(Ad(e[1]).length!==e[0].dims.length)throw new Error("Tile `repeats` input should have same number of elements as rank of input data tensor")},Of=(e,t)=>{let r=[];for(let n=0;n<e.length;++n)r.push(e[n]*t[n]);return r},So=(e,t)=>{let r=e[0].dims,n=t??Ad(e[1]),o=Of(r,n),i=k.size(o),a=e[0].dataType,d=E("input",a,r.length),l=M("output",a,o.length),c=m=>`
5399
- const inputShape = ${d.indices(...r)};
5400
- ${m.registerUniform("output_size","u32").declareVariables(d,l)}
5401
- ${m.mainStart()}
5402
- ${m.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
5403
- let output_indices = ${l.offsetToIndices("global_idx")};
5404
- var input_indices: ${d.type.indices};
5405
- for (var i = 0; i < ${r.length}; i++) {
5406
- let input_dim_i = ${d.indicesGet("uniforms.input_shape","i")};
5407
- let input_dim_value = ${l.indicesGet("output_indices","i")} % input_dim_i;
5408
-
5409
- ${d.indicesSet("input_indices","i","input_dim_value")}
5410
- }
5411
- ${l.setByOffset("global_idx",d.getByIndices("input_indices"))}
5412
- }`;return{name:"Tile",shaderCache:{hint:`${n}`,inputDependencies:["rank"]},getRunData:()=>({outputs:[{dims:o,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(i/64)},programUniforms:[{type:12,data:i},...R(e[0].dims,o)]}),getShaderSource:c}},kd=e=>{zf(e.inputs),e.compute(So(e.inputs),{inputs:[0]})}});var Bf,Ed,zd,Df,Pd,Od,Bd=V(()=>{"use strict";Q();ie();Ie();Kr();ae();xo();To();lt();Bf=(e,t)=>{let r=e[0],n=e[1],o=e[2],i=e[3],a=e[4];if(r.dims.length!==3&&r.dims.length!==5)throw new Error("Input query is expected to have 3 or 5 dimensions");let d=!1,l=r.dims[0],c=r.dims[1],m=r.dims.length===3?d?r.dims[2]/3:r.dims[2]:t.numHeads*r.dims[4],u=c,h=0,w=0,g=Math.floor(m/t.numHeads),y=i&&i.dims.length!==0,S=a&&a.dims.length!==0,$=!0;if(y&&S){if(i.dims.length!==4)throw new Error('Input "past_key" is expected to have 4 dimensions');if(a.dims.length!==4)throw new Error('Input "past_value" is expected to have 4 dimensions');$?(h=i.dims[1],w=i.dims[1]):(h=i.dims[2],w=i.dims[2])}else if(y||S)throw new Error('Input "past_key" and "past_value" shall be both present or both absent');let v;if(n){if(r.dims.length!==3)throw new Error('Input "query" is expected to have 3 dimensions when key is given');if(n.dims.length<3||n.dims.length>5)throw new Error('Input "key" is expected to have 3, 4, or 5 dimensions');if(r.dims[0]!==n.dims[0])throw new Error('Input "query" and "key" shall have same dim 0 (batch size)');if(n.dims.length===3){if(r.dims[2]%n.dims[2]!==0)throw new Error('Dimension 2 of "query" should be a multiple of "key"');v=2,u=n.dims[1]}else if(n.dims.length===5){if(n.dims[2]!==t.numHeads||n.dims[3]!==2||n.dims[4]!==g)throw new Error('Expect "key" shape (batch_size, kv_sequence_length, num_heads, 2, head_size) for packed kv');if(o)throw new Error('Expect "value" be none when "key" has packed kv format.');v=5,u=n.dims[1]}else{if(n.dims[1]!==t.numHeads||n.dims[3]!==g)throw new Error('Expect "key" shape (batch_size, num_heads, kv_sequence_length, head_size) for past_key');v=0,u=n.dims[2]}}else{if(r.dims.length!==3&&r.dims.length!==5)throw new Error('Input "query" is expected to have 3 or 5 dimensions when key is empty');if(r.dims.length===5&&(r.dims[2]!==t.numHeads||r.dims[3]!==3))throw new Error('Expect "query" shape (batch_size, kv_sequence_length, num_heads, 3, head_size) for packed kv');v=3}let x=0,T=!1,C=m;if(o){if(o.dims.length!==3&&o.dims.length!==4)throw new Error('Input "value" is expected to have 3 or 4 dimensions');if(r.dims[0]!==o.dims[0])throw new Error('Input "query" and "value" shall have same dim 0 (batch_size)');if(o.dims.length===3){if(u!==o.dims[1])throw new Error('Input "key" and "value" shall have the same dim 1 (kv_sequence_length)');C=o.dims[2]}else{if(u!==o.dims[2])throw new Error('Input "past_key" and "past_value" shall have the same dim 2 (kv_sequence_length)');C=o.dims[1]*o.dims[3],T=!0}}let A=h+u,P=!1;return{batchSize:l,sequenceLength:c,pastSequenceLength:h,kvSequenceLength:u,totalSequenceLength:A,maxSequenceLength:w,inputHiddenSize:0,hiddenSize:m,vHiddenSize:C,headSize:g,vHeadSize:Math.floor(C/t.kvNumHeads),numHeads:t.numHeads,kvNumHeads:t.kvNumHeads,nReps:t.numHeads/t.kvNumHeads,pastPresentShareBuffer:!1,maskType:x,scale:t.scale,broadcastResPosBias:P,passPastInKv:T,qkvFormat:v,isPastkvBSNH:$}},Ed=(e,t,r,n)=>{let o=[n.batchSize,n.totalSequenceLength,n.kvNumHeads,n.headSize],i=4,a=k.size(o)/i,d=n.totalSequenceLength,l=M("present_kv",r,o.length,i),c=E("new_kv",e.dataType,e.dims.length,i),m=t?E("past_kv",t.dataType,t.dims.length,i):void 0,u=Math.ceil(n.headSize/i),h={x:d,y:e.dims[0],z:1},w=t?["rank","rank"]:["rank"],g=[{type:12,data:a},{type:12,data:n.pastSequenceLength},{type:12,data:n.kvSequenceLength},{type:12,data:n.totalSequenceLength}],y=[c];m?(g.push(...R(e.dims),...R(t.dims),...R(o)),y.push(m)):g.push(...R(e.dims),...R(o));let S=[{name:"output_size",type:"u32"},{name:"past_seqlen",type:"u32"},{name:"new_seqlen",type:"u32"},{name:"present_seqlen",type:"u32"}],$=` let past_batch_stride = uniforms.past_seqlen * num_heads * H;
5413
- var past_head_stride = uniforms.past_seqlen * H;
5414
- if (is_bsnh) {
5415
- past_head_stride = H;
5416
- }
5417
- let in_offset = b * past_batch_stride + s * row_stride + n * past_head_stride + h;
5418
- present_kv[out_offset] = past_kv[in_offset];`,v=` let new_batch_stride = uniforms.new_seqlen * num_heads * H;
5419
- let new_row_stride = num_heads * H;
5420
- let new_head_stride = H;
5421
- let in_offset = b * new_batch_stride + (s - past_seqlen) * new_row_stride + n * new_head_stride + h;
5422
- present_kv[out_offset] = new_kv[in_offset];`,x=t?`if (s < past_seqlen) {
5423
- ${$}
5424
- } else if (s < past_seqlen + uniforms.new_seqlen) {
5425
- ${v}
5426
- }`:`if (s < past_seqlen + uniforms.new_seqlen) {
5427
- ${v}
5428
- }`,T=C=>`
5429
-
5430
- ${C.registerUniforms(S).declareVariables(...y,l)}
5431
- ${C.mainStart([u,n.kvNumHeads,1])}
5432
- ${C.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
5433
- var indices = ${l.offsetToIndices("global_idx")};
5434
- let h = local_id.x;
5435
- let n = local_id.y;
5436
- let s = workgroup_id.x;
5437
- let b = workgroup_id.y;
5438
- let num_heads = ${n.kvNumHeads}u;
5439
- let H = ${u}u;
5440
-
5441
- let present_seqlen = uniforms.present_seqlen;
5442
- let present_batch_stride = present_seqlen * num_heads * H;
5443
- var row_stride = H;
5444
- let is_bsnh = ${n.isPastkvBSNH};
5445
-
5446
- if (is_bsnh) {
5447
- row_stride = num_heads * H;
5433
+ }`};return e.compute({name:"MultiHeadAttentionAddBias",shaderCache:{inputDependencies:["type","type"]},getRunData:()=>({outputs:[{dims:d,dataType:t.dataType,gpuDataType:0}],dispatchGroup:{x:Math.ceil(l/64)},programUniforms:c}),getShaderSource:m},{inputs:[t,r],outputs:[-1]})[0]},nr=(e,t,r,n,o,i,a,d)=>{let l=i;if(a&&k.size(a.dims)>0){if(n===1)throw new Error("AddBiasReshape is not implemented. Please export your model with packed QKV or KV");return l=Pf(e,i,a,t,n,r*o,d),l=l.reshape([t,n,r,o]),r===1||n===1?l:e.compute(Pe(l,Cd.perm),{inputs:[l],outputs:[-1]})[0]}else return i.dims.length===3&&(l=i.reshape([t,n,r,o])),r===1||n===1?l:e.compute(Pe(l,Cd.perm),{inputs:[l],outputs:[-1]})[0]},kd=(e,t)=>{let r=Ef(e.inputs,t),n=e.inputs[0],o=We(e.inputs,1),i=We(e.inputs,2),a=We(e.inputs,3),d=We(e.inputs,4),l=We(e.inputs,5),c=We(e.inputs,6),m=We(e.inputs,7);if(n.dims.length===5)throw new Error("Packed QKV is not implemented");if(o?.dims.length===5)throw new Error("Packed KV is not implemented");let u=o&&i&&o.dims.length===4&&i.dims.length===4,h=nr(e,r.batchSize,r.numHeads,r.sequenceLength,r.headSize,n,a,0);if(u)return Nt(e,h,o,i,d,void 0,c,m,l,r);if(!o||!i)throw new Error("key and value must be provided");let w=nr(e,r.batchSize,r.numHeads,r.kvSequenceLength,r.headSize,o,a,r.hiddenSize),g=nr(e,r.batchSize,r.numHeads,r.kvSequenceLength,r.vHeadSize,i,a,2*r.hiddenSize);Nt(e,h,w,g,d,void 0,c,m,l,r)}});var zf,Of,Df,Bf,Io,Ed,Pd,Co=U(()=>{"use strict";J();ae();Ie();se();zf=e=>{if(!e||e.length<1)throw new Error("too few inputs")},Of=(e,t)=>{let r=[],n=t.numOutputs;return e[1].dims[0]>0&&(e[1].getBigInt64Array().forEach(o=>r.push(Number(o))),n=r.length),ee({numOutputs:n,axis:t.axis,splitSizes:r})},Df=e=>`
5434
+ fn calculateOutputIndex(index: u32) -> u32 {
5435
+ for (var i: u32 = 0u; i < ${e}u; i += 1u ) {
5436
+ if (index < ${F("uniforms.size_in_split_axis","i",e)}) {
5437
+ return i;
5448
5438
  }
5449
- var present_head_stride = present_seqlen * H;
5450
- if (is_bsnh) {
5451
- present_head_stride = H;
5452
5439
  }
5440
+ return ${e}u;
5441
+ }`,Bf=e=>{let t=e.length,r=[];for(let n=0;n<t;++n){let o=e[n].setByIndices("indices","input[global_idx]");t===1?r.push(o):n===0?r.push(`if (output_number == ${n}u) { ${o} }`):n===t-1?r.push(`else { ${o} }`):r.push(`else if (output_number == ${n}) { ${o} }`)}return`
5442
+ fn writeBufferData(output_number: u32, indices: ${e[0].type.indices}, global_idx: u32) {
5443
+ ${r.join(`
5444
+ `)}
5445
+ }`},Io=(e,t)=>{let r=e[0].dims,n=k.size(r),o=e[0].dataType,i=k.normalizeAxis(t.axis,r.length),a=new Array(t.numOutputs),d=E("input",o,r.length),l=new Array(t.numOutputs),c=[],m=[],u=0,h=[{type:12,data:n}];for(let g=0;g<t.numOutputs;g++){u+=t.splitSizes[g],l[g]=u;let y=r.slice();y[i]=t.splitSizes[g],m.push(y),a[g]=M(`output${g}`,o,y.length),c.push({dims:m[g],dataType:e[0].dataType})}h.push({type:12,data:l},...V(r,...m));let w=g=>`
5446
+ ${g.registerUniform("input_size","u32").registerUniform("size_in_split_axis","u32",l.length).declareVariables(d,...a)}
5447
+ ${Df(l.length)}
5448
+ ${Bf(a)}
5453
5449
 
5454
- let past_seqlen = uniforms.past_seqlen;
5450
+ ${g.mainStart()}
5451
+ ${g.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.input_size")}
5455
5452
 
5456
- let out_offset = b * present_batch_stride + s * row_stride + n * present_head_stride + h;
5457
- ${x}
5458
- }`;return{name:"ConcatPastNew",shaderCache:{hint:`${n.kvNumHeads}${u}${!!t}`,inputDependencies:w},getRunData:()=>({outputs:[{dims:o,dataType:r}],dispatchGroup:h,programUniforms:g}),getShaderSource:T}},zd=e=>J({...e}),Df=J({perm:[0,2,1,3]}),Pd=(e,t,r,n,o)=>{let i=t,a=n.kvNumHeads,d=n.nReps;return t.dims.length===3&&n.kvSequenceLength!==0&&(i=t.reshape([n.batchSize,n.kvSequenceLength,a,n.headSize])),r?i=e.compute(Ed(i,r,i.dataType,n),{inputs:[i,r],outputs:[n.isPastkvBSNH?o:-1]})[0]:i=e.compute(Ed(i,void 0,i.dataType,n),{inputs:[i],outputs:[n.isPastkvBSNH?o:-1]})[0],d!==1&&(i=e.compute(So([i],[1,1,1,d]),{inputs:[i],outputs:[-1]})[0],i=i.reshape([n.batchSize,n.totalSequenceLength,a*d,n.headSize])),e.compute(Pe(i,Df.perm),{inputs:[i],outputs:[-1]})[0]},Od=(e,t)=>{let r=Bf(e.inputs,t);if(e.inputs[0].dims.length===5)throw new Error("Packed QKV is not implemented");if(e.inputs[1]?.dims.length===5)throw new Error("Packed KV is not implemented");let n=nr(e,r.batchSize,r.numHeads,r.sequenceLength,r.headSize,e.inputs[0],void 0,0),o=e.inputs[3]&&e.inputs[3].dims.length!==0?e.inputs[3]:void 0,i=e.inputs[4]&&e.inputs[4].dims.length!==0?e.inputs[4]:void 0,a=Pd(e,e.inputs[1],o,r,1),d=Pd(e,e.inputs[2],i,r,2);Wt(e,n,a,d,void 0,void 0,void 0,void 0,void 0,r,t)}});var Dd,Mf,Rf,Md,Rd=V(()=>{"use strict";Q();ie();lt();ae();Dd=(e,t,r,n,o,i,a,d)=>{let l=we(i),c=l===1?"f32":`vec${l}f`,m=l===1?"vec2f":`mat2x${l}f`,u=o*a,h=[o,a,i/l],w=[o,a,2],g=["rank","type","type"],y=[];y.push(...R(h,w));let S=$=>{let v=E("x",t.dataType,3,l),x=E("scale",r.dataType,r.dims),T=E("bias",n.dataType,n.dims),C=M("output",1,3,2),A=[v,x,T,C],P=64;return`
5453
+ var indices = ${d.offsetToIndices("global_idx")};
5454
+ var index = ${d.indicesGet("indices",i)};
5455
+ let output_number = calculateOutputIndex(index);
5456
+ if (output_number != 0) {
5457
+ index -= ${F("uniforms.size_in_split_axis","output_number - 1u",l.length)};
5458
+ ${d.indicesSet("indices",i,"index")};
5459
+ }
5460
+ writeBufferData(output_number, indices, global_idx);
5461
+ }`;return{name:"Split",shaderCache:{hint:t.cacheKey,inputDependencies:["rank"]},getShaderSource:w,getRunData:()=>({outputs:c,dispatchGroup:{x:Math.ceil(n/64)},programUniforms:h})}},Ed=(e,t)=>{zf(e.inputs);let r=e.inputs.length===1?t:Of(e.inputs,t);e.compute(Io(e.inputs,r),{inputs:[0]})},Pd=e=>{let t=e.axis,r=e.splitSizes,n=e.numOutputs<0?r.length:e.numOutputs;if(n!==r.length)throw new Error("numOutputs and splitSizes lengh must be equal");return ee({axis:t,numOutputs:n,splitSizes:r})}});var Mf,Rf,zd,Od,Dd=U(()=>{"use strict";Ie();Kr();To();Co();lt();Mf=(e,t)=>{if(t.doRotary&&e.length<=7)throw new Error("cos_cache and sin_cache inputs are required if do_rotary is specified");let r=e[0],n=e[1],o=e[2],i=e[3],a=e[4];if(t.localWindowSize!==-1)throw new Error("Local attention is not supported");if(t.softcap!==0)throw new Error("Softcap is not supported");if(t.rotaryInterleaved!==0)throw new Error("Rotary interleaved is not supported");if(t.smoothSoftmax)throw new Error("Smooth softmax is not supported");if(r.dims.length!==3&&r.dims.length!==5)throw new Error("Input query is expected to have 3 or 5 dimensions");let d=!1,l=r.dims[0],c=r.dims[1],m=r.dims.length===3?d?r.dims[2]/3:r.dims[2]:t.numHeads*r.dims[4],u=c,h=0,w=!n||n.dims.length===0,g=Math.floor(w?m/(t.numHeads+2*t.kvNumHeads):m/t.numHeads);w&&(m=g*t.numHeads);let y=i&&i.dims.length!==0,S=a&&a.dims.length!==0;if(y&&i.dims.length===4&&i.dims[0]===l&&i.dims[1]!==t.kvNumHeads&&i.dims[2]===t.kvNumHeads&&i.dims[3]===g)throw new Error("BSNH pastKey/pastValue is not supported");if(y&&S){if(i.dims.length!==4)throw new Error('Input "past_key" is expected to have 4 dimensions');if(a.dims.length!==4)throw new Error('Input "past_value" is expected to have 4 dimensions');h=i.dims[2]}else if(y||S)throw new Error('Input "past_key" and "past_value" shall be both present or both absent');let v=1;if(n&&n.dims.length>0){if(r.dims.length!==3)throw new Error('Input "query" is expected to have 3 dimensions when key is given');if(n.dims.length<3||n.dims.length>5)throw new Error('Input "key" is expected to have 3, 4, or 5 dimensions');if(r.dims[0]!==n.dims[0])throw new Error('Input "query" and "key" shall have same dim 0 (batch size)');if(n.dims.length===3){if(r.dims[2]%n.dims[2]!==0)throw new Error('Dimension 2 of "query" should be a multiple of "key"');u=n.dims[1]}else if(n.dims.length===5){if(n.dims[2]!==t.numHeads||n.dims[3]!==2||n.dims[4]!==g)throw new Error('Expect "key" shape (batch_size, kv_sequence_length, num_heads, 2, head_size) for packed kv');if(o)throw new Error('Expect "value" be none when "key" has packed kv format.');u=n.dims[1]}else{if(n.dims[1]!==t.numHeads||n.dims[3]!==g)throw new Error('Expect "key" shape (batch_size, num_heads, kv_sequence_length, head_size) for past_key');u=n.dims[2]}}else{if(r.dims.length!==3&&r.dims.length!==5)throw new Error('Input "query" is expected to have 3 or 5 dimensions when key is empty');if(r.dims.length===5&&(r.dims[2]!==t.numHeads||r.dims[3]!==3))throw new Error('Expect "query" shape (batch_size, kv_sequence_length, num_heads, 3, head_size) for packed kv');v=3}let x=0,T=!1,C=t.kvNumHeads?g*t.kvNumHeads:m;if(o&&o.dims.length>0){if(o.dims.length!==3&&o.dims.length!==4)throw new Error('Input "value" is expected to have 3 or 4 dimensions');if(r.dims[0]!==o.dims[0])throw new Error('Input "query" and "value" shall have same dim 0 (batch_size)');if(o.dims.length===3){if(u!==o.dims[1])throw new Error('Input "key" and "value" shall have the same dim 1 (kv_sequence_length)');C=o.dims[2]}else{if(u!==o.dims[2])throw new Error('Input "past_key" and "past_value" shall have the same dim 2 (kv_sequence_length)');C=o.dims[1]*o.dims[3],T=!0}}let A=e.length>4?e[5]:void 0;if(A&&A.dims.length!==1&&A.dims[0]!==l)throw new Error('Input "seqlens" is expected to have 1 dimension and the same dim 0 as batch_size');let P=-1,D=-1,W=!1;return{batchSize:l,sequenceLength:c,pastSequenceLength:h,kvSequenceLength:u,totalSequenceLength:P,maxSequenceLength:D,inputHiddenSize:0,hiddenSize:m,vHiddenSize:C,headSize:g,vHeadSize:Math.floor(C/t.kvNumHeads),numHeads:t.numHeads,kvNumHeads:t.kvNumHeads,nReps:t.numHeads/t.kvNumHeads,pastPresentShareBuffer:!1,maskType:x,scale:t.scale,broadcastResPosBias:W,passPastInKv:T,qkvFormat:v}},Rf=ee({perm:[0,2,1,3]}),zd=(e,t,r)=>{let n=t,o=r.kvNumHeads;return t.dims.length===3&&r.kvSequenceLength!==0&&(n=t.reshape([r.batchSize,r.kvSequenceLength,o,r.headSize]),n=e.compute(Pe(n,Rf.perm),{inputs:[n],outputs:[-1]})[0]),n},Od=(e,t)=>{let r=Mf(e.inputs,t);if(e.inputs[0].dims.length===5)throw new Error("Packed QKV is not implemented");if(e.inputs[1]?.dims.length===5)throw new Error("Packed KV is not implemented");let n=e.inputs[0],o=e.inputs[1]&&e.inputs[1].dims.length>0?e.inputs[1]:void 0,i=e.inputs[2]&&e.inputs[2].dims.length>0?e.inputs[2]:void 0,a=e.inputs[3]&&e.inputs[3].dims.length!==0?e.inputs[3]:void 0,d=e.inputs[4]&&e.inputs[4].dims.length!==0?e.inputs[4]:void 0,l=e.inputs.length>4?e.inputs[5]:void 0,c=e.inputs.length>5?e.inputs[6]:void 0,m=r.kvNumHeads?r.kvNumHeads:r.numHeads,u=ee({axis:2,numOutputs:3,splitSizes:[r.numHeads*r.headSize,m*r.headSize,m*r.headSize]}),[h,w,g]=!o&&!i?e.compute(Io([n],u),{inputs:[n],outputs:[-1,-1,-1]}):[n,o,i],y=nr(e,r.batchSize,r.numHeads,r.sequenceLength,r.headSize,h,void 0,0);Nt(e,y,zd(e,w,r),zd(e,g,r),void 0,void 0,a,d,void 0,r,l,c)}});var Bd,Uf,Vf,Md,Rd=U(()=>{"use strict";J();ae();lt();se();Bd=(e,t,r,n,o,i,a,d)=>{let l=we(i),c=l===1?"f32":`vec${l}f`,m=l===1?"vec2f":`mat2x${l}f`,u=o*a,h=[o,a,i/l],w=[o,a,2],g=["rank","type","type"],y=[];y.push(...V(h,w));let S=$=>{let v=E("x",t.dataType,3,l),x=E("scale",r.dataType,r.dims),T=E("bias",n.dataType,n.dims),C=M("output",1,3,2),A=[v,x,T,C],P=64;return`
5459
5462
  var<workgroup> workgroup_shared : array<${m}, ${P}>;
5460
5463
  const workgroup_size = ${P}u;
5461
5464
  ${$.declareVariables(...A)}
@@ -5481,15 +5484,15 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5481
5484
  workgroupBarrier();
5482
5485
  }
5483
5486
  if (local_idx == 0) {
5484
- let sum_final = ${Ze("workgroup_shared[0][0]",l)} / f32(hight * ${l});
5485
- let squared_sum_final = ${Ze("workgroup_shared[0][1]",l)} / f32(hight * ${l});
5487
+ let sum_final = ${Qe("workgroup_shared[0][0]",l)} / f32(hight * ${l});
5488
+ let squared_sum_final = ${Qe("workgroup_shared[0][1]",l)} / f32(hight * ${l});
5486
5489
 
5487
5490
  let inv_std_dev = inverseSqrt(squared_sum_final - sum_final * sum_final + f32(${d}));
5488
5491
  let channel_scale = inv_std_dev * f32(scale[channel]);
5489
5492
  let channel_shift = f32(bias[channel]) - sum_final * channel_scale;
5490
5493
  output[workgroup_index] = vec2f(channel_scale, channel_shift);
5491
5494
  }
5492
- }`};return e.compute({name:"InstanceNormComputeChannelScaleShift",shaderCache:{hint:`${l};${d}`,inputDependencies:g},getRunData:()=>({outputs:[{dims:w,dataType:1}],dispatchGroup:{x:u},programUniforms:y}),getShaderSource:S},{inputs:[t,r,n],outputs:[-1]})[0]},Mf=(e,t,r)=>{let n=t[0].dims,o=n,i=2,a=n[0],d=n[1],l=k.sizeFromDimension(n,i),c=we(l),m=k.size(o)/c,u=Dd(e,t[0],t[1],t[2],a,l,d,r.epsilon),h=[a,d,l/c],w=[a,d],g=["type","none"],y=S=>{let $=E("x",t[0].dataType,h.length,c),v=E("scale_shift",1,w.length,2),x=M("output",t[0].dataType,h.length,c),T=[$,v,x];return`
5495
+ }`};return e.compute({name:"InstanceNormComputeChannelScaleShift",shaderCache:{hint:`${l};${d}`,inputDependencies:g},getRunData:()=>({outputs:[{dims:w,dataType:1}],dispatchGroup:{x:u},programUniforms:y}),getShaderSource:S},{inputs:[t,r,n],outputs:[-1]})[0]},Uf=(e,t,r)=>{let n=t[0].dims,o=n,i=2,a=n[0],d=n[1],l=k.sizeFromDimension(n,i),c=we(l),m=k.size(o)/c,u=Bd(e,t[0],t[1],t[2],a,l,d,r.epsilon),h=[a,d,l/c],w=[a,d],g=["type","none"],y=S=>{let $=E("x",t[0].dataType,h.length,c),v=E("scale_shift",1,w.length,2),x=M("output",t[0].dataType,h.length,c),T=[$,v,x];return`
5493
5496
  ${S.registerUniform("output_size","u32").declareVariables(...T)}
5494
5497
  ${S.mainStart()}
5495
5498
  ${S.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
@@ -5499,7 +5502,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5499
5502
  let scale_shift = ${v.getByIndices("vec2<u32>(batch, channel)")};
5500
5503
  let value = ${$.getByOffset("global_idx")} * ${x.type.value}(scale_shift.x) + ${x.type.value}(scale_shift.y);
5501
5504
  ${x.setByOffset("global_idx","value")};
5502
- }`};e.compute({name:"InstanceNormalization",shaderCache:{hint:`${c}`,inputDependencies:g},getRunData:()=>({outputs:[{dims:o,dataType:t[0].dataType}],dispatchGroup:{x:Math.ceil(m/64)},programUniforms:[{type:12,data:m},...R(h,w,h)]}),getShaderSource:y},{inputs:[t[0],u]})},Rf=(e,t,r)=>{let n=t[0].dims,o=n,i=n[0],a=n[n.length-1],d=k.sizeFromDimension(n,1)/a,l=we(a),c=k.size(o)/l,m=[{type:12,data:d},{type:12,data:Math.floor(a/l)}],u=["type","type"],h=[0,n.length-1];for(let S=0;S<n.length-2;S++)h.push(S+1);let w=e.compute(Pe(e.inputs[0],h),{inputs:[e.inputs[0]],outputs:[-1]})[0],g=Dd(e,w,t[1],t[2],i,d,a,r.epsilon),y=S=>{let $=he(t[0].dataType),v=l===1?"vec2f":`mat${l}x2f`,x=A=>{let P=A===0?"x":"y",B=l===1?"f32":`vec${l}f`;switch(l){case 1:return`${$}(${B}(scale.${P}))`;case 2:return`vec2<${$}>(${B}(scale[0].${P}, scale[1].${P}))`;case 4:return`vec4<${$}>(${B}(scale[0].${P}, scale[1].${P}, scale[2].${P}, scale[3].${P}))`;default:throw new Error(`Not supported compoents ${l}`)}},T=E("input",t[0].dataType,t[0].dims,l),C=M("output",t[0].dataType,o,l);return`
5505
+ }`};e.compute({name:"InstanceNormalization",shaderCache:{hint:`${c}`,inputDependencies:g},getRunData:()=>({outputs:[{dims:o,dataType:t[0].dataType}],dispatchGroup:{x:Math.ceil(m/64)},programUniforms:[{type:12,data:m},...V(h,w,h)]}),getShaderSource:y},{inputs:[t[0],u]})},Vf=(e,t,r)=>{let n=t[0].dims,o=n,i=n[0],a=n[n.length-1],d=k.sizeFromDimension(n,1)/a,l=we(a),c=k.size(o)/l,m=[{type:12,data:d},{type:12,data:Math.floor(a/l)}],u=["type","type"],h=[0,n.length-1];for(let S=0;S<n.length-2;S++)h.push(S+1);let w=e.compute(Pe(e.inputs[0],h),{inputs:[e.inputs[0]],outputs:[-1]})[0],g=Bd(e,w,t[1],t[2],i,d,a,r.epsilon),y=S=>{let $=he(t[0].dataType),v=l===1?"vec2f":`mat${l}x2f`,x=A=>{let P=A===0?"x":"y",D=l===1?"f32":`vec${l}f`;switch(l){case 1:return`${$}(${D}(scale.${P}))`;case 2:return`vec2<${$}>(${D}(scale[0].${P}, scale[1].${P}))`;case 4:return`vec4<${$}>(${D}(scale[0].${P}, scale[1].${P}, scale[2].${P}, scale[3].${P}))`;default:throw new Error(`Not supported compoents ${l}`)}},T=E("input",t[0].dataType,t[0].dims,l),C=M("output",t[0].dataType,o,l);return`
5503
5506
  @group(0) @binding(0) var<storage, read> input : array<${T.type.storage}>;
5504
5507
  @group(0) @binding(1) var<storage, read> scale_input : array<${v}>;
5505
5508
  @group(0) @binding(2) var<storage, read_write> output : array<${C.type.storage}>;
@@ -5513,23 +5516,23 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5513
5516
  let scale_offset = current_image_number * uniforms.C + current_channel_number;
5514
5517
  let scale = scale_input[scale_offset];
5515
5518
  output[global_idx] = fma(input[global_idx], ${x(0)}, ${x(1)});
5516
- }`};e.compute({name:"InstanceNormalizationNHWC",shaderCache:{hint:`${l}`,inputDependencies:u},getRunData:()=>({outputs:[{dims:o,dataType:t[0].dataType}],dispatchGroup:{x:Math.ceil(c/64)},programUniforms:m}),getShaderSource:y},{inputs:[t[0],g]})},Md=(e,t)=>{t.format==="NHWC"?Rf(e,e.inputs,t):Mf(e,e.inputs,t)}});var Uf,Vf,Ud,Vd=V(()=>{"use strict";Q();ie();ae();Uf=e=>{if(!e||e.length<2)throw new Error("layerNorm requires at least 2 inputs.")},Vf=(e,t,r)=>{let n=t.simplified,o=e[0].dims,i=e[1],a=!n&&e[2],d=o,l=k.normalizeAxis(t.axis,o.length),c=k.sizeToDimension(o,l),m=k.sizeFromDimension(o,l),u=k.size(i.dims),h=a?k.size(a.dims):0;if(u!==m||a&&h!==m)throw new Error(`Size of X.shape()[axis:] == ${m}.
5519
+ }`};e.compute({name:"InstanceNormalizationNHWC",shaderCache:{hint:`${l}`,inputDependencies:u},getRunData:()=>({outputs:[{dims:o,dataType:t[0].dataType}],dispatchGroup:{x:Math.ceil(c/64)},programUniforms:m}),getShaderSource:y},{inputs:[t[0],g]})},Md=(e,t)=>{t.format==="NHWC"?Vf(e,e.inputs,t):Uf(e,e.inputs,t)}});var Wf,Nf,Ud,Vd=U(()=>{"use strict";J();ae();se();Wf=e=>{if(!e||e.length<2)throw new Error("layerNorm requires at least 2 inputs.")},Nf=(e,t,r)=>{let n=t.simplified,o=e[0].dims,i=e[1],a=!n&&e[2],d=o,l=k.normalizeAxis(t.axis,o.length),c=k.sizeToDimension(o,l),m=k.sizeFromDimension(o,l),u=k.size(i.dims),h=a?k.size(a.dims):0;if(u!==m||a&&h!==m)throw new Error(`Size of X.shape()[axis:] == ${m}.
5517
5520
  Size of scale and bias (if provided) must match this.
5518
- Got scale size of ${u} and bias size of ${h}`);let w=[];for(let C=0;C<o.length;++C)C<l?w.push(o[C]):w.push(1);let g=we(m),y=["type","type"],S=[{type:12,data:c},{type:1,data:m},{type:12,data:Math.floor(m/g)},{type:1,data:t.epsilon}];a&&y.push("type");let $=r>1,v=r>2,x=C=>{let A=he(e[0].dataType),P=[E("x",e[0].dataType,e[0].dims,g),E("scale",i.dataType,i.dims,g)];a&&P.push(E("bias",a.dataType,a.dims,g)),P.push(M("output",e[0].dataType,d,g)),$&&P.push(M("mean_data_output",1,w)),v&&P.push(M("inv_std_output",1,w));let B=[{name:"norm_count",type:"u32"},{name:"norm_size",type:"f32"},{name:"norm_size_vectorized",type:"u32"},{name:"epsilon",type:"f32"}];return`
5519
- ${C.registerUniforms(B).declareVariables(...P)}
5521
+ Got scale size of ${u} and bias size of ${h}`);let w=[];for(let C=0;C<o.length;++C)C<l?w.push(o[C]):w.push(1);let g=we(m),y=["type","type"],S=[{type:12,data:c},{type:1,data:m},{type:12,data:Math.floor(m/g)},{type:1,data:t.epsilon}];a&&y.push("type");let $=r>1,v=r>2,x=C=>{let A=he(e[0].dataType),P=[E("x",e[0].dataType,e[0].dims,g),E("scale",i.dataType,i.dims,g)];a&&P.push(E("bias",a.dataType,a.dims,g)),P.push(M("output",e[0].dataType,d,g)),$&&P.push(M("mean_data_output",1,w)),v&&P.push(M("inv_std_output",1,w));let D=[{name:"norm_count",type:"u32"},{name:"norm_size",type:"f32"},{name:"norm_size_vectorized",type:"u32"},{name:"epsilon",type:"f32"}];return`
5522
+ ${C.registerUniforms(D).declareVariables(...P)}
5520
5523
  ${C.mainStart()}
5521
5524
  ${C.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.norm_count")}
5522
5525
  let offset = global_idx * uniforms.norm_size_vectorized;
5523
- var mean_vector = ${io("f32",g)};
5524
- var mean_square_vector = ${io("f32",g)};
5526
+ var mean_vector = ${ao("f32",g)};
5527
+ var mean_square_vector = ${ao("f32",g)};
5525
5528
 
5526
5529
  for (var h: u32 = 0u; h < uniforms.norm_size_vectorized; h++) {
5527
5530
  let value = ${kt(A,g,"x[h + offset]")};
5528
5531
  mean_vector += value;
5529
5532
  mean_square_vector += value * value;
5530
5533
  }
5531
- let mean = ${Ze("mean_vector",g)} / uniforms.norm_size;
5532
- let inv_std_dev = inverseSqrt(${Ze("mean_square_vector",g)} / uniforms.norm_size ${n?"":"- mean * mean"} + uniforms.epsilon);
5534
+ let mean = ${Qe("mean_vector",g)} / uniforms.norm_size;
5535
+ let inv_std_dev = inverseSqrt(${Qe("mean_square_vector",g)} / uniforms.norm_size ${n?"":"- mean * mean"} + uniforms.epsilon);
5533
5536
 
5534
5537
  for (var j: u32 = 0; j < uniforms.norm_size_vectorized; j++) {
5535
5538
  let f32input = ${kt(A,g,"x[j + offset]")};
@@ -5541,24 +5544,24 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5541
5544
 
5542
5545
  ${$?"mean_data_output[global_idx] = mean":""};
5543
5546
  ${v?"inv_std_output[global_idx] = inv_std_dev":""};
5544
- }`},T=[{dims:d,dataType:e[0].dataType}];return $&&T.push({dims:w,dataType:1}),v&&T.push({dims:w,dataType:1}),{name:"LayerNormalization",shaderCache:{hint:`${g};${r};${n}`,inputDependencies:y},getRunData:()=>({outputs:T,dispatchGroup:{x:Math.ceil(c/64)},programUniforms:S}),getShaderSource:x}},Ud=(e,t)=>{Uf(e.inputs),e.compute(Vf(e.inputs,t,e.outputCount))}});var Nf,Wf,Lf,Nd,Wd,Ld=V(()=>{"use strict";Q();ie();Ie();ae();Nf=(e,t)=>{if(e.length<3||e.length>4)throw new Error("MatMulNBits requires 3 or 4 inputs");let r=e[0],n=r.dims.length;if(r.dims[n-1]!==t.k)throw new Error("The last dim of input shape does not match the k value");let o=Math.floor((t.k+t.blockSize-1)/t.blockSize),i=t.blockSize/8*t.bits,a=e[1];if(!k.areEqual(a.dims,[t.n,o,i]))throw new Error("The second inputs must be 3D tensor with shape N X nBlocksPerCol X blobSize");let l=e[2].dims;if(k.size(l)!==t.n*o)throw new Error("scales input size error.");if(e.length===4){let m=e[3].dims,u=t.bits>4?t.n*o:t.n*Math.floor((o+1)/2);if(k.size(m)!==u)throw new Error("zeroPoints input size error.")}},Wf=(e,t)=>{let r=e[0].dims,n=r.length,o=r[n-2],i=t.k,a=t.n,d=r.slice(0,n-2),l=k.size(d),m=e[1].dims[2]/4,u=e[0].dataType,h=we(t.k),w=we(m),g=we(a),y=d.concat([o,a]),S=o>1&&a/g%2===0?2:1,$=k.size(y)/g/S,v=64,x=[],T=[l,o,i/h],C=k.convertShape(e[1].dims).slice();C.splice(-1,1,m/w),x.push(...R(T)),x.push(...R(C)),x.push(...R(e[2].dims)),e.length===4&&x.push(...R(k.convertShape(e[3].dims)));let A=[l,o,a/g];x.push(...R(A));let P=B=>{let N=T.length,W=E("a",e[0].dataType,N,h),K=E("b",12,C.length,w),Z=E("scales",e[2].dataType,e[2].dims.length),ee=[W,K,Z],se=e.length===4?E("zero_points",12,e[3].dims.length):void 0;se&&ee.push(se);let de=A.length,Y=M("output",e[0].dataType,de,g),le=he(e[0].dataType),ce=(()=>{switch(h){case 1:return`array<${le}, 8>`;case 2:return`mat4x2<${le}>`;case 4:return`mat2x4<${le}>`;default:throw new Error(`${h}-component is not supported.`)}})(),q=()=>{let ne=`
5547
+ }`},T=[{dims:d,dataType:e[0].dataType}];return $&&T.push({dims:w,dataType:1}),v&&T.push({dims:w,dataType:1}),{name:"LayerNormalization",shaderCache:{hint:`${g};${r};${n}`,inputDependencies:y},getRunData:()=>({outputs:T,dispatchGroup:{x:Math.ceil(c/64)},programUniforms:S}),getShaderSource:x}},Ud=(e,t)=>{Wf(e.inputs),e.compute(Nf(e.inputs,t,e.outputCount))}});var Lf,Hf,Gf,Wd,Nd,Ld=U(()=>{"use strict";J();ae();Ie();se();Lf=(e,t)=>{if(e.length<3||e.length>4)throw new Error("MatMulNBits requires 3 or 4 inputs");let r=e[0],n=r.dims.length;if(r.dims[n-1]!==t.k)throw new Error("The last dim of input shape does not match the k value");let o=Math.floor((t.k+t.blockSize-1)/t.blockSize),i=t.blockSize/8*t.bits,a=e[1];if(!k.areEqual(a.dims,[t.n,o,i]))throw new Error("The second inputs must be 3D tensor with shape N X nBlocksPerCol X blobSize");let l=e[2].dims;if(k.size(l)!==t.n*o)throw new Error("scales input size error.");if(e.length===4){let m=e[3].dims,u=t.bits>4?t.n*o:t.n*Math.floor((o+1)/2);if(k.size(m)!==u)throw new Error("zeroPoints input size error.")}},Hf=(e,t)=>{let r=e[0].dims,n=r.length,o=r[n-2],i=t.k,a=t.n,d=r.slice(0,n-2),l=k.size(d),m=e[1].dims[2]/4,u=e[0].dataType,h=we(t.k),w=we(m),g=we(a),y=d.concat([o,a]),S=o>1&&a/g%2===0?2:1,$=k.size(y)/g/S,v=64,x=[],T=[l,o,i/h],C=k.convertShape(e[1].dims).slice();C.splice(-1,1,m/w),x.push(...V(T)),x.push(...V(C)),x.push(...V(e[2].dims)),e.length===4&&x.push(...V(k.convertShape(e[3].dims)));let A=[l,o,a/g];x.push(...V(A));let P=D=>{let W=T.length,N=E("a",e[0].dataType,W,h),j=E("b",12,C.length,w),Y=E("scales",e[2].dataType,e[2].dims.length),Z=[N,j,Y],te=e.length===4?E("zero_points",12,e[3].dims.length):void 0;te&&Z.push(te);let ue=A.length,K=M("output",e[0].dataType,ue,g),de=he(e[0].dataType),ce=(()=>{switch(h){case 1:return`array<${de}, 8>`;case 2:return`mat4x2<${de}>`;case 4:return`mat2x4<${de}>`;default:throw new Error(`${h}-component is not supported.`)}})(),q=()=>{let ne=`
5545
5548
  // reuse a data
5546
- var input_offset = ${W.indicesToOffset(`${W.type.indices}(batch, row, word_offset)`)};
5549
+ var input_offset = ${N.indicesToOffset(`${N.type.indices}(batch, row, word_offset)`)};
5547
5550
  var a_data: ${ce};
5548
5551
  for (var j: u32 = 0; j < ${8/h}; j++) {
5549
- a_data[j] = ${W.getByOffset("input_offset")};
5552
+ a_data[j] = ${N.getByOffset("input_offset")};
5550
5553
  input_offset++;
5551
5554
  }
5552
5555
  `;for(let oe=0;oe<g*S;oe++)ne+=`
5553
5556
  b_value = ${w===1?`b${oe}_data`:`b${oe}_data[i]`};
5554
5557
  b_value_lower = unpack4xU8(b_value & b_mask);
5555
5558
  b_value_upper = unpack4xU8((b_value >> 4) & b_mask);
5556
- b_quantized_values = ${ce}(${Array.from({length:4},(U,G)=>`${le}(b_value_lower[${G}]), ${le}(b_value_upper[${G}])`).join(", ")});
5557
- b_dequantized_values = ${(()=>h===1?`${ce}(${Array.from({length:8},(U,G)=>`(b_quantized_values[${G}] - ${se?`zero_point${oe}`:"zero_point"}) * scale${oe}`).join(", ")});`:`(b_quantized_values - ${ce}(${Array(8).fill(`${se?`zero_point${oe}`:"zero_point"}`).join(",")})) * scale${oe};`)()};
5558
- workgroup_shared[local_id.x * ${S} + ${Math.floor(oe/g)}]${g>1?`[${oe%g}]`:""} += ${Array.from({length:8/h},(U,G)=>`${h===1?`a_data[${G}] * b_dequantized_values[${G}]`:`dot(a_data[${G}], b_dequantized_values[${G}])`}`).join(" + ")};
5559
- `;return ne},ue=()=>{let ne=`
5559
+ b_quantized_values = ${ce}(${Array.from({length:4},(R,G)=>`${de}(b_value_lower[${G}]), ${de}(b_value_upper[${G}])`).join(", ")});
5560
+ b_dequantized_values = ${(()=>h===1?`${ce}(${Array.from({length:8},(R,G)=>`(b_quantized_values[${G}] - ${te?`zero_point${oe}`:"zero_point"}) * scale${oe}`).join(", ")});`:`(b_quantized_values - ${ce}(${Array(8).fill(`${te?`zero_point${oe}`:"zero_point"}`).join(",")})) * scale${oe};`)()};
5561
+ workgroup_shared[local_id.x * ${S} + ${Math.floor(oe/g)}]${g>1?`[${oe%g}]`:""} += ${Array.from({length:8/h},(R,G)=>`${h===1?`a_data[${G}] * b_dequantized_values[${G}]`:`dot(a_data[${G}], b_dequantized_values[${G}])`}`).join(" + ")};
5562
+ `;return ne},le=()=>{let ne=`
5560
5563
  var col_index = col * ${g};
5561
- ${se?`
5564
+ ${te?`
5562
5565
  let zero_point_bytes_per_col = (nBlocksPerCol + 1) / 2;
5563
5566
  var zero_point_byte_count: u32;
5564
5567
  var zero_point_word_index: u32;
@@ -5567,18 +5570,18 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5567
5570
  var zero_point_bits_offset: u32;
5568
5571
  var zero_point_word: u32;`:`
5569
5572
  // The default zero point is 8 for unsigned 4-bit quantization.
5570
- let zero_point = ${le}(8);`}
5573
+ let zero_point = ${de}(8);`}
5571
5574
  `;for(let oe=0;oe<g*S;oe++)ne+=`
5572
- let scale${oe} = ${Z.getByOffset("col_index * nBlocksPerCol + block")};
5573
- ${se?`
5575
+ let scale${oe} = ${Y.getByOffset("col_index * nBlocksPerCol + block")};
5576
+ ${te?`
5574
5577
  zero_point_byte_count = col_index * zero_point_bytes_per_col + (block >> 0x1u);
5575
5578
  zero_point_word_index = zero_point_byte_count >> 0x2u;
5576
5579
  zero_point_byte_offset = zero_point_byte_count & 0x3u;
5577
5580
  zero_point_bits_offset = (zero_point_byte_offset << 3) + (zero_point_nibble_offset << 2);
5578
- zero_point_word = ${se.getByOffset("zero_point_word_index")} >> zero_point_bits_offset;
5579
- let zero_point${oe} = ${le}((zero_point_word) & 0xFu);`:""}
5581
+ zero_point_word = ${te.getByOffset("zero_point_word_index")} >> zero_point_bits_offset;
5582
+ let zero_point${oe} = ${de}((zero_point_word) & 0xFu);`:""}
5580
5583
  col_index += 1;`;return ne},re=()=>{let ne=`col_index = col * ${g};`;for(let oe=0;oe<g*S;oe++)ne+=`
5581
- let b${oe}_data = ${K.getByIndices(`${K.type.indices}(col_index, block, word)`)};
5584
+ let b${oe}_data = ${j.getByIndices(`${j.type.indices}(col_index, block, word)`)};
5582
5585
  col_index += 1;`;return ne+=`
5583
5586
  var b_value: u32;
5584
5587
  let b_mask: u32 = 0x0F0F0F0Fu;
@@ -5586,10 +5589,10 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5586
5589
  var b_value_upper: vec4<u32>;
5587
5590
  var b_quantized_values: ${ce};
5588
5591
  var b_dequantized_values: ${ce};`,ne};return`
5589
- var<workgroup> workgroup_shared: array<${Y.type.value}, ${S*v}>;
5590
- ${B.declareVariables(...ee,Y)}
5591
- ${B.mainStart([v,1,1])}
5592
- let output_indices = ${Y.offsetToIndices(`(global_idx / ${v}) * ${S}`)};
5592
+ var<workgroup> workgroup_shared: array<${K.type.value}, ${S*v}>;
5593
+ ${D.declareVariables(...Z,K)}
5594
+ ${D.mainStart([v,1,1])}
5595
+ let output_indices = ${K.offsetToIndices(`(global_idx / ${v}) * ${S}`)};
5593
5596
  let col = output_indices[2];
5594
5597
  let row = output_indices[1];
5595
5598
  let batch = output_indices[0];
@@ -5598,7 +5601,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5598
5601
  for (var block = local_id.x; block < nBlocksPerCol; block += ${v}) {
5599
5602
  //process one block
5600
5603
  var word_offset: u32 = block * ${t.blockSize/h};
5601
- ${ue()}
5604
+ ${le()}
5602
5605
  for (var word: u32 = 0; word < ${m}; word += ${w}) {
5603
5606
  ${re()}
5604
5607
  for (var i: u32 = 0; i < ${w}; i++) {
@@ -5610,25 +5613,25 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5610
5613
  workgroupBarrier();
5611
5614
 
5612
5615
  if (local_id.x < ${S}) {
5613
- var output_value: ${Y.type.value} = ${Y.type.value}(0);
5616
+ var output_value: ${K.type.value} = ${K.type.value}(0);
5614
5617
  var workgroup_shared_offset: u32 = local_id.x;
5615
5618
  for (var b: u32 = 0u; b < ${v}u; b++) {
5616
5619
  output_value += workgroup_shared[workgroup_shared_offset];
5617
5620
  workgroup_shared_offset += ${S};
5618
5621
  }
5619
- ${Y.setByIndices(`${Y.type.indices}(batch, row, col + local_id.x)`,"output_value")};
5622
+ ${K.setByIndices(`${K.type.indices}(batch, row, col + local_id.x)`,"output_value")};
5620
5623
  }
5621
- }`};return{name:"MatMulNBits",shaderCache:{hint:`${t.blockSize};${t.bits};${h};${w};${g};${S};${v}`,inputDependencies:Array(e.length).fill("rank")},getRunData:()=>({outputs:[{dims:y,dataType:u}],dispatchGroup:{x:$},programUniforms:x}),getShaderSource:P}},Lf=(e,t)=>{let r=e[0].dims,n=r.length,o=r[n-2],i=t.k,a=t.n,d=r.slice(0,n-2),l=k.size(d),m=e[1].dims[2]/4,u=e[0].dataType,h=we(t.k),w=we(m),g=d.concat([o,a]),y=128,S=a%8===0?8:a%4===0?4:1,$=y/S,v=$*w*8,x=v/h,T=v/t.blockSize,C=k.size(g)/S,A=[],P=[l,o,i/h],B=k.convertShape(e[1].dims).slice();B.splice(-1,1,m/w),A.push(...R(P)),A.push(...R(B)),A.push(...R(e[2].dims)),e.length===4&&A.push(...R(k.convertShape(e[3].dims)));let N=[l,o,a];A.push(...R(N));let W=K=>{let Z=P.length,ee=E("a",e[0].dataType,Z,h),se=E("b",12,B.length,w),de=E("scales",e[2].dataType,e[2].dims.length),Y=[ee,se,de],le=e.length===4?E("zero_points",12,e[3].dims.length):void 0;le&&Y.push(le);let ce=N.length,q=M("output",e[0].dataType,ce),ue=he(e[0].dataType),re=()=>{switch(h){case 1:return`
5622
- let a_data0 = vec4<${ue}>(sub_a[word_offset], sub_a[word_offset + 1], sub_a[word_offset + 2], sub_a[word_offset + 3]);
5623
- let a_data1 = vec4<${ue}>(sub_a[word_offset + 4], sub_a[word_offset + 5], sub_a[word_offset + 6], sub_a[word_offset + 7]);`;case 2:return`
5624
- let a_data0 = vec4<${ue}>(sub_a[word_offset], sub_a[word_offset + 1]);
5625
- let a_data1 = vec4<${ue}>(sub_a[word_offset + 2], sub_a[word_offset + 3]);`;case 4:return`
5624
+ }`};return{name:"MatMulNBits",shaderCache:{hint:`${t.blockSize};${t.bits};${h};${w};${g};${S};${v}`,inputDependencies:Array(e.length).fill("rank")},getRunData:()=>({outputs:[{dims:y,dataType:u}],dispatchGroup:{x:$},programUniforms:x}),getShaderSource:P}},Gf=(e,t)=>{let r=e[0].dims,n=r.length,o=r[n-2],i=t.k,a=t.n,d=r.slice(0,n-2),l=k.size(d),m=e[1].dims[2]/4,u=e[0].dataType,h=we(t.k),w=we(m),g=d.concat([o,a]),y=128,S=a%8===0?8:a%4===0?4:1,$=y/S,v=$*w*8,x=v/h,T=v/t.blockSize,C=k.size(g)/S,A=[],P=[l,o,i/h],D=k.convertShape(e[1].dims).slice();D.splice(-1,1,m/w),A.push(...V(P)),A.push(...V(D)),A.push(...V(e[2].dims)),e.length===4&&A.push(...V(k.convertShape(e[3].dims)));let W=[l,o,a];A.push(...V(W));let N=j=>{let Y=P.length,Z=E("a",e[0].dataType,Y,h),te=E("b",12,D.length,w),ue=E("scales",e[2].dataType,e[2].dims.length),K=[Z,te,ue],de=e.length===4?E("zero_points",12,e[3].dims.length):void 0;de&&K.push(de);let ce=W.length,q=M("output",e[0].dataType,ce),le=he(e[0].dataType),re=()=>{switch(h){case 1:return`
5625
+ let a_data0 = vec4<${le}>(sub_a[word_offset], sub_a[word_offset + 1], sub_a[word_offset + 2], sub_a[word_offset + 3]);
5626
+ let a_data1 = vec4<${le}>(sub_a[word_offset + 4], sub_a[word_offset + 5], sub_a[word_offset + 6], sub_a[word_offset + 7]);`;case 2:return`
5627
+ let a_data0 = vec4<${le}>(sub_a[word_offset], sub_a[word_offset + 1]);
5628
+ let a_data1 = vec4<${le}>(sub_a[word_offset + 2], sub_a[word_offset + 3]);`;case 4:return`
5626
5629
  let a_data0 = sub_a[word_offset];
5627
5630
  let a_data1 = sub_a[word_offset + 1];`;default:throw new Error(`${h}-component is not supported.`)}};return`
5628
- var<workgroup> sub_a: array<${ee.type.value}, ${x}>;
5631
+ var<workgroup> sub_a: array<${Z.type.value}, ${x}>;
5629
5632
  var<workgroup> inter_results: array<array<${q.type.value}, ${$}>, ${S}>;
5630
- ${K.declareVariables(...Y,q)}
5631
- ${K.mainStart([$,S,1])}
5633
+ ${j.declareVariables(...K,q)}
5634
+ ${j.mainStart([$,S,1])}
5632
5635
  let output_indices = ${q.offsetToIndices(`workgroup_index * ${S}`)};
5633
5636
  let col = output_indices[2];
5634
5637
  let row = output_indices[1];
@@ -5645,9 +5648,9 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5645
5648
  let a_col = a_col_start + a_offset;
5646
5649
  if (a_col < uniforms.a_shape[2])
5647
5650
  {
5648
- sub_a[a_offset] = ${ee.getByIndices(`${ee.type.indices}(batch, row, a_col)`)};
5651
+ sub_a[a_offset] = ${Z.getByIndices(`${Z.type.indices}(batch, row, a_col)`)};
5649
5652
  } else {
5650
- sub_a[a_offset] = ${ee.type.value}(0);
5653
+ sub_a[a_offset] = ${Z.type.value}(0);
5651
5654
  }
5652
5655
  }
5653
5656
  workgroupBarrier();
@@ -5655,27 +5658,27 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5655
5658
  // each thread process one block
5656
5659
  let b_row = col + local_id.y;
5657
5660
  let block = tile * ${T} + local_id.x;
5658
- ${le?`
5661
+ ${de?`
5659
5662
  let zero_point_bytes_per_col = (n_blocks_per_col + 1) / 2;
5660
5663
  let zero_point_byte_count = b_row * zero_point_bytes_per_col + (block >> 0x1u);
5661
5664
  let zero_point_word_index = zero_point_byte_count >> 0x2u;
5662
5665
  let zero_point_byte_offset = zero_point_byte_count & 0x3u;
5663
5666
  let zero_point_nibble_offset: u32 = block & 0x1u;
5664
5667
  let zero_point_bits_offset = (zero_point_byte_offset << 3) + (zero_point_nibble_offset << 2);
5665
- let zero_point_word = ${le.getByOffset("zero_point_word_index")} >> zero_point_bits_offset;
5666
- let zero_point = ${ue}((zero_point_word) & 0xFu);`:`
5668
+ let zero_point_word = ${de.getByOffset("zero_point_word_index")} >> zero_point_bits_offset;
5669
+ let zero_point = ${le}((zero_point_word) & 0xFu);`:`
5667
5670
  // The default zero point is 8 for unsigned 4-bit quantization.
5668
- let zero_point = ${ue}(8);`}
5669
- let scale = ${de.getByOffset("b_row * n_blocks_per_col + block")};
5670
- let b_data = ${se.getByIndices(`${se.type.indices}(b_row, block, 0)`)};
5671
+ let zero_point = ${le}(8);`}
5672
+ let scale = ${ue.getByOffset("b_row * n_blocks_per_col + block")};
5673
+ let b_data = ${te.getByIndices(`${te.type.indices}(b_row, block, 0)`)};
5671
5674
  var word_offset = local_id.x * ${t.blockSize/h};
5672
5675
  for (var i: u32 = 0; i < ${w}; i++) {
5673
5676
  ${re()}
5674
5677
  let b_value = ${w===1?"b_data":"b_data[i]"};
5675
5678
  let b_value_lower = unpack4xU8(b_value & 0x0F0F0F0Fu);
5676
5679
  let b_value_upper = unpack4xU8((b_value >> 4) & 0x0F0F0F0Fu);
5677
- let b_quantized_values = mat2x4<${ue}>(${Array.from({length:4},(ne,oe)=>`${ue}(b_value_lower[${oe}]), ${ue}(b_value_upper[${oe}])`).join(", ")});
5678
- let b_dequantized_values = (b_quantized_values - mat2x4<${ue}>(${Array(8).fill("zero_point").join(",")})) * scale;
5680
+ let b_quantized_values = mat2x4<${le}>(${Array.from({length:4},(ne,oe)=>`${le}(b_value_lower[${oe}]), ${le}(b_value_upper[${oe}])`).join(", ")});
5681
+ let b_dequantized_values = (b_quantized_values - mat2x4<${le}>(${Array(8).fill("zero_point").join(",")})) * scale;
5679
5682
  inter_results[local_id.y][local_id.x] += ${Array.from({length:2},(ne,oe)=>`${`dot(a_data${oe}, b_dequantized_values[${oe}])`}`).join(" + ")};
5680
5683
  word_offset += ${8/h};
5681
5684
  }
@@ -5692,7 +5695,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5692
5695
  ${q.setByIndices(`${q.type.indices}(batch, row, col + local_idx)`,"output_value")}
5693
5696
  }
5694
5697
  }
5695
- }`};return{name:"BlockwiseMatMulNBits32",shaderCache:{hint:`${t.blockSize};${h};${w};${$};${S}`,inputDependencies:Array(e.length).fill("rank")},getRunData:()=>({outputs:[{dims:g,dataType:u}],dispatchGroup:{x:C},programUniforms:A}),getShaderSource:W}},Nd=(e,t)=>{Nf(e.inputs,t),t.blockSize===32&&e.adapterInfo.isVendor("intel")&&e.adapterInfo.isArchitecture("gen-12lp")?e.compute(Lf(e.inputs,t)):e.compute(Wf(e.inputs,t))},Wd=e=>J(e)});var Hf,Gf,Ff,qf,jf,Kf,Yf,Xf,Hd,Gd=V(()=>{"use strict";Q();ie();ae();Hf=e=>{if(!e||e.length<1)throw new Error("Too few inputs");if(e[0].dataType!==1&&e[0].dataType!==10)throw new Error("Input type must be float or float16.");if(e.length>=2){let t=e[0].dims.length*2===e[1].dims[0];if(e.length===4&&(t=e[3].dims[0]*2===e[1].dims[0]),!t)throw new Error("The pads should be a 1D tensor of shape [2 * input_rank] or [2 * num_axes].")}},Gf=(e,t,r)=>{let n="";for(let o=t-1;o>=0;--o)n+=`
5698
+ }`};return{name:"BlockwiseMatMulNBits32",shaderCache:{hint:`${t.blockSize};${h};${w};${$};${S}`,inputDependencies:Array(e.length).fill("rank")},getRunData:()=>({outputs:[{dims:g,dataType:u}],dispatchGroup:{x:C},programUniforms:A}),getShaderSource:N}},Wd=(e,t)=>{Lf(e.inputs,t),t.blockSize===32&&e.adapterInfo.isVendor("intel")&&e.adapterInfo.isArchitecture("gen-12lp")?e.compute(Gf(e.inputs,t)):e.compute(Hf(e.inputs,t))},Nd=e=>ee(e)});var Ff,qf,jf,Kf,Yf,Xf,Qf,Zf,Hd,Gd=U(()=>{"use strict";J();ae();se();Ff=e=>{if(!e||e.length<1)throw new Error("Too few inputs");if(e[0].dataType!==1&&e[0].dataType!==10)throw new Error("Input type must be float or float16.");if(e.length>=2){let t=e[0].dims.length*2===e[1].dims[0];if(e.length===4&&(t=e[3].dims[0]*2===e[1].dims[0]),!t)throw new Error("The pads should be a 1D tensor of shape [2 * input_rank] or [2 * num_axes].")}},qf=(e,t,r)=>{let n="";for(let o=t-1;o>=0;--o)n+=`
5696
5699
  k = i32(${e.indicesGet("indices",o)}) - ${F("uniforms.pads",o,r)};
5697
5700
  if (k < 0) {
5698
5701
  break;
@@ -5709,7 +5712,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5709
5712
  ${n}
5710
5713
  value = x[offset];
5711
5714
  }
5712
- `},Ff=(e,t,r)=>{let n="";for(let o=t-1;o>=0;--o)n+=`
5715
+ `},jf=(e,t,r)=>{let n="";for(let o=t-1;o>=0;--o)n+=`
5713
5716
  k = i32(${e.indicesGet("indices",o)}) - ${F("uniforms.pads",o,r)};
5714
5717
  if (k < 0) {
5715
5718
  k = -k;
@@ -5727,7 +5730,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5727
5730
  var k = 0;
5728
5731
  ${n}
5729
5732
  value = x[offset];
5730
- `},qf=(e,t,r)=>{let n="";for(let o=t-1;o>=0;--o)n+=`
5733
+ `},Kf=(e,t,r)=>{let n="";for(let o=t-1;o>=0;--o)n+=`
5731
5734
  k = i32(${e.indicesGet("indices",o)}) - ${F("uniforms.pads",o,r)};
5732
5735
  if (k < 0) {
5733
5736
  k = 0;
@@ -5741,7 +5744,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5741
5744
  var k = 0;
5742
5745
  ${n}
5743
5746
  value = x[offset];
5744
- `},jf=(e,t,r)=>{let n="";for(let o=t-1;o>=0;--o)n+=`
5747
+ `},Yf=(e,t,r)=>{let n="";for(let o=t-1;o>=0;--o)n+=`
5745
5748
  k = i32(${e.indicesGet("indices",o)}) - ${F("uniforms.pads",o,r)};
5746
5749
  if (k < 0) {
5747
5750
  k += i32(${F("uniforms.x_shape",o,t)}]);
@@ -5755,7 +5758,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5755
5758
  var k = 0;
5756
5759
  ${n}
5757
5760
  value = x[offset];
5758
- `},Kf=(e,t,r)=>{switch(r.mode){case 0:return Gf(e,t,r.pads.length);case 1:return Ff(e,t,r.pads.length);case 2:return qf(e,t,r.pads.length);case 3:return jf(e,t,r.pads.length);default:throw new Error("Invalid mode")}},Yf=(e,t)=>{let r=k.padShape(e[0].dims.slice(),t.pads),n=e[0].dims,o=k.size(r),i=[{type:12,data:o},{type:6,data:t.pads}],a=e.length>=3&&e[2].data;t.mode===0&&i.push({type:a?e[2].dataType:1,data:t.value}),i.push(...R(e[0].dims,r));let d=["rank"],l=c=>{let m=M("output",e[0].dataType,r.length),u=E("x",e[0].dataType,n.length),h=u.type.value,w=Kf(m,n.length,t),g=[{name:"output_size",type:"u32"},{name:"pads",type:"i32",length:t.pads.length}];return t.mode===0&&g.push({name:"constant_value",type:a?h:"f32"}),`
5761
+ `},Xf=(e,t,r)=>{switch(r.mode){case 0:return qf(e,t,r.pads.length);case 1:return jf(e,t,r.pads.length);case 2:return Kf(e,t,r.pads.length);case 3:return Yf(e,t,r.pads.length);default:throw new Error("Invalid mode")}},Qf=(e,t)=>{let r=k.padShape(e[0].dims.slice(),t.pads),n=e[0].dims,o=k.size(r),i=[{type:12,data:o},{type:6,data:t.pads}],a=e.length>=3&&e[2].data;t.mode===0&&i.push({type:a?e[2].dataType:1,data:t.value}),i.push(...V(e[0].dims,r));let d=["rank"],l=c=>{let m=M("output",e[0].dataType,r.length),u=E("x",e[0].dataType,n.length),h=u.type.value,w=Xf(m,n.length,t),g=[{name:"output_size",type:"u32"},{name:"pads",type:"i32",length:t.pads.length}];return t.mode===0&&g.push({name:"constant_value",type:a?h:"f32"}),`
5759
5762
  ${c.registerUniforms(g).declareVariables(u,m)}
5760
5763
  ${c.mainStart()}
5761
5764
  ${c.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
@@ -5765,7 +5768,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5765
5768
  var value = ${h}(0);
5766
5769
  ${w}
5767
5770
  output[global_idx] = value;
5768
- }`};return{name:"Pad",shaderCache:{hint:`${t.mode}${a}`,inputDependencies:d},getRunData:()=>({outputs:[{dims:r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(k.size(r)/64)},programUniforms:i}),getShaderSource:l}},Xf=(e,t)=>{if(e.length>1){let r=e[1].getBigInt64Array(),n=e.length>=3&&e[2].data?e[2].dataType===10?e[2].getUint16Array()[0]:e[2].getFloat32Array()[0]:0,o=e[0].dims.length,i=new Int32Array(2*o).fill(0);if(e.length>=4){let d=e[3].getBigInt64Array();for(let l=0;l<d.length;l++)i[Number(d[l])]=Number(r[l]),i[Number(d[l])+o]=Number(r[l+d.length])}else r.forEach((d,l)=>i[Number(l)]=Number(d));let a=[];return i.forEach(d=>a.push(d)),{mode:t.mode,value:n,pads:a}}else return t},Hd=(e,t)=>{Hf(e.inputs);let r=Xf(e.inputs,t);e.compute(Yf(e.inputs,r),{inputs:[0]})}});var nn,Fd,qd,jd,Kd,Zf,Qf,Yd,Xd,Zd,Qd,Jd,el,tl,rl,nl,ol,il,al,sl=V(()=>{"use strict";Ke();Q();ie();ae();nn=e=>{if(_e.webgpu.validateInputContent&&(!e||e.length!==1))throw new Error("Pool ops requires 1 input.")},Fd=(e,t,r)=>{let n=t.format==="NHWC",o=e.dims.slice();n&&o.splice(1,0,o.pop());let i=Object.hasOwnProperty.call(t,"dilations"),a=t.kernelShape.slice(),d=t.strides.slice(),l=i?t.dilations.slice():[],c=t.pads.slice();Ct.adjustPoolAttributes(r,o,a,d,l,c);let m=Ct.computePoolOutputShape(r,o,d,l,a,c,t.autoPad),u=Object.assign({},t);i?Object.assign(u,{kernelShape:a,strides:d,pads:c,dilations:l,cacheKey:t.cacheKey}):Object.assign(u,{kernelShape:a,strides:d,pads:c,cacheKey:t.cacheKey});let h=m.slice();return h.push(h.splice(1,1)[0]),[u,n?h:m]},qd=(e,t)=>{let r=t.format==="NHWC",n=k.size(e),o=k.size(t.kernelShape),i=[{type:12,data:n},{type:12,data:o}],a=[{name:"outputSize",type:"u32"},{name:"kernelSize",type:"u32"}];if(t.kernelShape.length<=2){let d=t.kernelShape[t.kernelShape.length-1],l=t.strides[t.strides.length-1],c=t.pads[t.pads.length/2-1],m=t.pads[t.pads.length-1],u=!!(c+m);i.push({type:12,data:d},{type:12,data:l},{type:12,data:c},{type:12,data:m}),a.push({name:"kw",type:"u32"},{name:"sw",type:"u32"},{name:"pwStart",type:"u32"},{name:"pwEnd",type:"u32"});let h=!1;if(t.kernelShape.length===2){let w=t.kernelShape[t.kernelShape.length-2],g=t.strides[t.strides.length-2],y=t.pads[t.pads.length/2-2],S=t.pads[t.pads.length-2];h=!!(y+S),i.push({type:12,data:w},{type:12,data:g},{type:12,data:y},{type:12,data:S}),a.push({name:"kh",type:"u32"},{name:"sh",type:"u32"},{name:"phStart",type:"u32"},{name:"phEnd",type:"u32"})}return[i,a,!0,u,h]}else{if(r)throw new Error("Pooling with kernelShape.length > 2 is not supported for NHWC format.");let d=k.computeStrides(t.kernelShape);i.push({type:12,data:d},{type:12,data:t.pads},{type:12,data:t.strides}),a.push({name:"kernelStrides",type:"u32",length:d.length},{name:"pads",type:"u32",length:t.pads.length},{name:"strides",type:"u32",length:t.strides.length});let l=t.pads.reduce((c,m)=>c+m);return[i,a,!!l,!1,!1]}},jd=(e,t,r,n,o,i,a,d,l,c,m,u)=>{let h=o.format==="NHWC",w=t.type.value,g=M("output",t.type.tensor,n);if(o.kernelShape.length<=2){let y="",S="",$="",v=r-(h?2:1);if(m?y=`
5771
+ }`};return{name:"Pad",shaderCache:{hint:`${t.mode}${a}`,inputDependencies:d},getRunData:()=>({outputs:[{dims:r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(k.size(r)/64)},programUniforms:i}),getShaderSource:l}},Zf=(e,t)=>{if(e.length>1){let r=e[1].getBigInt64Array(),n=e.length>=3&&e[2].data?e[2].dataType===10?e[2].getUint16Array()[0]:e[2].getFloat32Array()[0]:0,o=e[0].dims.length,i=new Int32Array(2*o).fill(0);if(e.length>=4){let d=e[3].getBigInt64Array();for(let l=0;l<d.length;l++)i[Number(d[l])]=Number(r[l]),i[Number(d[l])+o]=Number(r[l+d.length])}else r.forEach((d,l)=>i[Number(l)]=Number(d));let a=[];return i.forEach(d=>a.push(d)),{mode:t.mode,value:n,pads:a}}else return t},Hd=(e,t)=>{Ff(e.inputs);let r=Zf(e.inputs,t);e.compute(Qf(e.inputs,r),{inputs:[0]})}});var nn,Fd,qd,jd,Kd,Jf,eh,Yd,Xd,Qd,Zd,Jd,el,tl,rl,nl,ol,il,al,sl=U(()=>{"use strict";Ke();J();ae();se();nn=e=>{if(_e.webgpu.validateInputContent&&(!e||e.length!==1))throw new Error("Pool ops requires 1 input.")},Fd=(e,t,r)=>{let n=t.format==="NHWC",o=e.dims.slice();n&&o.splice(1,0,o.pop());let i=Object.hasOwnProperty.call(t,"dilations"),a=t.kernelShape.slice(),d=t.strides.slice(),l=i?t.dilations.slice():[],c=t.pads.slice();Ct.adjustPoolAttributes(r,o,a,d,l,c);let m=Ct.computePoolOutputShape(r,o,d,l,a,c,t.autoPad),u=Object.assign({},t);i?Object.assign(u,{kernelShape:a,strides:d,pads:c,dilations:l,cacheKey:t.cacheKey}):Object.assign(u,{kernelShape:a,strides:d,pads:c,cacheKey:t.cacheKey});let h=m.slice();return h.push(h.splice(1,1)[0]),[u,n?h:m]},qd=(e,t)=>{let r=t.format==="NHWC",n=k.size(e),o=k.size(t.kernelShape),i=[{type:12,data:n},{type:12,data:o}],a=[{name:"outputSize",type:"u32"},{name:"kernelSize",type:"u32"}];if(t.kernelShape.length<=2){let d=t.kernelShape[t.kernelShape.length-1],l=t.strides[t.strides.length-1],c=t.pads[t.pads.length/2-1],m=t.pads[t.pads.length-1],u=!!(c+m);i.push({type:12,data:d},{type:12,data:l},{type:12,data:c},{type:12,data:m}),a.push({name:"kw",type:"u32"},{name:"sw",type:"u32"},{name:"pwStart",type:"u32"},{name:"pwEnd",type:"u32"});let h=!1;if(t.kernelShape.length===2){let w=t.kernelShape[t.kernelShape.length-2],g=t.strides[t.strides.length-2],y=t.pads[t.pads.length/2-2],S=t.pads[t.pads.length-2];h=!!(y+S),i.push({type:12,data:w},{type:12,data:g},{type:12,data:y},{type:12,data:S}),a.push({name:"kh",type:"u32"},{name:"sh",type:"u32"},{name:"phStart",type:"u32"},{name:"phEnd",type:"u32"})}return[i,a,!0,u,h]}else{if(r)throw new Error("Pooling with kernelShape.length > 2 is not supported for NHWC format.");let d=k.computeStrides(t.kernelShape);i.push({type:12,data:d},{type:12,data:t.pads},{type:12,data:t.strides}),a.push({name:"kernelStrides",type:"u32",length:d.length},{name:"pads",type:"u32",length:t.pads.length},{name:"strides",type:"u32",length:t.strides.length});let l=t.pads.reduce((c,m)=>c+m);return[i,a,!!l,!1,!1]}},jd=(e,t,r,n,o,i,a,d,l,c,m,u)=>{let h=o.format==="NHWC",w=t.type.value,g=M("output",t.type.tensor,n);if(o.kernelShape.length<=2){let y="",S="",$="",v=r-(h?2:1);if(m?y=`
5769
5772
  for (var i: u32 = 0u; i < uniforms.kw; i++) {
5770
5773
  xIndices[${v}] = indices[${v}] * uniforms.sw - uniforms.pwStart + i;
5771
5774
  if (xIndices[${v}] < 0 || xIndices[${v}]
@@ -5854,12 +5857,12 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5854
5857
  ${a}
5855
5858
 
5856
5859
  output[global_idx] = value;
5857
- }`}},Kd=e=>`${e.format};${e.ceilMode};${e.autoPad};${e.kernelShape.length}`,Zf=e=>`${Kd(e)};${e.countIncludePad}`,Qf=e=>`${Kd(e)};${e.storageOrder};${e.dilations}`,Yd=e=>({format:e.format,autoPad:["NOTSET","VALID","SAME_UPPER","SAME_LOWER"][e.auto_pad],ceilMode:e.ceil_mode,kernelShape:e.kernel_shape,strides:e.strides,pads:e.pads}),Xd=(e,t,r,n)=>{let[o,i]=Fd(t,n,r),a=E("x",t.dataType,t.dims.length),d=a.type.value,l="value += x_val;",c="";o.countIncludePad?c+=`value /= ${d}(uniforms.kernelSize);`:c+=`value /= ${d}(i32(uniforms.kernelSize) - pad);`;let[m,u,h,w,g]=qd(i,o);m.push(...R(t.dims,i));let y=["rank"];return{name:e,shaderCache:{hint:`${n.cacheKey};${h};${w};${g}`,inputDependencies:y},getRunData:()=>({outputs:[{dims:i,dataType:t.dataType}],dispatchGroup:{x:Math.ceil(k.size(i)/64)},programUniforms:m}),getShaderSource:S=>jd(S,a,t.dims.length,i.length,o,l,c,0,u,h,w,g)}},Zd=e=>{let t=e.count_include_pad!==0,r=Yd(e);if(r.ceilMode!==0)throw new Error("using ceil() in shape computation is not yet supported for AveragePool");let n={countIncludePad:t,...r,cacheKey:""};return{...n,cacheKey:Zf(n)}},Qd=(e,t)=>{nn(e.inputs),e.compute(Xd("AveragePool",e.inputs[0],!1,t))},Jd={autoPad:"",ceilMode:0,countIncludePad:!1,kernelShape:[],strides:[],pads:[],storageOrder:0,dilations:[]},el=e=>{let t=e.format;return{format:t,...Jd,cacheKey:t}},tl=(e,t)=>{nn(e.inputs),e.compute(Xd("GlobalAveragePool",e.inputs[0],!0,t))},rl=(e,t,r,n)=>{let[o,i]=Fd(t,n,r),a=`
5860
+ }`}},Kd=e=>`${e.format};${e.ceilMode};${e.autoPad};${e.kernelShape.length}`,Jf=e=>`${Kd(e)};${e.countIncludePad}`,eh=e=>`${Kd(e)};${e.storageOrder};${e.dilations}`,Yd=e=>({format:e.format,autoPad:["NOTSET","VALID","SAME_UPPER","SAME_LOWER"][e.auto_pad],ceilMode:e.ceil_mode,kernelShape:e.kernel_shape,strides:e.strides,pads:e.pads}),Xd=(e,t,r,n)=>{let[o,i]=Fd(t,n,r),a=E("x",t.dataType,t.dims.length),d=a.type.value,l="value += x_val;",c="";o.countIncludePad?c+=`value /= ${d}(uniforms.kernelSize);`:c+=`value /= ${d}(i32(uniforms.kernelSize) - pad);`;let[m,u,h,w,g]=qd(i,o);m.push(...V(t.dims,i));let y=["rank"];return{name:e,shaderCache:{hint:`${n.cacheKey};${h};${w};${g}`,inputDependencies:y},getRunData:()=>({outputs:[{dims:i,dataType:t.dataType}],dispatchGroup:{x:Math.ceil(k.size(i)/64)},programUniforms:m}),getShaderSource:S=>jd(S,a,t.dims.length,i.length,o,l,c,0,u,h,w,g)}},Qd=e=>{let t=e.count_include_pad!==0,r=Yd(e);if(r.ceilMode!==0)throw new Error("using ceil() in shape computation is not yet supported for AveragePool");let n={countIncludePad:t,...r,cacheKey:""};return{...n,cacheKey:Jf(n)}},Zd=(e,t)=>{nn(e.inputs),e.compute(Xd("AveragePool",e.inputs[0],!1,t))},Jd={autoPad:"",ceilMode:0,countIncludePad:!1,kernelShape:[],strides:[],pads:[],storageOrder:0,dilations:[]},el=e=>{let t=e.format;return{format:t,...Jd,cacheKey:t}},tl=(e,t)=>{nn(e.inputs),e.compute(Xd("GlobalAveragePool",e.inputs[0],!0,t))},rl=(e,t,r,n)=>{let[o,i]=Fd(t,n,r),a=`
5858
5861
  value = max(x_val, value);
5859
- `,d="",l=E("x",t.dataType,t.dims.length),c=["rank"],[m,u,h,w,g]=qd(i,o);return m.push(...R(t.dims,i)),{name:e,shaderCache:{hint:`${n.cacheKey};${h};${w};${g}`,inputDependencies:c},getRunData:()=>({outputs:[{dims:i,dataType:t.dataType}],dispatchGroup:{x:Math.ceil(k.size(i)/64)},programUniforms:m}),getShaderSource:y=>jd(y,l,t.dims.length,i.length,o,a,d,t.dataType===10?-65504:-1e5,u,h,w,g)}},nl=(e,t)=>{nn(e.inputs),e.compute(rl("MaxPool",e.inputs[0],!1,t))},ol=e=>{let t=e.storage_order,r=e.dilations,n=Yd(e);if(t!==0)throw new Error("column major storage order is not yet supported for MaxPool");if(n.ceilMode!==0)throw new Error("using ceil() in shape computation is not yet supported for MaxPool");let o={storageOrder:t,dilations:r,...n,cacheKey:""};return{...o,cacheKey:Qf(o)}},il=e=>{let t=e.format;return{format:t,...Jd,cacheKey:t}},al=(e,t)=>{nn(e.inputs),e.compute(rl("GlobalMaxPool",e.inputs[0],!0,t))}});var eh,th,ul,dl,ll=V(()=>{"use strict";Q();ie();Ie();ae();eh=(e,t)=>{if(e.length<2||e.length>3)throw new Error("DequantizeLinear requires 2 or 3 inputs.");if(e.length===3&&e[1].dims===e[2].dims)throw new Error("x-scale and x-zero-point must have the same shape.");if(e.length===3&&e[0].dataType!==e[2].dataType)throw new Error("x and x-zero-point must have the same data type.");if(e[0].dataType===6&&e.length>2)throw new Error("In the case of dequantizing int32 there is no zero point.");if(e[1].dims.length!==0&&e[1].dims.length!==1&&e[1].dims.length!==e[0].dims.length)throw new Error("scale input must be a scalar, a 1D tensor, or have the same rank as the input tensor.");if(e.length>2){if(e[0].dataType!==e[2].dataType)throw new Error("x and x-zero-point must have the same data type.");if(e[1].dims.length!==e[2].dims.length)throw new Error("scale and zero-point inputs must have the same rank.");if(!e[1].dims.map((r,n)=>r===e[2].dims[n]).reduce((r,n)=>r&&n,!0))throw new Error("scale and zero-point inputs must have the same shape.")}if(t.blockSize>0){if(e[1].dims.length===0||e[1].dims.length===1&&e[1].dims[0]===1)throw new Error("blockSize must be set only for block quantization.");if(!e[1].dims.map((o,i)=>i===t.axis||o===e[0].dims[i]).reduce((o,i)=>o&&i,!0))throw new Error("For block qunatization, scale input shape to match the input shape except for the axis");if(e[1].dims.length!==e[0].dims.length)throw new Error("For block qunatization the scale input rank must be the same as the x rank.");let r=e[0].dims[t.axis],n=e[1].dims[t.axis];if(t.blockSize<Math.ceil(r/n)||t.blockSize>Math.ceil(r/(n-1)-1))throw new Error("blockSize must be with in the range [ceil(dI / Si), ceil(dI / (Si - 1) - 1)].")}},th=(e,t)=>{let r=k.normalizeAxis(t.axis,e[0].dims.length),n=e[0].dataType,o=n===3,i=e[0].dims,a=e[1].dataType,d=k.size(i),l=n===3||n===2,c=l?[Math.ceil(k.size(e[0].dims)/4)]:e[0].dims,m=e[1].dims,u=e.length>2?e[2]:void 0,h=u?l?[Math.ceil(k.size(u.dims)/4)]:u.dims:void 0,w=m.length===0||m.length===1&&m[0]===1,g=w===!1&&m.length===1,y=we(d),S=w&&(!l||y===4),$=S?y:1,v=S&&!l?y:1,x=E("input",l?12:n,c.length,v),T=E("scale",a,m.length),C=u?E("zero_point",l?12:n,h.length):void 0,A=M("output",a,i.length,$),P=[x,T];C&&P.push(C);let B=[c,m];u&&B.push(h);let N=[{type:12,data:d/$},{type:12,data:r},{type:12,data:t.blockSize},...R(...B,i)],W=K=>{let Z=[{name:"output_size",type:"u32"},{name:"axis",type:"u32"},{name:"block_size",type:"u32"}];return`
5860
- ${K.registerUniforms(Z).declareVariables(...P,A)}
5861
- ${K.mainStart()}
5862
- ${K.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
5862
+ `,d="",l=E("x",t.dataType,t.dims.length),c=["rank"],[m,u,h,w,g]=qd(i,o);return m.push(...V(t.dims,i)),{name:e,shaderCache:{hint:`${n.cacheKey};${h};${w};${g}`,inputDependencies:c},getRunData:()=>({outputs:[{dims:i,dataType:t.dataType}],dispatchGroup:{x:Math.ceil(k.size(i)/64)},programUniforms:m}),getShaderSource:y=>jd(y,l,t.dims.length,i.length,o,a,d,t.dataType===10?-65504:-1e5,u,h,w,g)}},nl=(e,t)=>{nn(e.inputs),e.compute(rl("MaxPool",e.inputs[0],!1,t))},ol=e=>{let t=e.storage_order,r=e.dilations,n=Yd(e);if(t!==0)throw new Error("column major storage order is not yet supported for MaxPool");if(n.ceilMode!==0)throw new Error("using ceil() in shape computation is not yet supported for MaxPool");let o={storageOrder:t,dilations:r,...n,cacheKey:""};return{...o,cacheKey:eh(o)}},il=e=>{let t=e.format;return{format:t,...Jd,cacheKey:t}},al=(e,t)=>{nn(e.inputs),e.compute(rl("GlobalMaxPool",e.inputs[0],!0,t))}});var rh,nh,ul,dl,ll=U(()=>{"use strict";J();ae();Ie();se();rh=(e,t)=>{if(e.length<2||e.length>3)throw new Error("DequantizeLinear requires 2 or 3 inputs.");if(e.length===3&&e[1].dims===e[2].dims)throw new Error("x-scale and x-zero-point must have the same shape.");if(e.length===3&&e[0].dataType!==e[2].dataType)throw new Error("x and x-zero-point must have the same data type.");if(e[0].dataType===6&&e.length>2)throw new Error("In the case of dequantizing int32 there is no zero point.");if(e[1].dims.length!==0&&e[1].dims.length!==1&&e[1].dims.length!==e[0].dims.length)throw new Error("scale input must be a scalar, a 1D tensor, or have the same rank as the input tensor.");if(e.length>2){if(e[0].dataType!==e[2].dataType)throw new Error("x and x-zero-point must have the same data type.");if(e[1].dims.length!==e[2].dims.length)throw new Error("scale and zero-point inputs must have the same rank.");if(!e[1].dims.map((r,n)=>r===e[2].dims[n]).reduce((r,n)=>r&&n,!0))throw new Error("scale and zero-point inputs must have the same shape.")}if(t.blockSize>0){if(e[1].dims.length===0||e[1].dims.length===1&&e[1].dims[0]===1)throw new Error("blockSize must be set only for block quantization.");if(!e[1].dims.map((o,i)=>i===t.axis||o===e[0].dims[i]).reduce((o,i)=>o&&i,!0))throw new Error("For block qunatization, scale input shape to match the input shape except for the axis");if(e[1].dims.length!==e[0].dims.length)throw new Error("For block qunatization the scale input rank must be the same as the x rank.");let r=e[0].dims[t.axis],n=e[1].dims[t.axis];if(t.blockSize<Math.ceil(r/n)||t.blockSize>Math.ceil(r/(n-1)-1))throw new Error("blockSize must be with in the range [ceil(dI / Si), ceil(dI / (Si - 1) - 1)].")}},nh=(e,t)=>{let r=k.normalizeAxis(t.axis,e[0].dims.length),n=e[0].dataType,o=n===3,i=e[0].dims,a=e[1].dataType,d=k.size(i),l=n===3||n===2,c=l?[Math.ceil(k.size(e[0].dims)/4)]:e[0].dims,m=e[1].dims,u=e.length>2?e[2]:void 0,h=u?l?[Math.ceil(k.size(u.dims)/4)]:u.dims:void 0,w=m.length===0||m.length===1&&m[0]===1,g=w===!1&&m.length===1,y=we(d),S=w&&(!l||y===4),$=S?y:1,v=S&&!l?y:1,x=E("input",l?12:n,c.length,v),T=E("scale",a,m.length),C=u?E("zero_point",l?12:n,h.length):void 0,A=M("output",a,i.length,$),P=[x,T];C&&P.push(C);let D=[c,m];u&&D.push(h);let W=[{type:12,data:d/$},{type:12,data:r},{type:12,data:t.blockSize},...V(...D,i)],N=j=>{let Y=[{name:"output_size",type:"u32"},{name:"axis",type:"u32"},{name:"block_size",type:"u32"}];return`
5863
+ ${j.registerUniforms(Y).declareVariables(...P,A)}
5864
+ ${j.mainStart()}
5865
+ ${j.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
5863
5866
  let output_indices = ${A.offsetToIndices("global_idx")};
5864
5867
 
5865
5868
  // Set input x
@@ -5894,13 +5897,13 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5894
5897
  let zero_point_value = zero_point_vec[zero_point_offset % 4];`:`let zero_point_value = ${C.getByIndices("scale_indices")};`:`let zero_point_value = ${l?o?"i32":"u32":x.type.value}(0);`)()};
5895
5898
  // Compute and write output
5896
5899
  ${A.setByOffset("global_idx",`${A.type.value}(x_value - zero_point_value) * scale_value`)};
5897
- }`};return{name:"DequantizeLinear",shaderCache:{hint:t.cacheKey,inputDependencies:C?["rank","rank","rank"]:["rank","rank"]},getShaderSource:W,getRunData:()=>({outputs:[{dims:i,dataType:a}],dispatchGroup:{x:Math.ceil(d/$/64),y:1,z:1},programUniforms:N})}},ul=(e,t)=>{eh(e.inputs,t),e.compute(th(e.inputs,t))},dl=e=>J({axis:e.axis,blockSize:e.blockSize})});var rh,nh,cl,pl=V(()=>{"use strict";Ke();Q();ae();rh=(e,t,r)=>{let n=e===t,o=e<t&&r<0,i=e>t&&r>0;if(n||o||i)throw new Error("Range these inputs' contents are invalid.")},nh=(e,t,r,n)=>{let o=Math.abs(Math.ceil((t-e)/r)),i=[o],a=o,d=[{type:12,data:a},{type:n,data:e},{type:n,data:r},...R(i)],l=c=>{let m=M("output",n,i.length),u=m.type.value,h=[{name:"outputSize",type:"u32"},{name:"start",type:u},{name:"delta",type:u}];return`
5900
+ }`};return{name:"DequantizeLinear",shaderCache:{hint:t.cacheKey,inputDependencies:C?["rank","rank","rank"]:["rank","rank"]},getShaderSource:N,getRunData:()=>({outputs:[{dims:i,dataType:a}],dispatchGroup:{x:Math.ceil(d/$/64),y:1,z:1},programUniforms:W})}},ul=(e,t)=>{rh(e.inputs,t),e.compute(nh(e.inputs,t))},dl=e=>ee({axis:e.axis,blockSize:e.blockSize})});var oh,ih,cl,pl=U(()=>{"use strict";Ke();J();se();oh=(e,t,r)=>{let n=e===t,o=e<t&&r<0,i=e>t&&r>0;if(n||o||i)throw new Error("Range these inputs' contents are invalid.")},ih=(e,t,r,n)=>{let o=Math.abs(Math.ceil((t-e)/r)),i=[o],a=o,d=[{type:12,data:a},{type:n,data:e},{type:n,data:r},...V(i)],l=c=>{let m=M("output",n,i.length),u=m.type.value,h=[{name:"outputSize",type:"u32"},{name:"start",type:u},{name:"delta",type:u}];return`
5898
5901
  ${c.registerUniforms(h).declareVariables(m)}
5899
5902
  ${c.mainStart()}
5900
5903
  ${c.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}
5901
5904
  output[global_idx] = uniforms.start + ${u}(global_idx) * uniforms.delta;
5902
- }`};return{name:"Range",shaderCache:{hint:`${n}`},getShaderSource:l,getRunData:()=>({outputs:[{dims:i,dataType:n}],dispatchGroup:{x:Math.ceil(a/64)},programUniforms:d})}},cl=e=>{let t=0,r=0,n=0;e.inputs[0].dataType===6?(t=e.inputs[0].getInt32Array()[0],r=e.inputs[1].getInt32Array()[0],n=e.inputs[2].getInt32Array()[0]):e.inputs[0].dataType===1&&(t=e.inputs[0].getFloat32Array()[0],r=e.inputs[1].getFloat32Array()[0],n=e.inputs[2].getFloat32Array()[0]),_e.webgpu.validateInputContent&&rh(t,r,n),e.compute(nh(t,r,n,e.inputs[0].dataType),{inputs:[]})}});var oh,ih,ah,sh,uh,dh,lh,ch,ph,mh,fh,ml,hh,gh,yh,bh,wh,fl,hl,gl=V(()=>{"use strict";Q();ie();Ie();ae();oh=(e,t)=>{if(e.every(r=>r>0||(()=>{throw new Error("Resize requires scales input values to be positive")})),e.length>0){if(t.mode==="linear"){if(!(e.length===2||e.length===3||e.length===4&&e[0]===1&&e[1]===1||e.length===4&&e[0]===1&&e[3]===1||e.length===5&&e[0]===1&&e[1]===1))throw new Error(`For linear mode, Resize requires scales to be 2D, 3D, 4D with either two outermost or one innermost and
5903
- one outermost scale values equal to 1, or 5D with two outermost scale values equal to 1`)}else if(t.mode==="cubic"&&!(e.length===2||e.length===4&&e[0]===1&&e[1]===1||e.length===4&&e[0]===1&&e[3]===1))throw new Error("Resize requires scales input size to be 2 or 4 for cubic mode")}},ih=(e,t,r)=>{t.every(o=>o>=0&&o<r||(()=>{throw new Error("Resize requires axes input values to be positive and less than rank")}));let n=new Array(r).fill(1);return t.forEach((o,i)=>n[o]=e[i]),n},ah=(e,t,r,n,o,i)=>{let[a,d,l]=r>10?[1,2,3]:[-1,e.length>1?1:-1,-1],c=e[0].dims.length;if(a>0&&e.length>a&&e[a].dims.length>0)e[a].getFloat32Array().forEach(m=>i.push(m));else if(t.coordinateTransformMode==="tf_crop_and_resize")throw new Error("Resize requires RoI input to be specified when coordinateTransformMode is tfCropAndResize");if(d>0&&e.length>d&&e[d].dims.length===1&&e[d].dims[0]>0){if(e[d].getFloat32Array().forEach(m=>n.push(m)),n.length!==0&&n.length!==c&&r>=18&&n.length!==t.axes.length)throw new Error("Resize requires scales input size to be same as input rank or axes size for opset 18 and up");oh(n,t),t.axes.length>0&&ih(n,t.axes,c).forEach((m,u)=>n[u]=m)}if(l>0&&e.length>l&&e[l].dims.length===1&&e[l].dims[0]>0&&(e[l].getBigInt64Array().forEach(m=>o.push(Number(m))),o.length!==0&&o.length!==c&&r>=18&&o.length!==t.axes.length))throw new Error("Resize requires sizes input size to be same as input rank or axes size for opset 18 and up");if(t.axes.length>0){if(n.length!==0&&n.length!==t.axes.length)throw new Error('Resize requires "scales" input size to be of axes rank when axes attributes is specified');if(o.length!==0&&o.length!==t.axes.length)throw new Error('Resize requires "sizes" input size to be of rank axes rank when axes attributes is specified')}if(typeof n<"u"&&typeof o<"u"&&n.length>0&&o.length>c)throw new Error("Resize requires only of scales or sizes to be specified")},sh=(e,t)=>`fn getOriginalCoordinateFromResizedCoordinate(xResized: u32, xScale: f32, lengthResized: u32,
5905
+ }`};return{name:"Range",shaderCache:{hint:`${n}`},getShaderSource:l,getRunData:()=>({outputs:[{dims:i,dataType:n}],dispatchGroup:{x:Math.ceil(a/64)},programUniforms:d})}},cl=e=>{let t=0,r=0,n=0;e.inputs[0].dataType===6?(t=e.inputs[0].getInt32Array()[0],r=e.inputs[1].getInt32Array()[0],n=e.inputs[2].getInt32Array()[0]):e.inputs[0].dataType===1&&(t=e.inputs[0].getFloat32Array()[0],r=e.inputs[1].getFloat32Array()[0],n=e.inputs[2].getFloat32Array()[0]),_e.webgpu.validateInputContent&&oh(t,r,n),e.compute(ih(t,r,n,e.inputs[0].dataType),{inputs:[]})}});var ah,sh,uh,dh,lh,ch,ph,mh,fh,hh,gh,ml,yh,bh,wh,_h,vh,fl,hl,gl=U(()=>{"use strict";J();ae();Ie();se();ah=(e,t)=>{if(e.every(r=>r>0||(()=>{throw new Error("Resize requires scales input values to be positive")})),e.length>0){if(t.mode==="linear"){if(!(e.length===2||e.length===3||e.length===4&&e[0]===1&&e[1]===1||e.length===4&&e[0]===1&&e[3]===1||e.length===5&&e[0]===1&&e[1]===1))throw new Error(`For linear mode, Resize requires scales to be 2D, 3D, 4D with either two outermost or one innermost and
5906
+ one outermost scale values equal to 1, or 5D with two outermost scale values equal to 1`)}else if(t.mode==="cubic"&&!(e.length===2||e.length===4&&e[0]===1&&e[1]===1||e.length===4&&e[0]===1&&e[3]===1))throw new Error("Resize requires scales input size to be 2 or 4 for cubic mode")}},sh=(e,t,r)=>{t.every(o=>o>=0&&o<r||(()=>{throw new Error("Resize requires axes input values to be positive and less than rank")}));let n=new Array(r).fill(1);return t.forEach((o,i)=>n[o]=e[i]),n},uh=(e,t,r,n,o,i)=>{let[a,d,l]=r>10?[1,2,3]:[-1,e.length>1?1:-1,-1],c=e[0].dims.length;if(a>0&&e.length>a&&e[a].dims.length>0)e[a].getFloat32Array().forEach(m=>i.push(m));else if(t.coordinateTransformMode==="tf_crop_and_resize")throw new Error("Resize requires RoI input to be specified when coordinateTransformMode is tfCropAndResize");if(d>0&&e.length>d&&e[d].dims.length===1&&e[d].dims[0]>0){if(e[d].getFloat32Array().forEach(m=>n.push(m)),n.length!==0&&n.length!==c&&r>=18&&n.length!==t.axes.length)throw new Error("Resize requires scales input size to be same as input rank or axes size for opset 18 and up");ah(n,t),t.axes.length>0&&sh(n,t.axes,c).forEach((m,u)=>n[u]=m)}if(l>0&&e.length>l&&e[l].dims.length===1&&e[l].dims[0]>0&&(e[l].getBigInt64Array().forEach(m=>o.push(Number(m))),o.length!==0&&o.length!==c&&r>=18&&o.length!==t.axes.length))throw new Error("Resize requires sizes input size to be same as input rank or axes size for opset 18 and up");if(t.axes.length>0){if(n.length!==0&&n.length!==t.axes.length)throw new Error('Resize requires "scales" input size to be of axes rank when axes attributes is specified');if(o.length!==0&&o.length!==t.axes.length)throw new Error('Resize requires "sizes" input size to be of rank axes rank when axes attributes is specified')}if(typeof n<"u"&&typeof o<"u"&&n.length>0&&o.length>c)throw new Error("Resize requires only of scales or sizes to be specified")},dh=(e,t)=>`fn getOriginalCoordinateFromResizedCoordinate(xResized: u32, xScale: f32, lengthResized: u32,
5904
5907
  lengthOriginal: u32, roiStart: f32, roiEnd: f32) -> ${t} { `+(()=>{switch(e){case"asymmetric":return`return ${t}(xResized) / ${t}(xScale);`;case"pytorch_half_pixel":return`if (lengthResized > 1) {
5905
5908
  return (${t}(xResized) + 0.5) / ${t}(xScale) - 0.5;
5906
5909
  } else {
@@ -5925,7 +5928,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5925
5928
  const adjustment = ${t}(lengthResized) / outputWidth;
5926
5929
  const center = ${t}(lengthOriginal) / 2;
5927
5930
  const offset = center * (1 - adjustment);
5928
- return offset + ((${t}(xResized) + 0.5) / ${t}(xScale)) - 0.5;`;case"half_pixel":return`return ((${t}(xResized) + 0.5) / ${t}(xScale)) - 0.5;`;default:throw new Error(`Coordinate transform mode ${e} is not supported`)}})()+"}",uh=(e,t,r)=>`fn getNearestPixelFromOriginal(xOriginal: ${r}, isDownSample: bool) -> ${r} {`+(()=>{switch(e){case"round_prefer_ceil":return"if (fract(xOriginal) == 0.5) { return ceil(xOriginal); } else { return round(xOriginal); }";case"floor":return"return floor(xOriginal);";case"ceil":return"return ceil(xOriginal);";case"round_prefer_floor":return"if (fract(xOriginal) == 0.5) { return floor(xOriginal); } else { return round(xOriginal); }";case"simple":default:if(t<11)return"if (isDownSample) { return ceil(xOriginal); } else { return xOriginal; }";throw new Error(`Nearest mode ${e} is not supported`)}})()+"}",dh=(e,t,r)=>{let n=new Array(r).fill(0).concat(new Array(r).fill(1)),o=e.length===0?n:e.slice();return t.length>0?(t.forEach((i,a)=>{n[i]=o[a],n[a+r]=o[t.length+a]}),n):o},lh=(e,t,r,n)=>{let o=[];if(r.length>0)if(n.length>0){if(e.forEach(i=>o.push(i)),Math.max(...n)>e.length)throw new Error("axes is out of bound");n.forEach((i,a)=>o[i]=r[a])}else r.forEach(i=>o.push(i));else{if(t.length===0)throw new Error("Resize requires either scales or sizes.");o=e.map((i,a)=>Math.round(i*t[a]))}return o},ch=(e,t,r)=>{let n=(()=>{switch(r.keepAspectRatioPolicy){case"not_larger":return r.axes.length>0?Math.min(...r.axes.map(i=>t[i]),Number.MAX_VALUE):Math.min(...t,Number.MAX_VALUE);case"not_smaller":return r.axes.length>0?Math.max(...r.axes.map(i=>t[i]),Number.MIN_VALUE):Math.max(...t,Number.MIN_VALUE);default:throw new Error(`Keep aspect ratio policy ${r.keepAspectRatioPolicy} is not supported`)}})();t.fill(1,0,t.length);let o=e.slice();return r.axes.length>0?(r.axes.forEach(i=>t[i]=n),r.axes.forEach(i=>o[i]=Math.round(e[i]*t[i]))):(t.fill(n,0,t.length),o.forEach((i,a)=>o[a]=Math.round(i*t[a]))),o},ph=(e,t,r,n,o)=>`
5931
+ return offset + ((${t}(xResized) + 0.5) / ${t}(xScale)) - 0.5;`;case"half_pixel":return`return ((${t}(xResized) + 0.5) / ${t}(xScale)) - 0.5;`;default:throw new Error(`Coordinate transform mode ${e} is not supported`)}})()+"}",lh=(e,t,r)=>`fn getNearestPixelFromOriginal(xOriginal: ${r}, isDownSample: bool) -> ${r} {`+(()=>{switch(e){case"round_prefer_ceil":return"if (fract(xOriginal) == 0.5) { return ceil(xOriginal); } else { return round(xOriginal); }";case"floor":return"return floor(xOriginal);";case"ceil":return"return ceil(xOriginal);";case"round_prefer_floor":return"if (fract(xOriginal) == 0.5) { return floor(xOriginal); } else { return round(xOriginal); }";case"simple":default:if(t<11)return"if (isDownSample) { return ceil(xOriginal); } else { return xOriginal; }";throw new Error(`Nearest mode ${e} is not supported`)}})()+"}",ch=(e,t,r)=>{let n=new Array(r).fill(0).concat(new Array(r).fill(1)),o=e.length===0?n:e.slice();return t.length>0?(t.forEach((i,a)=>{n[i]=o[a],n[a+r]=o[t.length+a]}),n):o},ph=(e,t,r,n)=>{let o=[];if(r.length>0)if(n.length>0){if(e.forEach(i=>o.push(i)),Math.max(...n)>e.length)throw new Error("axes is out of bound");n.forEach((i,a)=>o[i]=r[a])}else r.forEach(i=>o.push(i));else{if(t.length===0)throw new Error("Resize requires either scales or sizes.");o=e.map((i,a)=>Math.round(i*t[a]))}return o},mh=(e,t,r)=>{let n=(()=>{switch(r.keepAspectRatioPolicy){case"not_larger":return r.axes.length>0?Math.min(...r.axes.map(i=>t[i]),Number.MAX_VALUE):Math.min(...t,Number.MAX_VALUE);case"not_smaller":return r.axes.length>0?Math.max(...r.axes.map(i=>t[i]),Number.MIN_VALUE):Math.max(...t,Number.MIN_VALUE);default:throw new Error(`Keep aspect ratio policy ${r.keepAspectRatioPolicy} is not supported`)}})();t.fill(1,0,t.length);let o=e.slice();return r.axes.length>0?(r.axes.forEach(i=>t[i]=n),r.axes.forEach(i=>o[i]=Math.round(e[i]*t[i]))):(t.fill(n,0,t.length),o.forEach((i,a)=>o[a]=Math.round(i*t[a]))),o},fh=(e,t,r,n,o)=>`
5929
5932
  fn calculateOriginalIndicesFromOutputIndices(output_indices: ${e.type.indices}) -> array<${e.type.value}, ${r.length}> {
5930
5933
  var original_indices: array<${e.type.value}, ${r.length}>;
5931
5934
  for (var i:u32 = 0; i < ${r.length}; i++) {
@@ -5943,7 +5946,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5943
5946
  }
5944
5947
  }
5945
5948
  return original_indices;
5946
- }`,mh=(e,t,r,n,o,i,a)=>`
5949
+ }`,hh=(e,t,r,n,o,i,a)=>`
5947
5950
  fn calculateInputIndicesFromOutputIndices(output_indices: ${t.type.indices}) -> ${e.type.indices} {
5948
5951
  var input_indices: ${e.type.indices};
5949
5952
  for (var i:u32 = 0; i < ${n.length}; i++) {
@@ -5974,7 +5977,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5974
5977
  ${e.indicesSet("input_indices","i"," input_index")}
5975
5978
  }
5976
5979
  return input_indices;
5977
- }`,fh=(e,t)=>`
5980
+ }`,gh=(e,t)=>`
5978
5981
  fn checkInputIndices(input_indices: ${e.type.indices}) -> bool {
5979
5982
  for (var i:u32 = 0; i < ${t.length}; i++) {
5980
5983
  var input_index = ${e.indicesGet("input_indices","i")};
@@ -5986,7 +5989,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
5986
5989
  }`,ml=(e,t,r,n)=>e.rank>n?`
5987
5990
  ${e.indicesSet("input_indices",t,"channel")};
5988
5991
  ${e.indicesSet("input_indices",r,"batch")};
5989
- `:"",hh=(e,t,r,n,o)=>{let[a,d,l,c]=r.length===2?[-1,0,1,-1]:[0,2,3,1],m=e.type.value;return`
5992
+ `:"",yh=(e,t,r,n,o)=>{let[a,d,l,c]=r.length===2?[-1,0,1,-1]:[0,2,3,1],m=e.type.value;return`
5990
5993
  fn getInputValue(batch: u32, channel: u32, row: u32, col: u32) -> ${m} {
5991
5994
  var input_indices: ${e.type.indices};
5992
5995
  ${e.indicesSet("input_indices",d,`max(0, min(row, ${r[d]} - 1))`)};
@@ -6027,7 +6030,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
6027
6030
  dy2 = 0.5;
6028
6031
  }
6029
6032
  return (x11 * dx2 * dy2 + x12 * dx2 * dy1 + x21 * dx1 * dy2 + x22 * dx1 * dy1);
6030
- }`},gh=(e,t,r,n,o,i,a,d,l,c)=>{let m=r.length===2,u=!0,[h,w]=m?[0,1]:u?[2,3]:[1,2],g=e.type.value,y=S=>{let $=S===h?"row":"col";return`
6033
+ }`},bh=(e,t,r,n,o,i,a,d,l,c)=>{let m=r.length===2,u=!0,[h,w]=m?[0,1]:u?[2,3]:[1,2],g=e.type.value,y=S=>{let $=S===h?"row":"col";return`
6031
6034
  fn ${$}CubicInterpolation(input_indices: ${e.type.indices}, output_indices: ${t.type.indices}) -> ${g} {
6032
6035
  var output_index = ${t.indicesGet("output_indices",S)};
6033
6036
  var originalIdx: ${g} = getOriginalCoordinateFromResizedCoordinate(output_index, ${o[S]},
@@ -6075,7 +6078,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
6075
6078
  var input_indices: ${e.type.indices} = output_indices;
6076
6079
  return colCubicInterpolation(input_indices, output_indices);
6077
6080
  }
6078
- `},yh=(e,t,r,n,o)=>{let[a,d,l,c,m]=r.length===3?[-1,0,1,2,-1]:[0,2,3,4,1],u=e.type.value;return`
6081
+ `},wh=(e,t,r,n,o)=>{let[a,d,l,c,m]=r.length===3?[-1,0,1,2,-1]:[0,2,3,4,1],u=e.type.value;return`
6079
6082
  fn getInputValue(batch: u32, channel: u32, depth:u32, height: u32, width: u32) -> ${u} {
6080
6083
  var input_indices: ${e.type.indices};
6081
6084
  ${e.indicesSet("input_indices",d,`max(0, min(depth, ${r[d]} - 1))`)};
@@ -6134,18 +6137,18 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
6134
6137
  }
6135
6138
  return (x111 * dx2 * dy2 * dz2 + x112 * dx2 * dy2 * dz1 + x121 * dx2 * dy1 *dz2 + x122 * dx2 * dy1 * dz1 +
6136
6139
  x211 * dx1 * dy2 * dz2 + x212 * dx1 * dy2 * dz1 + x221 * dx1 * dy1 *dz2 + x222 * dx1 * dy1 * dz1);
6137
- }`},bh=(e,t,r,n,o,i)=>{let a=e.dims,d=dh(i,t.axes,a.length),l=lh(a,n,o,t.axes),c=n.slice();n.length===0&&(c=a.map((v,x)=>v===0?1:l[x]/v),t.keepAspectRatioPolicy!=="stretch"&&(l=ch(a,c,t)));let m=M("output",e.dataType,l.length),u=E("input",e.dataType,a.length),h=k.size(l),w=a.length===l.length&&a.every((v,x)=>v===l[x]),g=t.coordinateTransformMode==="tf_crop_and_resize",y=t.extrapolationValue,S=u.type.value,$=v=>`
6140
+ }`},_h=(e,t,r,n,o,i)=>{let a=e.dims,d=ch(i,t.axes,a.length),l=ph(a,n,o,t.axes),c=n.slice();n.length===0&&(c=a.map((v,x)=>v===0?1:l[x]/v),t.keepAspectRatioPolicy!=="stretch"&&(l=mh(a,c,t)));let m=M("output",e.dataType,l.length),u=E("input",e.dataType,a.length),h=k.size(l),w=a.length===l.length&&a.every((v,x)=>v===l[x]),g=t.coordinateTransformMode==="tf_crop_and_resize",y=t.extrapolationValue,S=u.type.value,$=v=>`
6138
6141
  ${w?"":`
6139
- ${sh(t.coordinateTransformMode,S)};
6142
+ ${dh(t.coordinateTransformMode,S)};
6140
6143
  ${(()=>{switch(t.mode){case"nearest":return`
6141
- ${fh(u,a)};
6142
- ${uh(t.nearestMode,r,S)};
6143
- ${mh(u,m,a,l,c.length,d.length,g)};
6144
+ ${gh(u,a)};
6145
+ ${lh(t.nearestMode,r,S)};
6146
+ ${hh(u,m,a,l,c.length,d.length,g)};
6144
6147
  `;case"linear":return`
6145
- ${ph(m,a,l,c.length,d.length)};
6146
- ${(()=>{if(a.length===2||a.length===4)return`${hh(u,m,a,g,y)}`;if(a.length===3||a.length===5)return`${yh(u,m,a,g,y)}`;throw Error("Linear mode only supports input dims 2, 3, 4 and 5 are supported in linear mode.")})()};
6148
+ ${fh(m,a,l,c.length,d.length)};
6149
+ ${(()=>{if(a.length===2||a.length===4)return`${yh(u,m,a,g,y)}`;if(a.length===3||a.length===5)return`${wh(u,m,a,g,y)}`;throw Error("Linear mode only supports input dims 2, 3, 4 and 5 are supported in linear mode.")})()};
6147
6150
  `;case"cubic":return`
6148
- ${(()=>{if(a.length===2||a.length===4)return`${gh(u,m,a,l,c,d,t.cubicCoeffA,g,t.extrapolationValue,t.excludeOutside)}`;throw Error("Cubic mode only supports input dims 2 and 4 are supported in linear mode.")})()};
6151
+ ${(()=>{if(a.length===2||a.length===4)return`${bh(u,m,a,l,c,d,t.cubicCoeffA,g,t.extrapolationValue,t.excludeOutside)}`;throw Error("Cubic mode only supports input dims 2 and 4 are supported in linear mode.")})()};
6149
6152
  `;default:throw Error("Invalid resize mode")}})()};
6150
6153
  `}
6151
6154
  ${v.registerUniform("output_size","u32").registerUniform("scales","f32",c.length).registerUniform("roi","f32",d.length).declareVariables(u,m)}
@@ -6161,7 +6164,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
6161
6164
  output[global_idx] = ${t.extrapolationValue};
6162
6165
  }`;case"linear":return`output[global_idx] = ${a.length===2||a.length===4?"bilinearInterpolation":"trilinearInterpolation"}(output_indices);`;case"cubic":return"output[global_idx] = bicubicInterpolation(output_indices);";default:throw Error(`Unsupported resize mode: ${t.mode}`)}})()};
6163
6166
  `}
6164
- }`;return{name:"Resize",shaderCache:{hint:`${t.cacheKey}|${r}|${c.length>0?c:""}|${o.length>0?o:""}|${d.length>0?d:""}|${w}|${a}`,inputDependencies:["rank"]},getShaderSource:$,getRunData:()=>({outputs:[{dims:l,dataType:e.dataType}],dispatchGroup:{x:Math.ceil(h/64)},programUniforms:[{type:12,data:h},{type:1,data:c},{type:1,data:d},...R(a,l)]})}},wh=e=>{let t=e.customDataBuffer;return new Uint32Array(t,t.byteOffset,1)[0]},fl=(e,t)=>{let r=[],n=[],o=[],i=wh(e);if(t.antialias!==0)throw Error("Only default value (0) for Antialias attribute is supported");ah(e.inputs,t,i,r,n,o),e.compute(bh(e.inputs[0],t,i,r,n,o),{inputs:[0]})},hl=e=>{let t=e.antialias,r=e.axes,n=e.coordinateTransformMode,o=e.cubicCoeffA,i=e.excludeOutside!==0,a=e.extrapolationValue,d=e.keepAspectRatioPolicy,l=e.mode,c=e.nearestMode===""?"simple":e.nearestMode;return J({antialias:t,axes:r,coordinateTransformMode:n,cubicCoeffA:o,excludeOutside:i,extrapolationValue:a,keepAspectRatioPolicy:d,mode:l,nearestMode:c})}});var _h,vh,yl,bl=V(()=>{"use strict";Q();ie();Ie();ae();_h=(e,t)=>{let[r,n,o,i]=e,{numHeads:a,rotaryEmbeddingDim:d}=t;if(r.dims.length!==3&&r.dims.length!==4)throw new Error(`Input 'x' is expected to have 3 or 4 dimensions, got ${r.dims.length}`);if(!k.areEqual(n.dims,[])&&!k.areEqual(n.dims,[1])&&n.dims.length!==2)throw new Error(`Input 'position_ids' is expected to have 0, 1, or 2 dimensions, got ${n.dims.length}`);if(o.dims.length!==2)throw new Error(`Input 'cos_cache' is expected to have 2 dimensions, got ${o.dims.length}`);if(i.dims.length!==2)throw new Error(`Input 'sin_cache' is expected to have 2 dimensions, got ${i.dims.length}`);if(!k.areEqual(o.dims,i.dims))throw new Error("Inputs 'cos_cache' and 'sin_cache' are expected to have the same shape");if(d>0&&a===0)throw new Error("num_heads must be provided if rotary_embedding_dim is specified");let l=r.dims[0],c=r.dims[r.dims.length-2],m=o.dims[0],u=k.sizeFromDimension(r.dims,1)/c,h=d===0?o.dims[1]*2:u/a;if(d>h)throw new Error("rotary_embedding_dim must be less than or equal to head_size");if(n.dims.length===2){if(l!==n.dims[0])throw new Error(`Input 'position_ids' dimension 0 should be of size batch_size, got ${n.dims[0]}`);if(c!==n.dims[1])throw new Error(`Input 'position_ids' dimension 1 should be of size sequence_length, got ${n.dims[1]}`)}if(h/2!==o.dims[1]&&d/2!==o.dims[1])throw new Error(`Input 'cos_cache' dimension 1 should be same as head_size / 2 or rotary_embedding_dim / 2, got ${o.dims[1]}`);if(c>m)throw new Error("Updating cos_cache and sin_cache in RotaryEmbedding is not currently supported")},vh=(e,t)=>{let{interleaved:r,numHeads:n,rotaryEmbeddingDim:o,scale:i}=t,a=e[0].dims[0],d=k.sizeFromDimension(e[0].dims,1),l=e[0].dims[e[0].dims.length-2],c=d/l,m=e[2].dims[1],u=o===0?m*2:c/n,h=new Array(a,l,c/u,u-m),w=k.computeStrides(h),g=[{type:1,data:i},{type:12,data:h},{type:12,data:w},...e[0].dims.length===3?new Array({type:12,data:[d,c,u,1]}):[],...e[0].dims.length===4?new Array({type:12,data:[d,u,l*u,1]}):[],...R(e[0].dims,e[1].dims,e[2].dims,e[3].dims,e[0].dims)],y=S=>{let $=E("input",e[0].dataType,e[0].dims.length),v=E("position_ids",e[1].dataType,e[1].dims.length),x=E("cos_cache",e[2].dataType,e[2].dims.length),T=E("sin_cache",e[3].dataType,e[3].dims.length),C=M("output",e[0].dataType,e[0].dims.length);return S.registerUniforms([{name:"scale",type:"f32"},{name:"global_shape",type:"u32",length:h.length},{name:"global_strides",type:"u32",length:w.length},{name:"input_output_strides",type:"u32",length:w.length}]),`
6167
+ }`;return{name:"Resize",shaderCache:{hint:`${t.cacheKey}|${r}|${c.length>0?c:""}|${o.length>0?o:""}|${d.length>0?d:""}|${w}|${a}`,inputDependencies:["rank"]},getShaderSource:$,getRunData:()=>({outputs:[{dims:l,dataType:e.dataType}],dispatchGroup:{x:Math.ceil(h/64)},programUniforms:[{type:12,data:h},{type:1,data:c},{type:1,data:d},...V(a,l)]})}},vh=e=>{let t=e.customDataBuffer;return new Uint32Array(t,t.byteOffset,1)[0]},fl=(e,t)=>{let r=[],n=[],o=[],i=vh(e);if(t.antialias!==0)throw Error("Only default value (0) for Antialias attribute is supported");uh(e.inputs,t,i,r,n,o),e.compute(_h(e.inputs[0],t,i,r,n,o),{inputs:[0]})},hl=e=>{let t=e.antialias,r=e.axes,n=e.coordinateTransformMode,o=e.cubicCoeffA,i=e.excludeOutside!==0,a=e.extrapolationValue,d=e.keepAspectRatioPolicy,l=e.mode,c=e.nearestMode===""?"simple":e.nearestMode;return ee({antialias:t,axes:r,coordinateTransformMode:n,cubicCoeffA:o,excludeOutside:i,extrapolationValue:a,keepAspectRatioPolicy:d,mode:l,nearestMode:c})}});var $h,xh,yl,bl=U(()=>{"use strict";J();ae();Ie();se();$h=(e,t)=>{let[r,n,o,i]=e,{numHeads:a,rotaryEmbeddingDim:d}=t;if(r.dims.length!==3&&r.dims.length!==4)throw new Error(`Input 'x' is expected to have 3 or 4 dimensions, got ${r.dims.length}`);if(!k.areEqual(n.dims,[])&&!k.areEqual(n.dims,[1])&&n.dims.length!==2)throw new Error(`Input 'position_ids' is expected to have 0, 1, or 2 dimensions, got ${n.dims.length}`);if(o.dims.length!==2)throw new Error(`Input 'cos_cache' is expected to have 2 dimensions, got ${o.dims.length}`);if(i.dims.length!==2)throw new Error(`Input 'sin_cache' is expected to have 2 dimensions, got ${i.dims.length}`);if(!k.areEqual(o.dims,i.dims))throw new Error("Inputs 'cos_cache' and 'sin_cache' are expected to have the same shape");if(d>0&&a===0)throw new Error("num_heads must be provided if rotary_embedding_dim is specified");let l=r.dims[0],c=r.dims[r.dims.length-2],m=o.dims[0],u=k.sizeFromDimension(r.dims,1)/c,h=d===0?o.dims[1]*2:u/a;if(d>h)throw new Error("rotary_embedding_dim must be less than or equal to head_size");if(n.dims.length===2){if(l!==n.dims[0])throw new Error(`Input 'position_ids' dimension 0 should be of size batch_size, got ${n.dims[0]}`);if(c!==n.dims[1])throw new Error(`Input 'position_ids' dimension 1 should be of size sequence_length, got ${n.dims[1]}`)}if(h/2!==o.dims[1]&&d/2!==o.dims[1])throw new Error(`Input 'cos_cache' dimension 1 should be same as head_size / 2 or rotary_embedding_dim / 2, got ${o.dims[1]}`);if(c>m)throw new Error("Updating cos_cache and sin_cache in RotaryEmbedding is not currently supported")},xh=(e,t)=>{let{interleaved:r,numHeads:n,rotaryEmbeddingDim:o,scale:i}=t,a=e[0].dims[0],d=k.sizeFromDimension(e[0].dims,1),l=e[0].dims[e[0].dims.length-2],c=d/l,m=e[2].dims[1],u=o===0?m*2:c/n,h=new Array(a,l,c/u,u-m),w=k.computeStrides(h),g=[{type:1,data:i},{type:12,data:h},{type:12,data:w},...e[0].dims.length===3?new Array({type:12,data:[d,c,u,1]}):[],...e[0].dims.length===4?new Array({type:12,data:[d,u,l*u,1]}):[],...V(e[0].dims,e[1].dims,e[2].dims,e[3].dims,e[0].dims)],y=S=>{let $=E("input",e[0].dataType,e[0].dims.length),v=E("position_ids",e[1].dataType,e[1].dims.length),x=E("cos_cache",e[2].dataType,e[2].dims.length),T=E("sin_cache",e[3].dataType,e[3].dims.length),C=M("output",e[0].dataType,e[0].dims.length);return S.registerUniforms([{name:"scale",type:"f32"},{name:"global_shape",type:"u32",length:h.length},{name:"global_strides",type:"u32",length:w.length},{name:"input_output_strides",type:"u32",length:w.length}]),`
6165
6168
  ${S.declareVariables($,v,x,T,C)}
6166
6169
 
6167
6170
  ${S.mainStart(At)}
@@ -6187,11 +6190,11 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
6187
6190
  let k = dot(bsnh, uniforms.input_output_strides) + half_rotary_emb_dim;
6188
6191
  ${C.setByOffset("k",$.getByOffset("k"))}
6189
6192
  }
6190
- }`};return{name:"RotaryEmbedding",shaderCache:{hint:J({interleaved:r}).cacheKey,inputDependencies:["rank","rank","rank","rank"]},getShaderSource:y,getRunData:()=>({outputs:[{dims:e[0].dims,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(k.size(h)/At)},programUniforms:g})}},yl=(e,t)=>{_h(e.inputs,t),e.compute(vh(e.inputs,t))}});var $h,xh,wl,_l=V(()=>{"use strict";Q();ie();ae();$h=e=>{if(!e||e.length<3)throw new Error("layerNorm requires at least 3 inputs.");let t=e[0],r=e[1],n=e[2];if(t.dataType!==r.dataType||t.dataType!==n.dataType)throw new Error("All inputs must have the same data type");if(t.dims.length!==3&&t.dims.length!==2)throw new Error("Input must be 2D or 3D");if(r.dims.length!==3&&r.dims.length!==2)throw new Error("Skip must be 2D or 3D");let o=t.dims[t.dims.length-1],i=t.dims[t.dims.length-2];if(r.dims[r.dims.length-1]!==o)throw new Error("Skip must have the same hidden size as input");if(r.dims[r.dims.length-2]!==i)throw new Error("Skip must have the same sequence length as input");if(n.dims.length!==1)throw new Error("Gamma must be 1D");if(n.dims[n.dims.length-1]!==o)throw new Error("Gamma must have the same hidden size as input");if(e.length>3){let a=e[3];if(a.dims.length!==1)throw new Error("Beta must be 1D");if(a.dims[a.dims.length-1]!==o)throw new Error("Beta must have the same hidden size as input")}if(e.length>4){let a=e[4];if(a.dims.length!==1)throw new Error("Bias must be 1D");if(a.dims[a.dims.length-1]!==o)throw new Error("Bias must have the same hidden size as input")}},xh=(e,t,r,n)=>{let o=t.simplified,i=e[0].dims,a=k.size(i),d=i,l=a,c=i.slice(-1)[0],m=n?i.slice(0,-1).concat(1):[],u=!o&&e.length>3,h=e.length>4,w=n&&r>1,g=n&&r>2,y=r>3,S=64,$=we(c),v=[{type:12,data:l},{type:12,data:$},{type:12,data:c},{type:1,data:t.epsilon}],x=C=>{let A=[{name:"output_size",type:"u32"},{name:"components",type:"u32"},{name:"hidden_size",type:"u32"},{name:"epsilon",type:"f32"}],P=[E("x",e[0].dataType,e[0].dims,$),E("skip",e[1].dataType,e[1].dims,$),E("gamma",e[2].dataType,e[2].dims,$)];u&&P.push(E("beta",e[3].dataType,e[3].dims,$)),h&&P.push(E("bias",e[4].dataType,e[4].dims,$)),P.push(M("output",e[0].dataType,d,$)),w&&P.push(M("mean_output",1,m)),g&&P.push(M("inv_std_output",1,m)),y&&P.push(M("input_skip_bias_sum",e[0].dataType,d,$));let B=he(e[0].dataType),N=he(1,$);return`
6193
+ }`};return{name:"RotaryEmbedding",shaderCache:{hint:ee({interleaved:r}).cacheKey,inputDependencies:["rank","rank","rank","rank"]},getShaderSource:y,getRunData:()=>({outputs:[{dims:e[0].dims,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(k.size(h)/At)},programUniforms:g})}},yl=(e,t)=>{$h(e.inputs,t),e.compute(xh(e.inputs,t))}});var Sh,Th,wl,_l=U(()=>{"use strict";J();ae();se();Sh=e=>{if(!e||e.length<3)throw new Error("layerNorm requires at least 3 inputs.");let t=e[0],r=e[1],n=e[2];if(t.dataType!==r.dataType||t.dataType!==n.dataType)throw new Error("All inputs must have the same data type");if(t.dims.length!==3&&t.dims.length!==2)throw new Error("Input must be 2D or 3D");if(r.dims.length!==3&&r.dims.length!==2)throw new Error("Skip must be 2D or 3D");let o=t.dims[t.dims.length-1],i=t.dims[t.dims.length-2];if(r.dims[r.dims.length-1]!==o)throw new Error("Skip must have the same hidden size as input");if(r.dims[r.dims.length-2]!==i)throw new Error("Skip must have the same sequence length as input");if(n.dims.length!==1)throw new Error("Gamma must be 1D");if(n.dims[n.dims.length-1]!==o)throw new Error("Gamma must have the same hidden size as input");if(e.length>3){let a=e[3];if(a.dims.length!==1)throw new Error("Beta must be 1D");if(a.dims[a.dims.length-1]!==o)throw new Error("Beta must have the same hidden size as input")}if(e.length>4){let a=e[4];if(a.dims.length!==1)throw new Error("Bias must be 1D");if(a.dims[a.dims.length-1]!==o)throw new Error("Bias must have the same hidden size as input")}},Th=(e,t,r,n)=>{let o=t.simplified,i=e[0].dims,a=k.size(i),d=i,l=a,c=i.slice(-1)[0],m=n?i.slice(0,-1).concat(1):[],u=!o&&e.length>3,h=e.length>4,w=n&&r>1,g=n&&r>2,y=r>3,S=64,$=we(c),v=[{type:12,data:l},{type:12,data:$},{type:12,data:c},{type:1,data:t.epsilon}],x=C=>{let A=[{name:"output_size",type:"u32"},{name:"components",type:"u32"},{name:"hidden_size",type:"u32"},{name:"epsilon",type:"f32"}],P=[E("x",e[0].dataType,e[0].dims,$),E("skip",e[1].dataType,e[1].dims,$),E("gamma",e[2].dataType,e[2].dims,$)];u&&P.push(E("beta",e[3].dataType,e[3].dims,$)),h&&P.push(E("bias",e[4].dataType,e[4].dims,$)),P.push(M("output",e[0].dataType,d,$)),w&&P.push(M("mean_output",1,m)),g&&P.push(M("inv_std_output",1,m)),y&&P.push(M("input_skip_bias_sum",e[0].dataType,d,$));let D=he(e[0].dataType),W=he(1,$);return`
6191
6194
 
6192
6195
  ${C.registerUniforms(A).declareVariables(...P)}
6193
- var<workgroup> sum_shared : array<${N}, ${S}>;
6194
- var<workgroup> sum_squared_shared : array<${N}, ${S}>;
6196
+ var<workgroup> sum_shared : array<${W}, ${S}>;
6197
+ var<workgroup> sum_squared_shared : array<${W}, ${S}>;
6195
6198
 
6196
6199
  ${C.mainStart([S,1,1])}
6197
6200
  let ix = local_id.x;
@@ -6206,12 +6209,12 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
6206
6209
  }
6207
6210
  for (var i: u32 = 0; i < stride; i++) {
6208
6211
  let skip_value = skip[offset + i];
6209
- let bias_value = ${h?"bias[offset1d + i]":B+"(0.0)"};
6212
+ let bias_value = ${h?"bias[offset1d + i]":D+"(0.0)"};
6210
6213
  let input_value = x[offset + i];
6211
6214
  let value = input_value + skip_value + bias_value;
6212
6215
  ${y?"input_skip_bias_sum[offset + i] = value;":""}
6213
6216
  output[offset + i] = value;
6214
- let f32_value = ${kt(B,$,"value")};
6217
+ let f32_value = ${kt(D,$,"value")};
6215
6218
  sum_shared[ix] += f32_value;
6216
6219
  sum_squared_shared[ix] += f32_value * f32_value;
6217
6220
  }
@@ -6229,17 +6232,17 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
6229
6232
 
6230
6233
  let sum = sum_shared[0];
6231
6234
  let square_sum = sum_squared_shared[0];
6232
- let mean = ${Ze("sum",$)} / f32(uniforms.hidden_size);
6233
- let inv_std_dev = inverseSqrt(${Ze("square_sum",$)} / f32(uniforms.hidden_size) ${o?"":"- mean * mean"} + uniforms.epsilon);
6235
+ let mean = ${Qe("sum",$)} / f32(uniforms.hidden_size);
6236
+ let inv_std_dev = inverseSqrt(${Qe("square_sum",$)} / f32(uniforms.hidden_size) ${o?"":"- mean * mean"} + uniforms.epsilon);
6234
6237
  ${w?"mean_output[global_idx] = mean;":""}
6235
6238
  ${g?"inv_std_output[global_idx] = inv_std_dev;":""}
6236
6239
 
6237
6240
  for (var i: u32 = 0; i < stride; i++) {
6238
- output[offset + i] = (output[offset + i] ${o?"":`- ${B}(mean)`}) *
6239
- ${B}(inv_std_dev) * gamma[offset1d + i]
6241
+ output[offset + i] = (output[offset + i] ${o?"":`- ${D}(mean)`}) *
6242
+ ${D}(inv_std_dev) * gamma[offset1d + i]
6240
6243
  ${u?"+ beta[offset1d + i]":""};
6241
6244
  }
6242
- }`},T=[{dims:d,dataType:e[0].dataType}];return r>1&&T.push({dims:m,dataType:1}),r>2&&T.push({dims:m,dataType:1}),r>3&&T.push({dims:i,dataType:e[0].dataType}),{name:"SkipLayerNormalization",shaderCache:{hint:`${$};${w};${g};${y}`,inputDependencies:e.map((C,A)=>"type")},getShaderSource:x,getRunData:()=>({outputs:T,dispatchGroup:{x:Math.ceil(l/c)},programUniforms:v})}},wl=(e,t)=>{$h(e.inputs);let n=[0];e.outputCount>1&&n.push(-3),e.outputCount>2&&n.push(-3),e.outputCount>3&&n.push(3),e.compute(xh(e.inputs,t,e.outputCount,!1),{outputs:n})}});var Sh,on,Th,vl,Ih,Ch,$l,xl,Sl=V(()=>{"use strict";Q();ie();Ie();ae();Sh=(e,t)=>{if(!e||e.length<1)throw new Error("too few inputs");if(t.axes.length!==0){if(t.axes.length!==t.starts.length||t.axes.length!==t.ends.length)throw new Error("axes, starts and ends must have the same length")}else if(t.starts.length!==t.ends.length)throw new Error("starts and ends must have the same length");e.slice(1).forEach((r,n)=>{if(e[n+1].dataType!==6&&e[n+1].dataType!==7)throw new Error(`Input ${n} must be an array of int32 or int64`)})},on=(e,t)=>{let r=[];if(e.length>t)if(e[t].dataType===7)e[t].getBigInt64Array().forEach(n=>r.push(Number(n)));else if(e[t].dataType===6)e[t].getInt32Array().forEach(n=>r.push(Number(n)));else throw new Error(`Input ${t} must be an array of int32 or int64`);return r},Th=(e,t)=>{if(e.length>1){let r=on(e,1),n=on(e,2),o=on(e,3);return o.length===0&&(o=[...Array(e[0].dims.length).keys()]),J({starts:r,ends:n,axes:o})}else return t},vl=(e,t,r,n,o)=>{let i=e;return e<0&&(i+=r[n[t]]),o[t]<0?Math.max(0,Math.min(i,r[n[t]]-1)):Math.max(0,Math.min(i,r[n[t]]))},Ih=(e,t,r)=>`fn calculateInputIndices(output_indices: ${t.type.indices}) -> ${e.type.indices} {
6245
+ }`},T=[{dims:d,dataType:e[0].dataType}];return r>1&&T.push({dims:m,dataType:1}),r>2&&T.push({dims:m,dataType:1}),r>3&&T.push({dims:i,dataType:e[0].dataType}),{name:"SkipLayerNormalization",shaderCache:{hint:`${$};${w};${g};${y}`,inputDependencies:e.map((C,A)=>"type")},getShaderSource:x,getRunData:()=>({outputs:T,dispatchGroup:{x:Math.ceil(l/c)},programUniforms:v})}},wl=(e,t)=>{Sh(e.inputs);let n=[0];e.outputCount>1&&n.push(-3),e.outputCount>2&&n.push(-3),e.outputCount>3&&n.push(3),e.compute(Th(e.inputs,t,e.outputCount,!1),{outputs:n})}});var Ih,on,Ch,vl,Ah,kh,$l,xl,Sl=U(()=>{"use strict";J();ae();Ie();se();Ih=(e,t)=>{if(!e||e.length<1)throw new Error("too few inputs");if(t.axes.length!==0){if(t.axes.length!==t.starts.length||t.axes.length!==t.ends.length)throw new Error("axes, starts and ends must have the same length")}else if(t.starts.length!==t.ends.length)throw new Error("starts and ends must have the same length");e.slice(1).forEach((r,n)=>{if(e[n+1].dataType!==6&&e[n+1].dataType!==7)throw new Error(`Input ${n} must be an array of int32 or int64`)})},on=(e,t)=>{let r=[];if(e.length>t)if(e[t].dataType===7)e[t].getBigInt64Array().forEach(n=>r.push(Number(n)));else if(e[t].dataType===6)e[t].getInt32Array().forEach(n=>r.push(Number(n)));else throw new Error(`Input ${t} must be an array of int32 or int64`);return r},Ch=(e,t)=>{if(e.length>1){let r=on(e,1),n=on(e,2),o=on(e,3);return o.length===0&&(o=[...Array(e[0].dims.length).keys()]),ee({starts:r,ends:n,axes:o})}else return t},vl=(e,t,r,n,o)=>{let i=e;return e<0&&(i+=r[n[t]]),o[t]<0?Math.max(0,Math.min(i,r[n[t]]-1)):Math.max(0,Math.min(i,r[n[t]]))},Ah=(e,t,r)=>`fn calculateInputIndices(output_indices: ${t.type.indices}) -> ${e.type.indices} {
6243
6246
  var input_indices: ${e.type.indices};
6244
6247
  var carry = 0u;
6245
6248
  for (var i = ${r.length}; i >= 0; i--) {
@@ -6257,15 +6260,15 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
6257
6260
  ${e.indicesSet("input_indices","i","input_index")};
6258
6261
  }
6259
6262
  return input_indices;
6260
- }`,Ch=(e,t)=>{let r=e[0].dims,n=k.size(r),o=t.axes.length>0?k.normalizeAxes(t.axes,r.length):[...Array(r.length).keys()],i=on(e,4);i.forEach($=>$!==0||(()=>{throw new Error("step cannot be 0")})),i.length===0&&(i=Array(o.length).fill(1));let a=t.starts.map(($,v)=>vl($,v,r,o,i)),d=t.ends.map(($,v)=>vl($,v,r,o,i));if(o.length!==a.length||o.length!==d.length)throw new Error("start, ends and axes should have the same number of elements");if(o.length!==r.length)for(let $=0;$<r.length;++$)o.includes($)||(a.splice($,0,0),d.splice($,0,r[$]),i.splice($,0,1));let l=i.map($=>Math.sign($));i.forEach(($,v,x)=>{if($<0){let T=(d[v]-a[v])/$,C=a[v],A=C+T*i[v];a[v]=A,d[v]=C,x[v]=-$}});let c=r.slice(0);o.forEach(($,v)=>{c[$]=Math.ceil((d[$]-a[$])/i[$])});let m={dims:c,dataType:e[0].dataType},u=M("output",e[0].dataType,c.length),h=E("input",e[0].dataType,e[0].dims.length),w=k.size(c),g=[{name:"outputSize",type:"u32"},{name:"starts",type:"u32",length:a.length},{name:"signs",type:"i32",length:l.length},{name:"steps",type:"u32",length:i.length}],y=[{type:12,data:w},{type:12,data:a},{type:6,data:l},{type:12,data:i},...R(e[0].dims,c)],S=$=>`
6263
+ }`,kh=(e,t)=>{let r=e[0].dims,n=k.size(r),o=t.axes.length>0?k.normalizeAxes(t.axes,r.length):[...Array(r.length).keys()],i=on(e,4);i.forEach($=>$!==0||(()=>{throw new Error("step cannot be 0")})),i.length===0&&(i=Array(o.length).fill(1));let a=t.starts.map(($,v)=>vl($,v,r,o,i)),d=t.ends.map(($,v)=>vl($,v,r,o,i));if(o.length!==a.length||o.length!==d.length)throw new Error("start, ends and axes should have the same number of elements");if(o.length!==r.length)for(let $=0;$<r.length;++$)o.includes($)||(a.splice($,0,0),d.splice($,0,r[$]),i.splice($,0,1));let l=i.map($=>Math.sign($));i.forEach(($,v,x)=>{if($<0){let T=(d[v]-a[v])/$,C=a[v],A=C+T*i[v];a[v]=A,d[v]=C,x[v]=-$}});let c=r.slice(0);o.forEach(($,v)=>{c[$]=Math.ceil((d[$]-a[$])/i[$])});let m={dims:c,dataType:e[0].dataType},u=M("output",e[0].dataType,c.length),h=E("input",e[0].dataType,e[0].dims.length),w=k.size(c),g=[{name:"outputSize",type:"u32"},{name:"starts",type:"u32",length:a.length},{name:"signs",type:"i32",length:l.length},{name:"steps",type:"u32",length:i.length}],y=[{type:12,data:w},{type:12,data:a},{type:6,data:l},{type:12,data:i},...V(e[0].dims,c)],S=$=>`
6261
6264
  ${$.registerUniforms(g).declareVariables(h,u)}
6262
- ${Ih(h,u,r)}
6265
+ ${Ah(h,u,r)}
6263
6266
  ${$.mainStart()}
6264
6267
  ${$.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}
6265
6268
  let output_indices = ${u.offsetToIndices("global_idx")};
6266
6269
  let input_indices = calculateInputIndices(output_indices);
6267
6270
  ${u.setByOffset("global_idx",h.getByIndices("input_indices"))}
6268
- }`;return{name:"Slice",shaderCache:{hint:`${l.length}_${a.length}_${i.length}`,inputDependencies:["rank"]},getShaderSource:S,getRunData:()=>({outputs:[m],dispatchGroup:{x:Math.ceil(n/64)},programUniforms:y})}},$l=(e,t)=>{Sh(e.inputs,t);let r=Th(e.inputs,t);e.compute(Ch(e.inputs,r),{inputs:[0]})},xl=e=>{let t=e.starts,r=e.ends,n=e.axes;return J({starts:t,ends:r,axes:n})}});var Ah,kh,Tl,Il,Cl=V(()=>{"use strict";Q();ie();Ie();lt();ae();Ah=e=>{if(!e||e.length!==1)throw new Error("Softmax op requires 1 input.")},kh=(e,t)=>{let r=e.inputs[0],n=r.dims,o=k.size(n),i=64,a=n.length,d=k.normalizeAxis(t.axis,a),l=d<n.length-1,c,m=[];l?(m=Array.from({length:a},(P,B)=>B),m[d]=a-1,m[a-1]=d,c=e.compute(Pe(r,m),{inputs:[r],outputs:[-1]})[0]):c=r;let u=c.dims,h=u[a-1],w=o/h,g=we(h),y=h/g,S=(P,B)=>B===4?`max(max(${P}.x, ${P}.y), max(${P}.z, ${P}.w))`:B===2?`max(${P}.x, ${P}.y)`:B===3?`max(max(${P}.x, ${P}.y), ${P}.z)`:P,$=E("x",c.dataType,c.dims,g),v=M("result",c.dataType,c.dims,g),x=$.type.value,T=he(c.dataType)==="f32"?`var threadMax = ${x}(-3.402823e+38f);`:`var threadMax = ${x}(-65504.0h);`,C=P=>`
6271
+ }`;return{name:"Slice",shaderCache:{hint:`${l.length}_${a.length}_${i.length}`,inputDependencies:["rank"]},getShaderSource:S,getRunData:()=>({outputs:[m],dispatchGroup:{x:Math.ceil(n/64)},programUniforms:y})}},$l=(e,t)=>{Ih(e.inputs,t);let r=Ch(e.inputs,t);e.compute(kh(e.inputs,r),{inputs:[0]})},xl=e=>{let t=e.starts,r=e.ends,n=e.axes;return ee({starts:t,ends:r,axes:n})}});var Eh,Ph,Tl,Il,Cl=U(()=>{"use strict";J();ae();Ie();lt();se();Eh=e=>{if(!e||e.length!==1)throw new Error("Softmax op requires 1 input.")},Ph=(e,t)=>{let r=e.inputs[0],n=r.dims,o=k.size(n),i=64,a=n.length,d=k.normalizeAxis(t.axis,a),l=d<n.length-1,c,m=[];l?(m=Array.from({length:a},(P,D)=>D),m[d]=a-1,m[a-1]=d,c=e.compute(Pe(r,m),{inputs:[r],outputs:[-1]})[0]):c=r;let u=c.dims,h=u[a-1],w=o/h,g=we(h),y=h/g,S=(P,D)=>D===4?`max(max(${P}.x, ${P}.y), max(${P}.z, ${P}.w))`:D===2?`max(${P}.x, ${P}.y)`:D===3?`max(max(${P}.x, ${P}.y), ${P}.z)`:P,$=E("x",c.dataType,c.dims,g),v=M("result",c.dataType,c.dims,g),x=$.type.value,T=he(c.dataType)==="f32"?`var threadMax = ${x}(-3.402823e+38f);`:`var threadMax = ${x}(-65504.0h);`,C=P=>`
6269
6272
  var<workgroup> rowMaxShared : ${x};
6270
6273
  var<workgroup> rowSumShared : ${x};
6271
6274
  var<workgroup> threadShared : array<${x}, ${i}>;
@@ -6328,7 +6331,7 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
6328
6331
  workgroupBarrier();
6329
6332
  }
6330
6333
  if (lindex == 0) {
6331
- rowSumShared = ${x}(${Ze("threadShared[0]",g)});
6334
+ rowSumShared = ${x}(${Qe("threadShared[0]",g)});
6332
6335
  }
6333
6336
  workgroupBarrier();
6334
6337
 
@@ -6337,35 +6340,21 @@ ${U}_indices[${Re}] = 0;`}),ye+=`${U}_indices[${oe-2}] = 0u;
6337
6340
  let value = exp(getValue(row, col, row_stride) - rowMaxShared) / rowSumShared;
6338
6341
  setValue(row, col, row_stride, value);
6339
6342
  }
6340
- }`,A=e.compute({name:"Softmax",shaderCache:{hint:`${g}`,inputDependencies:["type"]},getRunData:()=>({outputs:[{dims:u,dataType:c.dataType}],dispatchGroup:{x:w},programUniforms:[{type:6,data:y}]}),getShaderSource:C},{inputs:[c],outputs:[l?-1:0]})[0];l&&e.compute(Pe(A,m),{inputs:[A]})},Tl=(e,t)=>{Ah(e.inputs),kh(e,t)},Il=e=>J({axis:e.axis})});var Eh,Ph,zh,Oh,Bh,Al,kl,El=V(()=>{"use strict";Q();ie();Ie();ae();Eh=e=>{if(!e||e.length<1)throw new Error("too few inputs")},Ph=(e,t)=>{let r=[],n=t.numOutputs;return e[1].dims[0]>0&&(e[1].getBigInt64Array().forEach(o=>r.push(Number(o))),n=r.length),J({numOutputs:n,axis:t.axis,splitSizes:r})},zh=e=>`
6341
- fn calculateOutputIndex(index: u32) -> u32 {
6342
- for (var i: u32 = 0u; i < ${e}u; i += 1u ) {
6343
- if (index < ${F("uniforms.size_in_split_axis","i",e)}) {
6344
- return i;
6345
- }
6346
- }
6347
- return ${e}u;
6348
- }`,Oh=e=>{let t=e.length,r=[];for(let n=0;n<t;++n){let o=e[n].setByIndices("indices","input[global_idx]");t===1?r.push(o):n===0?r.push(`if (output_number == ${n}u) { ${o} }`):n===t-1?r.push(`else { ${o} }`):r.push(`else if (output_number == ${n}) { ${o} }`)}return`
6349
- fn writeBufferData(output_number: u32, indices: ${e[0].type.indices}, global_idx: u32) {
6350
- ${r.join(`
6351
- `)}
6352
- }`},Bh=(e,t)=>{let r=e[0].dims,n=k.size(r),o=e[0].dataType,i=k.normalizeAxis(t.axis,r.length),a=new Array(t.numOutputs),d=E("input",o,r.length),l=new Array(t.numOutputs),c=[],m=[],u=0,h=[{type:12,data:n}];for(let g=0;g<t.numOutputs;g++){u+=t.splitSizes[g],l[g]=u;let y=r.slice();y[i]=t.splitSizes[g],m.push(y),a[g]=M(`output${g}`,o,y.length),c.push({dims:m[g],dataType:e[0].dataType})}h.push({type:12,data:l},...R(r,...m));let w=g=>`
6353
- ${g.registerUniform("input_size","u32").registerUniform("size_in_split_axis","u32",l.length).declareVariables(d,...a)}
6354
- ${zh(l.length)}
6355
- ${Oh(a)}
6356
-
6357
- ${g.mainStart()}
6358
- ${g.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.input_size")}
6343
+ }`,A=e.compute({name:"Softmax",shaderCache:{hint:`${g}`,inputDependencies:["type"]},getRunData:()=>({outputs:[{dims:u,dataType:c.dataType}],dispatchGroup:{x:w},programUniforms:[{type:6,data:y}]}),getShaderSource:C},{inputs:[c],outputs:[l?-1:0]})[0];l&&e.compute(Pe(A,m),{inputs:[A]})},Tl=(e,t)=>{Eh(e.inputs),Ph(e,t)},Il=e=>ee({axis:e.axis})});var Al,zh,Oh,Dh,kl,El=U(()=>{"use strict";J();ae();se();Al=e=>Array.from(e.getBigInt64Array(),Number),zh=e=>{if(!e||e.length!==2)throw new Error("Tile requires 2 inputs.");if(e[0].dataType!==1&&e[0].dataType!==10&&e[0].dataType!==6&&e[0].dataType!==12)throw new Error("Tile only support float, float16, int32, and uint32 data types");if(e[1].dataType!==7)throw new Error("Tile `repeats` input should be of int64 data type");if(e[1].dims.length!==1)throw new Error("Tile `repeats` input should be 1-D");if(Al(e[1]).length!==e[0].dims.length)throw new Error("Tile `repeats` input should have same number of elements as rank of input data tensor")},Oh=(e,t)=>{let r=[];for(let n=0;n<e.length;++n)r.push(e[n]*t[n]);return r},Dh=(e,t)=>{let r=e[0].dims,n=t??Al(e[1]),o=Oh(r,n),i=k.size(o),a=e[0].dataType,d=E("input",a,r.length),l=M("output",a,o.length),c=m=>`
6344
+ const inputShape = ${d.indices(...r)};
6345
+ ${m.registerUniform("output_size","u32").declareVariables(d,l)}
6346
+ ${m.mainStart()}
6347
+ ${m.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
6348
+ let output_indices = ${l.offsetToIndices("global_idx")};
6349
+ var input_indices: ${d.type.indices};
6350
+ for (var i = 0; i < ${r.length}; i++) {
6351
+ let input_dim_i = ${d.indicesGet("uniforms.input_shape","i")};
6352
+ let input_dim_value = ${l.indicesGet("output_indices","i")} % input_dim_i;
6359
6353
 
6360
- var indices = ${d.offsetToIndices("global_idx")};
6361
- var index = ${d.indicesGet("indices",i)};
6362
- let output_number = calculateOutputIndex(index);
6363
- if (output_number != 0) {
6364
- index -= ${F("uniforms.size_in_split_axis","output_number - 1u",l.length)};
6365
- ${d.indicesSet("indices",i,"index")};
6366
- }
6367
- writeBufferData(output_number, indices, global_idx);
6368
- }`;return{name:"Split",shaderCache:{hint:t.cacheKey,inputDependencies:["rank"]},getShaderSource:w,getRunData:()=>({outputs:c,dispatchGroup:{x:Math.ceil(n/64)},programUniforms:h})}},Al=(e,t)=>{Eh(e.inputs);let r=e.inputs.length===1?t:Ph(e.inputs,t);e.compute(Bh(e.inputs,r),{inputs:[0]})},kl=e=>{let t=e.axis,r=e.splitSizes,n=e.numOutputs<0?r.length:e.numOutputs;if(n!==r.length)throw new Error("numOutputs and splitSizes lengh must be equal");return J({axis:t,numOutputs:n,splitSizes:r})}});var Dh,Mh,Pl,zl=V(()=>{"use strict";Q();ie();ae();Dh=(e,t,r,n,o)=>{let i=M("output_data",o,r.length,4),a=E("a_data",t[1].dataType,t[1].dims.length,4),d=E("b_data",t[2].dataType,t[2].dims.length,4),l=E("c_data",t[0].dataType,t[0].dims.length,4),c,m=(u,h,w)=>`select(${h}, ${u}, ${w})`;if(!n)c=i.setByOffset("global_idx",m(a.getByOffset("global_idx"),d.getByOffset("global_idx"),l.getByOffset("global_idx")));else{let u=(h,w,g="")=>{let y=`a_data[index_a${w}][component_a${w}]`,S=`b_data[index_b${w}][component_b${w}]`,$=`bool(c_data[index_c${w}] & (0xffu << (component_c${w} * 8)))`;return`
6354
+ ${d.indicesSet("input_indices","i","input_dim_value")}
6355
+ }
6356
+ ${l.setByOffset("global_idx",d.getByIndices("input_indices"))}
6357
+ }`;return{name:"Tile",shaderCache:{hint:`${n}`,inputDependencies:["rank"]},getRunData:()=>({outputs:[{dims:o,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(i/64)},programUniforms:[{type:12,data:i},...V(e[0].dims,o)]}),getShaderSource:c}},kl=e=>{zh(e.inputs),e.compute(Dh(e.inputs),{inputs:[0]})}});var Bh,Mh,Pl,zl=U(()=>{"use strict";J();ae();se();Bh=(e,t,r,n,o)=>{let i=M("output_data",o,r.length,4),a=E("a_data",t[1].dataType,t[1].dims.length,4),d=E("b_data",t[2].dataType,t[2].dims.length,4),l=E("c_data",t[0].dataType,t[0].dims.length,4),c,m=(u,h,w)=>`select(${h}, ${u}, ${w})`;if(!n)c=i.setByOffset("global_idx",m(a.getByOffset("global_idx"),d.getByOffset("global_idx"),l.getByOffset("global_idx")));else{let u=(h,w,g="")=>{let y=`a_data[index_a${w}][component_a${w}]`,S=`b_data[index_b${w}][component_b${w}]`,$=`bool(c_data[index_c${w}] & (0xffu << (component_c${w} * 8)))`;return`
6369
6358
  let output_indices${w} = ${i.offsetToIndices(`global_idx * 4u + ${w}u`)};
6370
6359
  let offset_a${w} = ${a.broadcastedIndicesToOffset(`output_indices${w}`,i)};
6371
6360
  let offset_b${w} = ${d.broadcastedIndicesToOffset(`output_indices${w}`,i)};
@@ -6393,10 +6382,10 @@ fn calculateOutputIndex(index: u32) -> u32 {
6393
6382
  ${e.mainStart()}
6394
6383
  ${e.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")}
6395
6384
  ${c}
6396
- }`},Mh=e=>{let t=e[1].dims,r=e[2].dims,n=e[0].dims,o=e[1].dataType,i=!(k.areEqual(t,r)&&k.areEqual(r,n)),a=t,d=k.size(t);if(i){let c=rt.calcShape(rt.calcShape(t,r,!1),n,!1);if(!c)throw new Error("Can't perform where op on the given tensors");a=c,d=k.size(a)}let l=Math.ceil(d/4);return{name:"Where",shaderCache:{inputDependencies:["rank","rank","rank"]},getShaderSource:c=>Dh(c,e,a,i,o),getRunData:()=>({outputs:[{dims:a,dataType:o}],dispatchGroup:{x:Math.ceil(d/64/4)},programUniforms:[{type:12,data:l},...R(n,t,r,a)]})}},Pl=e=>{e.compute(Mh(e.inputs))}});var Ol,Bl=V(()=>{"use strict";Ts();Kr();As();Es();hu();Iu();ku();Hu();Xu();Ju();rd();sd();ld();pd();hd();bd();vd();Sd();Bd();Rd();Vd();fo();Ld();xo();Gd();sl();ll();pl();qr();gl();bl();_l();Sl();Cl();El();To();lt();Xr();zl();Ol=new Map([["Abs",[Ps]],["Acos",[zs]],["Acosh",[Os]],["Add",[gu]],["ArgMax",[Ss,uo]],["ArgMin",[xs,uo]],["Asin",[Bs]],["Asinh",[Ds]],["Atan",[Ms]],["Atanh",[Rs]],["Attention",[Is]],["AveragePool",[Qd,Zd]],["BatchNormalization",[Cs]],["BiasAdd",[ks]],["BiasSplitGelu",[fu]],["Cast",[Vs,Us]],["Ceil",[Ws]],["Clip",[Ns]],["Concat",[Cu,Au]],["Conv",[bo,yo]],["ConvTranspose",[Yu,Ku]],["Cos",[Ls]],["Cosh",[Hs]],["CumSum",[Zu,Qu]],["DepthToSpace",[ed,td]],["DequantizeLinear",[ul,dl]],["Div",[yu]],["Einsum",[id,ad]],["Elu",[Gs,Qt]],["Equal",[bu]],["Erf",[Fs]],["Exp",[qs]],["Expand",[dd]],["FastGelu",[cd]],["Floor",[js]],["FusedConv",[bo,yo]],["Gather",[fd,md]],["GatherElements",[_d,wd]],["GatherBlockQuantized",[gd,yd]],["Gelu",[Ks]],["Gemm",[xd,$d]],["GlobalAveragePool",[tl,el]],["GlobalMaxPool",[al,il]],["Greater",[$u]],["GreaterOrEqual",[Su]],["GroupQueryAttention",[Od,zd]],["HardSigmoid",[ru,tu]],["InstanceNormalization",[Md]],["LayerNormalization",[Ud]],["LeakyRelu",[Ys,Qt]],["Less",[xu]],["LessOrEqual",[Tu]],["Log",[cu]],["MatMul",[Wu]],["MatMulNBits",[Nd,Wd]],["MaxPool",[nl,ol]],["Mul",[wu]],["MultiHeadAttention",[Cd,Id]],["Neg",[Zs]],["Not",[Xs]],["Pad",[Hd]],["Pow",[_u]],["QuickGelu",[pu,Qt]],["Range",[cl]],["Reciprocal",[Qs]],["ReduceMin",[ys]],["ReduceMean",[ps]],["ReduceMax",[gs]],["ReduceSum",[ws]],["ReduceProd",[bs]],["ReduceL1",[ms]],["ReduceL2",[fs]],["ReduceLogSum",[vs]],["ReduceLogSumExp",[hs]],["ReduceSumSquare",[_s]],["Relu",[Js]],["Resize",[fl,hl]],["RotaryEmbedding",[yl]],["Sigmoid",[eu]],["Sin",[nu]],["Sinh",[ou]],["Slice",[$l,xl]],["SkipLayerNormalization",[wl]],["Split",[Al,kl]],["Sqrt",[iu]],["Softmax",[Tl,Il]],["Sub",[vu]],["Tan",[au]],["Tanh",[uu]],["ThresholdedRelu",[lu,Qt]],["Tile",[kd]],["Transpose",[Ja,es]],["Where",[Pl]]])});var an,Dl=V(()=>{"use strict";Ke();Xe();ae();an=class{constructor(t){this.backend=t;this.repo=new Map,this.attributesBound=!1}getArtifact(t){return this.repo.get(t)}setArtifact(t,r){this.repo.set(t,r)}run(t,r,n,o,i){Le(t.programInfo.name);let a=this.backend.device,d=this.backend.getComputePassEncoder();this.backend.writeTimestamp(this.backend.pendingDispatchNumber*2);let l=[];for(let m of r)l.push({binding:l.length,resource:{buffer:m.buffer}});for(let m of n)l.push({binding:l.length,resource:{buffer:m.buffer}});i&&l.push({binding:l.length,resource:i});let c=a.createBindGroup({layout:t.computePipeline.getBindGroupLayout(0),entries:l,label:t.programInfo.name});if(this.backend.sessionStatus==="capturing"){let m={kernelId:this.backend.currentKernelId,computePipeline:t.computePipeline,bindGroup:c,dispatchGroup:o};this.backend.capturedCommandList.get(this.backend.currentSessionId).push(m)}d.setPipeline(t.computePipeline),d.setBindGroup(0,c),d.dispatchWorkgroups(...o),this.backend.writeTimestamp(this.backend.pendingDispatchNumber*2+1),this.backend.pendingDispatchNumber++,(this.backend.pendingDispatchNumber>=this.backend.maxDispatchNumber||this.backend.queryType==="at-passes")&&this.backend.endComputePass(),this.backend.pendingDispatchNumber>=this.backend.maxDispatchNumber&&this.backend.flush(),Ve(t.programInfo.name)}dispose(){}build(t,r){Le(t.name);let n=this.backend.device,o=[];n.features.has("shader-f16")&&o.push("enable f16;");let i=Za(r,this.backend.device.limits),a=t.getShaderSource(i),d=`${o.join(`
6385
+ }`},Mh=e=>{let t=e[1].dims,r=e[2].dims,n=e[0].dims,o=e[1].dataType,i=!(k.areEqual(t,r)&&k.areEqual(r,n)),a=t,d=k.size(t);if(i){let c=rt.calcShape(rt.calcShape(t,r,!1),n,!1);if(!c)throw new Error("Can't perform where op on the given tensors");a=c,d=k.size(a)}let l=Math.ceil(d/4);return{name:"Where",shaderCache:{inputDependencies:["rank","rank","rank"]},getShaderSource:c=>Bh(c,e,a,i,o),getRunData:()=>({outputs:[{dims:a,dataType:o}],dispatchGroup:{x:Math.ceil(d/64/4)},programUniforms:[{type:12,data:l},...V(n,t,r,a)]})}},Pl=e=>{e.compute(Mh(e.inputs))}});var Ol,Dl=U(()=>{"use strict";Cs();Kr();Es();zs();yu();Au();Pu();Fu();Zu();td();od();dd();pd();fd();yd();_d();xd();Id();Dd();Rd();Vd();go();Ld();To();Gd();sl();ll();pl();qr();gl();bl();_l();Sl();Cl();Co();El();lt();Xr();zl();Ol=new Map([["Abs",[Os]],["Acos",[Ds]],["Acosh",[Bs]],["Add",[bu]],["ArgMax",[Is,lo]],["ArgMin",[Ts,lo]],["Asin",[Ms]],["Asinh",[Rs]],["Atan",[Us]],["Atanh",[Vs]],["Attention",[As]],["AveragePool",[Zd,Qd]],["BatchNormalization",[ks]],["BiasAdd",[Ps]],["BiasSplitGelu",[gu]],["Cast",[Ns,Ws]],["Ceil",[Hs]],["Clip",[Ls]],["Concat",[ku,Eu]],["Conv",[_o,wo]],["ConvTranspose",[Qu,Xu]],["Cos",[Gs]],["Cosh",[Fs]],["CumSum",[Ju,ed]],["DepthToSpace",[rd,nd]],["DequantizeLinear",[ul,dl]],["Div",[wu]],["Einsum",[sd,ud]],["Elu",[qs,Zt]],["Equal",[_u]],["Erf",[js]],["Exp",[Ks]],["Expand",[cd]],["FastGelu",[md]],["Floor",[Ys]],["FusedConv",[_o,wo]],["Gather",[gd,hd]],["GatherElements",[$d,vd]],["GatherBlockQuantized",[bd,wd]],["Gelu",[Xs]],["Gemm",[Td,Sd]],["GlobalAveragePool",[tl,el]],["GlobalMaxPool",[al,il]],["Greater",[Su]],["GreaterOrEqual",[Iu]],["GroupQueryAttention",[Od]],["HardSigmoid",[ou,nu]],["InstanceNormalization",[Md]],["LayerNormalization",[Ud]],["LeakyRelu",[Qs,Zt]],["Less",[Tu]],["LessOrEqual",[Cu]],["Log",[mu]],["MatMul",[Hu]],["MatMulNBits",[Wd,Nd]],["MaxPool",[nl,ol]],["Mul",[vu]],["MultiHeadAttention",[kd,Ad]],["Neg",[Js]],["Not",[Zs]],["Pad",[Hd]],["Pow",[$u]],["QuickGelu",[fu,Zt]],["Range",[cl]],["Reciprocal",[eu]],["ReduceMin",[ws]],["ReduceMean",[fs]],["ReduceMax",[bs]],["ReduceSum",[vs]],["ReduceProd",[_s]],["ReduceL1",[hs]],["ReduceL2",[gs]],["ReduceLogSum",[xs]],["ReduceLogSumExp",[ys]],["ReduceSumSquare",[$s]],["Relu",[tu]],["Resize",[fl,hl]],["RotaryEmbedding",[yl]],["Sigmoid",[ru]],["Sin",[iu]],["Sinh",[au]],["Slice",[$l,xl]],["SkipLayerNormalization",[wl]],["Split",[Ed,Pd]],["Sqrt",[su]],["Softmax",[Tl,Il]],["Sub",[xu]],["Tan",[uu]],["Tanh",[lu]],["ThresholdedRelu",[pu,Zt]],["Tile",[kl]],["Transpose",[ts,rs]],["Where",[Pl]]])});var an,Bl=U(()=>{"use strict";Ke();Xe();se();an=class{constructor(t){this.backend=t;this.repo=new Map,this.attributesBound=!1}getArtifact(t){return this.repo.get(t)}setArtifact(t,r){this.repo.set(t,r)}run(t,r,n,o,i){Le(t.programInfo.name);let a=this.backend.device,d=this.backend.getComputePassEncoder();this.backend.writeTimestamp(this.backend.pendingDispatchNumber*2);let l=[];for(let m of r)l.push({binding:l.length,resource:{buffer:m.buffer}});for(let m of n)l.push({binding:l.length,resource:{buffer:m.buffer}});i&&l.push({binding:l.length,resource:i});let c=a.createBindGroup({layout:t.computePipeline.getBindGroupLayout(0),entries:l,label:t.programInfo.name});if(this.backend.sessionStatus==="capturing"){let m={kernelId:this.backend.currentKernelId,computePipeline:t.computePipeline,bindGroup:c,dispatchGroup:o};this.backend.capturedCommandList.get(this.backend.currentSessionId).push(m)}d.setPipeline(t.computePipeline),d.setBindGroup(0,c),d.dispatchWorkgroups(...o),this.backend.writeTimestamp(this.backend.pendingDispatchNumber*2+1),this.backend.pendingDispatchNumber++,(this.backend.pendingDispatchNumber>=this.backend.maxDispatchNumber||this.backend.queryType==="at-passes")&&this.backend.endComputePass(),this.backend.pendingDispatchNumber>=this.backend.maxDispatchNumber&&this.backend.flush(),Ve(t.programInfo.name)}dispose(){}build(t,r){Le(t.name);let n=this.backend.device,o=[];n.features.has("shader-f16")&&o.push("enable f16;");let i=Ja(r,this.backend.device.limits),a=t.getShaderSource(i),d=`${o.join(`
6397
6386
  `)}
6398
6387
  ${i.additionalImplementations}
6399
- ${a}`,l=n.createShaderModule({code:d,label:t.name});me("verbose",()=>`[WebGPU] ${t.name} shader code: ${d}`);let c=n.createComputePipeline({compute:{module:l,entryPoint:"main"},layout:"auto",label:t.name});return Ve(t.name),{programInfo:t,computePipeline:c,uniformVariablesInfo:i.variablesInfo}}normalizeDispatchGroupSize(t){let r=typeof t=="number"?t:t.x,n=typeof t=="number"?1:t.y||1,o=typeof t=="number"?1:t.z||1,i=this.backend.device.limits.maxComputeWorkgroupsPerDimension;if(r<=i&&n<=i&&o<=i)return[r,n,o];let a=r*n*o,d=Math.ceil(Math.sqrt(a));if(d>i){if(d=Math.ceil(Math.cbrt(a)),d>i)throw new Error("Total dispatch size exceeds WebGPU maximum.");return[d,d,d]}else return[d,d,1]}}});var Rh,Uh,Io,sn,Ml=V(()=>{"use strict";Ke();Q();Xe();Xn();Ka();Bl();Dl();Rh=(e,t)=>{if(t.length!==e.length)throw new Error(`inputDependencies length ${t.length} is not equal to inputTensors length ${e.length}.`);let r=[];for(let n=0;n<e.length;++n){let o=e[n].dataType;switch(t[n]){case"none":{r.push("");break}case"type":{r.push(`${o}`);break}case"rank":{let i=e[n].dims.length;r.push(`${o};${i}`);break}case"dims":{let i=e[n].dims.join(",");r.push(`${o};${i}`);break}default:throw new Error(`unsupported input dependency: ${t[n]}`)}}return r.join("|")},Uh=(e,t,r)=>{let n=e.name;return e.shaderCache?.hint&&(n+="["+e.shaderCache.hint+"]"),n+=":"+r+`:${Rh(t,e.shaderCache?.inputDependencies??new Array(t.length).fill("dims"))}`,n},Io=class{constructor(t){t&&(this.architecture=t.architecture,this.vendor=t.vendor)}isArchitecture(t){return this.architecture===t}isVendor(t){return this.vendor===t}},sn=class{constructor(){this.currentSessionId=null;this.currentKernelId=null;this.commandEncoder=null;this.computePassEncoder=null;this.maxDispatchNumber=16;this.pendingDispatchNumber=0;this.pendingKernels=[];this.pendingQueries=new Map;this.sessionStatus="default";this.capturedCommandList=new Map;this.capturedPendingKernels=new Map;this.sessionExternalDataMapping=new Map}get currentKernelCustomData(){if(this.currentKernelId===null)throw new Error("currentKernelCustomData(): currentKernelId is null. (should not happen)");let t=this.kernelCustomData.get(this.currentKernelId);return t||(t={},this.kernelCustomData.set(this.currentKernelId,t)),t}async initialize(t,r){this.env=t;let n=[],o={requiredLimits:{maxComputeWorkgroupStorageSize:r.limits.maxComputeWorkgroupStorageSize,maxComputeWorkgroupsPerDimension:r.limits.maxComputeWorkgroupsPerDimension,maxStorageBufferBindingSize:r.limits.maxStorageBufferBindingSize,maxBufferSize:r.limits.maxBufferSize,maxComputeInvocationsPerWorkgroup:r.limits.maxComputeInvocationsPerWorkgroup,maxComputeWorkgroupSizeX:r.limits.maxComputeWorkgroupSizeX,maxComputeWorkgroupSizeY:r.limits.maxComputeWorkgroupSizeY,maxComputeWorkgroupSizeZ:r.limits.maxComputeWorkgroupSizeZ},requiredFeatures:n};r.features.has("chromium-experimental-timestamp-query-inside-passes")?n.push("chromium-experimental-timestamp-query-inside-passes"):r.features.has("timestamp-query")&&n.push("timestamp-query"),r.features.has("shader-f16")&&n.push("shader-f16"),this.device=await r.requestDevice(o),this.adapterInfo=new Io(r.info||await r.requestAdapterInfo()),this.gpuDataManager=ja(this),this.programManager=new an(this),this.kernels=new Map,this.kernelPersistentData=new Map,this.kernelCustomData=new Map,Wr(t.logLevel,!!t.debug),this.device.onuncapturederror=i=>{i.error instanceof GPUValidationError&&console.error(`An uncaught WebGPU validation error was raised: ${i.error.message}`)},Object.defineProperty(this.env.webgpu,"device",{value:this.device,writable:!1,enumerable:!0,configurable:!1}),Object.defineProperty(this.env.webgpu,"adapter",{value:r,writable:!1,enumerable:!0,configurable:!1}),this.setQueryType()}dispose(){typeof this.querySet<"u"&&this.querySet.destroy(),this.gpuDataManager.dispose()}getCommandEncoder(){return this.commandEncoder||(this.commandEncoder=this.device.createCommandEncoder()),this.commandEncoder}getComputePassEncoder(){if(!this.computePassEncoder){let t=this.getCommandEncoder(),r={};this.queryType==="at-passes"&&(r.timestampWrites={querySet:this.querySet,beginningOfPassWriteIndex:this.pendingDispatchNumber*2,endOfPassWriteIndex:this.pendingDispatchNumber*2+1}),this.computePassEncoder=t.beginComputePass(r)}return this.computePassEncoder}endComputePass(){this.computePassEncoder&&(this.computePassEncoder.end(),this.computePassEncoder=null)}flush(){if(!this.commandEncoder)return;Le(),this.endComputePass();let t;this.queryType!=="none"&&(this.commandEncoder.resolveQuerySet(this.querySet,0,this.pendingDispatchNumber*2,this.queryResolveBuffer,0),t=this.device.createBuffer({size:this.pendingDispatchNumber*2*8,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST}),this.pendingQueries.set(t,this.pendingKernels),this.pendingKernels=[],this.commandEncoder.copyBufferToBuffer(this.queryResolveBuffer,0,t,0,this.pendingDispatchNumber*2*8)),this.device.queue.submit([this.commandEncoder.finish()]),this.gpuDataManager.refreshPendingBuffers(),this.commandEncoder=null,this.pendingDispatchNumber=0,this.queryType!=="none"&&t.mapAsync(GPUMapMode.READ).then(()=>{let r=new BigUint64Array(t.getMappedRange()),n=this.pendingQueries.get(t);for(let o=0;o<r.length/2;o++){let i=n[o],a=i.kernelId,d=this.kernels.get(a),l=d.kernelType,c=d.kernelName,m=i.programName,u=i.inputTensorViews,h=i.outputTensorViews,w=r[o*2],g=r[o*2+1];typeof this.queryTimeBase>"u"&&(this.queryTimeBase=w);let y=Number(w-this.queryTimeBase),S=Number(g-this.queryTimeBase);if(!Number.isSafeInteger(y)||!Number.isSafeInteger(S))throw new RangeError("incorrect timestamp range");if(this.env.webgpu.profiling?.ondata)this.env.webgpu.profiling.ondata({version:1,inputsMetadata:u.map($=>({dims:$.dims,dataType:yt($.dataType)})),outputsMetadata:h.map($=>({dims:$.dims,dataType:yt($.dataType)})),kernelId:a,kernelType:l,kernelName:c,programName:m,startTime:y,endTime:S});else{let $="";u.forEach((x,T)=>{$+=`input[${T}]: [${x.dims}] | ${yt(x.dataType)}, `});let v="";h.forEach((x,T)=>{v+=`output[${T}]: [${x.dims}] | ${yt(x.dataType)}, `}),console.log(`[profiling] kernel "${a}|${l}|${c}|${m}" ${$}${v}execution time: ${S-y} ns`)}Sr("GPU",`${m}::${w}::${g}`)}t.unmap(),this.pendingQueries.delete(t)}),Ve()}run(t,r,n,o,i,a){Le(t.name);let d=[];for(let x=0;x<r.length;++x){let T=r[x].data;if(T===0)continue;let C=this.gpuDataManager.get(T);if(!C)throw new Error(`no GPU data for input: ${T}`);d.push(C)}let{outputs:l,dispatchGroup:c,programUniforms:m}=t.getRunData(r),u=n.length===0?l.map((x,T)=>T):n;if(u.length!==l.length)throw new Error(`Output size ${u.length} must be equal to ${l.length}.`);let h=[],w=[];for(let x=0;x<l.length;++x){if(!Number.isInteger(u[x])||u[x]<-3||u[x]>=a)throw new Error(`Invalid output index: ${u[x]}`);if(u[x]===-3)continue;let T=u[x]===-1,C=u[x]===-2,A=T||C?i(l[x].dataType,l[x].dims):o(u[x],l[x].dataType,l[x].dims);if(h.push(A),A.data===0)continue;let P=this.gpuDataManager.get(A.data);if(!P)throw new Error(`no GPU data for output: ${A.data}`);if(T&&this.temporaryData.push(P),C){let B=this.kernelPersistentData.get(this.currentKernelId);B||(B=[],this.kernelPersistentData.set(this.currentKernelId,B)),B.push(P)}w.push(P)}if(d.length!==r.length||w.length!==h.length){if(w.length===0)return Ve(t.name),h;throw new Error(`Program ${t.name} has zero-sized tensor(s) in inputs or outputs. This is not supported now.`)}let g;if(m){let x=0,T=[];m.forEach(B=>{let N=typeof B.data=="number"?[B.data]:B.data;if(N.length===0)return;let W=B.type===10?2:4,K,Z;B.type===10?(Z=N.length>4?16:N.length>2?8:N.length*W,K=N.length>4?16:W*N.length):(Z=N.length<=2?N.length*W:16,K=16),x=Math.ceil(x/Z)*Z,T.push(x);let ee=B.type===10?8:4;x+=N.length>4?Math.ceil(N.length/ee)*K:N.length*W});let C=16;x=Math.ceil(x/C)*C;let A=new ArrayBuffer(x);m.forEach((B,N)=>{let W=T[N],K=typeof B.data=="number"?[B.data]:B.data;if(B.type===6)new Int32Array(A,W,K.length).set(K);else if(B.type===12)new Uint32Array(A,W,K.length).set(K);else if(B.type===10)new Uint16Array(A,W,K.length).set(K);else if(B.type===1)new Float32Array(A,W,K.length).set(K);else throw new Error(`Unsupported uniform type: ${yt(B.type)}`)});let P=this.gpuDataManager.create(x,GPUBufferUsage.COPY_DST|GPUBufferUsage.UNIFORM);this.device.queue.writeBuffer(P.buffer,0,A,0,x),this.gpuDataManager.release(P.id),g={offset:0,size:x,buffer:P.buffer}}let y=this.programManager.normalizeDispatchGroupSize(c),S=y[1]===1&&y[2]===1,$=Uh(t,r,S),v=this.programManager.getArtifact($);if(v||(v=this.programManager.build(t,y),this.programManager.setArtifact($,v),me("info",()=>`[artifact] key: ${$}, programName: ${t.name}`)),m&&v.uniformVariablesInfo){if(m.length!==v.uniformVariablesInfo.length)throw new Error(`Uniform variables count mismatch: expect ${v.uniformVariablesInfo.length}, got ${m.length} in program "${v.programInfo.name}".`);for(let x=0;x<m.length;x++){let T=m[x],C=T.type,A=typeof T.data=="number"?1:T.data.length,[P,B]=v.uniformVariablesInfo[x];if(C!==P||A!==B)throw new Error(`Uniform variable ${x} mismatch: expect type ${P} with size ${B}, got type ${C} with size ${A} in program "${v.programInfo.name}".`)}}if(me("info",()=>`[ProgramManager] run "${t.name}" (key=${$}) with ${y[0]}x${y[1]}x${y[2]}`),this.queryType!=="none"||this.sessionStatus==="capturing"){let x={kernelId:this.currentKernelId,programName:v.programInfo.name,inputTensorViews:r,outputTensorViews:h};this.pendingKernels.push(x),this.sessionStatus==="capturing"&&this.capturedPendingKernels.get(this.currentSessionId).push(x)}return this.programManager.run(v,d,w,y,g),Ve(t.name),h}upload(t,r){this.gpuDataManager.upload(t,r)}memcpy(t,r){this.gpuDataManager.memcpy(t,r)}async download(t,r){await this.gpuDataManager.download(t,r)}alloc(t){return this.gpuDataManager.create(t).id}free(t){return this.gpuDataManager.release(t)}createKernel(t,r,n,o){let i=Ol.get(t);if(!i)throw new Error(`kernel not implemented: ${t}`);let a={kernelType:t,kernelName:o,kernelEntry:i[0],attributes:[i[1],n]};this.kernels.set(r,a)}releaseKernel(t){let r=this.kernelPersistentData.get(t);if(r){for(let n of r)this.gpuDataManager.release(n.id);this.kernelPersistentData.delete(t)}this.kernelCustomData.delete(t),this.kernels.delete(t)}computeKernel(t,r,n){let o=this.kernels.get(t);if(!o)throw new Error(`kernel not created: ${t}`);let i=o.kernelType,a=o.kernelName,d=o.kernelEntry,l=o.attributes;if(this.currentKernelId!==null)throw new Error(`kernel "[${i}] ${a}" is not allowed to be called recursively`);this.currentKernelId=t,l[0]&&(l[1]=l[0](l[1]),l[0]=void 0),me("info",()=>`[WebGPU] Start to run kernel "[${i}] ${a}"...`);let c=this.env.debug;this.temporaryData=[];try{return c&&this.device.pushErrorScope("validation"),d(r,l[1]),0}catch(m){return n.push(Promise.resolve(`[WebGPU] Kernel "[${i}] ${a}" failed. ${m}`)),1}finally{c&&n.push(this.device.popErrorScope().then(m=>m?`GPU validation error for kernel "[${i}] ${a}": ${m.message}`:null));for(let m of this.temporaryData)this.gpuDataManager.release(m.id);this.temporaryData=[],this.currentKernelId=null}}registerBuffer(t,r,n,o){let i=this.sessionExternalDataMapping.get(t);i||(i=new Map,this.sessionExternalDataMapping.set(t,i));let a=i.get(r),d=this.gpuDataManager.registerExternalBuffer(n,o,a);return i.set(r,[d,n]),d}unregisterBuffers(t){let r=this.sessionExternalDataMapping.get(t);r&&(r.forEach(n=>this.gpuDataManager.unregisterExternalBuffer(n[0])),this.sessionExternalDataMapping.delete(t))}getBuffer(t){let r=this.gpuDataManager.get(t);if(!r)throw new Error(`no GPU data for buffer: ${t}`);return r.buffer}createDownloader(t,r,n){return async()=>{let o=await eo(this,t,r);return Lr(o.buffer,n)}}writeTimestamp(t){this.queryType==="inside-passes"&&this.computePassEncoder.writeTimestamp(this.querySet,t)}setQueryType(){this.queryType="none",(this.env.webgpu.profiling?.mode==="default"||(typeof this.env.trace>"u"?this.env.wasm.trace:this.env.trace))&&(this.device.features.has("chromium-experimental-timestamp-query-inside-passes")?this.queryType="inside-passes":this.device.features.has("timestamp-query")&&(this.queryType="at-passes"),this.queryType!=="none"&&typeof this.querySet>"u"&&(this.querySet=this.device.createQuerySet({type:"timestamp",count:this.maxDispatchNumber*2}),this.queryResolveBuffer=this.device.createBuffer({size:this.maxDispatchNumber*2*8,usage:GPUBufferUsage.COPY_SRC|GPUBufferUsage.QUERY_RESOLVE})))}captureBegin(){me("info","captureBegin"),this.capturedCommandList.get(this.currentSessionId)||this.capturedCommandList.set(this.currentSessionId,[]),this.capturedPendingKernels.get(this.currentSessionId)||this.capturedPendingKernels.set(this.currentSessionId,[]),this.flush(),this.sessionStatus="capturing"}captureEnd(){me("info","captureEnd"),this.flush(),this.sessionStatus="default"}replay(){me("info","replay"),this.sessionStatus="replaying";let t=this.capturedCommandList.get(this.currentSessionId),r=this.capturedPendingKernels.get(this.currentSessionId),n=t.length;this.pendingKernels=[];for(let o=0;o<n;o++){let i=this.getComputePassEncoder(),a=t[o];this.writeTimestamp(this.pendingDispatchNumber*2),i.setPipeline(a.computePipeline),i.setBindGroup(0,a.bindGroup),i.dispatchWorkgroups(...a.dispatchGroup),this.writeTimestamp(this.pendingDispatchNumber*2+1),this.pendingDispatchNumber++,this.queryType!=="none"&&this.pendingKernels.push(r[o]),(this.pendingDispatchNumber>=this.maxDispatchNumber||this.queryType==="at-passes")&&this.endComputePass(),this.pendingDispatchNumber>=this.maxDispatchNumber&&this.flush()}this.flush(),this.sessionStatus="default"}onReleaseSession(t){this.unregisterBuffers(t),this.capturedCommandList.has(t)&&this.capturedCommandList.delete(t),this.capturedPendingKernels.has(t)&&this.capturedPendingKernels.delete(t),this.gpuDataManager.onReleaseSession(t)}onRunStart(t){this.currentSessionId=t,this.setQueryType()}}});var Vh,Rl,un,Co,Ul,Vl=V(()=>{"use strict";Xe();Vh=1,Rl=()=>Vh++,un=class{constructor(t,r){this.mlContext=t;this.tensorEntry=r,this.tensorCache=r?[r]:[]}get tensor(){return this.tensorEntry?.[0]}get context(){if(!this.mlContext)throw new Error("MLContext has not been set.");return this.mlContext}set context(t){if(this.mlContext&&this.mlContext!==t)throw new Error("MLTensor in use in a different MLContext.");this.mlContext=t}destroy(){for(let[t]of this.tensorCache)t.destroy();this.tensorCache=[],this.tensorEntry=void 0}trySelectTensor(t,r){for(let[n,o,i]of this.tensorCache)if(r===n){if(this.context!==t)throw new Error("MLTensor cannot be registered with a different MLContext.");return this.tensorEntry=[n,o,i],!0}return!1}async ensureTensor(t,r,n){if(this.tensorEntry){let[a,d,l]=this.tensorEntry;if(d===t&&l.every((c,m)=>c===r[m]))return a}for(let[a,d,l]of this.tensorCache)if(d===t&&l.every((c,m)=>c===r[m])){if(n&&this.tensorEntry){me("verbose",()=>`[WebNN] Slowdown may occur, having to copy existing tensor {dataType: ${t}, shape: ${r}}`);let c=await this.context.readTensor(this.tensorEntry[0]);this.context.writeTensor(a,c)}return this.tensorEntry=[a,d,l],a}me("verbose",()=>`[WebNN] MLContext.createTensor {dataType: ${t}, shape: ${r}}`);let o=MLTensorUsage.READ|MLTensorUsage.WRITE,i=await this.context.createTensor({dataType:t,shape:r,dimensions:r,usage:o});return this.tensorEntry=[i,t,r],this.tensorCache.push(this.tensorEntry),this.activeUpload&&(this.mlContext?.writeTensor(i,this.activeUpload),this.activeUpload=void 0),i}upload(t){if(!this.tensorEntry){this.activeUpload=new Uint8Array(t);return}this.mlContext?.writeTensor(this.tensorEntry[0],t)}async download(t){if(this.activeUpload)if(t){t instanceof ArrayBuffer?new Uint8Array(t).set(this.activeUpload):new Uint8Array(t.buffer,t.byteOffset,t.byteLength).set(this.activeUpload);return}else return this.activeUpload.buffer;if(!this.tensorEntry)throw new Error("Tensor has not been created.");return t?this.context.readTensor(this.tensorEntry[0],t):this.context.readTensor(this.tensorEntry[0])}},Co=class{constructor(t){this.backend=t;this.tensorsById=new Map;this.tensorIdsByContext=new Map}reserveTensorId(){let t=Rl();return this.tensorsById.set(t,new un),t}releaseTensorId(t){let r=this.tensorsById.get(t);if(r){r.destroy(),this.tensorsById.delete(t);for(let[n,o]of this.tensorIdsByContext)if(o.has(t)){o.delete(t),o.size===0&&this.tensorIdsByContext.delete(n);break}}}async ensureTensor(t,r,n,o){me("verbose",()=>`[WebNN] TensorManager.ensureTensor {tensorId: ${t}, dataType: ${r}, shape: ${n}, copyOld: ${o}}`);let i=this.tensorsById.get(t);if(!i)throw new Error("Tensor not found.");return i.context=this.backend.currentContext,this.tensorIdsByContext.has(this.backend.currentContext)||this.tensorIdsByContext.set(this.backend.currentContext,new Set),this.tensorIdsByContext.get(this.backend.currentContext)?.add(t),i.ensureTensor(r,n,o)}upload(t,r){this.tensorsById.get(t).upload(r)}async download(t,r){return me("verbose",()=>`[WebNN] TensorManager.download {tensorId: ${t}, dstBuffer: ${r?.byteLength}}`),this.tensorsById.get(t).download(r)}releaseTensorsForContext(t){let r=this.tensorIdsByContext.get(t);if(r){for(let n of r)this.tensorsById.get(n).destroy(),this.tensorsById.delete(n);this.tensorIdsByContext.delete(t)}}registerTensor(t,r,n,o){for(let[d,l]of this.tensorsById)if(l.trySelectTensor(t,r))return d;let i=Rl();this.tensorsById.set(i,new un(t,[r,n,o]));let a=this.tensorIdsByContext.get(t);return a||(a=new Set,this.tensorIdsByContext.set(t,a)),a.add(i),i}},Ul=(...e)=>new Co(...e)});var Nl,dn,Wl=V(()=>{"use strict";Q();gt();Xn();Vl();Xe();Nl=new Map([[1,"float32"],[10,"float16"],[6,"int32"],[12,"uint32"],[7,"int64"],[13,"uint64"],[3,"int8"],[2,"uint8"],[9,"uint8"]]),dn=class{constructor(t){this.tensorManager=Ul(this);this.mlContextBySessionId=new Map;this.sessionIdsByMLContext=new Map;Wr(t.logLevel,!!t.debug)}get currentSessionId(){if(this.activeSessionId===void 0)throw new Error("No active session");return this.activeSessionId}onRunStart(t){this.activeSessionId=t}get currentContext(){let t=this.getMLContext(this.currentSessionId);if(!t)throw new Error(`No MLContext found for session ${this.currentSessionId}`);return t}registerMLContext(t,r){this.mlContextBySessionId.set(t,r);let n=this.sessionIdsByMLContext.get(r);n||(n=new Set,this.sessionIdsByMLContext.set(r,n)),n.add(t)}onReleaseSession(t){let r=this.mlContextBySessionId.get(t);if(!r)return;this.mlContextBySessionId.delete(t);let n=this.sessionIdsByMLContext.get(r);n.delete(t),n.size===0&&(this.sessionIdsByMLContext.delete(r),this.tensorManager.releaseTensorsForContext(r))}getMLContext(t){return this.mlContextBySessionId.get(t)}reserveTensorId(){return this.tensorManager.reserveTensorId()}releaseTensorId(t){me("verbose",()=>`[WebNN] releaseTensorId {tensorId: ${t}}`),this.tensorManager.releaseTensorId(t)}async ensureTensor(t,r,n,o){let i=Nl.get(r);if(!i)throw new Error(`Unsupported ONNX data type: ${r}`);return this.tensorManager.ensureTensor(t,i,n,o)}uploadTensor(t,r){if(!Te().shouldTransferToMLTensor)throw new Error("Trying to upload to a MLTensor while shouldTransferToMLTensor is false");me("verbose",()=>`[WebNN] uploadTensor {tensorId: ${t}, data: ${r.byteLength}}`),this.tensorManager.upload(t,r)}async downloadTensor(t,r){return this.tensorManager.download(t,r)}createMLTensorDownloader(t,r){return async()=>{let n=await this.tensorManager.download(t);return Lr(n,r)}}registerMLTensor(t,r,n){let o=Nl.get(r);if(!o)throw new Error(`Unsupported ONNX data type: ${r}`);let i=this.tensorManager.registerTensor(this.currentContext,t,o,n);return me("verbose",()=>`[WebNN] registerMLTensor {tensor: ${t}, dataType: ${o}, dimensions: ${n}} -> {tensorId: ${i}}`),i}flush(){}}});var Ll={};Gt(Ll,{init:()=>Nh});var or,Ao,Nh,Hl=V(()=>{"use strict";Q();Ml();Xe();ie();Wl();or=class e{constructor(t,r,n,o){this.module=t;this.dataType=r;this.data=n;this.dims=o}getFloat32Array(){if(this.dataType!==1)throw new Error("Invalid data type");let t=k.size(this.dims);return t===0?new Float32Array:new Float32Array(this.module.HEAP8.buffer,this.data,t)}getBigInt64Array(){if(this.dataType!==7)throw new Error("Invalid data type");let t=k.size(this.dims);return t===0?new BigInt64Array:new BigInt64Array(this.module.HEAP8.buffer,this.data,t)}getInt32Array(){if(this.dataType!==6)throw new Error("Invalid data type");let t=k.size(this.dims);return t===0?new Int32Array:new Int32Array(this.module.HEAP8.buffer,this.data,t)}getUint16Array(){if(this.dataType!==10&&this.dataType!==4)throw new Error("Invalid data type");let t=k.size(this.dims);return t===0?new Uint16Array:new Uint16Array(this.module.HEAP8.buffer,this.data,t)}reshape(t){if(k.size(t)!==k.size(this.dims))throw new Error("Invalid new shape");return new e(this.module,this.dataType,this.data,t)}},Ao=class{constructor(t,r,n){this.module=t;this.backend=r;this.customDataOffset=0;this.customDataSize=0;this.adapterInfo=r.adapterInfo;let o=t.HEAPU32,i=n>>>2;this.opKernelContext=o[i++];let a=o[i++];this.outputCount=o[i++],this.customDataOffset=o[i++],this.customDataSize=o[i++];let d=[];for(let l=0;l<a;l++){let c=o[i++],m=o[i++],u=o[i++],h=[];for(let w=0;w<u;w++)h.push(o[i++]);d.push(new or(t,c,m,h))}this.inputs=d}get kernelCustomData(){return this.backend.currentKernelCustomData}get customDataBuffer(){return this.module.HEAPU8.subarray(this.customDataOffset,this.customDataOffset+this.customDataSize)}getMaxComputeWorkgroupSizes(){return[this.backend.device.limits.maxComputeWorkgroupSizeX,this.backend.device.limits.maxComputeWorkgroupSizeY,this.backend.device.limits.maxComputeWorkgroupSizeZ]}getMaxComputeWorkgroupStoragesize(){return this.backend.device.limits.maxComputeWorkgroupStorageSize}compute(t,r){let n=r?.inputs?.map(d=>typeof d=="number"?this.inputs[d]:d)??this.inputs,o=r?.outputs??[],i=(d,l,c)=>new or(this.module,l,this.output(d,c),c),a=(d,l)=>{let c=It(d,l);if(!c)throw new Error(`Unsupported data type: ${d}`);let m=c>0?this.backend.gpuDataManager.create(c).id:0;return new or(this.module,d,m,l)};return this.backend.run(t,n,o,i,a,this.outputCount)}output(t,r){let n=this.module.stackSave();try{let o=this.module.stackAlloc((1+r.length)*4),i=o>>2;this.module.HEAPU32[i++]=r.length;for(let a=0;a<r.length;a++)this.module.HEAPU32[i++]=r[a];return this.module._JsepOutput(this.opKernelContext,t,o)}catch(o){throw new Error(`Failed to generate kernel's output[${t}] with dims [${r}]. If you are running with pre-allocated output, please make sure the output type/dims are correct. Error: ${o}`)}finally{this.module.stackRestore(n)}}},Nh=async(e,t,r,n)=>{let o=t.jsepInit;if(!o)throw new Error("Failed to initialize JSEP. The WebAssembly module is not built with JSEP support.");if(e==="webgpu"){let i=new sn;await i.initialize(r,n),o("webgpu",[i,a=>i.alloc(a),a=>i.free(a),(a,d,l,c=!1)=>{if(c)me("verbose",()=>`[WebGPU] jsepCopyGpuToGpu: src=${a}, dst=${d}, size=${l}`),i.memcpy(a,d);else{me("verbose",()=>`[WebGPU] jsepCopyCpuToGpu: dataOffset=${a}, gpuDataId=${d}, size=${l}`);let m=t.HEAPU8.subarray(a>>>0,(a>>>0)+l);i.upload(d,m)}},async(a,d,l)=>{me("verbose",()=>`[WebGPU] jsepCopyGpuToCpu: gpuDataId=${a}, dataOffset=${d}, size=${l}`),await i.download(a,()=>t.HEAPU8.subarray(d>>>0,(d>>>0)+l))},(a,d,l)=>i.createKernel(a,d,l,t.UTF8ToString(t._JsepGetNodeName(d))),a=>i.releaseKernel(a),(a,d,l,c)=>{me("verbose",()=>`[WebGPU] jsepRun: sessionHandle=${l}, kernel=${a}, contextDataOffset=${d}`);let m=new Ao(t,i,d);return i.computeKernel(a,m,c)},()=>i.captureBegin(),()=>i.captureEnd(),()=>i.replay()])}else{let i=new dn(r);o("webnn",[i,()=>i.reserveTensorId(),a=>i.releaseTensorId(a),async(a,d,l,c)=>i.ensureTensor(a,d,l,c),(a,d)=>{i.uploadTensor(a,d)},async(a,d)=>i.downloadTensor(a,d)])}}});var Wh,kr,Er,Et,Lh,jt,Pr,zr,Gl,Or,Br,Dr,Gn=V(()=>{"use strict";Va();Wa();Q();gt();Rr();Yn();Wh=(e,t)=>{Te()._OrtInit(e,t)!==0&&ve("Can't initialize onnxruntime.")},kr=async e=>{Wh(e.wasm.numThreads,Xt(e.logLevel))},Er=async(e,t)=>{{let r=(Hl(),br(Ll)).init;if(t==="webgpu"){if(typeof navigator>"u"||!navigator.gpu)throw new Error("WebGPU is not supported in current environment");let n=e.webgpu.adapter;if(n){if(typeof n.limits!="object"||typeof n.features!="object"||typeof n.requestDevice!="function")throw new Error("Invalid GPU adapter set in `env.webgpu.adapter`. It must be a GPUAdapter object.")}else{let o=e.webgpu.powerPreference;if(o!==void 0&&o!=="low-power"&&o!=="high-performance")throw new Error(`Invalid powerPreference setting: "${o}"`);let i=e.webgpu.forceFallbackAdapter;if(i!==void 0&&typeof i!="boolean")throw new Error(`Invalid forceFallbackAdapter setting: "${i}"`);if(n=await navigator.gpu.requestAdapter({powerPreference:o,forceFallbackAdapter:i}),!n)throw new Error('Failed to get GPU adapter. You may need to enable flag "--enable-unsafe-webgpu" if you are using Chrome.')}await r("webgpu",Te(),e,n)}if(t==="webnn"){if(typeof navigator>"u"||!navigator.ml)throw new Error("WebNN is not supported in current environment");await r("webnn",Te(),e)}}},Et=new Map,Lh=e=>{let t=Te(),r=t.stackSave();try{let n=t.stackAlloc(8);return t._OrtGetInputOutputCount(e,n,n+4)!==0&&ve("Can't get session input/output count."),[t.HEAP32[n/4],t.HEAP32[n/4+1]]}finally{t.stackRestore(r)}},jt=e=>{let t=Te(),r=t._malloc(e.byteLength);if(r===0)throw new Error(`Can't create a session. failed to allocate a buffer of size ${e.byteLength}.`);return t.HEAPU8.set(e,r),[r,e.byteLength]},Pr=async(e,t)=>{let r,n,o=Te();Array.isArray(e)?[r,n]=e:e.buffer===o.HEAPU8.buffer?[r,n]=[e.byteOffset,e.byteLength]:[r,n]=jt(e);let i=0,a=0,d=0,l=[],c=[],m=[];try{if([a,l]=Na(t),t?.externalData&&o.mountExternalData){let v=[];for(let x of t.externalData){let T=typeof x=="string"?x:x.path;v.push(Zt(typeof x=="string"?x:x.data).then(C=>{o.mountExternalData(T,C)}))}await Promise.all(v)}for(let v of t?.executionProviders??[])if((typeof v=="string"?v:v.name)==="webnn"){if(o.shouldTransferToMLTensor=!1,o.currentContext)throw new Error("WebNN execution provider is already set.");if(typeof v!="string"){let T=v,C=T?.context,A=T?.gpuDevice,P=T?.deviceType,B=T?.numThreads,N=T?.powerPreference;C?o.currentContext=C:A?o.currentContext=await navigator.ml.createContext(A):o.currentContext=await navigator.ml.createContext({deviceType:P,numThreads:B,powerPreference:N})}else o.currentContext=await navigator.ml.createContext();break}i=await o._OrtCreateSession(r,n,a),i===0&&ve("Can't create a session."),o.currentContext&&(o.jsepRegisterMLContext(i,o.currentContext),o.currentContext=void 0,o.shouldTransferToMLTensor=!0);let[u,h]=Lh(i),w=!!t?.enableGraphCapture,g=[],y=[],S=[];for(let v=0;v<u;v++){let x=o._OrtGetInputName(i,v);x===0&&ve("Can't get an input name."),c.push(x),g.push(o.UTF8ToString(x))}for(let v=0;v<h;v++){let x=o._OrtGetOutputName(i,v);x===0&&ve("Can't get an output name."),m.push(x);let T=o.UTF8ToString(x);y.push(T);{if(w&&t?.preferredOutputLocation===void 0){S.push("gpu-buffer");continue}let C=typeof t?.preferredOutputLocation=="string"?t.preferredOutputLocation:t?.preferredOutputLocation?.[T]??"cpu";if(C!=="cpu"&&C!=="cpu-pinned"&&C!=="gpu-buffer"&&C!=="ml-tensor")throw new Error(`Not supported preferred output location: ${C}.`);if(w&&C!=="gpu-buffer")throw new Error(`Not supported preferred output location: ${C}. Only 'gpu-buffer' location is supported when enableGraphCapture is true.`);S.push(C)}}let $=null;return S.some(v=>v==="gpu-buffer"||v==="ml-tensor")&&(d=o._OrtCreateBinding(i),d===0&&ve("Can't create IO binding."),$={handle:d,outputPreferredLocations:S,outputPreferredLocationsEncoded:S.map(v=>Kn(v))}),Et.set(i,[i,c,m,$,w,!1]),[i,g,y]}catch(u){throw c.forEach(h=>o._OrtFree(h)),m.forEach(h=>o._OrtFree(h)),d!==0&&o._OrtReleaseBinding(d),i!==0&&o._OrtReleaseSession(i),u}finally{o._free(r),a!==0&&o._OrtReleaseSessionOptions(a),l.forEach(u=>o._free(u)),o.unmountExternalData?.()}},zr=e=>{let t=Te(),r=Et.get(e);if(!r)throw new Error(`cannot release session. invalid session id: ${e}`);let[n,o,i,a,d]=r;a&&(d&&t._OrtClearBoundOutputs(a.handle),t._OrtReleaseBinding(a.handle)),t.jsepOnReleaseSession?.(e),o.forEach(l=>t._OrtFree(l)),i.forEach(l=>t._OrtFree(l)),t._OrtReleaseSession(n),Et.delete(e)},Gl=(e,t,r,n,o,i=!1)=>{if(!e){t.push(0);return}let a=Te(),d=e[0],l=e[1],c=e[3],m,u;if(d==="string"&&(c==="gpu-buffer"||c==="ml-tensor"))throw new Error("String tensor is not supported on GPU.");if(i&&c!=="gpu-buffer")throw new Error(`External buffer must be provided for input/output index ${o} when enableGraphCapture is true.`);if(c==="gpu-buffer"){let g=e[2].gpuBuffer;u=It(Yt(d),l);let y=a.jsepRegisterBuffer;if(!y)throw new Error('Tensor location "gpu-buffer" is not supported without using WebGPU.');m=y(n,o,g,u)}else if(c==="ml-tensor"){let g=e[2].mlTensor;u=It(Yt(d),l);let y=a.jsepRegisterMLTensor;if(!y)throw new Error('Tensor location "ml-tensor" is not supported without using WebNN.');m=y(g,Yt(d),l)}else{let g=e[2];if(Array.isArray(g)){u=4*g.length,m=a._malloc(u),r.push(m);let y=m/4;for(let S=0;S<g.length;S++){if(typeof g[S]!="string")throw new TypeError(`tensor data at index ${S} is not a string`);a.HEAPU32[y++]=ke(g[S],r)}}else u=g.byteLength,m=a._malloc(u),r.push(m),a.HEAPU8.set(new Uint8Array(g.buffer,g.byteOffset,u),m)}let h=a.stackSave(),w=a.stackAlloc(4*l.length);try{let g=w/4;l.forEach(S=>a.HEAP32[g++]=S);let y=a._OrtCreateTensor(Yt(d),m,u,w,l.length,Kn(c));y===0&&ve(`Can't create tensor for input/output. session=${n}, index=${o}.`),t.push(y)}finally{a.stackRestore(h)}},Or=async(e,t,r,n,o,i)=>{let a=Te(),d=Et.get(e);if(!d)throw new Error(`cannot run inference. invalid session id: ${e}`);let l=d[0],c=d[1],m=d[2],u=d[3],h=d[4],w=d[5],g=t.length,y=n.length,S=0,$=[],v=[],x=[],T=[],C=a.stackSave(),A=a.stackAlloc(g*4),P=a.stackAlloc(g*4),B=a.stackAlloc(y*4),N=a.stackAlloc(y*4);try{a.jsepOnRunStart?.(l),[S,$]=Ua(i);for(let Y=0;Y<g;Y++)Gl(r[Y],v,T,e,t[Y],h);for(let Y=0;Y<y;Y++)Gl(o[Y],x,T,e,g+n[Y],h);let W=A/4,K=P/4,Z=B/4,ee=N/4;for(let Y=0;Y<g;Y++)a.HEAPU32[W++]=v[Y],a.HEAPU32[K++]=c[t[Y]];for(let Y=0;Y<y;Y++)a.HEAPU32[Z++]=x[Y],a.HEAPU32[ee++]=m[n[Y]];if(u&&!w){let{handle:Y,outputPreferredLocations:le,outputPreferredLocationsEncoded:ce}=u;if(c.length!==g)throw new Error(`input count from feeds (${g}) is expected to be always equal to model's input count (${c.length}).`);for(let q=0;q<g;q++){let ue=t[q];await a._OrtBindInput(Y,c[ue],v[q])!==0&&ve(`Can't bind input[${q}] for session=${e}.`)}for(let q=0;q<y;q++){let ue=n[q];o[q]?.[3]?a._OrtBindOutput(Y,m[ue],x[q],0)!==0&&ve(`Can't bind pre-allocated output[${q}] for session=${e}.`):a._OrtBindOutput(Y,m[ue],0,ce[ue])!==0&&ve(`Can't bind output[${q}] to ${le[q]} for session=${e}.`)}Et.set(e,[l,c,m,u,h,!0])}let se;u?se=await a._OrtRunWithBinding(l,u.handle,y,B,S):se=await a._OrtRun(l,P,A,g,N,y,B,S),se!==0&&ve("failed to call OrtRun().");let de=[];for(let Y=0;Y<y;Y++){let le=a.HEAPU32[B/4+Y];if(le===x[Y]){de.push(o[Y]);continue}let ce=a.stackSave(),q=a.stackAlloc(4*4),ue=!1,re,ne=0;try{a._OrtGetTensorData(le,q,q+4,q+8,q+12)!==0&&ve(`Can't access output tensor data on index ${Y}.`);let U=q/4,G=a.HEAPU32[U++];ne=a.HEAPU32[U++];let ye=a.HEAPU32[U++],Re=a.HEAPU32[U++],$e=[];for(let Ae=0;Ae<Re;Ae++)$e.push(a.HEAPU32[ye/4+Ae]);a._OrtFree(ye);let Ce=$e.reduce((Ae,Me)=>Ae*Me,1);re=yt(G);let bt=u?.outputPreferredLocations[n[Y]];if(re==="string"){if(bt==="gpu-buffer"||bt==="ml-tensor")throw new Error("String tensor is not supported on GPU.");let Ae=[],Me=ne/4;for(let Ue=0;Ue<Ce;Ue++){let zt=a.HEAPU32[Me++],wt=Ue===Ce-1?void 0:a.HEAPU32[Me]-zt;Ae.push(a.UTF8ToString(zt,wt))}de.push([re,$e,Ae,"cpu"])}else if(bt==="gpu-buffer"&&Ce>0){let Ae=a.jsepGetBuffer;if(!Ae)throw new Error('preferredLocation "gpu-buffer" is not supported without using WebGPU.');let Me=Ae(ne),Ue=It(G,Ce);if(Ue===void 0||!Vr(re))throw new Error(`Unsupported data type: ${re}`);ue=!0,de.push([re,$e,{gpuBuffer:Me,download:a.jsepCreateDownloader(Me,Ue,re),dispose:()=>{a._OrtReleaseTensor(le)}},"gpu-buffer"])}else if(bt==="ml-tensor"&&Ce>0){let Ae=a.jsepEnsureTensor;if(!Ae)throw new Error('preferredLocation "ml-tensor" is not supported without using WebNN.');if(It(G,Ce)===void 0||!Nr(re))throw new Error(`Unsupported data type: ${re}`);let Ue=await Ae(ne,G,$e,!1);ue=!0,de.push([re,$e,{mlTensor:Ue,download:a.jsepCreateMLTensorDownloader(ne,re),dispose:()=>{a.jsepReleaseTensorId(ne),a._OrtReleaseTensor(le)}},"ml-tensor"])}else{let Ae=Ur(re),Me=new Ae(Ce);new Uint8Array(Me.buffer,Me.byteOffset,Me.byteLength).set(a.HEAPU8.subarray(ne,ne+Me.byteLength)),de.push([re,$e,Me,"cpu"])}}finally{a.stackRestore(ce),re==="string"&&ne&&a._free(ne),ue||a._OrtReleaseTensor(le)}}return u&&!h&&(a._OrtClearBoundOutputs(u.handle),Et.set(e,[l,c,m,u,h,!1])),de}finally{a.stackRestore(C),v.forEach(W=>a._OrtReleaseTensor(W)),x.forEach(W=>a._OrtReleaseTensor(W)),T.forEach(W=>a._free(W)),S!==0&&a._OrtReleaseRunOptions(S),$.forEach(W=>a._free(W))}},Br=e=>{let t=Te(),r=Et.get(e);if(!r)throw new Error("invalid session id");let n=r[0],o=t._OrtEndProfiling(n);o===0&&ve("Can't get an profile file name."),t._OrtFree(o)},Dr=e=>{let t=[];for(let r of e){let n=r[2];!Array.isArray(n)&&"buffer"in n&&t.push(n.buffer)}return t}});var Pt,Ye,ir,cn,pn,ln,ko,Eo,Lt,Ht,Gh,Fl,ql,jl,Kl,Yl,Xl,Zl,Po=V(()=>{"use strict";Ke();Gn();gt();qt();Pt=()=>!!_e.wasm.proxy&&typeof document<"u",ir=!1,cn=!1,pn=!1,Eo=new Map,Lt=(e,t)=>{let r=Eo.get(e);r?r.push(t):Eo.set(e,[t])},Ht=()=>{if(ir||!cn||pn||!Ye)throw new Error("worker not ready")},Gh=e=>{switch(e.data.type){case"init-wasm":ir=!1,e.data.err?(pn=!0,ko[1](e.data.err)):(cn=!0,ko[0]()),ln&&(URL.revokeObjectURL(ln),ln=void 0);break;case"init-ep":case"copy-from":case"create":case"release":case"run":case"end-profiling":{let t=Eo.get(e.data.type);e.data.err?t.shift()[1](e.data.err):t.shift()[0](e.data.out);break}default:}},Fl=async()=>{if(!cn){if(ir)throw new Error("multiple calls to 'initWasm()' detected.");if(pn)throw new Error("previous call to 'initWasm()' failed.");if(ir=!0,Pt())return new Promise((e,t)=>{Ye?.terminate(),Da().then(([r,n])=>{try{Ye=n,Ye.onerror=i=>t(i),Ye.onmessage=Gh,ko=[e,t];let o={type:"init-wasm",in:_e};Ye.postMessage(o),ln=r}catch(o){t(o)}},t)});try{await Ar(_e.wasm),await kr(_e),cn=!0}catch(e){throw pn=!0,e}finally{ir=!1}}},ql=async e=>{if(Pt())return Ht(),new Promise((t,r)=>{Lt("init-ep",[t,r]);let n={type:"init-ep",in:{epName:e,env:_e}};Ye.postMessage(n)});await Er(_e,e)},jl=async e=>Pt()?(Ht(),new Promise((t,r)=>{Lt("copy-from",[t,r]);let n={type:"copy-from",in:{buffer:e}};Ye.postMessage(n,[e.buffer])})):jt(e),Kl=async(e,t)=>{if(Pt()){if(t?.preferredOutputLocation)throw new Error('session option "preferredOutputLocation" is not supported for proxy.');return Ht(),new Promise((r,n)=>{Lt("create",[r,n]);let o={type:"create",in:{model:e,options:{...t}}},i=[];e instanceof Uint8Array&&i.push(e.buffer),Ye.postMessage(o,i)})}else return Pr(e,t)},Yl=async e=>{if(Pt())return Ht(),new Promise((t,r)=>{Lt("release",[t,r]);let n={type:"release",in:e};Ye.postMessage(n)});zr(e)},Xl=async(e,t,r,n,o,i)=>{if(Pt()){if(r.some(a=>a[3]!=="cpu"))throw new Error("input tensor on GPU is not supported for proxy.");if(o.some(a=>a))throw new Error("pre-allocated output tensor is not supported for proxy.");return Ht(),new Promise((a,d)=>{Lt("run",[a,d]);let l=r,c={type:"run",in:{sessionId:e,inputIndices:t,inputs:l,outputIndices:n,options:i}};Ye.postMessage(c,Dr(l))})}else return Or(e,t,r,n,o,i)},Zl=async e=>{if(Pt())return Ht(),new Promise((t,r)=>{Lt("end-profiling",[t,r]);let n={type:"end-profiling",in:e};Ye.postMessage(n)});Br(e)}});var Ql,Fh,mn,Jl=V(()=>{"use strict";Ke();Po();Q();Cr();Yn();Ql=(e,t)=>{switch(e.location){case"cpu":return[e.type,e.dims,e.data,"cpu"];case"gpu-buffer":return[e.type,e.dims,{gpuBuffer:e.gpuBuffer},"gpu-buffer"];case"ml-tensor":return[e.type,e.dims,{mlTensor:e.mlTensor},"ml-tensor"];default:throw new Error(`invalid data location: ${e.location} for ${t()}`)}},Fh=e=>{switch(e[3]){case"cpu":return new De(e[0],e[2],e[1]);case"gpu-buffer":{let t=e[0];if(!Vr(t))throw new Error(`not supported data type: ${t} for deserializing GPU tensor`);let{gpuBuffer:r,download:n,dispose:o}=e[2];return De.fromGpuBuffer(r,{dataType:t,dims:e[1],download:n,dispose:o})}case"ml-tensor":{let t=e[0];if(!Nr(t))throw new Error(`not supported data type: ${t} for deserializing MLTensor tensor`);let{mlTensor:r,download:n,dispose:o}=e[2];return De.fromMLTensor(r,{dataType:t,dims:e[1],download:n,dispose:o})}default:throw new Error(`invalid data location: ${e[3]}`)}},mn=class{async fetchModelAndCopyToWasmMemory(t){return jl(await Zt(t))}async loadModel(t,r){Le();let n;typeof t=="string"? false?0:n=await this.fetchModelAndCopyToWasmMemory(t):n=t,[this.sessionId,this.inputNames,this.outputNames]=await Kl(n,r),Ve()}async dispose(){return Yl(this.sessionId)}async run(t,r,n){Le();let o=[],i=[];Object.entries(t).forEach(h=>{let w=h[0],g=h[1],y=this.inputNames.indexOf(w);if(y===-1)throw new Error(`invalid input '${w}'`);o.push(g),i.push(y)});let a=[],d=[];Object.entries(r).forEach(h=>{let w=h[0],g=h[1],y=this.outputNames.indexOf(w);if(y===-1)throw new Error(`invalid output '${w}'`);a.push(g),d.push(y)});let l=o.map((h,w)=>Ql(h,()=>`input "${this.inputNames[i[w]]}"`)),c=a.map((h,w)=>h?Ql(h,()=>`output "${this.outputNames[d[w]]}"`):null),m=await Xl(this.sessionId,i,l,d,c,n),u={};for(let h=0;h<m.length;h++)u[this.outputNames[d[h]]]=a[h]??Fh(m[h]);return Ve(),u}startProfiling(){}endProfiling(){Zl(this.sessionId)}}});var tc={};Gt(tc,{OnnxruntimeWebAssemblyBackend:()=>fn,initializeFlags:()=>ec,wasmBackend:()=>qh});var ec,fn,qh,rc=V(()=>{"use strict";Ke();Po();Jl();qt();ec=()=>{if((typeof _e.wasm.initTimeout!="number"||_e.wasm.initTimeout<0)&&(_e.wasm.initTimeout=0),_e.wasm.simd===!1&&console.warn('Deprecated property "env.wasm.simd" is set to false. non-SIMD build is no longer provided, and this setting will be ignored.'),typeof _e.wasm.proxy!="boolean"&&(_e.wasm.proxy=!1),typeof _e.wasm.trace!="boolean"&&(_e.wasm.trace=!1),typeof _e.wasm.numThreads!="number"||!Number.isInteger(_e.wasm.numThreads)||_e.wasm.numThreads<=0)if(typeof self<"u"&&!self.crossOriginIsolated)_e.wasm.numThreads=1;else{let e=typeof navigator>"u"?Vn("node:os").cpus().length:navigator.hardwareConcurrency;_e.wasm.numThreads=Math.min(4,Math.ceil((e||1)/2))}},fn=class{async init(t){ec(),await Fl(),await ql(t)}async createInferenceSessionHandler(t,r){let n=new mn;return await n.loadModel(t,r),Promise.resolve(n)}},qh=new fn});Ke();Ke();Ke();var Ia="1.20.0-dev.20241016-2b8fc5529b";var Hx=Hn;{let e=(rc(),br(tc)).wasmBackend;St("webgpu",e,5),St("webnn",e,5),St("cpu",e,10),St("wasm",e,10)}Object.defineProperty(_e.versions,"web",{value:Ia,enumerable:!0});
6388
+ ${a}`,l=n.createShaderModule({code:d,label:t.name});pe("verbose",()=>`[WebGPU] ${t.name} shader code: ${d}`);let c=n.createComputePipeline({compute:{module:l,entryPoint:"main"},layout:"auto",label:t.name});return Ve(t.name),{programInfo:t,computePipeline:c,uniformVariablesInfo:i.variablesInfo}}normalizeDispatchGroupSize(t){let r=typeof t=="number"?t:t.x,n=typeof t=="number"?1:t.y||1,o=typeof t=="number"?1:t.z||1,i=this.backend.device.limits.maxComputeWorkgroupsPerDimension;if(r<=i&&n<=i&&o<=i)return[r,n,o];let a=r*n*o,d=Math.ceil(Math.sqrt(a));if(d>i){if(d=Math.ceil(Math.cbrt(a)),d>i)throw new Error("Total dispatch size exceeds WebGPU maximum.");return[d,d,d]}else return[d,d,1]}}});var Rh,Uh,Ao,sn,Ml=U(()=>{"use strict";Ke();J();Xe();Qn();Xa();Dl();Bl();Rh=(e,t)=>{if(t.length!==e.length)throw new Error(`inputDependencies length ${t.length} is not equal to inputTensors length ${e.length}.`);let r=[];for(let n=0;n<e.length;++n){let o=e[n].dataType;switch(t[n]){case"none":{r.push("");break}case"type":{r.push(`${o}`);break}case"rank":{let i=e[n].dims.length;r.push(`${o};${i}`);break}case"dims":{let i=e[n].dims.join(",");r.push(`${o};${i}`);break}default:throw new Error(`unsupported input dependency: ${t[n]}`)}}return r.join("|")},Uh=(e,t,r)=>{let n=e.name;return e.shaderCache?.hint&&(n+="["+e.shaderCache.hint+"]"),n+=":"+r+`:${Rh(t,e.shaderCache?.inputDependencies??new Array(t.length).fill("dims"))}`,n},Ao=class{constructor(t){t&&(this.architecture=t.architecture,this.vendor=t.vendor)}isArchitecture(t){return this.architecture===t}isVendor(t){return this.vendor===t}},sn=class{constructor(){this.currentSessionId=null;this.currentKernelId=null;this.commandEncoder=null;this.computePassEncoder=null;this.maxDispatchNumber=16;this.pendingDispatchNumber=0;this.pendingKernels=[];this.pendingQueries=new Map;this.sessionStatus="default";this.capturedCommandList=new Map;this.capturedPendingKernels=new Map;this.sessionExternalDataMapping=new Map}get currentKernelCustomData(){if(this.currentKernelId===null)throw new Error("currentKernelCustomData(): currentKernelId is null. (should not happen)");let t=this.kernelCustomData.get(this.currentKernelId);return t||(t={},this.kernelCustomData.set(this.currentKernelId,t)),t}async initialize(t,r){this.env=t;let n=[],o={requiredLimits:{maxComputeWorkgroupStorageSize:r.limits.maxComputeWorkgroupStorageSize,maxComputeWorkgroupsPerDimension:r.limits.maxComputeWorkgroupsPerDimension,maxStorageBufferBindingSize:r.limits.maxStorageBufferBindingSize,maxBufferSize:r.limits.maxBufferSize,maxComputeInvocationsPerWorkgroup:r.limits.maxComputeInvocationsPerWorkgroup,maxComputeWorkgroupSizeX:r.limits.maxComputeWorkgroupSizeX,maxComputeWorkgroupSizeY:r.limits.maxComputeWorkgroupSizeY,maxComputeWorkgroupSizeZ:r.limits.maxComputeWorkgroupSizeZ},requiredFeatures:n};r.features.has("chromium-experimental-timestamp-query-inside-passes")?n.push("chromium-experimental-timestamp-query-inside-passes"):r.features.has("timestamp-query")&&n.push("timestamp-query"),r.features.has("shader-f16")&&n.push("shader-f16"),this.device=await r.requestDevice(o),this.adapterInfo=new Ao(r.info||await r.requestAdapterInfo()),this.gpuDataManager=Ya(this),this.programManager=new an(this),this.kernels=new Map,this.kernelPersistentData=new Map,this.kernelCustomData=new Map,Nr(t.logLevel,!!t.debug),this.device.onuncapturederror=i=>{i.error instanceof GPUValidationError&&console.error(`An uncaught WebGPU validation error was raised: ${i.error.message}`)},Object.defineProperty(this.env.webgpu,"device",{value:this.device,writable:!1,enumerable:!0,configurable:!1}),Object.defineProperty(this.env.webgpu,"adapter",{value:r,writable:!1,enumerable:!0,configurable:!1}),this.setQueryType()}dispose(){typeof this.querySet<"u"&&this.querySet.destroy(),this.gpuDataManager.dispose()}getCommandEncoder(){return this.commandEncoder||(this.commandEncoder=this.device.createCommandEncoder()),this.commandEncoder}getComputePassEncoder(){if(!this.computePassEncoder){let t=this.getCommandEncoder(),r={};this.queryType==="at-passes"&&(r.timestampWrites={querySet:this.querySet,beginningOfPassWriteIndex:this.pendingDispatchNumber*2,endOfPassWriteIndex:this.pendingDispatchNumber*2+1}),this.computePassEncoder=t.beginComputePass(r)}return this.computePassEncoder}endComputePass(){this.computePassEncoder&&(this.computePassEncoder.end(),this.computePassEncoder=null)}flush(){if(!this.commandEncoder)return;Le(),this.endComputePass();let t;this.queryType!=="none"&&(this.commandEncoder.resolveQuerySet(this.querySet,0,this.pendingDispatchNumber*2,this.queryResolveBuffer,0),t=this.device.createBuffer({size:this.pendingDispatchNumber*2*8,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST}),this.pendingQueries.set(t,this.pendingKernels),this.pendingKernels=[],this.commandEncoder.copyBufferToBuffer(this.queryResolveBuffer,0,t,0,this.pendingDispatchNumber*2*8)),this.device.queue.submit([this.commandEncoder.finish()]),this.gpuDataManager.refreshPendingBuffers(),this.commandEncoder=null,this.pendingDispatchNumber=0,this.queryType!=="none"&&t.mapAsync(GPUMapMode.READ).then(()=>{let r=new BigUint64Array(t.getMappedRange()),n=this.pendingQueries.get(t);for(let o=0;o<r.length/2;o++){let i=n[o],a=i.kernelId,d=this.kernels.get(a),l=d.kernelType,c=d.kernelName,m=i.programName,u=i.inputTensorViews,h=i.outputTensorViews,w=r[o*2],g=r[o*2+1];typeof this.queryTimeBase>"u"&&(this.queryTimeBase=w);let y=Number(w-this.queryTimeBase),S=Number(g-this.queryTimeBase);if(!Number.isSafeInteger(y)||!Number.isSafeInteger(S))throw new RangeError("incorrect timestamp range");if(this.env.webgpu.profiling?.ondata)this.env.webgpu.profiling.ondata({version:1,inputsMetadata:u.map($=>({dims:$.dims,dataType:yt($.dataType)})),outputsMetadata:h.map($=>({dims:$.dims,dataType:yt($.dataType)})),kernelId:a,kernelType:l,kernelName:c,programName:m,startTime:y,endTime:S});else{let $="";u.forEach((x,T)=>{$+=`input[${T}]: [${x.dims}] | ${yt(x.dataType)}, `});let v="";h.forEach((x,T)=>{v+=`output[${T}]: [${x.dims}] | ${yt(x.dataType)}, `}),console.log(`[profiling] kernel "${a}|${l}|${c}|${m}" ${$}${v}execution time: ${S-y} ns`)}Sr("GPU",`${m}::${w}::${g}`)}t.unmap(),this.pendingQueries.delete(t)}),Ve()}run(t,r,n,o,i,a){Le(t.name);let d=[];for(let x=0;x<r.length;++x){let T=r[x].data;if(T===0)continue;let C=this.gpuDataManager.get(T);if(!C)throw new Error(`no GPU data for input: ${T}`);d.push(C)}let{outputs:l,dispatchGroup:c,programUniforms:m}=t.getRunData(r),u=n.length===0?l.map((x,T)=>T):n;if(u.length!==l.length)throw new Error(`Output size ${u.length} must be equal to ${l.length}.`);let h=[],w=[];for(let x=0;x<l.length;++x){if(!Number.isInteger(u[x])||u[x]<-3||u[x]>=a)throw new Error(`Invalid output index: ${u[x]}`);if(u[x]===-3)continue;let T=u[x]===-1,C=u[x]===-2,A=T||C?i(l[x].dataType,l[x].dims):o(u[x],l[x].dataType,l[x].dims);if(h.push(A),A.data===0)continue;let P=this.gpuDataManager.get(A.data);if(!P)throw new Error(`no GPU data for output: ${A.data}`);if(T&&this.temporaryData.push(P),C){let D=this.kernelPersistentData.get(this.currentKernelId);D||(D=[],this.kernelPersistentData.set(this.currentKernelId,D)),D.push(P)}w.push(P)}if(d.length!==r.length||w.length!==h.length){if(w.length===0)return Ve(t.name),h;throw new Error(`Program ${t.name} has zero-sized tensor(s) in inputs or outputs. This is not supported now.`)}let g;if(m){let x=0,T=[];m.forEach(D=>{let W=typeof D.data=="number"?[D.data]:D.data;if(W.length===0)return;let N=D.type===10?2:4,j,Y;D.type===10?(Y=W.length>4?16:W.length>2?8:W.length*N,j=W.length>4?16:N*W.length):(Y=W.length<=2?W.length*N:16,j=16),x=Math.ceil(x/Y)*Y,T.push(x);let Z=D.type===10?8:4;x+=W.length>4?Math.ceil(W.length/Z)*j:W.length*N});let C=16;x=Math.ceil(x/C)*C;let A=new ArrayBuffer(x);m.forEach((D,W)=>{let N=T[W],j=typeof D.data=="number"?[D.data]:D.data;if(D.type===6)new Int32Array(A,N,j.length).set(j);else if(D.type===12)new Uint32Array(A,N,j.length).set(j);else if(D.type===10)new Uint16Array(A,N,j.length).set(j);else if(D.type===1)new Float32Array(A,N,j.length).set(j);else throw new Error(`Unsupported uniform type: ${yt(D.type)}`)});let P=this.gpuDataManager.create(x,GPUBufferUsage.COPY_DST|GPUBufferUsage.UNIFORM);this.device.queue.writeBuffer(P.buffer,0,A,0,x),this.gpuDataManager.release(P.id),g={offset:0,size:x,buffer:P.buffer}}let y=this.programManager.normalizeDispatchGroupSize(c),S=y[1]===1&&y[2]===1,$=Uh(t,r,S),v=this.programManager.getArtifact($);if(v||(v=this.programManager.build(t,y),this.programManager.setArtifact($,v),pe("info",()=>`[artifact] key: ${$}, programName: ${t.name}`)),m&&v.uniformVariablesInfo){if(m.length!==v.uniformVariablesInfo.length)throw new Error(`Uniform variables count mismatch: expect ${v.uniformVariablesInfo.length}, got ${m.length} in program "${v.programInfo.name}".`);for(let x=0;x<m.length;x++){let T=m[x],C=T.type,A=typeof T.data=="number"?1:T.data.length,[P,D]=v.uniformVariablesInfo[x];if(C!==P||A!==D)throw new Error(`Uniform variable ${x} mismatch: expect type ${P} with size ${D}, got type ${C} with size ${A} in program "${v.programInfo.name}".`)}}if(pe("info",()=>`[ProgramManager] run "${t.name}" (key=${$}) with ${y[0]}x${y[1]}x${y[2]}`),this.queryType!=="none"||this.sessionStatus==="capturing"){let x={kernelId:this.currentKernelId,programName:v.programInfo.name,inputTensorViews:r,outputTensorViews:h};this.pendingKernels.push(x),this.sessionStatus==="capturing"&&this.capturedPendingKernels.get(this.currentSessionId).push(x)}return this.programManager.run(v,d,w,y,g),Ve(t.name),h}upload(t,r){this.gpuDataManager.upload(t,r)}memcpy(t,r){this.gpuDataManager.memcpy(t,r)}async download(t,r){await this.gpuDataManager.download(t,r)}alloc(t){return this.gpuDataManager.create(t).id}free(t){return this.gpuDataManager.release(t)}createKernel(t,r,n,o){let i=Ol.get(t);if(!i)throw new Error(`kernel not implemented: ${t}`);let a={kernelType:t,kernelName:o,kernelEntry:i[0],attributes:[i[1],n]};this.kernels.set(r,a)}releaseKernel(t){let r=this.kernelPersistentData.get(t);if(r){for(let n of r)this.gpuDataManager.release(n.id);this.kernelPersistentData.delete(t)}this.kernelCustomData.delete(t),this.kernels.delete(t)}computeKernel(t,r,n){let o=this.kernels.get(t);if(!o)throw new Error(`kernel not created: ${t}`);let i=o.kernelType,a=o.kernelName,d=o.kernelEntry,l=o.attributes;if(this.currentKernelId!==null)throw new Error(`kernel "[${i}] ${a}" is not allowed to be called recursively`);this.currentKernelId=t,l[0]&&(l[1]=l[0](l[1]),l[0]=void 0),pe("info",()=>`[WebGPU] Start to run kernel "[${i}] ${a}"...`);let c=this.env.debug;this.temporaryData=[];try{return c&&this.device.pushErrorScope("validation"),d(r,l[1]),0}catch(m){return n.push(Promise.resolve(`[WebGPU] Kernel "[${i}] ${a}" failed. ${m}`)),1}finally{c&&n.push(this.device.popErrorScope().then(m=>m?`GPU validation error for kernel "[${i}] ${a}": ${m.message}`:null));for(let m of this.temporaryData)this.gpuDataManager.release(m.id);this.temporaryData=[],this.currentKernelId=null}}registerBuffer(t,r,n,o){let i=this.sessionExternalDataMapping.get(t);i||(i=new Map,this.sessionExternalDataMapping.set(t,i));let a=i.get(r),d=this.gpuDataManager.registerExternalBuffer(n,o,a);return i.set(r,[d,n]),d}unregisterBuffers(t){let r=this.sessionExternalDataMapping.get(t);r&&(r.forEach(n=>this.gpuDataManager.unregisterExternalBuffer(n[0])),this.sessionExternalDataMapping.delete(t))}getBuffer(t){let r=this.gpuDataManager.get(t);if(!r)throw new Error(`no GPU data for buffer: ${t}`);return r.buffer}createDownloader(t,r,n){return async()=>{let o=await to(this,t,r);return Lr(o.buffer,n)}}writeTimestamp(t){this.queryType==="inside-passes"&&this.computePassEncoder.writeTimestamp(this.querySet,t)}setQueryType(){this.queryType="none",(this.env.webgpu.profiling?.mode==="default"||(typeof this.env.trace>"u"?this.env.wasm.trace:this.env.trace))&&(this.device.features.has("chromium-experimental-timestamp-query-inside-passes")?this.queryType="inside-passes":this.device.features.has("timestamp-query")&&(this.queryType="at-passes"),this.queryType!=="none"&&typeof this.querySet>"u"&&(this.querySet=this.device.createQuerySet({type:"timestamp",count:this.maxDispatchNumber*2}),this.queryResolveBuffer=this.device.createBuffer({size:this.maxDispatchNumber*2*8,usage:GPUBufferUsage.COPY_SRC|GPUBufferUsage.QUERY_RESOLVE})))}captureBegin(){pe("info","captureBegin"),this.capturedCommandList.get(this.currentSessionId)||this.capturedCommandList.set(this.currentSessionId,[]),this.capturedPendingKernels.get(this.currentSessionId)||this.capturedPendingKernels.set(this.currentSessionId,[]),this.flush(),this.sessionStatus="capturing"}captureEnd(){pe("info","captureEnd"),this.flush(),this.sessionStatus="default"}replay(){pe("info","replay"),this.sessionStatus="replaying";let t=this.capturedCommandList.get(this.currentSessionId),r=this.capturedPendingKernels.get(this.currentSessionId),n=t.length;this.pendingKernels=[];for(let o=0;o<n;o++){let i=this.getComputePassEncoder(),a=t[o];this.writeTimestamp(this.pendingDispatchNumber*2),i.setPipeline(a.computePipeline),i.setBindGroup(0,a.bindGroup),i.dispatchWorkgroups(...a.dispatchGroup),this.writeTimestamp(this.pendingDispatchNumber*2+1),this.pendingDispatchNumber++,this.queryType!=="none"&&this.pendingKernels.push(r[o]),(this.pendingDispatchNumber>=this.maxDispatchNumber||this.queryType==="at-passes")&&this.endComputePass(),this.pendingDispatchNumber>=this.maxDispatchNumber&&this.flush()}this.flush(),this.sessionStatus="default"}onCreateSession(){this.gpuDataManager.onCreateSession()}onReleaseSession(t){this.unregisterBuffers(t),this.capturedCommandList.has(t)&&this.capturedCommandList.delete(t),this.capturedPendingKernels.has(t)&&this.capturedPendingKernels.delete(t),this.gpuDataManager.onReleaseSession(t)}onRunStart(t){this.currentSessionId=t,this.setQueryType()}}});var Vh,Rl,un,dn,ko,Ul,Vl=U(()=>{"use strict";Xe();Vh=1,Rl=()=>Vh++,un=class{constructor(t){this.sessionId=t.sessionId,this.mlContext=t.context,this.mlTensor=t.tensor,this.dataType=t.dataType,this.tensorShape=t.shape}get tensor(){return this.mlTensor}get type(){return this.dataType}get shape(){return this.tensorShape}destroy(){pe("verbose",()=>"[WebNN] TensorWrapper.destroy"),this.mlTensor.destroy()}write(t){this.mlContext.writeTensor(this.mlTensor,t)}async read(t){return t?this.mlContext.readTensor(this.mlTensor,t):this.mlContext.readTensor(this.mlTensor)}sameTypeAndShape(t,r){return this.dataType===t&&this.tensorShape.every((n,o)=>n===r[o])}},dn=class{constructor(t,r){this.tensorManager=t;this.wrapper=r}get tensorWrapper(){return this.wrapper}releaseTensor(){this.tensorWrapper&&this.tensorManager.releaseTensor(this.tensorWrapper)}async ensureTensor(t,r,n){if(this.wrapper){if(this.wrapper.sameTypeAndShape(t,r))return this.wrapper.tensor;n&&(this.activeUpload=new Uint8Array(await this.wrapper.read())),this.tensorManager.releaseTensor(this.wrapper)}let o=MLTensorUsage.READ|MLTensorUsage.WRITE;return this.wrapper=await this.tensorManager.getCachedTensor(t,r,o,!0,!0),n&&this.activeUpload&&(this.wrapper.write(this.activeUpload),this.activeUpload=void 0),this.wrapper.tensor}upload(t){if(this.wrapper){this.wrapper.write(t);return}this.activeUpload?this.activeUpload.set(t):this.activeUpload=new Uint8Array(t)}async download(t){if(this.activeUpload)if(t){t instanceof ArrayBuffer?new Uint8Array(t).set(this.activeUpload):new Uint8Array(t.buffer,t.byteOffset,t.byteLength).set(this.activeUpload);return}else return this.activeUpload.buffer;if(!this.wrapper)throw new Error("Tensor has not been created.");return t?this.wrapper.read(t):this.wrapper.read()}},ko=class{constructor(t){this.backend=t;this.tensorTrackersById=new Map;this.freeTensors=[];this.externalTensors=new Set}reserveTensorId(){let t=Rl();return this.tensorTrackersById.set(t,new dn(this)),t}releaseTensorId(t){let r=this.tensorTrackersById.get(t);r&&(this.tensorTrackersById.delete(t),r.tensorWrapper&&this.releaseTensor(r.tensorWrapper))}async ensureTensor(t,r,n,o){pe("verbose",()=>`[WebNN] TensorManager.ensureTensor {tensorId: ${t}, dataType: ${r}, shape: ${n}, copyOld: ${o}}`);let i=this.tensorTrackersById.get(t);if(!i)throw new Error("Tensor not found.");return i.ensureTensor(r,n,o)}upload(t,r){let n=this.tensorTrackersById.get(t);if(!n)throw new Error("Tensor not found.");n.upload(r)}async download(t,r){pe("verbose",()=>`[WebNN] TensorManager.download {tensorId: ${t}, dstBuffer: ${r?.byteLength}}`);let n=this.tensorTrackersById.get(t);if(!n)throw new Error("Tensor not found.");return n.download(r)}releaseTensorsForSession(t){for(let r of this.freeTensors)r.sessionId===t&&r.destroy();this.freeTensors=this.freeTensors.filter(r=>r.sessionId!==t)}registerTensor(t,r,n,o){let i=Rl(),a=new un({sessionId:this.backend.currentSessionId,context:t,tensor:r,dataType:n,shape:o});return this.tensorTrackersById.set(i,new dn(this,a)),this.externalTensors.add(a),i}async getCachedTensor(t,r,n,o,i){let a=this.backend.currentSessionId;for(let[c,m]of this.freeTensors.entries())if(m.sameTypeAndShape(t,r)){let u=this.freeTensors.splice(c,1)[0];return u.sessionId=a,u}let d=this.backend.currentContext;pe("verbose",()=>`[WebNN] MLContext.createTensor {dataType: ${t}, shape: ${r}}`);let l=await d.createTensor({dataType:t,shape:r,dimensions:r,usage:n,writable:o,readable:i});return new un({sessionId:a,context:d,tensor:l,dataType:t,shape:r})}releaseTensor(t){this.externalTensors.has(t)&&this.externalTensors.delete(t),this.freeTensors.push(t)}},Ul=(...e)=>new ko(...e)});var Wl,ln,Nl=U(()=>{"use strict";J();gt();Qn();Vl();Xe();Wl=new Map([[1,"float32"],[10,"float16"],[6,"int32"],[12,"uint32"],[7,"int64"],[13,"uint64"],[3,"int8"],[2,"uint8"],[9,"uint8"]]),ln=class{constructor(t){this.tensorManager=Ul(this);this.mlContextBySessionId=new Map;this.sessionIdsByMLContext=new Map;Nr(t.logLevel,!!t.debug)}get currentSessionId(){if(this.activeSessionId===void 0)throw new Error("No active session");return this.activeSessionId}onRunStart(t){this.activeSessionId=t}get currentContext(){let t=this.getMLContext(this.currentSessionId);if(!t)throw new Error(`No MLContext found for session ${this.currentSessionId}`);return t}registerMLContext(t,r){this.mlContextBySessionId.set(t,r);let n=this.sessionIdsByMLContext.get(r);n||(n=new Set,this.sessionIdsByMLContext.set(r,n)),n.add(t)}onReleaseSession(t){let r=this.mlContextBySessionId.get(t);if(!r)return;this.tensorManager.releaseTensorsForSession(t),this.mlContextBySessionId.delete(t);let n=this.sessionIdsByMLContext.get(r);n.delete(t),n.size===0&&this.sessionIdsByMLContext.delete(r)}getMLContext(t){return this.mlContextBySessionId.get(t)}reserveTensorId(){return this.tensorManager.reserveTensorId()}releaseTensorId(t){pe("verbose",()=>`[WebNN] releaseTensorId {tensorId: ${t}}`),this.tensorManager.releaseTensorId(t)}async ensureTensor(t,r,n,o){let i=Wl.get(r);if(!i)throw new Error(`Unsupported ONNX data type: ${r}`);return this.tensorManager.ensureTensor(t,i,n,o)}uploadTensor(t,r){if(!Te().shouldTransferToMLTensor)throw new Error("Trying to upload to a MLTensor while shouldTransferToMLTensor is false");pe("verbose",()=>`[WebNN] uploadTensor {tensorId: ${t}, data: ${r.byteLength}}`),this.tensorManager.upload(t,r)}async downloadTensor(t,r){return this.tensorManager.download(t,r)}createMLTensorDownloader(t,r){return async()=>{let n=await this.tensorManager.download(t);return Lr(n,r)}}registerMLTensor(t,r,n){let o=Wl.get(r);if(!o)throw new Error(`Unsupported ONNX data type: ${r}`);let i=this.tensorManager.registerTensor(this.currentContext,t,o,n);return pe("verbose",()=>`[WebNN] registerMLTensor {tensor: ${t}, dataType: ${o}, dimensions: ${n}} -> {tensorId: ${i}}`),i}registerMLConstant(t,r,n,o,i,a){if(!a)throw new Error("External mounted files are not available.");let d=t;t.startsWith("./")&&(d=t.substring(2));let l=a.get(d);if(!l)throw new Error(`File with name ${d} not found in preloaded files.`);if(r+n>l.byteLength)throw new Error("Out of bounds: data offset and length exceed the external file data size.");let c=l.slice(r,r+n).buffer,m;switch(i.dataType){case"float32":m=new Float32Array(c);break;case"float16":m=new Uint16Array(c);break;case"int32":m=new Int32Array(c);break;case"uint32":m=new Uint32Array(c);break;case"int64":m=new BigInt64Array(c);break;case"uint64":m=new BigUint64Array(c);break;case"int8":m=new Int8Array(c);break;case"uint8":m=new Uint8Array(c);break;default:throw new Error(`Unsupported data type: ${i.dataType} in creating WebNN Constant from external data.`)}return pe("verbose",()=>`[WebNN] registerMLConstant {dataType: ${i.dataType}, shape: ${i.shape}}}`),o.constant(i,m)}flush(){}}});var Ll={};Gt(Ll,{init:()=>Wh});var or,Eo,Wh,Hl=U(()=>{"use strict";J();Ml();Xe();ae();Nl();or=class e{constructor(t,r,n,o){this.module=t;this.dataType=r;this.data=n;this.dims=o}getFloat32Array(){if(this.dataType!==1)throw new Error("Invalid data type");let t=k.size(this.dims);return t===0?new Float32Array:new Float32Array(this.module.HEAP8.buffer,this.data,t)}getBigInt64Array(){if(this.dataType!==7)throw new Error("Invalid data type");let t=k.size(this.dims);return t===0?new BigInt64Array:new BigInt64Array(this.module.HEAP8.buffer,this.data,t)}getInt32Array(){if(this.dataType!==6)throw new Error("Invalid data type");let t=k.size(this.dims);return t===0?new Int32Array:new Int32Array(this.module.HEAP8.buffer,this.data,t)}getUint16Array(){if(this.dataType!==10&&this.dataType!==4)throw new Error("Invalid data type");let t=k.size(this.dims);return t===0?new Uint16Array:new Uint16Array(this.module.HEAP8.buffer,this.data,t)}reshape(t){if(k.size(t)!==k.size(this.dims))throw new Error("Invalid new shape");return new e(this.module,this.dataType,this.data,t)}},Eo=class{constructor(t,r,n){this.module=t;this.backend=r;this.customDataOffset=0;this.customDataSize=0;this.adapterInfo=r.adapterInfo;let o=t.HEAPU32,i=n>>>2;this.opKernelContext=o[i++];let a=o[i++];this.outputCount=o[i++],this.customDataOffset=o[i++],this.customDataSize=o[i++];let d=[];for(let l=0;l<a;l++){let c=o[i++],m=o[i++],u=o[i++],h=[];for(let w=0;w<u;w++)h.push(o[i++]);d.push(new or(t,c,m,h))}this.inputs=d}get kernelCustomData(){return this.backend.currentKernelCustomData}get customDataBuffer(){return this.module.HEAPU8.subarray(this.customDataOffset,this.customDataOffset+this.customDataSize)}getMaxComputeWorkgroupSizes(){return[this.backend.device.limits.maxComputeWorkgroupSizeX,this.backend.device.limits.maxComputeWorkgroupSizeY,this.backend.device.limits.maxComputeWorkgroupSizeZ]}getMaxComputeWorkgroupStoragesize(){return this.backend.device.limits.maxComputeWorkgroupStorageSize}compute(t,r){let n=r?.inputs?.map(d=>typeof d=="number"?this.inputs[d]:d)??this.inputs,o=r?.outputs??[],i=(d,l,c)=>new or(this.module,l,this.output(d,c),c),a=(d,l)=>{let c=It(d,l);if(!c)throw new Error(`Unsupported data type: ${d}`);let m=c>0?this.backend.gpuDataManager.create(c).id:0;return new or(this.module,d,m,l)};return this.backend.run(t,n,o,i,a,this.outputCount)}output(t,r){let n=this.module.stackSave();try{let o=this.module.stackAlloc((1+r.length)*4),i=o>>2;this.module.HEAPU32[i++]=r.length;for(let a=0;a<r.length;a++)this.module.HEAPU32[i++]=r[a];return this.module._JsepOutput(this.opKernelContext,t,o)}catch(o){throw new Error(`Failed to generate kernel's output[${t}] with dims [${r}]. If you are running with pre-allocated output, please make sure the output type/dims are correct. Error: ${o}`)}finally{this.module.stackRestore(n)}}},Wh=async(e,t,r,n)=>{let o=t.jsepInit;if(!o)throw new Error("Failed to initialize JSEP. The WebAssembly module is not built with JSEP support.");if(e==="webgpu"){let i=new sn;await i.initialize(r,n),o("webgpu",[i,a=>i.alloc(a),a=>i.free(a),(a,d,l,c=!1)=>{if(c)pe("verbose",()=>`[WebGPU] jsepCopyGpuToGpu: src=${a}, dst=${d}, size=${l}`),i.memcpy(a,d);else{pe("verbose",()=>`[WebGPU] jsepCopyCpuToGpu: dataOffset=${a}, gpuDataId=${d}, size=${l}`);let m=t.HEAPU8.subarray(a>>>0,(a>>>0)+l);i.upload(d,m)}},async(a,d,l)=>{pe("verbose",()=>`[WebGPU] jsepCopyGpuToCpu: gpuDataId=${a}, dataOffset=${d}, size=${l}`),await i.download(a,()=>t.HEAPU8.subarray(d>>>0,(d>>>0)+l))},(a,d,l)=>i.createKernel(a,d,l,t.UTF8ToString(t._JsepGetNodeName(d))),a=>i.releaseKernel(a),(a,d,l,c)=>{pe("verbose",()=>`[WebGPU] jsepRun: sessionHandle=${l}, kernel=${a}, contextDataOffset=${d}`);let m=new Eo(t,i,d);return i.computeKernel(a,m,c)},()=>i.captureBegin(),()=>i.captureEnd(),()=>i.replay()])}else{let i=new ln(r);o("webnn",[i,()=>i.reserveTensorId(),a=>i.releaseTensorId(a),async(a,d,l,c)=>i.ensureTensor(a,d,l,c),(a,d)=>{i.uploadTensor(a,d)},async(a,d)=>i.downloadTensor(a,d)])}}});var Nh,kr,Er,Et,Lh,jt,Pr,zr,Gl,Or,Dr,Br,Fn=U(()=>{"use strict";Na();Ha();J();gt();Rr();Xn();Nh=(e,t)=>{Te()._OrtInit(e,t)!==0&&ve("Can't initialize onnxruntime.")},kr=async e=>{Nh(e.wasm.numThreads,Xt(e.logLevel))},Er=async(e,t)=>{{let r=(Hl(),br(Ll)).init;if(t==="webgpu"){if(typeof navigator>"u"||!navigator.gpu)throw new Error("WebGPU is not supported in current environment");let n=e.webgpu.adapter;if(n){if(typeof n.limits!="object"||typeof n.features!="object"||typeof n.requestDevice!="function")throw new Error("Invalid GPU adapter set in `env.webgpu.adapter`. It must be a GPUAdapter object.")}else{let o=e.webgpu.powerPreference;if(o!==void 0&&o!=="low-power"&&o!=="high-performance")throw new Error(`Invalid powerPreference setting: "${o}"`);let i=e.webgpu.forceFallbackAdapter;if(i!==void 0&&typeof i!="boolean")throw new Error(`Invalid forceFallbackAdapter setting: "${i}"`);if(n=await navigator.gpu.requestAdapter({powerPreference:o,forceFallbackAdapter:i}),!n)throw new Error('Failed to get GPU adapter. You may need to enable flag "--enable-unsafe-webgpu" if you are using Chrome.')}await r("webgpu",Te(),e,n)}if(t==="webnn"){if(typeof navigator>"u"||!navigator.ml)throw new Error("WebNN is not supported in current environment");await r("webnn",Te(),e)}}},Et=new Map,Lh=e=>{let t=Te(),r=t.stackSave();try{let n=t.stackAlloc(8);return t._OrtGetInputOutputCount(e,n,n+4)!==0&&ve("Can't get session input/output count."),[t.HEAP32[n/4],t.HEAP32[n/4+1]]}finally{t.stackRestore(r)}},jt=e=>{let t=Te(),r=t._malloc(e.byteLength);if(r===0)throw new Error(`Can't create a session. failed to allocate a buffer of size ${e.byteLength}.`);return t.HEAPU8.set(e,r),[r,e.byteLength]},Pr=async(e,t)=>{let r,n,o=Te();Array.isArray(e)?[r,n]=e:e.buffer===o.HEAPU8.buffer?[r,n]=[e.byteOffset,e.byteLength]:[r,n]=jt(e);let i=0,a=0,d=0,l=[],c=[],m=[];try{if([a,l]=La(t),t?.externalData&&o.mountExternalData){let v=[];for(let x of t.externalData){let T=typeof x=="string"?x:x.path;v.push(Qt(typeof x=="string"?x:x.data).then(C=>{o.mountExternalData(T,C)}))}await Promise.all(v)}for(let v of t?.executionProviders??[])if((typeof v=="string"?v:v.name)==="webnn"){if(o.shouldTransferToMLTensor=!1,o.currentContext)throw new Error("WebNN execution provider is already set.");if(typeof v!="string"){let T=v,C=T?.context,A=T?.gpuDevice,P=T?.deviceType,D=T?.powerPreference;C?o.currentContext=C:A?o.currentContext=await navigator.ml.createContext(A):o.currentContext=await navigator.ml.createContext({deviceType:P,powerPreference:D})}else o.currentContext=await navigator.ml.createContext();break}i=await o._OrtCreateSession(r,n,a),i===0&&ve("Can't create a session."),o.jsepOnCreateSession?.(),o.currentContext&&(o.jsepRegisterMLContext(i,o.currentContext),o.currentContext=void 0,o.shouldTransferToMLTensor=!0);let[u,h]=Lh(i),w=!!t?.enableGraphCapture,g=[],y=[],S=[];for(let v=0;v<u;v++){let x=o._OrtGetInputName(i,v);x===0&&ve("Can't get an input name."),c.push(x),g.push(o.UTF8ToString(x))}for(let v=0;v<h;v++){let x=o._OrtGetOutputName(i,v);x===0&&ve("Can't get an output name."),m.push(x);let T=o.UTF8ToString(x);y.push(T);{if(w&&t?.preferredOutputLocation===void 0){S.push("gpu-buffer");continue}let C=typeof t?.preferredOutputLocation=="string"?t.preferredOutputLocation:t?.preferredOutputLocation?.[T]??"cpu";if(C!=="cpu"&&C!=="cpu-pinned"&&C!=="gpu-buffer"&&C!=="ml-tensor")throw new Error(`Not supported preferred output location: ${C}.`);if(w&&C!=="gpu-buffer")throw new Error(`Not supported preferred output location: ${C}. Only 'gpu-buffer' location is supported when enableGraphCapture is true.`);S.push(C)}}let $=null;return S.some(v=>v==="gpu-buffer"||v==="ml-tensor")&&(d=o._OrtCreateBinding(i),d===0&&ve("Can't create IO binding."),$={handle:d,outputPreferredLocations:S,outputPreferredLocationsEncoded:S.map(v=>Yn(v))}),Et.set(i,[i,c,m,$,w,!1]),[i,g,y]}catch(u){throw c.forEach(h=>o._OrtFree(h)),m.forEach(h=>o._OrtFree(h)),d!==0&&o._OrtReleaseBinding(d),i!==0&&o._OrtReleaseSession(i),u}finally{o._free(r),a!==0&&o._OrtReleaseSessionOptions(a),l.forEach(u=>o._free(u)),o.unmountExternalData?.()}},zr=e=>{let t=Te(),r=Et.get(e);if(!r)throw new Error(`cannot release session. invalid session id: ${e}`);let[n,o,i,a,d]=r;a&&(d&&t._OrtClearBoundOutputs(a.handle),t._OrtReleaseBinding(a.handle)),t.jsepOnReleaseSession?.(e),o.forEach(l=>t._OrtFree(l)),i.forEach(l=>t._OrtFree(l)),t._OrtReleaseSession(n),Et.delete(e)},Gl=(e,t,r,n,o,i=!1)=>{if(!e){t.push(0);return}let a=Te(),d=e[0],l=e[1],c=e[3],m,u;if(d==="string"&&(c==="gpu-buffer"||c==="ml-tensor"))throw new Error("String tensor is not supported on GPU.");if(i&&c!=="gpu-buffer")throw new Error(`External buffer must be provided for input/output index ${o} when enableGraphCapture is true.`);if(c==="gpu-buffer"){let g=e[2].gpuBuffer;u=It(Yt(d),l);let y=a.jsepRegisterBuffer;if(!y)throw new Error('Tensor location "gpu-buffer" is not supported without using WebGPU.');m=y(n,o,g,u)}else if(c==="ml-tensor"){let g=e[2].mlTensor;u=It(Yt(d),l);let y=a.jsepRegisterMLTensor;if(!y)throw new Error('Tensor location "ml-tensor" is not supported without using WebNN.');m=y(g,Yt(d),l)}else{let g=e[2];if(Array.isArray(g)){u=4*g.length,m=a._malloc(u),r.push(m);let y=m/4;for(let S=0;S<g.length;S++){if(typeof g[S]!="string")throw new TypeError(`tensor data at index ${S} is not a string`);a.HEAPU32[y++]=ke(g[S],r)}}else u=g.byteLength,m=a._malloc(u),r.push(m),a.HEAPU8.set(new Uint8Array(g.buffer,g.byteOffset,u),m)}let h=a.stackSave(),w=a.stackAlloc(4*l.length);try{let g=w/4;l.forEach(S=>a.HEAP32[g++]=S);let y=a._OrtCreateTensor(Yt(d),m,u,w,l.length,Yn(c));y===0&&ve(`Can't create tensor for input/output. session=${n}, index=${o}.`),t.push(y)}finally{a.stackRestore(h)}},Or=async(e,t,r,n,o,i)=>{let a=Te(),d=Et.get(e);if(!d)throw new Error(`cannot run inference. invalid session id: ${e}`);let l=d[0],c=d[1],m=d[2],u=d[3],h=d[4],w=d[5],g=t.length,y=n.length,S=0,$=[],v=[],x=[],T=[],C=a.stackSave(),A=a.stackAlloc(g*4),P=a.stackAlloc(g*4),D=a.stackAlloc(y*4),W=a.stackAlloc(y*4);try{a.jsepOnRunStart?.(l),[S,$]=Wa(i);for(let K=0;K<g;K++)Gl(r[K],v,T,e,t[K],h);for(let K=0;K<y;K++)Gl(o[K],x,T,e,g+n[K],h);let N=A/4,j=P/4,Y=D/4,Z=W/4;for(let K=0;K<g;K++)a.HEAPU32[N++]=v[K],a.HEAPU32[j++]=c[t[K]];for(let K=0;K<y;K++)a.HEAPU32[Y++]=x[K],a.HEAPU32[Z++]=m[n[K]];if(u&&!w){let{handle:K,outputPreferredLocations:de,outputPreferredLocationsEncoded:ce}=u;if(c.length!==g)throw new Error(`input count from feeds (${g}) is expected to be always equal to model's input count (${c.length}).`);for(let q=0;q<g;q++){let le=t[q];await a._OrtBindInput(K,c[le],v[q])!==0&&ve(`Can't bind input[${q}] for session=${e}.`)}for(let q=0;q<y;q++){let le=n[q];o[q]?.[3]?a._OrtBindOutput(K,m[le],x[q],0)!==0&&ve(`Can't bind pre-allocated output[${q}] for session=${e}.`):a._OrtBindOutput(K,m[le],0,ce[le])!==0&&ve(`Can't bind output[${q}] to ${de[q]} for session=${e}.`)}Et.set(e,[l,c,m,u,h,!0])}let te;u?te=await a._OrtRunWithBinding(l,u.handle,y,D,S):te=await a._OrtRun(l,P,A,g,W,y,D,S),te!==0&&ve("failed to call OrtRun().");let ue=[];for(let K=0;K<y;K++){let de=a.HEAPU32[D/4+K];if(de===x[K]){ue.push(o[K]);continue}let ce=a.stackSave(),q=a.stackAlloc(4*4),le=!1,re,ne=0;try{a._OrtGetTensorData(de,q,q+4,q+8,q+12)!==0&&ve(`Can't access output tensor data on index ${K}.`);let R=q/4,G=a.HEAPU32[R++];ne=a.HEAPU32[R++];let ye=a.HEAPU32[R++],Re=a.HEAPU32[R++],$e=[];for(let Ae=0;Ae<Re;Ae++)$e.push(a.HEAPU32[ye/4+Ae]);a._OrtFree(ye);let Ce=$e.reduce((Ae,Me)=>Ae*Me,1);re=yt(G);let bt=u?.outputPreferredLocations[n[K]];if(re==="string"){if(bt==="gpu-buffer"||bt==="ml-tensor")throw new Error("String tensor is not supported on GPU.");let Ae=[],Me=ne/4;for(let Ue=0;Ue<Ce;Ue++){let zt=a.HEAPU32[Me++],wt=Ue===Ce-1?void 0:a.HEAPU32[Me]-zt;Ae.push(a.UTF8ToString(zt,wt))}ue.push([re,$e,Ae,"cpu"])}else if(bt==="gpu-buffer"&&Ce>0){let Ae=a.jsepGetBuffer;if(!Ae)throw new Error('preferredLocation "gpu-buffer" is not supported without using WebGPU.');let Me=Ae(ne),Ue=It(G,Ce);if(Ue===void 0||!Vr(re))throw new Error(`Unsupported data type: ${re}`);le=!0,ue.push([re,$e,{gpuBuffer:Me,download:a.jsepCreateDownloader(Me,Ue,re),dispose:()=>{a._OrtReleaseTensor(de)}},"gpu-buffer"])}else if(bt==="ml-tensor"&&Ce>0){let Ae=a.jsepEnsureTensor;if(!Ae)throw new Error('preferredLocation "ml-tensor" is not supported without using WebNN.');if(It(G,Ce)===void 0||!Wr(re))throw new Error(`Unsupported data type: ${re}`);let Ue=await Ae(ne,G,$e,!1);le=!0,ue.push([re,$e,{mlTensor:Ue,download:a.jsepCreateMLTensorDownloader(ne,re),dispose:()=>{a.jsepReleaseTensorId(ne),a._OrtReleaseTensor(de)}},"ml-tensor"])}else{let Ae=Ur(re),Me=new Ae(Ce);new Uint8Array(Me.buffer,Me.byteOffset,Me.byteLength).set(a.HEAPU8.subarray(ne,ne+Me.byteLength)),ue.push([re,$e,Me,"cpu"])}}finally{a.stackRestore(ce),re==="string"&&ne&&a._free(ne),le||a._OrtReleaseTensor(de)}}return u&&!h&&(a._OrtClearBoundOutputs(u.handle),Et.set(e,[l,c,m,u,h,!1])),ue}finally{a.stackRestore(C),v.forEach(N=>a._OrtReleaseTensor(N)),x.forEach(N=>a._OrtReleaseTensor(N)),T.forEach(N=>a._free(N)),S!==0&&a._OrtReleaseRunOptions(S),$.forEach(N=>a._free(N))}},Dr=e=>{let t=Te(),r=Et.get(e);if(!r)throw new Error("invalid session id");let n=r[0],o=t._OrtEndProfiling(n);o===0&&ve("Can't get an profile file name."),t._OrtFree(o)},Br=e=>{let t=[];for(let r of e){let n=r[2];!Array.isArray(n)&&"buffer"in n&&t.push(n.buffer)}return t}});var Pt,Ye,ir,pn,mn,cn,Po,zo,Lt,Ht,Gh,Fl,ql,jl,Kl,Yl,Xl,Ql,Oo=U(()=>{"use strict";Ke();Fn();gt();qt();Pt=()=>!!_e.wasm.proxy&&typeof document<"u",ir=!1,pn=!1,mn=!1,zo=new Map,Lt=(e,t)=>{let r=zo.get(e);r?r.push(t):zo.set(e,[t])},Ht=()=>{if(ir||!pn||mn||!Ye)throw new Error("worker not ready")},Gh=e=>{switch(e.data.type){case"init-wasm":ir=!1,e.data.err?(mn=!0,Po[1](e.data.err)):(pn=!0,Po[0]()),cn&&(URL.revokeObjectURL(cn),cn=void 0);break;case"init-ep":case"copy-from":case"create":case"release":case"run":case"end-profiling":{let t=zo.get(e.data.type);e.data.err?t.shift()[1](e.data.err):t.shift()[0](e.data.out);break}default:}},Fl=async()=>{if(!pn){if(ir)throw new Error("multiple calls to 'initWasm()' detected.");if(mn)throw new Error("previous call to 'initWasm()' failed.");if(ir=!0,Pt())return new Promise((e,t)=>{Ye?.terminate(),Ra().then(([r,n])=>{try{Ye=n,Ye.onerror=i=>t(i),Ye.onmessage=Gh,Po=[e,t];let o={type:"init-wasm",in:_e};Ye.postMessage(o),cn=r}catch(o){t(o)}},t)});try{await Ar(_e.wasm),await kr(_e),pn=!0}catch(e){throw mn=!0,e}finally{ir=!1}}},ql=async e=>{if(Pt())return Ht(),new Promise((t,r)=>{Lt("init-ep",[t,r]);let n={type:"init-ep",in:{epName:e,env:_e}};Ye.postMessage(n)});await Er(_e,e)},jl=async e=>Pt()?(Ht(),new Promise((t,r)=>{Lt("copy-from",[t,r]);let n={type:"copy-from",in:{buffer:e}};Ye.postMessage(n,[e.buffer])})):jt(e),Kl=async(e,t)=>{if(Pt()){if(t?.preferredOutputLocation)throw new Error('session option "preferredOutputLocation" is not supported for proxy.');return Ht(),new Promise((r,n)=>{Lt("create",[r,n]);let o={type:"create",in:{model:e,options:{...t}}},i=[];e instanceof Uint8Array&&i.push(e.buffer),Ye.postMessage(o,i)})}else return Pr(e,t)},Yl=async e=>{if(Pt())return Ht(),new Promise((t,r)=>{Lt("release",[t,r]);let n={type:"release",in:e};Ye.postMessage(n)});zr(e)},Xl=async(e,t,r,n,o,i)=>{if(Pt()){if(r.some(a=>a[3]!=="cpu"))throw new Error("input tensor on GPU is not supported for proxy.");if(o.some(a=>a))throw new Error("pre-allocated output tensor is not supported for proxy.");return Ht(),new Promise((a,d)=>{Lt("run",[a,d]);let l=r,c={type:"run",in:{sessionId:e,inputIndices:t,inputs:l,outputIndices:n,options:i}};Ye.postMessage(c,Br(l))})}else return Or(e,t,r,n,o,i)},Ql=async e=>{if(Pt())return Ht(),new Promise((t,r)=>{Lt("end-profiling",[t,r]);let n={type:"end-profiling",in:e};Ye.postMessage(n)});Dr(e)}});var Zl,Fh,fn,Jl=U(()=>{"use strict";Ke();Oo();J();Cr();Xn();Zl=(e,t)=>{switch(e.location){case"cpu":return[e.type,e.dims,e.data,"cpu"];case"gpu-buffer":return[e.type,e.dims,{gpuBuffer:e.gpuBuffer},"gpu-buffer"];case"ml-tensor":return[e.type,e.dims,{mlTensor:e.mlTensor},"ml-tensor"];default:throw new Error(`invalid data location: ${e.location} for ${t()}`)}},Fh=e=>{switch(e[3]){case"cpu":return new Be(e[0],e[2],e[1]);case"gpu-buffer":{let t=e[0];if(!Vr(t))throw new Error(`not supported data type: ${t} for deserializing GPU tensor`);let{gpuBuffer:r,download:n,dispose:o}=e[2];return Be.fromGpuBuffer(r,{dataType:t,dims:e[1],download:n,dispose:o})}case"ml-tensor":{let t=e[0];if(!Wr(t))throw new Error(`not supported data type: ${t} for deserializing MLTensor tensor`);let{mlTensor:r,download:n,dispose:o}=e[2];return Be.fromMLTensor(r,{dataType:t,dims:e[1],download:n,dispose:o})}default:throw new Error(`invalid data location: ${e[3]}`)}},fn=class{async fetchModelAndCopyToWasmMemory(t){return jl(await Qt(t))}async loadModel(t,r){Le();let n;typeof t=="string"? false?0:n=await this.fetchModelAndCopyToWasmMemory(t):n=t,[this.sessionId,this.inputNames,this.outputNames]=await Kl(n,r),Ve()}async dispose(){return Yl(this.sessionId)}async run(t,r,n){Le();let o=[],i=[];Object.entries(t).forEach(h=>{let w=h[0],g=h[1],y=this.inputNames.indexOf(w);if(y===-1)throw new Error(`invalid input '${w}'`);o.push(g),i.push(y)});let a=[],d=[];Object.entries(r).forEach(h=>{let w=h[0],g=h[1],y=this.outputNames.indexOf(w);if(y===-1)throw new Error(`invalid output '${w}'`);a.push(g),d.push(y)});let l=o.map((h,w)=>Zl(h,()=>`input "${this.inputNames[i[w]]}"`)),c=a.map((h,w)=>h?Zl(h,()=>`output "${this.outputNames[d[w]]}"`):null),m=await Xl(this.sessionId,i,l,d,c,n),u={};for(let h=0;h<m.length;h++)u[this.outputNames[d[h]]]=a[h]??Fh(m[h]);return Ve(),u}startProfiling(){}endProfiling(){Ql(this.sessionId)}}});var tc={};Gt(tc,{OnnxruntimeWebAssemblyBackend:()=>hn,initializeFlags:()=>ec,wasmBackend:()=>qh});var ec,hn,qh,rc=U(()=>{"use strict";Ke();Oo();Jl();qt();ec=()=>{if((typeof _e.wasm.initTimeout!="number"||_e.wasm.initTimeout<0)&&(_e.wasm.initTimeout=0),_e.wasm.simd===!1&&console.warn('Deprecated property "env.wasm.simd" is set to false. non-SIMD build is no longer provided, and this setting will be ignored.'),typeof _e.wasm.proxy!="boolean"&&(_e.wasm.proxy=!1),typeof _e.wasm.trace!="boolean"&&(_e.wasm.trace=!1),typeof _e.wasm.numThreads!="number"||!Number.isInteger(_e.wasm.numThreads)||_e.wasm.numThreads<=0)if(typeof self<"u"&&!self.crossOriginIsolated)_e.wasm.numThreads=1;else{let e=typeof navigator>"u"?Wn("node:os").cpus().length:navigator.hardwareConcurrency;_e.wasm.numThreads=Math.min(4,Math.ceil((e||1)/2))}},hn=class{async init(t){ec(),await Fl(),await ql(t)}async createInferenceSessionHandler(t,r){let n=new fn;return await n.loadModel(t,r),Promise.resolve(n)}},qh=new hn});Ke();Ke();Ke();var Aa="1.21.0-dev.20241024-d9ca84ef96";var Vx=Gn;{let e=(rc(),br(tc)).wasmBackend;St("webgpu",e,5),St("webnn",e,5),St("cpu",e,10),St("wasm",e,10)}Object.defineProperty(_e.versions,"web",{value:Aa,enumerable:!0});
6400
6389
  /**
6401
6390
  * @license
6402
6391
  * Copyright 2021 Google LLC. All Rights Reserved.
@@ -7122,7 +7111,7 @@ __webpack_require__.r(__webpack_exports__);
7122
7111
 
7123
7112
 
7124
7113
 
7125
- const VERSION = '3.0.0';
7114
+ const VERSION = '3.0.1';
7126
7115
 
7127
7116
  // Check if various APIs are available (depends on environment)
7128
7117
  const IS_BROWSER_ENV = typeof self !== 'undefined';
@@ -9837,7 +9826,7 @@ function replaceTensors(obj) {
9837
9826
 
9838
9827
  /**
9839
9828
  * Converts an array or Tensor of integers to an int64 Tensor.
9840
- * @param {Array|Tensor} items The input integers to be converted.
9829
+ * @param {any[]|Tensor} items The input integers to be converted.
9841
9830
  * @returns {Tensor} The int64 Tensor with the converted values.
9842
9831
  * @throws {Error} If the input array is empty or the input is a batched Tensor and not all sequences have the same length.
9843
9832
  * @private
@@ -10760,35 +10749,37 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
10760
10749
  let { decoder_input_ids, ...model_inputs } = model_kwargs;
10761
10750
 
10762
10751
  // Prepare input ids if the user has not defined `decoder_input_ids` manually.
10763
- if (!decoder_input_ids) {
10764
- decoder_start_token_id ??= bos_token_id;
10765
-
10766
- if (this.config.model_type === 'musicgen') {
10767
- // Custom logic (TODO: move to Musicgen class)
10768
- decoder_input_ids = Array.from({
10769
- length: batch_size * this.config.decoder.num_codebooks
10770
- }, () => [decoder_start_token_id]);
10771
-
10772
- } else if (Array.isArray(decoder_start_token_id)) {
10773
- if (decoder_start_token_id.length !== batch_size) {
10774
- throw new Error(
10775
- `\`decoder_start_token_id\` expcted to have length ${batch_size} but got ${decoder_start_token_id.length}`
10776
- )
10752
+ if (!(decoder_input_ids instanceof _utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.Tensor)) {
10753
+ if (!decoder_input_ids) {
10754
+ decoder_start_token_id ??= bos_token_id;
10755
+
10756
+ if (this.config.model_type === 'musicgen') {
10757
+ // Custom logic (TODO: move to Musicgen class)
10758
+ decoder_input_ids = Array.from({
10759
+ length: batch_size * this.config.decoder.num_codebooks
10760
+ }, () => [decoder_start_token_id]);
10761
+
10762
+ } else if (Array.isArray(decoder_start_token_id)) {
10763
+ if (decoder_start_token_id.length !== batch_size) {
10764
+ throw new Error(
10765
+ `\`decoder_start_token_id\` expcted to have length ${batch_size} but got ${decoder_start_token_id.length}`
10766
+ )
10767
+ }
10768
+ decoder_input_ids = decoder_start_token_id;
10769
+ } else {
10770
+ decoder_input_ids = Array.from({
10771
+ length: batch_size,
10772
+ }, () => [decoder_start_token_id]);
10777
10773
  }
10778
- decoder_input_ids = decoder_start_token_id;
10779
- } else {
10774
+ } else if (!Array.isArray(decoder_input_ids[0])) {
10775
+ // Correct batch size
10780
10776
  decoder_input_ids = Array.from({
10781
10777
  length: batch_size,
10782
- }, () => [decoder_start_token_id]);
10778
+ }, () => decoder_input_ids);
10783
10779
  }
10784
- } else if (!Array.isArray(decoder_input_ids[0])) {
10785
- // Correct batch size
10786
- decoder_input_ids = Array.from({
10787
- length: batch_size,
10788
- }, () => decoder_input_ids);
10780
+ decoder_input_ids = toI64Tensor(decoder_input_ids);
10789
10781
  }
10790
10782
 
10791
- decoder_input_ids = toI64Tensor(decoder_input_ids);
10792
10783
  model_kwargs['decoder_attention_mask'] = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.ones_like)(decoder_input_ids);
10793
10784
 
10794
10785
  return { input_ids: decoder_input_ids, model_inputs };
@@ -12611,8 +12602,11 @@ class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
12611
12602
  class VisionEncoderDecoderModel extends PreTrainedModel {
12612
12603
  main_input_name = 'pixel_values';
12613
12604
  forward_params = [
12605
+ // Encoder inputs
12614
12606
  'pixel_values',
12615
- 'input_ids',
12607
+
12608
+ // Decoder inpputs
12609
+ 'decoder_input_ids',
12616
12610
  'encoder_hidden_states',
12617
12611
  'past_key_values',
12618
12612
  ];
@@ -19224,7 +19218,6 @@ class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: TextIm
19224
19218
 
19225
19219
  /** @type {DocumentQuestionAnsweringPipelineCallback} */
19226
19220
  async _call(image, question, generate_kwargs = {}) {
19227
- throw new Error('This pipeline is not yet supported in Transformers.js v3.'); // TODO: Remove when implemented
19228
19221
 
19229
19222
  // NOTE: For now, we only support a batch size of 1
19230
19223
 
@@ -20024,6 +20017,7 @@ __webpack_require__.r(__webpack_exports__);
20024
20017
  /* harmony export */ DeiTFeatureExtractor: () => (/* binding */ DeiTFeatureExtractor),
20025
20018
  /* harmony export */ DetrFeatureExtractor: () => (/* binding */ DetrFeatureExtractor),
20026
20019
  /* harmony export */ DonutFeatureExtractor: () => (/* binding */ DonutFeatureExtractor),
20020
+ /* harmony export */ DonutImageProcessor: () => (/* binding */ DonutImageProcessor),
20027
20021
  /* harmony export */ EfficientNetImageProcessor: () => (/* binding */ EfficientNetImageProcessor),
20028
20022
  /* harmony export */ FeatureExtractor: () => (/* binding */ FeatureExtractor),
20029
20023
  /* harmony export */ Florence2Processor: () => (/* binding */ Florence2Processor),
@@ -21266,6 +21260,7 @@ class DonutFeatureExtractor extends ImageFeatureExtractor {
21266
21260
  });
21267
21261
  }
21268
21262
  }
21263
+ class DonutImageProcessor extends DonutFeatureExtractor { } // NOTE extends DonutFeatureExtractor
21269
21264
  class NougatImageProcessor extends DonutFeatureExtractor { } // NOTE extends DonutFeatureExtractor
21270
21265
 
21271
21266
  /**
@@ -22626,6 +22621,7 @@ class AutoProcessor {
22626
22621
  MaskFormerFeatureExtractor,
22627
22622
  YolosFeatureExtractor,
22628
22623
  DonutFeatureExtractor,
22624
+ DonutImageProcessor,
22629
22625
  NougatImageProcessor,
22630
22626
  EfficientNetImageProcessor,
22631
22627
 
@@ -32978,6 +32974,7 @@ __webpack_require__.r(__webpack_exports__);
32978
32974
  /* harmony export */ DistilBertTokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.DistilBertTokenizer),
32979
32975
  /* harmony export */ DocumentQuestionAnsweringPipeline: () => (/* reexport safe */ _pipelines_js__WEBPACK_IMPORTED_MODULE_1__.DocumentQuestionAnsweringPipeline),
32980
32976
  /* harmony export */ DonutFeatureExtractor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.DonutFeatureExtractor),
32977
+ /* harmony export */ DonutImageProcessor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.DonutImageProcessor),
32981
32978
  /* harmony export */ DonutSwinModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DonutSwinModel),
32982
32979
  /* harmony export */ DonutSwinPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DonutSwinPreTrainedModel),
32983
32980
  /* harmony export */ EfficientNetForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.EfficientNetForImageClassification),
@@ -33631,6 +33628,7 @@ var __webpack_exports__DistilBertPreTrainedModel = __webpack_exports__.DistilBer
33631
33628
  var __webpack_exports__DistilBertTokenizer = __webpack_exports__.DistilBertTokenizer;
33632
33629
  var __webpack_exports__DocumentQuestionAnsweringPipeline = __webpack_exports__.DocumentQuestionAnsweringPipeline;
33633
33630
  var __webpack_exports__DonutFeatureExtractor = __webpack_exports__.DonutFeatureExtractor;
33631
+ var __webpack_exports__DonutImageProcessor = __webpack_exports__.DonutImageProcessor;
33634
33632
  var __webpack_exports__DonutSwinModel = __webpack_exports__.DonutSwinModel;
33635
33633
  var __webpack_exports__DonutSwinPreTrainedModel = __webpack_exports__.DonutSwinPreTrainedModel;
33636
33634
  var __webpack_exports__EfficientNetForImageClassification = __webpack_exports__.EfficientNetForImageClassification;
@@ -34066,6 +34064,6 @@ var __webpack_exports__topk = __webpack_exports__.topk;
34066
34064
  var __webpack_exports__window_function = __webpack_exports__.window_function;
34067
34065
  var __webpack_exports__zeros = __webpack_exports__.zeros;
34068
34066
  var __webpack_exports__zeros_like = __webpack_exports__.zeros_like;
34069
- export { __webpack_exports__ASTFeatureExtractor as ASTFeatureExtractor, __webpack_exports__ASTForAudioClassification as ASTForAudioClassification, __webpack_exports__ASTModel as ASTModel, __webpack_exports__ASTPreTrainedModel as ASTPreTrainedModel, __webpack_exports__AlbertForMaskedLM as AlbertForMaskedLM, __webpack_exports__AlbertForQuestionAnswering as AlbertForQuestionAnswering, __webpack_exports__AlbertForSequenceClassification as AlbertForSequenceClassification, __webpack_exports__AlbertModel as AlbertModel, __webpack_exports__AlbertPreTrainedModel as AlbertPreTrainedModel, __webpack_exports__AlbertTokenizer as AlbertTokenizer, __webpack_exports__AudioClassificationPipeline as AudioClassificationPipeline, __webpack_exports__AutoConfig as AutoConfig, __webpack_exports__AutoModel as AutoModel, __webpack_exports__AutoModelForAudioClassification as AutoModelForAudioClassification, __webpack_exports__AutoModelForAudioFrameClassification as AutoModelForAudioFrameClassification, __webpack_exports__AutoModelForCTC as AutoModelForCTC, __webpack_exports__AutoModelForCausalLM as AutoModelForCausalLM, __webpack_exports__AutoModelForDepthEstimation as AutoModelForDepthEstimation, __webpack_exports__AutoModelForDocumentQuestionAnswering as AutoModelForDocumentQuestionAnswering, __webpack_exports__AutoModelForImageClassification as AutoModelForImageClassification, __webpack_exports__AutoModelForImageFeatureExtraction as AutoModelForImageFeatureExtraction, __webpack_exports__AutoModelForImageMatting as AutoModelForImageMatting, __webpack_exports__AutoModelForImageSegmentation as AutoModelForImageSegmentation, __webpack_exports__AutoModelForImageToImage as AutoModelForImageToImage, __webpack_exports__AutoModelForMaskGeneration as AutoModelForMaskGeneration, __webpack_exports__AutoModelForMaskedLM as AutoModelForMaskedLM, __webpack_exports__AutoModelForNormalEstimation as AutoModelForNormalEstimation, __webpack_exports__AutoModelForObjectDetection as AutoModelForObjectDetection, __webpack_exports__AutoModelForQuestionAnswering as AutoModelForQuestionAnswering, __webpack_exports__AutoModelForSemanticSegmentation as AutoModelForSemanticSegmentation, __webpack_exports__AutoModelForSeq2SeqLM as AutoModelForSeq2SeqLM, __webpack_exports__AutoModelForSequenceClassification as AutoModelForSequenceClassification, __webpack_exports__AutoModelForSpeechSeq2Seq as AutoModelForSpeechSeq2Seq, __webpack_exports__AutoModelForTextToSpectrogram as AutoModelForTextToSpectrogram, __webpack_exports__AutoModelForTextToWaveform as AutoModelForTextToWaveform, __webpack_exports__AutoModelForTokenClassification as AutoModelForTokenClassification, __webpack_exports__AutoModelForUniversalSegmentation as AutoModelForUniversalSegmentation, __webpack_exports__AutoModelForVision2Seq as AutoModelForVision2Seq, __webpack_exports__AutoModelForXVector as AutoModelForXVector, __webpack_exports__AutoModelForZeroShotObjectDetection as AutoModelForZeroShotObjectDetection, __webpack_exports__AutoProcessor as AutoProcessor, __webpack_exports__AutoTokenizer as AutoTokenizer, __webpack_exports__AutomaticSpeechRecognitionPipeline as AutomaticSpeechRecognitionPipeline, __webpack_exports__BartForConditionalGeneration as BartForConditionalGeneration, __webpack_exports__BartForSequenceClassification as BartForSequenceClassification, __webpack_exports__BartModel as BartModel, __webpack_exports__BartPretrainedModel as BartPretrainedModel, __webpack_exports__BartTokenizer as BartTokenizer, __webpack_exports__BaseModelOutput as BaseModelOutput, __webpack_exports__BaseStreamer as BaseStreamer, __webpack_exports__BeitFeatureExtractor as BeitFeatureExtractor, __webpack_exports__BeitForImageClassification as BeitForImageClassification, __webpack_exports__BeitModel as BeitModel, __webpack_exports__BeitPreTrainedModel as BeitPreTrainedModel, __webpack_exports__BertForMaskedLM as BertForMaskedLM, __webpack_exports__BertForQuestionAnswering as BertForQuestionAnswering, __webpack_exports__BertForSequenceClassification as BertForSequenceClassification, __webpack_exports__BertForTokenClassification as BertForTokenClassification, __webpack_exports__BertModel as BertModel, __webpack_exports__BertPreTrainedModel as BertPreTrainedModel, __webpack_exports__BertTokenizer as BertTokenizer, __webpack_exports__BitImageProcessor as BitImageProcessor, __webpack_exports__BlenderbotForConditionalGeneration as BlenderbotForConditionalGeneration, __webpack_exports__BlenderbotModel as BlenderbotModel, __webpack_exports__BlenderbotPreTrainedModel as BlenderbotPreTrainedModel, __webpack_exports__BlenderbotSmallForConditionalGeneration as BlenderbotSmallForConditionalGeneration, __webpack_exports__BlenderbotSmallModel as BlenderbotSmallModel, __webpack_exports__BlenderbotSmallPreTrainedModel as BlenderbotSmallPreTrainedModel, __webpack_exports__BlenderbotSmallTokenizer as BlenderbotSmallTokenizer, __webpack_exports__BlenderbotTokenizer as BlenderbotTokenizer, __webpack_exports__BloomForCausalLM as BloomForCausalLM, __webpack_exports__BloomModel as BloomModel, __webpack_exports__BloomPreTrainedModel as BloomPreTrainedModel, __webpack_exports__BloomTokenizer as BloomTokenizer, __webpack_exports__CLIPFeatureExtractor as CLIPFeatureExtractor, __webpack_exports__CLIPImageProcessor as CLIPImageProcessor, __webpack_exports__CLIPModel as CLIPModel, __webpack_exports__CLIPPreTrainedModel as CLIPPreTrainedModel, __webpack_exports__CLIPSegForImageSegmentation as CLIPSegForImageSegmentation, __webpack_exports__CLIPSegModel as CLIPSegModel, __webpack_exports__CLIPSegPreTrainedModel as CLIPSegPreTrainedModel, __webpack_exports__CLIPTextModel as CLIPTextModel, __webpack_exports__CLIPTextModelWithProjection as CLIPTextModelWithProjection, __webpack_exports__CLIPTokenizer as CLIPTokenizer, __webpack_exports__CLIPVisionModel as CLIPVisionModel, __webpack_exports__CLIPVisionModelWithProjection as CLIPVisionModelWithProjection, __webpack_exports__CamembertForMaskedLM as CamembertForMaskedLM, __webpack_exports__CamembertForQuestionAnswering as CamembertForQuestionAnswering, __webpack_exports__CamembertForSequenceClassification as CamembertForSequenceClassification, __webpack_exports__CamembertForTokenClassification as CamembertForTokenClassification, __webpack_exports__CamembertModel as CamembertModel, __webpack_exports__CamembertPreTrainedModel as CamembertPreTrainedModel, __webpack_exports__CamembertTokenizer as CamembertTokenizer, __webpack_exports__CausalLMOutput as CausalLMOutput, __webpack_exports__CausalLMOutputWithPast as CausalLMOutputWithPast, __webpack_exports__ChineseCLIPFeatureExtractor as ChineseCLIPFeatureExtractor, __webpack_exports__ChineseCLIPModel as ChineseCLIPModel, __webpack_exports__ChineseCLIPPreTrainedModel as ChineseCLIPPreTrainedModel, __webpack_exports__ClapAudioModelWithProjection as ClapAudioModelWithProjection, __webpack_exports__ClapFeatureExtractor as ClapFeatureExtractor, __webpack_exports__ClapModel as ClapModel, __webpack_exports__ClapPreTrainedModel as ClapPreTrainedModel, __webpack_exports__ClapTextModelWithProjection as ClapTextModelWithProjection, __webpack_exports__CodeGenForCausalLM as CodeGenForCausalLM, __webpack_exports__CodeGenModel as CodeGenModel, __webpack_exports__CodeGenPreTrainedModel as CodeGenPreTrainedModel, __webpack_exports__CodeGenTokenizer as CodeGenTokenizer, __webpack_exports__CodeLlamaTokenizer as CodeLlamaTokenizer, __webpack_exports__CohereForCausalLM as CohereForCausalLM, __webpack_exports__CohereModel as CohereModel, __webpack_exports__CoherePreTrainedModel as CoherePreTrainedModel, __webpack_exports__CohereTokenizer as CohereTokenizer, __webpack_exports__ConvBertForMaskedLM as ConvBertForMaskedLM, __webpack_exports__ConvBertForQuestionAnswering as ConvBertForQuestionAnswering, __webpack_exports__ConvBertForSequenceClassification as ConvBertForSequenceClassification, __webpack_exports__ConvBertForTokenClassification as ConvBertForTokenClassification, __webpack_exports__ConvBertModel as ConvBertModel, __webpack_exports__ConvBertPreTrainedModel as ConvBertPreTrainedModel, __webpack_exports__ConvBertTokenizer as ConvBertTokenizer, __webpack_exports__ConvNextFeatureExtractor as ConvNextFeatureExtractor, __webpack_exports__ConvNextForImageClassification as ConvNextForImageClassification, __webpack_exports__ConvNextImageProcessor as ConvNextImageProcessor, __webpack_exports__ConvNextModel as ConvNextModel, __webpack_exports__ConvNextPreTrainedModel as ConvNextPreTrainedModel, __webpack_exports__ConvNextV2ForImageClassification as ConvNextV2ForImageClassification, __webpack_exports__ConvNextV2Model as ConvNextV2Model, __webpack_exports__ConvNextV2PreTrainedModel as ConvNextV2PreTrainedModel, __webpack_exports__DPTFeatureExtractor as DPTFeatureExtractor, __webpack_exports__DPTForDepthEstimation as DPTForDepthEstimation, __webpack_exports__DPTImageProcessor as DPTImageProcessor, __webpack_exports__DPTModel as DPTModel, __webpack_exports__DPTPreTrainedModel as DPTPreTrainedModel, __webpack_exports__DebertaForMaskedLM as DebertaForMaskedLM, __webpack_exports__DebertaForQuestionAnswering as DebertaForQuestionAnswering, __webpack_exports__DebertaForSequenceClassification as DebertaForSequenceClassification, __webpack_exports__DebertaForTokenClassification as DebertaForTokenClassification, __webpack_exports__DebertaModel as DebertaModel, __webpack_exports__DebertaPreTrainedModel as DebertaPreTrainedModel, __webpack_exports__DebertaTokenizer as DebertaTokenizer, __webpack_exports__DebertaV2ForMaskedLM as DebertaV2ForMaskedLM, __webpack_exports__DebertaV2ForQuestionAnswering as DebertaV2ForQuestionAnswering, __webpack_exports__DebertaV2ForSequenceClassification as DebertaV2ForSequenceClassification, __webpack_exports__DebertaV2ForTokenClassification as DebertaV2ForTokenClassification, __webpack_exports__DebertaV2Model as DebertaV2Model, __webpack_exports__DebertaV2PreTrainedModel as DebertaV2PreTrainedModel, __webpack_exports__DebertaV2Tokenizer as DebertaV2Tokenizer, __webpack_exports__DecisionTransformerModel as DecisionTransformerModel, __webpack_exports__DecisionTransformerPreTrainedModel as DecisionTransformerPreTrainedModel, __webpack_exports__DeiTFeatureExtractor as DeiTFeatureExtractor, __webpack_exports__DeiTForImageClassification as DeiTForImageClassification, __webpack_exports__DeiTModel as DeiTModel, __webpack_exports__DeiTPreTrainedModel as DeiTPreTrainedModel, __webpack_exports__DepthAnythingForDepthEstimation as DepthAnythingForDepthEstimation, __webpack_exports__DepthAnythingPreTrainedModel as DepthAnythingPreTrainedModel, __webpack_exports__DepthEstimationPipeline as DepthEstimationPipeline, __webpack_exports__DepthProForDepthEstimation as DepthProForDepthEstimation, __webpack_exports__DepthProPreTrainedModel as DepthProPreTrainedModel, __webpack_exports__DetrFeatureExtractor as DetrFeatureExtractor, __webpack_exports__DetrForObjectDetection as DetrForObjectDetection, __webpack_exports__DetrForSegmentation as DetrForSegmentation, __webpack_exports__DetrModel as DetrModel, __webpack_exports__DetrObjectDetectionOutput as DetrObjectDetectionOutput, __webpack_exports__DetrPreTrainedModel as DetrPreTrainedModel, __webpack_exports__DetrSegmentationOutput as DetrSegmentationOutput, __webpack_exports__Dinov2ForImageClassification as Dinov2ForImageClassification, __webpack_exports__Dinov2Model as Dinov2Model, __webpack_exports__Dinov2PreTrainedModel as Dinov2PreTrainedModel, __webpack_exports__DistilBertForMaskedLM as DistilBertForMaskedLM, __webpack_exports__DistilBertForQuestionAnswering as DistilBertForQuestionAnswering, __webpack_exports__DistilBertForSequenceClassification as DistilBertForSequenceClassification, __webpack_exports__DistilBertForTokenClassification as DistilBertForTokenClassification, __webpack_exports__DistilBertModel as DistilBertModel, __webpack_exports__DistilBertPreTrainedModel as DistilBertPreTrainedModel, __webpack_exports__DistilBertTokenizer as DistilBertTokenizer, __webpack_exports__DocumentQuestionAnsweringPipeline as DocumentQuestionAnsweringPipeline, __webpack_exports__DonutFeatureExtractor as DonutFeatureExtractor, __webpack_exports__DonutSwinModel as DonutSwinModel, __webpack_exports__DonutSwinPreTrainedModel as DonutSwinPreTrainedModel, __webpack_exports__EfficientNetForImageClassification as EfficientNetForImageClassification, __webpack_exports__EfficientNetImageProcessor as EfficientNetImageProcessor, __webpack_exports__EfficientNetModel as EfficientNetModel, __webpack_exports__EfficientNetPreTrainedModel as EfficientNetPreTrainedModel, __webpack_exports__ElectraForMaskedLM as ElectraForMaskedLM, __webpack_exports__ElectraForQuestionAnswering as ElectraForQuestionAnswering, __webpack_exports__ElectraForSequenceClassification as ElectraForSequenceClassification, __webpack_exports__ElectraForTokenClassification as ElectraForTokenClassification, __webpack_exports__ElectraModel as ElectraModel, __webpack_exports__ElectraPreTrainedModel as ElectraPreTrainedModel, __webpack_exports__ElectraTokenizer as ElectraTokenizer, __webpack_exports__EosTokenCriteria as EosTokenCriteria, __webpack_exports__EsmForMaskedLM as EsmForMaskedLM, __webpack_exports__EsmForSequenceClassification as EsmForSequenceClassification, __webpack_exports__EsmForTokenClassification as EsmForTokenClassification, __webpack_exports__EsmModel as EsmModel, __webpack_exports__EsmPreTrainedModel as EsmPreTrainedModel, __webpack_exports__EsmTokenizer as EsmTokenizer, __webpack_exports__FFT as FFT, __webpack_exports__FalconForCausalLM as FalconForCausalLM, __webpack_exports__FalconModel as FalconModel, __webpack_exports__FalconPreTrainedModel as FalconPreTrainedModel, __webpack_exports__FalconTokenizer as FalconTokenizer, __webpack_exports__FastViTForImageClassification as FastViTForImageClassification, __webpack_exports__FastViTModel as FastViTModel, __webpack_exports__FastViTPreTrainedModel as FastViTPreTrainedModel, __webpack_exports__FeatureExtractionPipeline as FeatureExtractionPipeline, __webpack_exports__FeatureExtractor as FeatureExtractor, __webpack_exports__FillMaskPipeline as FillMaskPipeline, __webpack_exports__Florence2ForConditionalGeneration as Florence2ForConditionalGeneration, __webpack_exports__Florence2PreTrainedModel as Florence2PreTrainedModel, __webpack_exports__Florence2Processor as Florence2Processor, __webpack_exports__GLPNFeatureExtractor as GLPNFeatureExtractor, __webpack_exports__GLPNForDepthEstimation as GLPNForDepthEstimation, __webpack_exports__GLPNModel as GLPNModel, __webpack_exports__GLPNPreTrainedModel as GLPNPreTrainedModel, __webpack_exports__GPT2LMHeadModel as GPT2LMHeadModel, __webpack_exports__GPT2Model as GPT2Model, __webpack_exports__GPT2PreTrainedModel as GPT2PreTrainedModel, __webpack_exports__GPT2Tokenizer as GPT2Tokenizer, __webpack_exports__GPTBigCodeForCausalLM as GPTBigCodeForCausalLM, __webpack_exports__GPTBigCodeModel as GPTBigCodeModel, __webpack_exports__GPTBigCodePreTrainedModel as GPTBigCodePreTrainedModel, __webpack_exports__GPTJForCausalLM as GPTJForCausalLM, __webpack_exports__GPTJModel as GPTJModel, __webpack_exports__GPTJPreTrainedModel as GPTJPreTrainedModel, __webpack_exports__GPTNeoForCausalLM as GPTNeoForCausalLM, __webpack_exports__GPTNeoModel as GPTNeoModel, __webpack_exports__GPTNeoPreTrainedModel as GPTNeoPreTrainedModel, __webpack_exports__GPTNeoXForCausalLM as GPTNeoXForCausalLM, __webpack_exports__GPTNeoXModel as GPTNeoXModel, __webpack_exports__GPTNeoXPreTrainedModel as GPTNeoXPreTrainedModel, __webpack_exports__GPTNeoXTokenizer as GPTNeoXTokenizer, __webpack_exports__Gemma2ForCausalLM as Gemma2ForCausalLM, __webpack_exports__Gemma2Model as Gemma2Model, __webpack_exports__Gemma2PreTrainedModel as Gemma2PreTrainedModel, __webpack_exports__GemmaForCausalLM as GemmaForCausalLM, __webpack_exports__GemmaModel as GemmaModel, __webpack_exports__GemmaPreTrainedModel as GemmaPreTrainedModel, __webpack_exports__GemmaTokenizer as GemmaTokenizer, __webpack_exports__GraniteForCausalLM as GraniteForCausalLM, __webpack_exports__GraniteModel as GraniteModel, __webpack_exports__GranitePreTrainedModel as GranitePreTrainedModel, __webpack_exports__Grok1Tokenizer as Grok1Tokenizer, __webpack_exports__GroupViTModel as GroupViTModel, __webpack_exports__GroupViTPreTrainedModel as GroupViTPreTrainedModel, __webpack_exports__HerbertTokenizer as HerbertTokenizer, __webpack_exports__HieraForImageClassification as HieraForImageClassification, __webpack_exports__HieraModel as HieraModel, __webpack_exports__HieraPreTrainedModel as HieraPreTrainedModel, __webpack_exports__HubertForCTC as HubertForCTC, __webpack_exports__HubertForSequenceClassification as HubertForSequenceClassification, __webpack_exports__HubertModel as HubertModel, __webpack_exports__HubertPreTrainedModel as HubertPreTrainedModel, __webpack_exports__ImageClassificationPipeline as ImageClassificationPipeline, __webpack_exports__ImageFeatureExtractionPipeline as ImageFeatureExtractionPipeline, __webpack_exports__ImageFeatureExtractor as ImageFeatureExtractor, __webpack_exports__ImageMattingOutput as ImageMattingOutput, __webpack_exports__ImageSegmentationPipeline as ImageSegmentationPipeline, __webpack_exports__ImageToImagePipeline as ImageToImagePipeline, __webpack_exports__ImageToTextPipeline as ImageToTextPipeline, __webpack_exports__InterruptableStoppingCriteria as InterruptableStoppingCriteria, __webpack_exports__JAISLMHeadModel as JAISLMHeadModel, __webpack_exports__JAISModel as JAISModel, __webpack_exports__JAISPreTrainedModel as JAISPreTrainedModel, __webpack_exports__LlamaForCausalLM as LlamaForCausalLM, __webpack_exports__LlamaModel as LlamaModel, __webpack_exports__LlamaPreTrainedModel as LlamaPreTrainedModel, __webpack_exports__LlamaTokenizer as LlamaTokenizer, __webpack_exports__LlavaForConditionalGeneration as LlavaForConditionalGeneration, __webpack_exports__LlavaPreTrainedModel as LlavaPreTrainedModel, __webpack_exports__LongT5ForConditionalGeneration as LongT5ForConditionalGeneration, __webpack_exports__LongT5Model as LongT5Model, __webpack_exports__LongT5PreTrainedModel as LongT5PreTrainedModel, __webpack_exports__M2M100ForConditionalGeneration as M2M100ForConditionalGeneration, __webpack_exports__M2M100Model as M2M100Model, __webpack_exports__M2M100PreTrainedModel as M2M100PreTrainedModel, __webpack_exports__M2M100Tokenizer as M2M100Tokenizer, __webpack_exports__MBart50Tokenizer as MBart50Tokenizer, __webpack_exports__MBartForCausalLM as MBartForCausalLM, __webpack_exports__MBartForConditionalGeneration as MBartForConditionalGeneration, __webpack_exports__MBartForSequenceClassification as MBartForSequenceClassification, __webpack_exports__MBartModel as MBartModel, __webpack_exports__MBartPreTrainedModel as MBartPreTrainedModel, __webpack_exports__MBartTokenizer as MBartTokenizer, __webpack_exports__MPNetForMaskedLM as MPNetForMaskedLM, __webpack_exports__MPNetForQuestionAnswering as MPNetForQuestionAnswering, __webpack_exports__MPNetForSequenceClassification as MPNetForSequenceClassification, __webpack_exports__MPNetForTokenClassification as MPNetForTokenClassification, __webpack_exports__MPNetModel as MPNetModel, __webpack_exports__MPNetPreTrainedModel as MPNetPreTrainedModel, __webpack_exports__MPNetTokenizer as MPNetTokenizer, __webpack_exports__MT5ForConditionalGeneration as MT5ForConditionalGeneration, __webpack_exports__MT5Model as MT5Model, __webpack_exports__MT5PreTrainedModel as MT5PreTrainedModel, __webpack_exports__MarianMTModel as MarianMTModel, __webpack_exports__MarianModel as MarianModel, __webpack_exports__MarianPreTrainedModel as MarianPreTrainedModel, __webpack_exports__MarianTokenizer as MarianTokenizer, __webpack_exports__MaskFormerFeatureExtractor as MaskFormerFeatureExtractor, __webpack_exports__MaskFormerForInstanceSegmentation as MaskFormerForInstanceSegmentation, __webpack_exports__MaskFormerModel as MaskFormerModel, __webpack_exports__MaskFormerPreTrainedModel as MaskFormerPreTrainedModel, __webpack_exports__MaskedLMOutput as MaskedLMOutput, __webpack_exports__MaxLengthCriteria as MaxLengthCriteria, __webpack_exports__MistralForCausalLM as MistralForCausalLM, __webpack_exports__MistralModel as MistralModel, __webpack_exports__MistralPreTrainedModel as MistralPreTrainedModel, __webpack_exports__MobileBertForMaskedLM as MobileBertForMaskedLM, __webpack_exports__MobileBertForQuestionAnswering as MobileBertForQuestionAnswering, __webpack_exports__MobileBertForSequenceClassification as MobileBertForSequenceClassification, __webpack_exports__MobileBertModel as MobileBertModel, __webpack_exports__MobileBertPreTrainedModel as MobileBertPreTrainedModel, __webpack_exports__MobileBertTokenizer as MobileBertTokenizer, __webpack_exports__MobileNetV1FeatureExtractor as MobileNetV1FeatureExtractor, __webpack_exports__MobileNetV1ForImageClassification as MobileNetV1ForImageClassification, __webpack_exports__MobileNetV1Model as MobileNetV1Model, __webpack_exports__MobileNetV1PreTrainedModel as MobileNetV1PreTrainedModel, __webpack_exports__MobileNetV2FeatureExtractor as MobileNetV2FeatureExtractor, __webpack_exports__MobileNetV2ForImageClassification as MobileNetV2ForImageClassification, __webpack_exports__MobileNetV2Model as MobileNetV2Model, __webpack_exports__MobileNetV2PreTrainedModel as MobileNetV2PreTrainedModel, __webpack_exports__MobileNetV3FeatureExtractor as MobileNetV3FeatureExtractor, __webpack_exports__MobileNetV3ForImageClassification as MobileNetV3ForImageClassification, __webpack_exports__MobileNetV3Model as MobileNetV3Model, __webpack_exports__MobileNetV3PreTrainedModel as MobileNetV3PreTrainedModel, __webpack_exports__MobileNetV4FeatureExtractor as MobileNetV4FeatureExtractor, __webpack_exports__MobileNetV4ForImageClassification as MobileNetV4ForImageClassification, __webpack_exports__MobileNetV4Model as MobileNetV4Model, __webpack_exports__MobileNetV4PreTrainedModel as MobileNetV4PreTrainedModel, __webpack_exports__MobileViTFeatureExtractor as MobileViTFeatureExtractor, __webpack_exports__MobileViTForImageClassification as MobileViTForImageClassification, __webpack_exports__MobileViTImageProcessor as MobileViTImageProcessor, __webpack_exports__MobileViTModel as MobileViTModel, __webpack_exports__MobileViTPreTrainedModel as MobileViTPreTrainedModel, __webpack_exports__MobileViTV2ForImageClassification as MobileViTV2ForImageClassification, __webpack_exports__MobileViTV2Model as MobileViTV2Model, __webpack_exports__MobileViTV2PreTrainedModel as MobileViTV2PreTrainedModel, __webpack_exports__ModelOutput as ModelOutput, __webpack_exports__Moondream1ForConditionalGeneration as Moondream1ForConditionalGeneration, __webpack_exports__MptForCausalLM as MptForCausalLM, __webpack_exports__MptModel as MptModel, __webpack_exports__MptPreTrainedModel as MptPreTrainedModel, __webpack_exports__MusicgenForCausalLM as MusicgenForCausalLM, __webpack_exports__MusicgenForConditionalGeneration as MusicgenForConditionalGeneration, __webpack_exports__MusicgenModel as MusicgenModel, __webpack_exports__MusicgenPreTrainedModel as MusicgenPreTrainedModel, __webpack_exports__NllbTokenizer as NllbTokenizer, __webpack_exports__NomicBertModel as NomicBertModel, __webpack_exports__NomicBertPreTrainedModel as NomicBertPreTrainedModel, __webpack_exports__NougatImageProcessor as NougatImageProcessor, __webpack_exports__NougatTokenizer as NougatTokenizer, __webpack_exports__OPTForCausalLM as OPTForCausalLM, __webpack_exports__OPTModel as OPTModel, __webpack_exports__OPTPreTrainedModel as OPTPreTrainedModel, __webpack_exports__ObjectDetectionPipeline as ObjectDetectionPipeline, __webpack_exports__OpenELMForCausalLM as OpenELMForCausalLM, __webpack_exports__OpenELMModel as OpenELMModel, __webpack_exports__OpenELMPreTrainedModel as OpenELMPreTrainedModel, __webpack_exports__OwlViTFeatureExtractor as OwlViTFeatureExtractor, __webpack_exports__OwlViTForObjectDetection as OwlViTForObjectDetection, __webpack_exports__OwlViTModel as OwlViTModel, __webpack_exports__OwlViTPreTrainedModel as OwlViTPreTrainedModel, __webpack_exports__OwlViTProcessor as OwlViTProcessor, __webpack_exports__Owlv2ForObjectDetection as Owlv2ForObjectDetection, __webpack_exports__Owlv2ImageProcessor as Owlv2ImageProcessor, __webpack_exports__Owlv2Model as Owlv2Model, __webpack_exports__Owlv2PreTrainedModel as Owlv2PreTrainedModel, __webpack_exports__Phi3ForCausalLM as Phi3ForCausalLM, __webpack_exports__Phi3Model as Phi3Model, __webpack_exports__Phi3PreTrainedModel as Phi3PreTrainedModel, __webpack_exports__PhiForCausalLM as PhiForCausalLM, __webpack_exports__PhiModel as PhiModel, __webpack_exports__PhiPreTrainedModel as PhiPreTrainedModel, __webpack_exports__Pipeline as Pipeline, __webpack_exports__PreTrainedModel as PreTrainedModel, __webpack_exports__PreTrainedTokenizer as PreTrainedTokenizer, __webpack_exports__PretrainedConfig as PretrainedConfig, __webpack_exports__PretrainedMixin as PretrainedMixin, __webpack_exports__Processor as Processor, __webpack_exports__PvtForImageClassification as PvtForImageClassification, __webpack_exports__PvtImageProcessor as PvtImageProcessor, __webpack_exports__PvtModel as PvtModel, __webpack_exports__PvtPreTrainedModel as PvtPreTrainedModel, __webpack_exports__PyAnnoteFeatureExtractor as PyAnnoteFeatureExtractor, __webpack_exports__PyAnnoteForAudioFrameClassification as PyAnnoteForAudioFrameClassification, __webpack_exports__PyAnnoteModel as PyAnnoteModel, __webpack_exports__PyAnnotePreTrainedModel as PyAnnotePreTrainedModel, __webpack_exports__PyAnnoteProcessor as PyAnnoteProcessor, __webpack_exports__QuestionAnsweringModelOutput as QuestionAnsweringModelOutput, __webpack_exports__QuestionAnsweringPipeline as QuestionAnsweringPipeline, __webpack_exports__Qwen2ForCausalLM as Qwen2ForCausalLM, __webpack_exports__Qwen2Model as Qwen2Model, __webpack_exports__Qwen2PreTrainedModel as Qwen2PreTrainedModel, __webpack_exports__Qwen2Tokenizer as Qwen2Tokenizer, __webpack_exports__RTDetrForObjectDetection as RTDetrForObjectDetection, __webpack_exports__RTDetrImageProcessor as RTDetrImageProcessor, __webpack_exports__RTDetrModel as RTDetrModel, __webpack_exports__RTDetrObjectDetectionOutput as RTDetrObjectDetectionOutput, __webpack_exports__RTDetrPreTrainedModel as RTDetrPreTrainedModel, __webpack_exports__RawImage as RawImage, __webpack_exports__ResNetForImageClassification as ResNetForImageClassification, __webpack_exports__ResNetModel as ResNetModel, __webpack_exports__ResNetPreTrainedModel as ResNetPreTrainedModel, __webpack_exports__RoFormerForMaskedLM as RoFormerForMaskedLM, __webpack_exports__RoFormerForQuestionAnswering as RoFormerForQuestionAnswering, __webpack_exports__RoFormerForSequenceClassification as RoFormerForSequenceClassification, __webpack_exports__RoFormerForTokenClassification as RoFormerForTokenClassification, __webpack_exports__RoFormerModel as RoFormerModel, __webpack_exports__RoFormerPreTrainedModel as RoFormerPreTrainedModel, __webpack_exports__RoFormerTokenizer as RoFormerTokenizer, __webpack_exports__RobertaForMaskedLM as RobertaForMaskedLM, __webpack_exports__RobertaForQuestionAnswering as RobertaForQuestionAnswering, __webpack_exports__RobertaForSequenceClassification as RobertaForSequenceClassification, __webpack_exports__RobertaForTokenClassification as RobertaForTokenClassification, __webpack_exports__RobertaModel as RobertaModel, __webpack_exports__RobertaPreTrainedModel as RobertaPreTrainedModel, __webpack_exports__RobertaTokenizer as RobertaTokenizer, __webpack_exports__SamImageProcessor as SamImageProcessor, __webpack_exports__SamImageSegmentationOutput as SamImageSegmentationOutput, __webpack_exports__SamModel as SamModel, __webpack_exports__SamPreTrainedModel as SamPreTrainedModel, __webpack_exports__SamProcessor as SamProcessor, __webpack_exports__SapiensFeatureExtractor as SapiensFeatureExtractor, __webpack_exports__SapiensForDepthEstimation as SapiensForDepthEstimation, __webpack_exports__SapiensForNormalEstimation as SapiensForNormalEstimation, __webpack_exports__SapiensForSemanticSegmentation as SapiensForSemanticSegmentation, __webpack_exports__SapiensPreTrainedModel as SapiensPreTrainedModel, __webpack_exports__SeamlessM4TFeatureExtractor as SeamlessM4TFeatureExtractor, __webpack_exports__SegformerFeatureExtractor as SegformerFeatureExtractor, __webpack_exports__SegformerForImageClassification as SegformerForImageClassification, __webpack_exports__SegformerForSemanticSegmentation as SegformerForSemanticSegmentation, __webpack_exports__SegformerModel as SegformerModel, __webpack_exports__SegformerPreTrainedModel as SegformerPreTrainedModel, __webpack_exports__Seq2SeqLMOutput as Seq2SeqLMOutput, __webpack_exports__SequenceClassifierOutput as SequenceClassifierOutput, __webpack_exports__SiglipImageProcessor as SiglipImageProcessor, __webpack_exports__SiglipModel as SiglipModel, __webpack_exports__SiglipPreTrainedModel as SiglipPreTrainedModel, __webpack_exports__SiglipTextModel as SiglipTextModel, __webpack_exports__SiglipTokenizer as SiglipTokenizer, __webpack_exports__SiglipVisionModel as SiglipVisionModel, __webpack_exports__SpeechT5FeatureExtractor as SpeechT5FeatureExtractor, __webpack_exports__SpeechT5ForSpeechToText as SpeechT5ForSpeechToText, __webpack_exports__SpeechT5ForTextToSpeech as SpeechT5ForTextToSpeech, __webpack_exports__SpeechT5HifiGan as SpeechT5HifiGan, __webpack_exports__SpeechT5Model as SpeechT5Model, __webpack_exports__SpeechT5PreTrainedModel as SpeechT5PreTrainedModel, __webpack_exports__SpeechT5Processor as SpeechT5Processor, __webpack_exports__SpeechT5Tokenizer as SpeechT5Tokenizer, __webpack_exports__SqueezeBertForMaskedLM as SqueezeBertForMaskedLM, __webpack_exports__SqueezeBertForQuestionAnswering as SqueezeBertForQuestionAnswering, __webpack_exports__SqueezeBertForSequenceClassification as SqueezeBertForSequenceClassification, __webpack_exports__SqueezeBertModel as SqueezeBertModel, __webpack_exports__SqueezeBertPreTrainedModel as SqueezeBertPreTrainedModel, __webpack_exports__SqueezeBertTokenizer as SqueezeBertTokenizer, __webpack_exports__StableLmForCausalLM as StableLmForCausalLM, __webpack_exports__StableLmModel as StableLmModel, __webpack_exports__StableLmPreTrainedModel as StableLmPreTrainedModel, __webpack_exports__Starcoder2ForCausalLM as Starcoder2ForCausalLM, __webpack_exports__Starcoder2Model as Starcoder2Model, __webpack_exports__Starcoder2PreTrainedModel as Starcoder2PreTrainedModel, __webpack_exports__StoppingCriteria as StoppingCriteria, __webpack_exports__StoppingCriteriaList as StoppingCriteriaList, __webpack_exports__SummarizationPipeline as SummarizationPipeline, __webpack_exports__Swin2SRForImageSuperResolution as Swin2SRForImageSuperResolution, __webpack_exports__Swin2SRImageProcessor as Swin2SRImageProcessor, __webpack_exports__Swin2SRModel as Swin2SRModel, __webpack_exports__Swin2SRPreTrainedModel as Swin2SRPreTrainedModel, __webpack_exports__SwinForImageClassification as SwinForImageClassification, __webpack_exports__SwinModel as SwinModel, __webpack_exports__SwinPreTrainedModel as SwinPreTrainedModel, __webpack_exports__T5ForConditionalGeneration as T5ForConditionalGeneration, __webpack_exports__T5Model as T5Model, __webpack_exports__T5PreTrainedModel as T5PreTrainedModel, __webpack_exports__T5Tokenizer as T5Tokenizer, __webpack_exports__TableTransformerForObjectDetection as TableTransformerForObjectDetection, __webpack_exports__TableTransformerModel as TableTransformerModel, __webpack_exports__TableTransformerObjectDetectionOutput as TableTransformerObjectDetectionOutput, __webpack_exports__TableTransformerPreTrainedModel as TableTransformerPreTrainedModel, __webpack_exports__Tensor as Tensor, __webpack_exports__Text2TextGenerationPipeline as Text2TextGenerationPipeline, __webpack_exports__TextClassificationPipeline as TextClassificationPipeline, __webpack_exports__TextGenerationPipeline as TextGenerationPipeline, __webpack_exports__TextStreamer as TextStreamer, __webpack_exports__TextToAudioPipeline as TextToAudioPipeline, __webpack_exports__TokenClassificationPipeline as TokenClassificationPipeline, __webpack_exports__TokenClassifierOutput as TokenClassifierOutput, __webpack_exports__TokenizerModel as TokenizerModel, __webpack_exports__TrOCRForCausalLM as TrOCRForCausalLM, __webpack_exports__TrOCRPreTrainedModel as TrOCRPreTrainedModel, __webpack_exports__TranslationPipeline as TranslationPipeline, __webpack_exports__UniSpeechForCTC as UniSpeechForCTC, __webpack_exports__UniSpeechForSequenceClassification as UniSpeechForSequenceClassification, __webpack_exports__UniSpeechModel as UniSpeechModel, __webpack_exports__UniSpeechPreTrainedModel as UniSpeechPreTrainedModel, __webpack_exports__UniSpeechSatForAudioFrameClassification as UniSpeechSatForAudioFrameClassification, __webpack_exports__UniSpeechSatForCTC as UniSpeechSatForCTC, __webpack_exports__UniSpeechSatForSequenceClassification as UniSpeechSatForSequenceClassification, __webpack_exports__UniSpeechSatModel as UniSpeechSatModel, __webpack_exports__UniSpeechSatPreTrainedModel as UniSpeechSatPreTrainedModel, __webpack_exports__ViTFeatureExtractor as ViTFeatureExtractor, __webpack_exports__ViTForImageClassification as ViTForImageClassification, __webpack_exports__ViTImageProcessor as ViTImageProcessor, __webpack_exports__ViTMAEModel as ViTMAEModel, __webpack_exports__ViTMAEPreTrainedModel as ViTMAEPreTrainedModel, __webpack_exports__ViTMSNForImageClassification as ViTMSNForImageClassification, __webpack_exports__ViTMSNModel as ViTMSNModel, __webpack_exports__ViTMSNPreTrainedModel as ViTMSNPreTrainedModel, __webpack_exports__ViTModel as ViTModel, __webpack_exports__ViTPreTrainedModel as ViTPreTrainedModel, __webpack_exports__VisionEncoderDecoderModel as VisionEncoderDecoderModel, __webpack_exports__VitMatteForImageMatting as VitMatteForImageMatting, __webpack_exports__VitMatteImageProcessor as VitMatteImageProcessor, __webpack_exports__VitMattePreTrainedModel as VitMattePreTrainedModel, __webpack_exports__VitsModel as VitsModel, __webpack_exports__VitsModelOutput as VitsModelOutput, __webpack_exports__VitsPreTrainedModel as VitsPreTrainedModel, __webpack_exports__VitsTokenizer as VitsTokenizer, __webpack_exports__Wav2Vec2BertForCTC as Wav2Vec2BertForCTC, __webpack_exports__Wav2Vec2BertForSequenceClassification as Wav2Vec2BertForSequenceClassification, __webpack_exports__Wav2Vec2BertModel as Wav2Vec2BertModel, __webpack_exports__Wav2Vec2BertPreTrainedModel as Wav2Vec2BertPreTrainedModel, __webpack_exports__Wav2Vec2CTCTokenizer as Wav2Vec2CTCTokenizer, __webpack_exports__Wav2Vec2FeatureExtractor as Wav2Vec2FeatureExtractor, __webpack_exports__Wav2Vec2ForAudioFrameClassification as Wav2Vec2ForAudioFrameClassification, __webpack_exports__Wav2Vec2ForCTC as Wav2Vec2ForCTC, __webpack_exports__Wav2Vec2ForSequenceClassification as Wav2Vec2ForSequenceClassification, __webpack_exports__Wav2Vec2Model as Wav2Vec2Model, __webpack_exports__Wav2Vec2PreTrainedModel as Wav2Vec2PreTrainedModel, __webpack_exports__Wav2Vec2ProcessorWithLM as Wav2Vec2ProcessorWithLM, __webpack_exports__WavLMForAudioFrameClassification as WavLMForAudioFrameClassification, __webpack_exports__WavLMForCTC as WavLMForCTC, __webpack_exports__WavLMForSequenceClassification as WavLMForSequenceClassification, __webpack_exports__WavLMForXVector as WavLMForXVector, __webpack_exports__WavLMModel as WavLMModel, __webpack_exports__WavLMPreTrainedModel as WavLMPreTrainedModel, __webpack_exports__WeSpeakerFeatureExtractor as WeSpeakerFeatureExtractor, __webpack_exports__WeSpeakerResNetModel as WeSpeakerResNetModel, __webpack_exports__WeSpeakerResNetPreTrainedModel as WeSpeakerResNetPreTrainedModel, __webpack_exports__WhisperFeatureExtractor as WhisperFeatureExtractor, __webpack_exports__WhisperForConditionalGeneration as WhisperForConditionalGeneration, __webpack_exports__WhisperModel as WhisperModel, __webpack_exports__WhisperPreTrainedModel as WhisperPreTrainedModel, __webpack_exports__WhisperProcessor as WhisperProcessor, __webpack_exports__WhisperTextStreamer as WhisperTextStreamer, __webpack_exports__WhisperTokenizer as WhisperTokenizer, __webpack_exports__XLMForQuestionAnswering as XLMForQuestionAnswering, __webpack_exports__XLMForSequenceClassification as XLMForSequenceClassification, __webpack_exports__XLMForTokenClassification as XLMForTokenClassification, __webpack_exports__XLMModel as XLMModel, __webpack_exports__XLMPreTrainedModel as XLMPreTrainedModel, __webpack_exports__XLMRobertaForMaskedLM as XLMRobertaForMaskedLM, __webpack_exports__XLMRobertaForQuestionAnswering as XLMRobertaForQuestionAnswering, __webpack_exports__XLMRobertaForSequenceClassification as XLMRobertaForSequenceClassification, __webpack_exports__XLMRobertaForTokenClassification as XLMRobertaForTokenClassification, __webpack_exports__XLMRobertaModel as XLMRobertaModel, __webpack_exports__XLMRobertaPreTrainedModel as XLMRobertaPreTrainedModel, __webpack_exports__XLMRobertaTokenizer as XLMRobertaTokenizer, __webpack_exports__XLMTokenizer as XLMTokenizer, __webpack_exports__XLMWithLMHeadModel as XLMWithLMHeadModel, __webpack_exports__XVectorOutput as XVectorOutput, __webpack_exports__YolosFeatureExtractor as YolosFeatureExtractor, __webpack_exports__YolosForObjectDetection as YolosForObjectDetection, __webpack_exports__YolosModel as YolosModel, __webpack_exports__YolosObjectDetectionOutput as YolosObjectDetectionOutput, __webpack_exports__YolosPreTrainedModel as YolosPreTrainedModel, __webpack_exports__ZeroShotAudioClassificationPipeline as ZeroShotAudioClassificationPipeline, __webpack_exports__ZeroShotClassificationPipeline as ZeroShotClassificationPipeline, __webpack_exports__ZeroShotImageClassificationPipeline as ZeroShotImageClassificationPipeline, __webpack_exports__ZeroShotObjectDetectionPipeline as ZeroShotObjectDetectionPipeline, __webpack_exports__bankers_round as bankers_round, __webpack_exports__cat as cat, __webpack_exports__cos_sim as cos_sim, __webpack_exports__dot as dot, __webpack_exports__dynamic_time_warping as dynamic_time_warping, __webpack_exports__env as env, __webpack_exports__full as full, __webpack_exports__full_like as full_like, __webpack_exports__getKeyValueShapes as getKeyValueShapes, __webpack_exports__hamming as hamming, __webpack_exports__hanning as hanning, __webpack_exports__interpolate as interpolate, __webpack_exports__interpolate_4d as interpolate_4d, __webpack_exports__interpolate_data as interpolate_data, __webpack_exports__is_chinese_char as is_chinese_char, __webpack_exports__layer_norm as layer_norm, __webpack_exports__log_softmax as log_softmax, __webpack_exports__magnitude as magnitude, __webpack_exports__matmul as matmul, __webpack_exports__max as max, __webpack_exports__mean as mean, __webpack_exports__mean_pooling as mean_pooling, __webpack_exports__medianFilter as medianFilter, __webpack_exports__mel_filter_bank as mel_filter_bank, __webpack_exports__min as min, __webpack_exports__ones as ones, __webpack_exports__ones_like as ones_like, __webpack_exports__permute as permute, __webpack_exports__permute_data as permute_data, __webpack_exports__pipeline as pipeline, __webpack_exports__quantize_embeddings as quantize_embeddings, __webpack_exports__read_audio as read_audio, __webpack_exports__rfft as rfft, __webpack_exports__round as round, __webpack_exports__softmax as softmax, __webpack_exports__spectrogram as spectrogram, __webpack_exports__stack as stack, __webpack_exports__std_mean as std_mean, __webpack_exports__topk as topk, __webpack_exports__window_function as window_function, __webpack_exports__zeros as zeros, __webpack_exports__zeros_like as zeros_like };
34067
+ export { __webpack_exports__ASTFeatureExtractor as ASTFeatureExtractor, __webpack_exports__ASTForAudioClassification as ASTForAudioClassification, __webpack_exports__ASTModel as ASTModel, __webpack_exports__ASTPreTrainedModel as ASTPreTrainedModel, __webpack_exports__AlbertForMaskedLM as AlbertForMaskedLM, __webpack_exports__AlbertForQuestionAnswering as AlbertForQuestionAnswering, __webpack_exports__AlbertForSequenceClassification as AlbertForSequenceClassification, __webpack_exports__AlbertModel as AlbertModel, __webpack_exports__AlbertPreTrainedModel as AlbertPreTrainedModel, __webpack_exports__AlbertTokenizer as AlbertTokenizer, __webpack_exports__AudioClassificationPipeline as AudioClassificationPipeline, __webpack_exports__AutoConfig as AutoConfig, __webpack_exports__AutoModel as AutoModel, __webpack_exports__AutoModelForAudioClassification as AutoModelForAudioClassification, __webpack_exports__AutoModelForAudioFrameClassification as AutoModelForAudioFrameClassification, __webpack_exports__AutoModelForCTC as AutoModelForCTC, __webpack_exports__AutoModelForCausalLM as AutoModelForCausalLM, __webpack_exports__AutoModelForDepthEstimation as AutoModelForDepthEstimation, __webpack_exports__AutoModelForDocumentQuestionAnswering as AutoModelForDocumentQuestionAnswering, __webpack_exports__AutoModelForImageClassification as AutoModelForImageClassification, __webpack_exports__AutoModelForImageFeatureExtraction as AutoModelForImageFeatureExtraction, __webpack_exports__AutoModelForImageMatting as AutoModelForImageMatting, __webpack_exports__AutoModelForImageSegmentation as AutoModelForImageSegmentation, __webpack_exports__AutoModelForImageToImage as AutoModelForImageToImage, __webpack_exports__AutoModelForMaskGeneration as AutoModelForMaskGeneration, __webpack_exports__AutoModelForMaskedLM as AutoModelForMaskedLM, __webpack_exports__AutoModelForNormalEstimation as AutoModelForNormalEstimation, __webpack_exports__AutoModelForObjectDetection as AutoModelForObjectDetection, __webpack_exports__AutoModelForQuestionAnswering as AutoModelForQuestionAnswering, __webpack_exports__AutoModelForSemanticSegmentation as AutoModelForSemanticSegmentation, __webpack_exports__AutoModelForSeq2SeqLM as AutoModelForSeq2SeqLM, __webpack_exports__AutoModelForSequenceClassification as AutoModelForSequenceClassification, __webpack_exports__AutoModelForSpeechSeq2Seq as AutoModelForSpeechSeq2Seq, __webpack_exports__AutoModelForTextToSpectrogram as AutoModelForTextToSpectrogram, __webpack_exports__AutoModelForTextToWaveform as AutoModelForTextToWaveform, __webpack_exports__AutoModelForTokenClassification as AutoModelForTokenClassification, __webpack_exports__AutoModelForUniversalSegmentation as AutoModelForUniversalSegmentation, __webpack_exports__AutoModelForVision2Seq as AutoModelForVision2Seq, __webpack_exports__AutoModelForXVector as AutoModelForXVector, __webpack_exports__AutoModelForZeroShotObjectDetection as AutoModelForZeroShotObjectDetection, __webpack_exports__AutoProcessor as AutoProcessor, __webpack_exports__AutoTokenizer as AutoTokenizer, __webpack_exports__AutomaticSpeechRecognitionPipeline as AutomaticSpeechRecognitionPipeline, __webpack_exports__BartForConditionalGeneration as BartForConditionalGeneration, __webpack_exports__BartForSequenceClassification as BartForSequenceClassification, __webpack_exports__BartModel as BartModel, __webpack_exports__BartPretrainedModel as BartPretrainedModel, __webpack_exports__BartTokenizer as BartTokenizer, __webpack_exports__BaseModelOutput as BaseModelOutput, __webpack_exports__BaseStreamer as BaseStreamer, __webpack_exports__BeitFeatureExtractor as BeitFeatureExtractor, __webpack_exports__BeitForImageClassification as BeitForImageClassification, __webpack_exports__BeitModel as BeitModel, __webpack_exports__BeitPreTrainedModel as BeitPreTrainedModel, __webpack_exports__BertForMaskedLM as BertForMaskedLM, __webpack_exports__BertForQuestionAnswering as BertForQuestionAnswering, __webpack_exports__BertForSequenceClassification as BertForSequenceClassification, __webpack_exports__BertForTokenClassification as BertForTokenClassification, __webpack_exports__BertModel as BertModel, __webpack_exports__BertPreTrainedModel as BertPreTrainedModel, __webpack_exports__BertTokenizer as BertTokenizer, __webpack_exports__BitImageProcessor as BitImageProcessor, __webpack_exports__BlenderbotForConditionalGeneration as BlenderbotForConditionalGeneration, __webpack_exports__BlenderbotModel as BlenderbotModel, __webpack_exports__BlenderbotPreTrainedModel as BlenderbotPreTrainedModel, __webpack_exports__BlenderbotSmallForConditionalGeneration as BlenderbotSmallForConditionalGeneration, __webpack_exports__BlenderbotSmallModel as BlenderbotSmallModel, __webpack_exports__BlenderbotSmallPreTrainedModel as BlenderbotSmallPreTrainedModel, __webpack_exports__BlenderbotSmallTokenizer as BlenderbotSmallTokenizer, __webpack_exports__BlenderbotTokenizer as BlenderbotTokenizer, __webpack_exports__BloomForCausalLM as BloomForCausalLM, __webpack_exports__BloomModel as BloomModel, __webpack_exports__BloomPreTrainedModel as BloomPreTrainedModel, __webpack_exports__BloomTokenizer as BloomTokenizer, __webpack_exports__CLIPFeatureExtractor as CLIPFeatureExtractor, __webpack_exports__CLIPImageProcessor as CLIPImageProcessor, __webpack_exports__CLIPModel as CLIPModel, __webpack_exports__CLIPPreTrainedModel as CLIPPreTrainedModel, __webpack_exports__CLIPSegForImageSegmentation as CLIPSegForImageSegmentation, __webpack_exports__CLIPSegModel as CLIPSegModel, __webpack_exports__CLIPSegPreTrainedModel as CLIPSegPreTrainedModel, __webpack_exports__CLIPTextModel as CLIPTextModel, __webpack_exports__CLIPTextModelWithProjection as CLIPTextModelWithProjection, __webpack_exports__CLIPTokenizer as CLIPTokenizer, __webpack_exports__CLIPVisionModel as CLIPVisionModel, __webpack_exports__CLIPVisionModelWithProjection as CLIPVisionModelWithProjection, __webpack_exports__CamembertForMaskedLM as CamembertForMaskedLM, __webpack_exports__CamembertForQuestionAnswering as CamembertForQuestionAnswering, __webpack_exports__CamembertForSequenceClassification as CamembertForSequenceClassification, __webpack_exports__CamembertForTokenClassification as CamembertForTokenClassification, __webpack_exports__CamembertModel as CamembertModel, __webpack_exports__CamembertPreTrainedModel as CamembertPreTrainedModel, __webpack_exports__CamembertTokenizer as CamembertTokenizer, __webpack_exports__CausalLMOutput as CausalLMOutput, __webpack_exports__CausalLMOutputWithPast as CausalLMOutputWithPast, __webpack_exports__ChineseCLIPFeatureExtractor as ChineseCLIPFeatureExtractor, __webpack_exports__ChineseCLIPModel as ChineseCLIPModel, __webpack_exports__ChineseCLIPPreTrainedModel as ChineseCLIPPreTrainedModel, __webpack_exports__ClapAudioModelWithProjection as ClapAudioModelWithProjection, __webpack_exports__ClapFeatureExtractor as ClapFeatureExtractor, __webpack_exports__ClapModel as ClapModel, __webpack_exports__ClapPreTrainedModel as ClapPreTrainedModel, __webpack_exports__ClapTextModelWithProjection as ClapTextModelWithProjection, __webpack_exports__CodeGenForCausalLM as CodeGenForCausalLM, __webpack_exports__CodeGenModel as CodeGenModel, __webpack_exports__CodeGenPreTrainedModel as CodeGenPreTrainedModel, __webpack_exports__CodeGenTokenizer as CodeGenTokenizer, __webpack_exports__CodeLlamaTokenizer as CodeLlamaTokenizer, __webpack_exports__CohereForCausalLM as CohereForCausalLM, __webpack_exports__CohereModel as CohereModel, __webpack_exports__CoherePreTrainedModel as CoherePreTrainedModel, __webpack_exports__CohereTokenizer as CohereTokenizer, __webpack_exports__ConvBertForMaskedLM as ConvBertForMaskedLM, __webpack_exports__ConvBertForQuestionAnswering as ConvBertForQuestionAnswering, __webpack_exports__ConvBertForSequenceClassification as ConvBertForSequenceClassification, __webpack_exports__ConvBertForTokenClassification as ConvBertForTokenClassification, __webpack_exports__ConvBertModel as ConvBertModel, __webpack_exports__ConvBertPreTrainedModel as ConvBertPreTrainedModel, __webpack_exports__ConvBertTokenizer as ConvBertTokenizer, __webpack_exports__ConvNextFeatureExtractor as ConvNextFeatureExtractor, __webpack_exports__ConvNextForImageClassification as ConvNextForImageClassification, __webpack_exports__ConvNextImageProcessor as ConvNextImageProcessor, __webpack_exports__ConvNextModel as ConvNextModel, __webpack_exports__ConvNextPreTrainedModel as ConvNextPreTrainedModel, __webpack_exports__ConvNextV2ForImageClassification as ConvNextV2ForImageClassification, __webpack_exports__ConvNextV2Model as ConvNextV2Model, __webpack_exports__ConvNextV2PreTrainedModel as ConvNextV2PreTrainedModel, __webpack_exports__DPTFeatureExtractor as DPTFeatureExtractor, __webpack_exports__DPTForDepthEstimation as DPTForDepthEstimation, __webpack_exports__DPTImageProcessor as DPTImageProcessor, __webpack_exports__DPTModel as DPTModel, __webpack_exports__DPTPreTrainedModel as DPTPreTrainedModel, __webpack_exports__DebertaForMaskedLM as DebertaForMaskedLM, __webpack_exports__DebertaForQuestionAnswering as DebertaForQuestionAnswering, __webpack_exports__DebertaForSequenceClassification as DebertaForSequenceClassification, __webpack_exports__DebertaForTokenClassification as DebertaForTokenClassification, __webpack_exports__DebertaModel as DebertaModel, __webpack_exports__DebertaPreTrainedModel as DebertaPreTrainedModel, __webpack_exports__DebertaTokenizer as DebertaTokenizer, __webpack_exports__DebertaV2ForMaskedLM as DebertaV2ForMaskedLM, __webpack_exports__DebertaV2ForQuestionAnswering as DebertaV2ForQuestionAnswering, __webpack_exports__DebertaV2ForSequenceClassification as DebertaV2ForSequenceClassification, __webpack_exports__DebertaV2ForTokenClassification as DebertaV2ForTokenClassification, __webpack_exports__DebertaV2Model as DebertaV2Model, __webpack_exports__DebertaV2PreTrainedModel as DebertaV2PreTrainedModel, __webpack_exports__DebertaV2Tokenizer as DebertaV2Tokenizer, __webpack_exports__DecisionTransformerModel as DecisionTransformerModel, __webpack_exports__DecisionTransformerPreTrainedModel as DecisionTransformerPreTrainedModel, __webpack_exports__DeiTFeatureExtractor as DeiTFeatureExtractor, __webpack_exports__DeiTForImageClassification as DeiTForImageClassification, __webpack_exports__DeiTModel as DeiTModel, __webpack_exports__DeiTPreTrainedModel as DeiTPreTrainedModel, __webpack_exports__DepthAnythingForDepthEstimation as DepthAnythingForDepthEstimation, __webpack_exports__DepthAnythingPreTrainedModel as DepthAnythingPreTrainedModel, __webpack_exports__DepthEstimationPipeline as DepthEstimationPipeline, __webpack_exports__DepthProForDepthEstimation as DepthProForDepthEstimation, __webpack_exports__DepthProPreTrainedModel as DepthProPreTrainedModel, __webpack_exports__DetrFeatureExtractor as DetrFeatureExtractor, __webpack_exports__DetrForObjectDetection as DetrForObjectDetection, __webpack_exports__DetrForSegmentation as DetrForSegmentation, __webpack_exports__DetrModel as DetrModel, __webpack_exports__DetrObjectDetectionOutput as DetrObjectDetectionOutput, __webpack_exports__DetrPreTrainedModel as DetrPreTrainedModel, __webpack_exports__DetrSegmentationOutput as DetrSegmentationOutput, __webpack_exports__Dinov2ForImageClassification as Dinov2ForImageClassification, __webpack_exports__Dinov2Model as Dinov2Model, __webpack_exports__Dinov2PreTrainedModel as Dinov2PreTrainedModel, __webpack_exports__DistilBertForMaskedLM as DistilBertForMaskedLM, __webpack_exports__DistilBertForQuestionAnswering as DistilBertForQuestionAnswering, __webpack_exports__DistilBertForSequenceClassification as DistilBertForSequenceClassification, __webpack_exports__DistilBertForTokenClassification as DistilBertForTokenClassification, __webpack_exports__DistilBertModel as DistilBertModel, __webpack_exports__DistilBertPreTrainedModel as DistilBertPreTrainedModel, __webpack_exports__DistilBertTokenizer as DistilBertTokenizer, __webpack_exports__DocumentQuestionAnsweringPipeline as DocumentQuestionAnsweringPipeline, __webpack_exports__DonutFeatureExtractor as DonutFeatureExtractor, __webpack_exports__DonutImageProcessor as DonutImageProcessor, __webpack_exports__DonutSwinModel as DonutSwinModel, __webpack_exports__DonutSwinPreTrainedModel as DonutSwinPreTrainedModel, __webpack_exports__EfficientNetForImageClassification as EfficientNetForImageClassification, __webpack_exports__EfficientNetImageProcessor as EfficientNetImageProcessor, __webpack_exports__EfficientNetModel as EfficientNetModel, __webpack_exports__EfficientNetPreTrainedModel as EfficientNetPreTrainedModel, __webpack_exports__ElectraForMaskedLM as ElectraForMaskedLM, __webpack_exports__ElectraForQuestionAnswering as ElectraForQuestionAnswering, __webpack_exports__ElectraForSequenceClassification as ElectraForSequenceClassification, __webpack_exports__ElectraForTokenClassification as ElectraForTokenClassification, __webpack_exports__ElectraModel as ElectraModel, __webpack_exports__ElectraPreTrainedModel as ElectraPreTrainedModel, __webpack_exports__ElectraTokenizer as ElectraTokenizer, __webpack_exports__EosTokenCriteria as EosTokenCriteria, __webpack_exports__EsmForMaskedLM as EsmForMaskedLM, __webpack_exports__EsmForSequenceClassification as EsmForSequenceClassification, __webpack_exports__EsmForTokenClassification as EsmForTokenClassification, __webpack_exports__EsmModel as EsmModel, __webpack_exports__EsmPreTrainedModel as EsmPreTrainedModel, __webpack_exports__EsmTokenizer as EsmTokenizer, __webpack_exports__FFT as FFT, __webpack_exports__FalconForCausalLM as FalconForCausalLM, __webpack_exports__FalconModel as FalconModel, __webpack_exports__FalconPreTrainedModel as FalconPreTrainedModel, __webpack_exports__FalconTokenizer as FalconTokenizer, __webpack_exports__FastViTForImageClassification as FastViTForImageClassification, __webpack_exports__FastViTModel as FastViTModel, __webpack_exports__FastViTPreTrainedModel as FastViTPreTrainedModel, __webpack_exports__FeatureExtractionPipeline as FeatureExtractionPipeline, __webpack_exports__FeatureExtractor as FeatureExtractor, __webpack_exports__FillMaskPipeline as FillMaskPipeline, __webpack_exports__Florence2ForConditionalGeneration as Florence2ForConditionalGeneration, __webpack_exports__Florence2PreTrainedModel as Florence2PreTrainedModel, __webpack_exports__Florence2Processor as Florence2Processor, __webpack_exports__GLPNFeatureExtractor as GLPNFeatureExtractor, __webpack_exports__GLPNForDepthEstimation as GLPNForDepthEstimation, __webpack_exports__GLPNModel as GLPNModel, __webpack_exports__GLPNPreTrainedModel as GLPNPreTrainedModel, __webpack_exports__GPT2LMHeadModel as GPT2LMHeadModel, __webpack_exports__GPT2Model as GPT2Model, __webpack_exports__GPT2PreTrainedModel as GPT2PreTrainedModel, __webpack_exports__GPT2Tokenizer as GPT2Tokenizer, __webpack_exports__GPTBigCodeForCausalLM as GPTBigCodeForCausalLM, __webpack_exports__GPTBigCodeModel as GPTBigCodeModel, __webpack_exports__GPTBigCodePreTrainedModel as GPTBigCodePreTrainedModel, __webpack_exports__GPTJForCausalLM as GPTJForCausalLM, __webpack_exports__GPTJModel as GPTJModel, __webpack_exports__GPTJPreTrainedModel as GPTJPreTrainedModel, __webpack_exports__GPTNeoForCausalLM as GPTNeoForCausalLM, __webpack_exports__GPTNeoModel as GPTNeoModel, __webpack_exports__GPTNeoPreTrainedModel as GPTNeoPreTrainedModel, __webpack_exports__GPTNeoXForCausalLM as GPTNeoXForCausalLM, __webpack_exports__GPTNeoXModel as GPTNeoXModel, __webpack_exports__GPTNeoXPreTrainedModel as GPTNeoXPreTrainedModel, __webpack_exports__GPTNeoXTokenizer as GPTNeoXTokenizer, __webpack_exports__Gemma2ForCausalLM as Gemma2ForCausalLM, __webpack_exports__Gemma2Model as Gemma2Model, __webpack_exports__Gemma2PreTrainedModel as Gemma2PreTrainedModel, __webpack_exports__GemmaForCausalLM as GemmaForCausalLM, __webpack_exports__GemmaModel as GemmaModel, __webpack_exports__GemmaPreTrainedModel as GemmaPreTrainedModel, __webpack_exports__GemmaTokenizer as GemmaTokenizer, __webpack_exports__GraniteForCausalLM as GraniteForCausalLM, __webpack_exports__GraniteModel as GraniteModel, __webpack_exports__GranitePreTrainedModel as GranitePreTrainedModel, __webpack_exports__Grok1Tokenizer as Grok1Tokenizer, __webpack_exports__GroupViTModel as GroupViTModel, __webpack_exports__GroupViTPreTrainedModel as GroupViTPreTrainedModel, __webpack_exports__HerbertTokenizer as HerbertTokenizer, __webpack_exports__HieraForImageClassification as HieraForImageClassification, __webpack_exports__HieraModel as HieraModel, __webpack_exports__HieraPreTrainedModel as HieraPreTrainedModel, __webpack_exports__HubertForCTC as HubertForCTC, __webpack_exports__HubertForSequenceClassification as HubertForSequenceClassification, __webpack_exports__HubertModel as HubertModel, __webpack_exports__HubertPreTrainedModel as HubertPreTrainedModel, __webpack_exports__ImageClassificationPipeline as ImageClassificationPipeline, __webpack_exports__ImageFeatureExtractionPipeline as ImageFeatureExtractionPipeline, __webpack_exports__ImageFeatureExtractor as ImageFeatureExtractor, __webpack_exports__ImageMattingOutput as ImageMattingOutput, __webpack_exports__ImageSegmentationPipeline as ImageSegmentationPipeline, __webpack_exports__ImageToImagePipeline as ImageToImagePipeline, __webpack_exports__ImageToTextPipeline as ImageToTextPipeline, __webpack_exports__InterruptableStoppingCriteria as InterruptableStoppingCriteria, __webpack_exports__JAISLMHeadModel as JAISLMHeadModel, __webpack_exports__JAISModel as JAISModel, __webpack_exports__JAISPreTrainedModel as JAISPreTrainedModel, __webpack_exports__LlamaForCausalLM as LlamaForCausalLM, __webpack_exports__LlamaModel as LlamaModel, __webpack_exports__LlamaPreTrainedModel as LlamaPreTrainedModel, __webpack_exports__LlamaTokenizer as LlamaTokenizer, __webpack_exports__LlavaForConditionalGeneration as LlavaForConditionalGeneration, __webpack_exports__LlavaPreTrainedModel as LlavaPreTrainedModel, __webpack_exports__LongT5ForConditionalGeneration as LongT5ForConditionalGeneration, __webpack_exports__LongT5Model as LongT5Model, __webpack_exports__LongT5PreTrainedModel as LongT5PreTrainedModel, __webpack_exports__M2M100ForConditionalGeneration as M2M100ForConditionalGeneration, __webpack_exports__M2M100Model as M2M100Model, __webpack_exports__M2M100PreTrainedModel as M2M100PreTrainedModel, __webpack_exports__M2M100Tokenizer as M2M100Tokenizer, __webpack_exports__MBart50Tokenizer as MBart50Tokenizer, __webpack_exports__MBartForCausalLM as MBartForCausalLM, __webpack_exports__MBartForConditionalGeneration as MBartForConditionalGeneration, __webpack_exports__MBartForSequenceClassification as MBartForSequenceClassification, __webpack_exports__MBartModel as MBartModel, __webpack_exports__MBartPreTrainedModel as MBartPreTrainedModel, __webpack_exports__MBartTokenizer as MBartTokenizer, __webpack_exports__MPNetForMaskedLM as MPNetForMaskedLM, __webpack_exports__MPNetForQuestionAnswering as MPNetForQuestionAnswering, __webpack_exports__MPNetForSequenceClassification as MPNetForSequenceClassification, __webpack_exports__MPNetForTokenClassification as MPNetForTokenClassification, __webpack_exports__MPNetModel as MPNetModel, __webpack_exports__MPNetPreTrainedModel as MPNetPreTrainedModel, __webpack_exports__MPNetTokenizer as MPNetTokenizer, __webpack_exports__MT5ForConditionalGeneration as MT5ForConditionalGeneration, __webpack_exports__MT5Model as MT5Model, __webpack_exports__MT5PreTrainedModel as MT5PreTrainedModel, __webpack_exports__MarianMTModel as MarianMTModel, __webpack_exports__MarianModel as MarianModel, __webpack_exports__MarianPreTrainedModel as MarianPreTrainedModel, __webpack_exports__MarianTokenizer as MarianTokenizer, __webpack_exports__MaskFormerFeatureExtractor as MaskFormerFeatureExtractor, __webpack_exports__MaskFormerForInstanceSegmentation as MaskFormerForInstanceSegmentation, __webpack_exports__MaskFormerModel as MaskFormerModel, __webpack_exports__MaskFormerPreTrainedModel as MaskFormerPreTrainedModel, __webpack_exports__MaskedLMOutput as MaskedLMOutput, __webpack_exports__MaxLengthCriteria as MaxLengthCriteria, __webpack_exports__MistralForCausalLM as MistralForCausalLM, __webpack_exports__MistralModel as MistralModel, __webpack_exports__MistralPreTrainedModel as MistralPreTrainedModel, __webpack_exports__MobileBertForMaskedLM as MobileBertForMaskedLM, __webpack_exports__MobileBertForQuestionAnswering as MobileBertForQuestionAnswering, __webpack_exports__MobileBertForSequenceClassification as MobileBertForSequenceClassification, __webpack_exports__MobileBertModel as MobileBertModel, __webpack_exports__MobileBertPreTrainedModel as MobileBertPreTrainedModel, __webpack_exports__MobileBertTokenizer as MobileBertTokenizer, __webpack_exports__MobileNetV1FeatureExtractor as MobileNetV1FeatureExtractor, __webpack_exports__MobileNetV1ForImageClassification as MobileNetV1ForImageClassification, __webpack_exports__MobileNetV1Model as MobileNetV1Model, __webpack_exports__MobileNetV1PreTrainedModel as MobileNetV1PreTrainedModel, __webpack_exports__MobileNetV2FeatureExtractor as MobileNetV2FeatureExtractor, __webpack_exports__MobileNetV2ForImageClassification as MobileNetV2ForImageClassification, __webpack_exports__MobileNetV2Model as MobileNetV2Model, __webpack_exports__MobileNetV2PreTrainedModel as MobileNetV2PreTrainedModel, __webpack_exports__MobileNetV3FeatureExtractor as MobileNetV3FeatureExtractor, __webpack_exports__MobileNetV3ForImageClassification as MobileNetV3ForImageClassification, __webpack_exports__MobileNetV3Model as MobileNetV3Model, __webpack_exports__MobileNetV3PreTrainedModel as MobileNetV3PreTrainedModel, __webpack_exports__MobileNetV4FeatureExtractor as MobileNetV4FeatureExtractor, __webpack_exports__MobileNetV4ForImageClassification as MobileNetV4ForImageClassification, __webpack_exports__MobileNetV4Model as MobileNetV4Model, __webpack_exports__MobileNetV4PreTrainedModel as MobileNetV4PreTrainedModel, __webpack_exports__MobileViTFeatureExtractor as MobileViTFeatureExtractor, __webpack_exports__MobileViTForImageClassification as MobileViTForImageClassification, __webpack_exports__MobileViTImageProcessor as MobileViTImageProcessor, __webpack_exports__MobileViTModel as MobileViTModel, __webpack_exports__MobileViTPreTrainedModel as MobileViTPreTrainedModel, __webpack_exports__MobileViTV2ForImageClassification as MobileViTV2ForImageClassification, __webpack_exports__MobileViTV2Model as MobileViTV2Model, __webpack_exports__MobileViTV2PreTrainedModel as MobileViTV2PreTrainedModel, __webpack_exports__ModelOutput as ModelOutput, __webpack_exports__Moondream1ForConditionalGeneration as Moondream1ForConditionalGeneration, __webpack_exports__MptForCausalLM as MptForCausalLM, __webpack_exports__MptModel as MptModel, __webpack_exports__MptPreTrainedModel as MptPreTrainedModel, __webpack_exports__MusicgenForCausalLM as MusicgenForCausalLM, __webpack_exports__MusicgenForConditionalGeneration as MusicgenForConditionalGeneration, __webpack_exports__MusicgenModel as MusicgenModel, __webpack_exports__MusicgenPreTrainedModel as MusicgenPreTrainedModel, __webpack_exports__NllbTokenizer as NllbTokenizer, __webpack_exports__NomicBertModel as NomicBertModel, __webpack_exports__NomicBertPreTrainedModel as NomicBertPreTrainedModel, __webpack_exports__NougatImageProcessor as NougatImageProcessor, __webpack_exports__NougatTokenizer as NougatTokenizer, __webpack_exports__OPTForCausalLM as OPTForCausalLM, __webpack_exports__OPTModel as OPTModel, __webpack_exports__OPTPreTrainedModel as OPTPreTrainedModel, __webpack_exports__ObjectDetectionPipeline as ObjectDetectionPipeline, __webpack_exports__OpenELMForCausalLM as OpenELMForCausalLM, __webpack_exports__OpenELMModel as OpenELMModel, __webpack_exports__OpenELMPreTrainedModel as OpenELMPreTrainedModel, __webpack_exports__OwlViTFeatureExtractor as OwlViTFeatureExtractor, __webpack_exports__OwlViTForObjectDetection as OwlViTForObjectDetection, __webpack_exports__OwlViTModel as OwlViTModel, __webpack_exports__OwlViTPreTrainedModel as OwlViTPreTrainedModel, __webpack_exports__OwlViTProcessor as OwlViTProcessor, __webpack_exports__Owlv2ForObjectDetection as Owlv2ForObjectDetection, __webpack_exports__Owlv2ImageProcessor as Owlv2ImageProcessor, __webpack_exports__Owlv2Model as Owlv2Model, __webpack_exports__Owlv2PreTrainedModel as Owlv2PreTrainedModel, __webpack_exports__Phi3ForCausalLM as Phi3ForCausalLM, __webpack_exports__Phi3Model as Phi3Model, __webpack_exports__Phi3PreTrainedModel as Phi3PreTrainedModel, __webpack_exports__PhiForCausalLM as PhiForCausalLM, __webpack_exports__PhiModel as PhiModel, __webpack_exports__PhiPreTrainedModel as PhiPreTrainedModel, __webpack_exports__Pipeline as Pipeline, __webpack_exports__PreTrainedModel as PreTrainedModel, __webpack_exports__PreTrainedTokenizer as PreTrainedTokenizer, __webpack_exports__PretrainedConfig as PretrainedConfig, __webpack_exports__PretrainedMixin as PretrainedMixin, __webpack_exports__Processor as Processor, __webpack_exports__PvtForImageClassification as PvtForImageClassification, __webpack_exports__PvtImageProcessor as PvtImageProcessor, __webpack_exports__PvtModel as PvtModel, __webpack_exports__PvtPreTrainedModel as PvtPreTrainedModel, __webpack_exports__PyAnnoteFeatureExtractor as PyAnnoteFeatureExtractor, __webpack_exports__PyAnnoteForAudioFrameClassification as PyAnnoteForAudioFrameClassification, __webpack_exports__PyAnnoteModel as PyAnnoteModel, __webpack_exports__PyAnnotePreTrainedModel as PyAnnotePreTrainedModel, __webpack_exports__PyAnnoteProcessor as PyAnnoteProcessor, __webpack_exports__QuestionAnsweringModelOutput as QuestionAnsweringModelOutput, __webpack_exports__QuestionAnsweringPipeline as QuestionAnsweringPipeline, __webpack_exports__Qwen2ForCausalLM as Qwen2ForCausalLM, __webpack_exports__Qwen2Model as Qwen2Model, __webpack_exports__Qwen2PreTrainedModel as Qwen2PreTrainedModel, __webpack_exports__Qwen2Tokenizer as Qwen2Tokenizer, __webpack_exports__RTDetrForObjectDetection as RTDetrForObjectDetection, __webpack_exports__RTDetrImageProcessor as RTDetrImageProcessor, __webpack_exports__RTDetrModel as RTDetrModel, __webpack_exports__RTDetrObjectDetectionOutput as RTDetrObjectDetectionOutput, __webpack_exports__RTDetrPreTrainedModel as RTDetrPreTrainedModel, __webpack_exports__RawImage as RawImage, __webpack_exports__ResNetForImageClassification as ResNetForImageClassification, __webpack_exports__ResNetModel as ResNetModel, __webpack_exports__ResNetPreTrainedModel as ResNetPreTrainedModel, __webpack_exports__RoFormerForMaskedLM as RoFormerForMaskedLM, __webpack_exports__RoFormerForQuestionAnswering as RoFormerForQuestionAnswering, __webpack_exports__RoFormerForSequenceClassification as RoFormerForSequenceClassification, __webpack_exports__RoFormerForTokenClassification as RoFormerForTokenClassification, __webpack_exports__RoFormerModel as RoFormerModel, __webpack_exports__RoFormerPreTrainedModel as RoFormerPreTrainedModel, __webpack_exports__RoFormerTokenizer as RoFormerTokenizer, __webpack_exports__RobertaForMaskedLM as RobertaForMaskedLM, __webpack_exports__RobertaForQuestionAnswering as RobertaForQuestionAnswering, __webpack_exports__RobertaForSequenceClassification as RobertaForSequenceClassification, __webpack_exports__RobertaForTokenClassification as RobertaForTokenClassification, __webpack_exports__RobertaModel as RobertaModel, __webpack_exports__RobertaPreTrainedModel as RobertaPreTrainedModel, __webpack_exports__RobertaTokenizer as RobertaTokenizer, __webpack_exports__SamImageProcessor as SamImageProcessor, __webpack_exports__SamImageSegmentationOutput as SamImageSegmentationOutput, __webpack_exports__SamModel as SamModel, __webpack_exports__SamPreTrainedModel as SamPreTrainedModel, __webpack_exports__SamProcessor as SamProcessor, __webpack_exports__SapiensFeatureExtractor as SapiensFeatureExtractor, __webpack_exports__SapiensForDepthEstimation as SapiensForDepthEstimation, __webpack_exports__SapiensForNormalEstimation as SapiensForNormalEstimation, __webpack_exports__SapiensForSemanticSegmentation as SapiensForSemanticSegmentation, __webpack_exports__SapiensPreTrainedModel as SapiensPreTrainedModel, __webpack_exports__SeamlessM4TFeatureExtractor as SeamlessM4TFeatureExtractor, __webpack_exports__SegformerFeatureExtractor as SegformerFeatureExtractor, __webpack_exports__SegformerForImageClassification as SegformerForImageClassification, __webpack_exports__SegformerForSemanticSegmentation as SegformerForSemanticSegmentation, __webpack_exports__SegformerModel as SegformerModel, __webpack_exports__SegformerPreTrainedModel as SegformerPreTrainedModel, __webpack_exports__Seq2SeqLMOutput as Seq2SeqLMOutput, __webpack_exports__SequenceClassifierOutput as SequenceClassifierOutput, __webpack_exports__SiglipImageProcessor as SiglipImageProcessor, __webpack_exports__SiglipModel as SiglipModel, __webpack_exports__SiglipPreTrainedModel as SiglipPreTrainedModel, __webpack_exports__SiglipTextModel as SiglipTextModel, __webpack_exports__SiglipTokenizer as SiglipTokenizer, __webpack_exports__SiglipVisionModel as SiglipVisionModel, __webpack_exports__SpeechT5FeatureExtractor as SpeechT5FeatureExtractor, __webpack_exports__SpeechT5ForSpeechToText as SpeechT5ForSpeechToText, __webpack_exports__SpeechT5ForTextToSpeech as SpeechT5ForTextToSpeech, __webpack_exports__SpeechT5HifiGan as SpeechT5HifiGan, __webpack_exports__SpeechT5Model as SpeechT5Model, __webpack_exports__SpeechT5PreTrainedModel as SpeechT5PreTrainedModel, __webpack_exports__SpeechT5Processor as SpeechT5Processor, __webpack_exports__SpeechT5Tokenizer as SpeechT5Tokenizer, __webpack_exports__SqueezeBertForMaskedLM as SqueezeBertForMaskedLM, __webpack_exports__SqueezeBertForQuestionAnswering as SqueezeBertForQuestionAnswering, __webpack_exports__SqueezeBertForSequenceClassification as SqueezeBertForSequenceClassification, __webpack_exports__SqueezeBertModel as SqueezeBertModel, __webpack_exports__SqueezeBertPreTrainedModel as SqueezeBertPreTrainedModel, __webpack_exports__SqueezeBertTokenizer as SqueezeBertTokenizer, __webpack_exports__StableLmForCausalLM as StableLmForCausalLM, __webpack_exports__StableLmModel as StableLmModel, __webpack_exports__StableLmPreTrainedModel as StableLmPreTrainedModel, __webpack_exports__Starcoder2ForCausalLM as Starcoder2ForCausalLM, __webpack_exports__Starcoder2Model as Starcoder2Model, __webpack_exports__Starcoder2PreTrainedModel as Starcoder2PreTrainedModel, __webpack_exports__StoppingCriteria as StoppingCriteria, __webpack_exports__StoppingCriteriaList as StoppingCriteriaList, __webpack_exports__SummarizationPipeline as SummarizationPipeline, __webpack_exports__Swin2SRForImageSuperResolution as Swin2SRForImageSuperResolution, __webpack_exports__Swin2SRImageProcessor as Swin2SRImageProcessor, __webpack_exports__Swin2SRModel as Swin2SRModel, __webpack_exports__Swin2SRPreTrainedModel as Swin2SRPreTrainedModel, __webpack_exports__SwinForImageClassification as SwinForImageClassification, __webpack_exports__SwinModel as SwinModel, __webpack_exports__SwinPreTrainedModel as SwinPreTrainedModel, __webpack_exports__T5ForConditionalGeneration as T5ForConditionalGeneration, __webpack_exports__T5Model as T5Model, __webpack_exports__T5PreTrainedModel as T5PreTrainedModel, __webpack_exports__T5Tokenizer as T5Tokenizer, __webpack_exports__TableTransformerForObjectDetection as TableTransformerForObjectDetection, __webpack_exports__TableTransformerModel as TableTransformerModel, __webpack_exports__TableTransformerObjectDetectionOutput as TableTransformerObjectDetectionOutput, __webpack_exports__TableTransformerPreTrainedModel as TableTransformerPreTrainedModel, __webpack_exports__Tensor as Tensor, __webpack_exports__Text2TextGenerationPipeline as Text2TextGenerationPipeline, __webpack_exports__TextClassificationPipeline as TextClassificationPipeline, __webpack_exports__TextGenerationPipeline as TextGenerationPipeline, __webpack_exports__TextStreamer as TextStreamer, __webpack_exports__TextToAudioPipeline as TextToAudioPipeline, __webpack_exports__TokenClassificationPipeline as TokenClassificationPipeline, __webpack_exports__TokenClassifierOutput as TokenClassifierOutput, __webpack_exports__TokenizerModel as TokenizerModel, __webpack_exports__TrOCRForCausalLM as TrOCRForCausalLM, __webpack_exports__TrOCRPreTrainedModel as TrOCRPreTrainedModel, __webpack_exports__TranslationPipeline as TranslationPipeline, __webpack_exports__UniSpeechForCTC as UniSpeechForCTC, __webpack_exports__UniSpeechForSequenceClassification as UniSpeechForSequenceClassification, __webpack_exports__UniSpeechModel as UniSpeechModel, __webpack_exports__UniSpeechPreTrainedModel as UniSpeechPreTrainedModel, __webpack_exports__UniSpeechSatForAudioFrameClassification as UniSpeechSatForAudioFrameClassification, __webpack_exports__UniSpeechSatForCTC as UniSpeechSatForCTC, __webpack_exports__UniSpeechSatForSequenceClassification as UniSpeechSatForSequenceClassification, __webpack_exports__UniSpeechSatModel as UniSpeechSatModel, __webpack_exports__UniSpeechSatPreTrainedModel as UniSpeechSatPreTrainedModel, __webpack_exports__ViTFeatureExtractor as ViTFeatureExtractor, __webpack_exports__ViTForImageClassification as ViTForImageClassification, __webpack_exports__ViTImageProcessor as ViTImageProcessor, __webpack_exports__ViTMAEModel as ViTMAEModel, __webpack_exports__ViTMAEPreTrainedModel as ViTMAEPreTrainedModel, __webpack_exports__ViTMSNForImageClassification as ViTMSNForImageClassification, __webpack_exports__ViTMSNModel as ViTMSNModel, __webpack_exports__ViTMSNPreTrainedModel as ViTMSNPreTrainedModel, __webpack_exports__ViTModel as ViTModel, __webpack_exports__ViTPreTrainedModel as ViTPreTrainedModel, __webpack_exports__VisionEncoderDecoderModel as VisionEncoderDecoderModel, __webpack_exports__VitMatteForImageMatting as VitMatteForImageMatting, __webpack_exports__VitMatteImageProcessor as VitMatteImageProcessor, __webpack_exports__VitMattePreTrainedModel as VitMattePreTrainedModel, __webpack_exports__VitsModel as VitsModel, __webpack_exports__VitsModelOutput as VitsModelOutput, __webpack_exports__VitsPreTrainedModel as VitsPreTrainedModel, __webpack_exports__VitsTokenizer as VitsTokenizer, __webpack_exports__Wav2Vec2BertForCTC as Wav2Vec2BertForCTC, __webpack_exports__Wav2Vec2BertForSequenceClassification as Wav2Vec2BertForSequenceClassification, __webpack_exports__Wav2Vec2BertModel as Wav2Vec2BertModel, __webpack_exports__Wav2Vec2BertPreTrainedModel as Wav2Vec2BertPreTrainedModel, __webpack_exports__Wav2Vec2CTCTokenizer as Wav2Vec2CTCTokenizer, __webpack_exports__Wav2Vec2FeatureExtractor as Wav2Vec2FeatureExtractor, __webpack_exports__Wav2Vec2ForAudioFrameClassification as Wav2Vec2ForAudioFrameClassification, __webpack_exports__Wav2Vec2ForCTC as Wav2Vec2ForCTC, __webpack_exports__Wav2Vec2ForSequenceClassification as Wav2Vec2ForSequenceClassification, __webpack_exports__Wav2Vec2Model as Wav2Vec2Model, __webpack_exports__Wav2Vec2PreTrainedModel as Wav2Vec2PreTrainedModel, __webpack_exports__Wav2Vec2ProcessorWithLM as Wav2Vec2ProcessorWithLM, __webpack_exports__WavLMForAudioFrameClassification as WavLMForAudioFrameClassification, __webpack_exports__WavLMForCTC as WavLMForCTC, __webpack_exports__WavLMForSequenceClassification as WavLMForSequenceClassification, __webpack_exports__WavLMForXVector as WavLMForXVector, __webpack_exports__WavLMModel as WavLMModel, __webpack_exports__WavLMPreTrainedModel as WavLMPreTrainedModel, __webpack_exports__WeSpeakerFeatureExtractor as WeSpeakerFeatureExtractor, __webpack_exports__WeSpeakerResNetModel as WeSpeakerResNetModel, __webpack_exports__WeSpeakerResNetPreTrainedModel as WeSpeakerResNetPreTrainedModel, __webpack_exports__WhisperFeatureExtractor as WhisperFeatureExtractor, __webpack_exports__WhisperForConditionalGeneration as WhisperForConditionalGeneration, __webpack_exports__WhisperModel as WhisperModel, __webpack_exports__WhisperPreTrainedModel as WhisperPreTrainedModel, __webpack_exports__WhisperProcessor as WhisperProcessor, __webpack_exports__WhisperTextStreamer as WhisperTextStreamer, __webpack_exports__WhisperTokenizer as WhisperTokenizer, __webpack_exports__XLMForQuestionAnswering as XLMForQuestionAnswering, __webpack_exports__XLMForSequenceClassification as XLMForSequenceClassification, __webpack_exports__XLMForTokenClassification as XLMForTokenClassification, __webpack_exports__XLMModel as XLMModel, __webpack_exports__XLMPreTrainedModel as XLMPreTrainedModel, __webpack_exports__XLMRobertaForMaskedLM as XLMRobertaForMaskedLM, __webpack_exports__XLMRobertaForQuestionAnswering as XLMRobertaForQuestionAnswering, __webpack_exports__XLMRobertaForSequenceClassification as XLMRobertaForSequenceClassification, __webpack_exports__XLMRobertaForTokenClassification as XLMRobertaForTokenClassification, __webpack_exports__XLMRobertaModel as XLMRobertaModel, __webpack_exports__XLMRobertaPreTrainedModel as XLMRobertaPreTrainedModel, __webpack_exports__XLMRobertaTokenizer as XLMRobertaTokenizer, __webpack_exports__XLMTokenizer as XLMTokenizer, __webpack_exports__XLMWithLMHeadModel as XLMWithLMHeadModel, __webpack_exports__XVectorOutput as XVectorOutput, __webpack_exports__YolosFeatureExtractor as YolosFeatureExtractor, __webpack_exports__YolosForObjectDetection as YolosForObjectDetection, __webpack_exports__YolosModel as YolosModel, __webpack_exports__YolosObjectDetectionOutput as YolosObjectDetectionOutput, __webpack_exports__YolosPreTrainedModel as YolosPreTrainedModel, __webpack_exports__ZeroShotAudioClassificationPipeline as ZeroShotAudioClassificationPipeline, __webpack_exports__ZeroShotClassificationPipeline as ZeroShotClassificationPipeline, __webpack_exports__ZeroShotImageClassificationPipeline as ZeroShotImageClassificationPipeline, __webpack_exports__ZeroShotObjectDetectionPipeline as ZeroShotObjectDetectionPipeline, __webpack_exports__bankers_round as bankers_round, __webpack_exports__cat as cat, __webpack_exports__cos_sim as cos_sim, __webpack_exports__dot as dot, __webpack_exports__dynamic_time_warping as dynamic_time_warping, __webpack_exports__env as env, __webpack_exports__full as full, __webpack_exports__full_like as full_like, __webpack_exports__getKeyValueShapes as getKeyValueShapes, __webpack_exports__hamming as hamming, __webpack_exports__hanning as hanning, __webpack_exports__interpolate as interpolate, __webpack_exports__interpolate_4d as interpolate_4d, __webpack_exports__interpolate_data as interpolate_data, __webpack_exports__is_chinese_char as is_chinese_char, __webpack_exports__layer_norm as layer_norm, __webpack_exports__log_softmax as log_softmax, __webpack_exports__magnitude as magnitude, __webpack_exports__matmul as matmul, __webpack_exports__max as max, __webpack_exports__mean as mean, __webpack_exports__mean_pooling as mean_pooling, __webpack_exports__medianFilter as medianFilter, __webpack_exports__mel_filter_bank as mel_filter_bank, __webpack_exports__min as min, __webpack_exports__ones as ones, __webpack_exports__ones_like as ones_like, __webpack_exports__permute as permute, __webpack_exports__permute_data as permute_data, __webpack_exports__pipeline as pipeline, __webpack_exports__quantize_embeddings as quantize_embeddings, __webpack_exports__read_audio as read_audio, __webpack_exports__rfft as rfft, __webpack_exports__round as round, __webpack_exports__softmax as softmax, __webpack_exports__spectrogram as spectrogram, __webpack_exports__stack as stack, __webpack_exports__std_mean as std_mean, __webpack_exports__topk as topk, __webpack_exports__window_function as window_function, __webpack_exports__zeros as zeros, __webpack_exports__zeros_like as zeros_like };
34070
34068
 
34071
34069
  //# sourceMappingURL=transformers.js.map