@huggingface/transformers 3.3.2 → 3.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -775,18 +775,19 @@ function parse(tokens) {
775
775
  return left;
776
776
  }
777
777
  function parseCallMemberExpression() {
778
- const member = parseMemberExpression();
778
+ const member = parseMemberExpression(parsePrimaryExpression());
779
779
  if (is(TOKEN_TYPES.OpenParen)) {
780
780
  return parseCallExpression(member);
781
781
  }
782
782
  return member;
783
783
  }
784
784
  function parseCallExpression(callee) {
785
- let callExpression = new CallExpression(callee, parseArgs());
785
+ let expression = new CallExpression(callee, parseArgs());
786
+ expression = parseMemberExpression(expression);
786
787
  if (is(TOKEN_TYPES.OpenParen)) {
787
- callExpression = parseCallExpression(callExpression);
788
+ expression = parseCallExpression(expression);
788
789
  }
789
- return callExpression;
790
+ return expression;
790
791
  }
791
792
  function parseArgs() {
792
793
  expect(TOKEN_TYPES.OpenParen, "Expected opening parenthesis for arguments list");
@@ -840,8 +841,7 @@ function parse(tokens) {
840
841
  }
841
842
  return slices[0];
842
843
  }
843
- function parseMemberExpression() {
844
- let object = parsePrimaryExpression();
844
+ function parseMemberExpression(object) {
845
845
  while (is(TOKEN_TYPES.Dot) || is(TOKEN_TYPES.OpenSquareBracket)) {
846
846
  const operator = tokens[current];
847
847
  ++current;
@@ -1066,6 +1066,41 @@ var StringValue = class extends RuntimeValue {
1066
1066
  new FunctionValue(() => {
1067
1067
  return new StringValue(this.value.trimStart());
1068
1068
  })
1069
+ ],
1070
+ [
1071
+ "split",
1072
+ // follows Python's `str.split(sep=None, maxsplit=-1)` function behavior
1073
+ // https://docs.python.org/3.13/library/stdtypes.html#str.split
1074
+ new FunctionValue((args) => {
1075
+ const sep = args[0] ?? new NullValue();
1076
+ if (!(sep instanceof StringValue || sep instanceof NullValue)) {
1077
+ throw new Error("sep argument must be a string or null");
1078
+ }
1079
+ const maxsplit = args[1] ?? new NumericValue(-1);
1080
+ if (!(maxsplit instanceof NumericValue)) {
1081
+ throw new Error("maxsplit argument must be a number");
1082
+ }
1083
+ let result = [];
1084
+ if (sep instanceof NullValue) {
1085
+ const text = this.value.trimStart();
1086
+ for (const { 0: match, index } of text.matchAll(/\S+/g)) {
1087
+ if (maxsplit.value !== -1 && result.length >= maxsplit.value && index !== void 0) {
1088
+ result.push(match + text.slice(index + match.length));
1089
+ break;
1090
+ }
1091
+ result.push(match);
1092
+ }
1093
+ } else {
1094
+ if (sep.value === "") {
1095
+ throw new Error("empty separator");
1096
+ }
1097
+ result = this.value.split(sep.value);
1098
+ if (maxsplit.value !== -1 && result.length > maxsplit.value) {
1099
+ result.push(result.splice(maxsplit.value).join(sep.value));
1100
+ }
1101
+ }
1102
+ return new ArrayValue(result.map((part) => new StringValue(part)));
1103
+ })
1069
1104
  ]
1070
1105
  ]);
1071
1106
  };
@@ -1402,6 +1437,8 @@ var Interpreter = class {
1402
1437
  }
1403
1438
  })
1404
1439
  );
1440
+ case "join":
1441
+ return new StringValue(operand.value.map((x) => x.value).join(""));
1405
1442
  default:
1406
1443
  throw new Error(`Unknown ArrayValue filter: ${filter.value}`);
1407
1444
  }
@@ -1428,6 +1465,7 @@ var Interpreter = class {
1428
1465
  )
1429
1466
  ).join("\n")
1430
1467
  );
1468
+ case "join":
1431
1469
  case "string":
1432
1470
  return operand;
1433
1471
  default:
@@ -1466,6 +1504,21 @@ var Interpreter = class {
1466
1504
  throw new Error("If set, indent must be a number");
1467
1505
  }
1468
1506
  return new StringValue(toJSON(operand, indent.value));
1507
+ } else if (filterName === "join") {
1508
+ let value;
1509
+ if (operand instanceof StringValue) {
1510
+ value = Array.from(operand.value);
1511
+ } else if (operand instanceof ArrayValue) {
1512
+ value = operand.value.map((x) => x.value);
1513
+ } else {
1514
+ throw new Error(`Cannot apply filter "${filterName}" to type: ${operand.type}`);
1515
+ }
1516
+ const [args, kwargs] = this.evaluateArguments(filter.args, environment);
1517
+ const separator = args.at(0) ?? kwargs.get("separator") ?? new StringValue("");
1518
+ if (!(separator instanceof StringValue)) {
1519
+ throw new Error("separator must be a string");
1520
+ }
1521
+ return new StringValue(value.join(separator.value));
1469
1522
  }
1470
1523
  if (operand instanceof ArrayValue) {
1471
1524
  switch (filterName) {
@@ -1927,8 +1980,10 @@ var Template = class {
1927
1980
  throw new Error(args);
1928
1981
  });
1929
1982
  env.set("range", range);
1930
- for (const [key, value] of Object.entries(items)) {
1931
- env.set(key, value);
1983
+ if (items) {
1984
+ for (const [key, value] of Object.entries(items)) {
1985
+ env.set(key, value);
1986
+ }
1932
1987
  }
1933
1988
  const interpreter = new Interpreter(env);
1934
1989
  const result = interpreter.run(this.parsed);
@@ -3899,32 +3954,32 @@ const version = '1.20.1';
3899
3954
  __webpack_require__.r(__webpack_exports__);
3900
3955
  /* harmony export */ __webpack_require__.d(__webpack_exports__, {
3901
3956
  /* harmony export */ InferenceSession: () => (/* binding */ Fp),
3902
- /* harmony export */ TRACE: () => (/* binding */ wr),
3903
- /* harmony export */ TRACE_FUNC_BEGIN: () => (/* binding */ Ue),
3904
- /* harmony export */ TRACE_FUNC_END: () => (/* binding */ De),
3905
- /* harmony export */ Tensor: () => (/* binding */ He),
3957
+ /* harmony export */ TRACE: () => (/* binding */ _r),
3958
+ /* harmony export */ TRACE_FUNC_BEGIN: () => (/* binding */ Ne),
3959
+ /* harmony export */ TRACE_FUNC_END: () => (/* binding */ Be),
3960
+ /* harmony export */ Tensor: () => (/* binding */ Fe),
3906
3961
  /* harmony export */ "default": () => (/* binding */ $1),
3907
- /* harmony export */ env: () => (/* binding */ ve),
3908
- /* harmony export */ registerBackend: () => (/* binding */ St)
3962
+ /* harmony export */ env: () => (/* binding */ _e),
3963
+ /* harmony export */ registerBackend: () => (/* binding */ Tt)
3909
3964
  /* harmony export */ });
3910
3965
  /*!
3911
- * ONNX Runtime Web v1.21.0-dev.20250114-228dd16893
3966
+ * ONNX Runtime Web v1.21.0-dev.20250206-d981b153d3
3912
3967
  * Copyright (c) Microsoft Corporation. All rights reserved.
3913
3968
  * Licensed under the MIT License.
3914
3969
  */
3915
- var Un=Object.defineProperty;var Vp=Object.getOwnPropertyDescriptor;var Wp=Object.getOwnPropertyNames;var Lp=Object.prototype.hasOwnProperty;var Nn=(e=>typeof require<"u"?require:typeof Proxy<"u"?new Proxy(e,{get:(t,r)=>(typeof require<"u"?require:t)[r]}):e)(function(e){if(typeof require<"u")return require.apply(this,arguments);throw Error('Dynamic require of "'+e+'" is not supported')});var U=(e,t)=>()=>(e&&(t=e(e=0)),t);var Ft=(e,t)=>{for(var r in t)Un(e,r,{get:t[r],enumerable:!0})},Gp=(e,t,r,n)=>{if(t&&typeof t=="object"||typeof t=="function")for(let o of Wp(t))!Lp.call(e,o)&&o!==r&&Un(e,o,{get:()=>t[o],enumerable:!(n=Vp(t,o))||n.enumerable});return e};var br=e=>Gp(Un({},"__esModule",{value:!0}),e);var yr,xt,St,Hp,Ji,Vn=U(()=>{"use strict";yr=new Map,xt=[],St=(e,t,r)=>{if(t&&typeof t.init=="function"&&typeof t.createInferenceSessionHandler=="function"){let n=yr.get(e);if(n===void 0)yr.set(e,{backend:t,priority:r});else{if(n.priority>r)return;if(n.priority===r&&n.backend!==t)throw new Error(`cannot register backend "${e}" using priority ${r}`)}if(r>=0){let o=xt.indexOf(e);o!==-1&&xt.splice(o,1);for(let i=0;i<xt.length;i++)if(yr.get(xt[i]).priority<=r){xt.splice(i,0,e);return}xt.push(e)}return}throw new TypeError("not a valid backend")},Hp=async e=>{let t=yr.get(e);if(!t)return"backend not found.";if(t.initialized)return t.backend;if(t.aborted)return t.error;{let r=!!t.initPromise;try{return r||(t.initPromise=t.backend.init(e)),await t.initPromise,t.initialized=!0,t.backend}catch(n){return r||(t.error=`${n}`,t.aborted=!0),t.error}finally{delete t.initPromise}}},Ji=async e=>{let t=e.executionProviders||[],r=t.map(l=>typeof l=="string"?l:l.name),n=r.length===0?xt:r,o,i=[],a=new Set;for(let l of n){let p=await Hp(l);typeof p=="string"?i.push({name:l,err:p}):(o||(o=p),o===p&&a.add(l))}if(!o)throw new Error(`no available backend found. ERR: ${i.map(l=>`[${l.name}] ${l.err}`).join(", ")}`);for(let{name:l,err:p}of i)r.includes(l)&&console.warn(`removing requested execution provider "${l}" from session options because it is not available: ${p}`);let d=t.filter(l=>a.has(typeof l=="string"?l:l.name));return[o,new Proxy(e,{get:(l,p)=>p==="executionProviders"?d:Reflect.get(l,p)})]}});var ea=U(()=>{"use strict";Vn()});var ta,ra=U(()=>{"use strict";ta="1.21.0-dev.20241212-1f88284f96"});var na,Re,Wn=U(()=>{"use strict";ra();na="warning",Re={wasm:{},webgl:{},webgpu:{},versions:{common:ta},set logLevel(e){if(e!==void 0){if(typeof e!="string"||["verbose","info","warning","error","fatal"].indexOf(e)===-1)throw new Error(`Unsupported logging level: ${e}`);na=e}},get logLevel(){return na}};Object.defineProperty(Re,"logLevel",{enumerable:!0})});var ve,oa=U(()=>{"use strict";Wn();ve=Re});var ia,aa,sa=U(()=>{"use strict";ia=(e,t)=>{let r=typeof document<"u"?document.createElement("canvas"):new OffscreenCanvas(1,1);r.width=e.dims[3],r.height=e.dims[2];let n=r.getContext("2d");if(n!=null){let o,i;t?.tensorLayout!==void 0&&t.tensorLayout==="NHWC"?(o=e.dims[2],i=e.dims[3]):(o=e.dims[3],i=e.dims[2]);let a=t?.format!==void 0?t.format:"RGB",d=t?.norm,l,p;d===void 0||d.mean===void 0?l=[255,255,255,255]:typeof d.mean=="number"?l=[d.mean,d.mean,d.mean,d.mean]:(l=[d.mean[0],d.mean[1],d.mean[2],0],d.mean[3]!==void 0&&(l[3]=d.mean[3])),d===void 0||d.bias===void 0?p=[0,0,0,0]:typeof d.bias=="number"?p=[d.bias,d.bias,d.bias,d.bias]:(p=[d.bias[0],d.bias[1],d.bias[2],0],d.bias[3]!==void 0&&(p[3]=d.bias[3]));let m=i*o,u=0,h=m,_=m*2,y=-1;a==="RGBA"?(u=0,h=m,_=m*2,y=m*3):a==="RGB"?(u=0,h=m,_=m*2):a==="RBG"&&(u=0,_=m,h=m*2);for(let g=0;g<i;g++)for(let x=0;x<o;x++){let $=(e.data[u++]-p[0])*l[0],v=(e.data[h++]-p[1])*l[1],S=(e.data[_++]-p[2])*l[2],T=y===-1?255:(e.data[y++]-p[3])*l[3];n.fillStyle="rgba("+$+","+v+","+S+","+T+")",n.fillRect(x,g,1,1)}if("toDataURL"in r)return r.toDataURL();throw new Error("toDataURL is not supported")}else throw new Error("Can not access image data")},aa=(e,t)=>{let r=typeof document<"u"?document.createElement("canvas").getContext("2d"):new OffscreenCanvas(1,1).getContext("2d"),n;if(r!=null){let o,i,a;t?.tensorLayout!==void 0&&t.tensorLayout==="NHWC"?(o=e.dims[2],i=e.dims[1],a=e.dims[3]):(o=e.dims[3],i=e.dims[2],a=e.dims[1]);let d=t!==void 0&&t.format!==void 0?t.format:"RGB",l=t?.norm,p,m;l===void 0||l.mean===void 0?p=[255,255,255,255]:typeof l.mean=="number"?p=[l.mean,l.mean,l.mean,l.mean]:(p=[l.mean[0],l.mean[1],l.mean[2],255],l.mean[3]!==void 0&&(p[3]=l.mean[3])),l===void 0||l.bias===void 0?m=[0,0,0,0]:typeof l.bias=="number"?m=[l.bias,l.bias,l.bias,l.bias]:(m=[l.bias[0],l.bias[1],l.bias[2],0],l.bias[3]!==void 0&&(m[3]=l.bias[3]));let u=i*o;if(t!==void 0&&(t.format!==void 0&&a===4&&t.format!=="RGBA"||a===3&&t.format!=="RGB"&&t.format!=="BGR"))throw new Error("Tensor format doesn't match input tensor dims");let h=4,_=0,y=1,g=2,x=3,$=0,v=u,S=u*2,T=-1;d==="RGBA"?($=0,v=u,S=u*2,T=u*3):d==="RGB"?($=0,v=u,S=u*2):d==="RBG"&&($=0,S=u,v=u*2),n=r.createImageData(o,i);for(let A=0;A<i*o;_+=h,y+=h,g+=h,x+=h,A++)n.data[_]=(e.data[$++]-m[0])*p[0],n.data[y]=(e.data[v++]-m[1])*p[1],n.data[g]=(e.data[S++]-m[2])*p[2],n.data[x]=T===-1?255:(e.data[T++]-m[3])*p[3]}else throw new Error("Can not access image data");return n}});var Ln,ua,da,la,ca,pa,ma=U(()=>{"use strict";_r();Ln=(e,t)=>{if(e===void 0)throw new Error("Image buffer must be defined");if(t.height===void 0||t.width===void 0)throw new Error("Image height and width must be defined");if(t.tensorLayout==="NHWC")throw new Error("NHWC Tensor layout is not supported yet");let{height:r,width:n}=t,o=t.norm??{mean:255,bias:0},i,a;typeof o.mean=="number"?i=[o.mean,o.mean,o.mean,o.mean]:i=[o.mean[0],o.mean[1],o.mean[2],o.mean[3]??255],typeof o.bias=="number"?a=[o.bias,o.bias,o.bias,o.bias]:a=[o.bias[0],o.bias[1],o.bias[2],o.bias[3]??0];let d=t.format!==void 0?t.format:"RGBA",l=t.tensorFormat!==void 0&&t.tensorFormat!==void 0?t.tensorFormat:"RGB",p=r*n,m=l==="RGBA"?new Float32Array(p*4):new Float32Array(p*3),u=4,h=0,_=1,y=2,g=3,x=0,$=p,v=p*2,S=-1;d==="RGB"&&(u=3,h=0,_=1,y=2,g=-1),l==="RGBA"?S=p*3:l==="RBG"?(x=0,v=p,$=p*2):l==="BGR"&&(v=0,$=p,x=p*2);for(let A=0;A<p;A++,h+=u,y+=u,_+=u,g+=u)m[x++]=(e[h]+a[0])/i[0],m[$++]=(e[_]+a[1])/i[1],m[v++]=(e[y]+a[2])/i[2],S!==-1&&g!==-1&&(m[S++]=(e[g]+a[3])/i[3]);return l==="RGBA"?new ze("float32",m,[1,4,r,n]):new ze("float32",m,[1,3,r,n])},ua=async(e,t)=>{let r=typeof HTMLImageElement<"u"&&e instanceof HTMLImageElement,n=typeof ImageData<"u"&&e instanceof ImageData,o=typeof ImageBitmap<"u"&&e instanceof ImageBitmap,i=typeof e=="string",a,d=t??{},l=()=>{if(typeof document<"u")return document.createElement("canvas");if(typeof OffscreenCanvas<"u")return new OffscreenCanvas(1,1);throw new Error("Canvas is not supported")},p=m=>typeof HTMLCanvasElement<"u"&&m instanceof HTMLCanvasElement||m instanceof OffscreenCanvas?m.getContext("2d"):null;if(r){let m=l();m.width=e.width,m.height=e.height;let u=p(m);if(u!=null){let h=e.height,_=e.width;if(t!==void 0&&t.resizedHeight!==void 0&&t.resizedWidth!==void 0&&(h=t.resizedHeight,_=t.resizedWidth),t!==void 0){if(d=t,t.tensorFormat!==void 0)throw new Error("Image input config format must be RGBA for HTMLImageElement");d.tensorFormat="RGBA",d.height=h,d.width=_}else d.tensorFormat="RGBA",d.height=h,d.width=_;u.drawImage(e,0,0),a=u.getImageData(0,0,_,h).data}else throw new Error("Can not access image data")}else if(n){let m,u;if(t!==void 0&&t.resizedWidth!==void 0&&t.resizedHeight!==void 0?(m=t.resizedHeight,u=t.resizedWidth):(m=e.height,u=e.width),t!==void 0&&(d=t),d.format="RGBA",d.height=m,d.width=u,t!==void 0){let h=l();h.width=u,h.height=m;let _=p(h);if(_!=null)_.putImageData(e,0,0),a=_.getImageData(0,0,u,m).data;else throw new Error("Can not access image data")}else a=e.data}else if(o){if(t===void 0)throw new Error("Please provide image config with format for Imagebitmap");let m=l();m.width=e.width,m.height=e.height;let u=p(m);if(u!=null){let h=e.height,_=e.width;return u.drawImage(e,0,0,_,h),a=u.getImageData(0,0,_,h).data,d.height=h,d.width=_,Ln(a,d)}else throw new Error("Can not access image data")}else{if(i)return new Promise((m,u)=>{let h=l(),_=p(h);if(!e||!_)return u();let y=new Image;y.crossOrigin="Anonymous",y.src=e,y.onload=()=>{h.width=y.width,h.height=y.height,_.drawImage(y,0,0,h.width,h.height);let g=_.getImageData(0,0,h.width,h.height);d.height=h.height,d.width=h.width,m(Ln(g.data,d))}});throw new Error("Input data provided is not supported - aborted tensor creation")}if(a!==void 0)return Ln(a,d);throw new Error("Input data provided is not supported - aborted tensor creation")},da=(e,t)=>{let{width:r,height:n,download:o,dispose:i}=t,a=[1,n,r,4];return new ze({location:"texture",type:"float32",texture:e,dims:a,download:o,dispose:i})},la=(e,t)=>{let{dataType:r,dims:n,download:o,dispose:i}=t;return new ze({location:"gpu-buffer",type:r??"float32",gpuBuffer:e,dims:n,download:o,dispose:i})},ca=(e,t)=>{let{dataType:r,dims:n,download:o,dispose:i}=t;return new ze({location:"ml-tensor",type:r??"float32",mlTensor:e,dims:n,download:o,dispose:i})},pa=(e,t,r)=>new ze({location:"cpu-pinned",type:e,data:t,dims:r??[t.length]})});var Tt,qt,fa,ha,ga=U(()=>{"use strict";Tt=new Map([["float32",Float32Array],["uint8",Uint8Array],["int8",Int8Array],["uint16",Uint16Array],["int16",Int16Array],["int32",Int32Array],["bool",Uint8Array],["float64",Float64Array],["uint32",Uint32Array],["int4",Uint8Array],["uint4",Uint8Array]]),qt=new Map([[Float32Array,"float32"],[Uint8Array,"uint8"],[Int8Array,"int8"],[Uint16Array,"uint16"],[Int16Array,"int16"],[Int32Array,"int32"],[Float64Array,"float64"],[Uint32Array,"uint32"]]),fa=!1,ha=()=>{if(!fa){fa=!0;let e=typeof BigInt64Array<"u"&&BigInt64Array.from,t=typeof BigUint64Array<"u"&&BigUint64Array.from,r=typeof Float16Array<"u"&&Float16Array.from;e&&(Tt.set("int64",BigInt64Array),qt.set(BigInt64Array,"int64")),t&&(Tt.set("uint64",BigUint64Array),qt.set(BigUint64Array,"uint64")),r?(Tt.set("float16",Float16Array),qt.set(Float16Array,"float16")):Tt.set("float16",Uint16Array)}}});var ba,ya,_a=U(()=>{"use strict";_r();ba=e=>{let t=1;for(let r=0;r<e.length;r++){let n=e[r];if(typeof n!="number"||!Number.isSafeInteger(n))throw new TypeError(`dims[${r}] must be an integer, got: ${n}`);if(n<0)throw new RangeError(`dims[${r}] must be a non-negative integer, got: ${n}`);t*=n}return t},ya=(e,t)=>{switch(e.location){case"cpu":return new ze(e.type,e.data,t);case"cpu-pinned":return new ze({location:"cpu-pinned",data:e.data,type:e.type,dims:t});case"texture":return new ze({location:"texture",texture:e.texture,type:e.type,dims:t});case"gpu-buffer":return new ze({location:"gpu-buffer",gpuBuffer:e.gpuBuffer,type:e.type,dims:t});case"ml-tensor":return new ze({location:"ml-tensor",mlTensor:e.mlTensor,type:e.type,dims:t});default:throw new Error(`tensorReshape: tensor location ${e.location} is not supported`)}}});var ze,_r=U(()=>{"use strict";sa();ma();ga();_a();ze=class{constructor(t,r,n){ha();let o,i;if(typeof t=="object"&&"location"in t)switch(this.dataLocation=t.location,o=t.type,i=t.dims,t.location){case"cpu-pinned":{let d=Tt.get(o);if(!d)throw new TypeError(`unsupported type "${o}" to create tensor from pinned buffer`);if(!(t.data instanceof d))throw new TypeError(`buffer should be of type ${d.name}`);this.cpuData=t.data;break}case"texture":{if(o!=="float32")throw new TypeError(`unsupported type "${o}" to create tensor from texture`);this.gpuTextureData=t.texture,this.downloader=t.download,this.disposer=t.dispose;break}case"gpu-buffer":{if(o!=="float32"&&o!=="float16"&&o!=="int32"&&o!=="int64"&&o!=="uint32"&&o!=="uint8"&&o!=="bool"&&o!=="uint4"&&o!=="int4")throw new TypeError(`unsupported type "${o}" to create tensor from gpu buffer`);this.gpuBufferData=t.gpuBuffer,this.downloader=t.download,this.disposer=t.dispose;break}case"ml-tensor":{if(o!=="float32"&&o!=="float16"&&o!=="int32"&&o!=="int64"&&o!=="uint32"&&o!=="uint64"&&o!=="int8"&&o!=="uint8"&&o!=="bool"&&o!=="uint4"&&o!=="int4")throw new TypeError(`unsupported type "${o}" to create tensor from MLTensor`);this.mlTensorData=t.mlTensor,this.downloader=t.download,this.disposer=t.dispose;break}default:throw new Error(`Tensor constructor: unsupported location '${this.dataLocation}'`)}else{let d,l;if(typeof t=="string")if(o=t,l=n,t==="string"){if(!Array.isArray(r))throw new TypeError("A string tensor's data must be a string array.");d=r}else{let p=Tt.get(t);if(p===void 0)throw new TypeError(`Unsupported tensor type: ${t}.`);if(Array.isArray(r)){if(t==="float16"&&p===Uint16Array||t==="uint4"||t==="int4")throw new TypeError(`Creating a ${t} tensor from number array is not supported. Please use ${p.name} as data.`);t==="uint64"||t==="int64"?d=p.from(r,BigInt):d=p.from(r)}else if(r instanceof p)d=r;else if(r instanceof Uint8ClampedArray)if(t==="uint8")d=Uint8Array.from(r);else throw new TypeError("A Uint8ClampedArray tensor's data must be type of uint8");else throw new TypeError(`A ${o} tensor's data must be type of ${p}`)}else if(l=r,Array.isArray(t)){if(t.length===0)throw new TypeError("Tensor type cannot be inferred from an empty array.");let p=typeof t[0];if(p==="string")o="string",d=t;else if(p==="boolean")o="bool",d=Uint8Array.from(t);else throw new TypeError(`Invalid element type of data array: ${p}.`)}else if(t instanceof Uint8ClampedArray)o="uint8",d=Uint8Array.from(t);else{let p=qt.get(t.constructor);if(p===void 0)throw new TypeError(`Unsupported type for tensor data: ${t.constructor}.`);o=p,d=t}if(l===void 0)l=[d.length];else if(!Array.isArray(l))throw new TypeError("A tensor's dims must be a number array");i=l,this.cpuData=d,this.dataLocation="cpu"}let a=ba(i);if(this.cpuData&&a!==this.cpuData.length&&!((o==="uint4"||o==="int4")&&Math.ceil(a/2)===this.cpuData.length))throw new Error(`Tensor's size(${a}) does not match data length(${this.cpuData.length}).`);this.type=o,this.dims=i,this.size=a}static async fromImage(t,r){return ua(t,r)}static fromTexture(t,r){return da(t,r)}static fromGpuBuffer(t,r){return la(t,r)}static fromMLTensor(t,r){return ca(t,r)}static fromPinnedBuffer(t,r,n){return pa(t,r,n)}toDataURL(t){return ia(this,t)}toImageData(t){return aa(this,t)}get data(){if(this.ensureValid(),!this.cpuData)throw new Error("The data is not on CPU. Use `getData()` to download GPU data to CPU, or use `texture` or `gpuBuffer` property to access the GPU data directly.");return this.cpuData}get location(){return this.dataLocation}get texture(){if(this.ensureValid(),!this.gpuTextureData)throw new Error("The data is not stored as a WebGL texture.");return this.gpuTextureData}get gpuBuffer(){if(this.ensureValid(),!this.gpuBufferData)throw new Error("The data is not stored as a WebGPU buffer.");return this.gpuBufferData}get mlTensor(){if(this.ensureValid(),!this.mlTensorData)throw new Error("The data is not stored as a WebNN MLTensor.");return this.mlTensorData}async getData(t){switch(this.ensureValid(),this.dataLocation){case"cpu":case"cpu-pinned":return this.data;case"texture":case"gpu-buffer":case"ml-tensor":{if(!this.downloader)throw new Error("The current tensor is not created with a specified data downloader.");if(this.isDownloading)throw new Error("The current tensor is being downloaded.");try{this.isDownloading=!0;let r=await this.downloader();return this.downloader=void 0,this.dataLocation="cpu",this.cpuData=r,t&&this.disposer&&(this.disposer(),this.disposer=void 0),r}finally{this.isDownloading=!1}}default:throw new Error(`cannot get data from location: ${this.dataLocation}`)}}dispose(){if(this.isDownloading)throw new Error("The current tensor is being downloaded.");this.disposer&&(this.disposer(),this.disposer=void 0),this.cpuData=void 0,this.gpuTextureData=void 0,this.gpuBufferData=void 0,this.mlTensorData=void 0,this.downloader=void 0,this.isDownloading=void 0,this.dataLocation="none"}ensureValid(){if(this.dataLocation==="none")throw new Error("The tensor is disposed.")}reshape(t){if(this.ensureValid(),this.downloader||this.disposer)throw new Error("Cannot reshape a tensor that owns GPU resource.");return ya(this,t)}}});var He,Gn=U(()=>{"use strict";_r();He=ze});var wr,wa,Ue,De,Hn=U(()=>{"use strict";Wn();wr=(e,t)=>{(typeof Re.trace>"u"?!Re.wasm.trace:!Re.trace)||console.timeStamp(`${e}::ORT::${t}`)},wa=(e,t)=>{let r=new Error().stack?.split(/\r\n|\r|\n/g)||[],n=!1;for(let o=0;o<r.length;o++){if(n&&!r[o].includes("TRACE_FUNC")){let i=`FUNC_${e}::${r[o].trim().split(" ")[1]}`;t&&(i+=`::${t}`),wr("CPU",i);return}r[o].includes("TRACE_FUNC")&&(n=!0)}},Ue=e=>{(typeof Re.trace>"u"?!Re.wasm.trace:!Re.trace)||wa("BEGIN",e)},De=e=>{(typeof Re.trace>"u"?!Re.wasm.trace:!Re.trace)||wa("END",e)}});var vr,va=U(()=>{"use strict";Vn();Gn();Hn();vr=class e{constructor(t){this.handler=t}async run(t,r,n){Ue();let o={},i={};if(typeof t!="object"||t===null||t instanceof He||Array.isArray(t))throw new TypeError("'feeds' must be an object that use input names as keys and OnnxValue as corresponding values.");let a=!0;if(typeof r=="object"){if(r===null)throw new TypeError("Unexpected argument[1]: cannot be null.");if(r instanceof He)throw new TypeError("'fetches' cannot be a Tensor");if(Array.isArray(r)){if(r.length===0)throw new TypeError("'fetches' cannot be an empty array.");a=!1;for(let p of r){if(typeof p!="string")throw new TypeError("'fetches' must be a string array or an object.");if(this.outputNames.indexOf(p)===-1)throw new RangeError(`'fetches' contains invalid output name: ${p}.`);o[p]=null}if(typeof n=="object"&&n!==null)i=n;else if(typeof n<"u")throw new TypeError("'options' must be an object.")}else{let p=!1,m=Object.getOwnPropertyNames(r);for(let u of this.outputNames)if(m.indexOf(u)!==-1){let h=r[u];(h===null||h instanceof He)&&(p=!0,a=!1,o[u]=h)}if(p){if(typeof n=="object"&&n!==null)i=n;else if(typeof n<"u")throw new TypeError("'options' must be an object.")}else i=r}}else if(typeof r<"u")throw new TypeError("Unexpected argument[1]: must be 'fetches' or 'options'.");for(let p of this.inputNames)if(typeof t[p]>"u")throw new Error(`input '${p}' is missing in 'feeds'.`);if(a)for(let p of this.outputNames)o[p]=null;let d=await this.handler.run(t,o,i),l={};for(let p in d)if(Object.hasOwnProperty.call(d,p)){let m=d[p];m instanceof He?l[p]=m:l[p]=new He(m.type,m.data,m.dims)}return De(),l}async release(){return this.handler.dispose()}static async create(t,r,n,o){Ue();let i,a={};if(typeof t=="string"){if(i=t,typeof r=="object"&&r!==null)a=r;else if(typeof r<"u")throw new TypeError("'options' must be an object.")}else if(t instanceof Uint8Array){if(i=t,typeof r=="object"&&r!==null)a=r;else if(typeof r<"u")throw new TypeError("'options' must be an object.")}else if(t instanceof ArrayBuffer||typeof SharedArrayBuffer<"u"&&t instanceof SharedArrayBuffer){let m=t,u=0,h=t.byteLength;if(typeof r=="object"&&r!==null)a=r;else if(typeof r=="number"){if(u=r,!Number.isSafeInteger(u))throw new RangeError("'byteOffset' must be an integer.");if(u<0||u>=m.byteLength)throw new RangeError(`'byteOffset' is out of range [0, ${m.byteLength}).`);if(h=t.byteLength-u,typeof n=="number"){if(h=n,!Number.isSafeInteger(h))throw new RangeError("'byteLength' must be an integer.");if(h<=0||u+h>m.byteLength)throw new RangeError(`'byteLength' is out of range (0, ${m.byteLength-u}].`);if(typeof o=="object"&&o!==null)a=o;else if(typeof o<"u")throw new TypeError("'options' must be an object.")}else if(typeof n<"u")throw new TypeError("'byteLength' must be a number.")}else if(typeof r<"u")throw new TypeError("'options' must be an object.");i=new Uint8Array(m,u,h)}else throw new TypeError("Unexpected argument[0]: must be 'path' or 'buffer'.");let[d,l]=await Ji(a),p=await d.createInferenceSessionHandler(i,l);return De(),new e(p)}startProfiling(){this.handler.startProfiling()}endProfiling(){this.handler.endProfiling()}get inputNames(){return this.handler.inputNames}get outputNames(){return this.handler.outputNames}}});var Fp,$a=U(()=>{"use strict";va();Fp=vr});var xa=U(()=>{"use strict"});var Sa=U(()=>{"use strict"});var Ta=U(()=>{"use strict"});var Ia=U(()=>{"use strict"});var Fn={};Ft(Fn,{InferenceSession:()=>Fp,TRACE:()=>wr,TRACE_FUNC_BEGIN:()=>Ue,TRACE_FUNC_END:()=>De,Tensor:()=>He,env:()=>ve,registerBackend:()=>St});var We=U(()=>{"use strict";ea();oa();$a();Gn();xa();Sa();Hn();Ta();Ia()});var $r=U(()=>{"use strict"});var Ea={};Ft(Ea,{default:()=>qp});var Aa,ka,qp,Pa=U(()=>{"use strict";qn();gt();xr();Aa="ort-wasm-proxy-worker",ka=globalThis.self?.name===Aa;ka&&(self.onmessage=e=>{let{type:t,in:r}=e.data;try{switch(t){case"init-wasm":Sr(r.wasm).then(()=>{Tr(r).then(()=>{postMessage({type:t})},n=>{postMessage({type:t,err:n})})},n=>{postMessage({type:t,err:n})});break;case"init-ep":{let{epName:n,env:o}=r;Ir(o,n).then(()=>{postMessage({type:t})},i=>{postMessage({type:t,err:i})});break}case"copy-from":{let{buffer:n}=r,o=Kt(n);postMessage({type:t,out:o});break}case"create":{let{model:n,options:o}=r;Cr(n,o).then(i=>{postMessage({type:t,out:i})},i=>{postMessage({type:t,err:i})});break}case"release":Ar(r),postMessage({type:t});break;case"run":{let{sessionId:n,inputIndices:o,inputs:i,outputIndices:a,options:d}=r;kr(n,o,i,a,new Array(a.length).fill(null),d).then(l=>{l.some(p=>p[3]!=="cpu")?postMessage({type:t,err:"Proxy does not support non-cpu tensor location."}):postMessage({type:t,out:l},Pr([...i,...l]))},l=>{postMessage({type:t,err:l})});break}case"end-profiling":Er(r),postMessage({type:t});break;default:}}catch(n){postMessage({type:t,err:n})}});qp=ka?null:e=>new Worker(e??Ne,{type:"module",name:Aa})});var Oa={};Ft(Oa,{default:()=>Kp});var Kn,za,Kp,Da=U(()=>{"use strict";za=(Kn=import.meta.url,async function(e={}){function t(){return se.buffer!=J.buffer&&ye(),J}function r(){return se.buffer!=J.buffer&&ye(),ne}function n(){return se.buffer!=J.buffer&&ye(),be}function o(){return se.buffer!=J.buffer&&ye(),Oe}function i(){return se.buffer!=J.buffer&&ye(),$e}function a(){return se.buffer!=J.buffer&&ye(),le}function d(){return se.buffer!=J.buffer&&ye(),W}function l(){return se.buffer!=J.buffer&&ye(),Ge}var p,m,u=Object.assign({},e),h=new Promise((s,c)=>{p=s,m=c}),_=typeof window=="object",y=typeof importScripts=="function",g=y&&self.name=="em-pthread";u.mountExternalData=(s,c)=>{s.startsWith("./")&&(s=s.substring(2)),(u.Fb||(u.Fb=new Map)).set(s,c)},u.unmountExternalData=()=>{delete u.Fb};var x=globalThis.SharedArrayBuffer??new WebAssembly.Memory({initial:0,maximum:0,shared:!0}).buffer.constructor;let $=()=>{let s=(f,b,w)=>(...I)=>{let O=Xe,B=b?.();I=f(...I);let L=b?.();return B!==L&&(f=L,w(B),b=w=null),Xe!=O?new Promise((H,X)=>{En={resolve:H,reject:X}}):I},c=f=>async(...b)=>{try{if(u.Gb)throw Error("Session already started");let w=u.Gb={hc:b[0],errors:[]},I=await f(...b);if(u.Gb!==w)throw Error("Session mismatch");u.Hb?.flush();let O=w.errors;if(0<O.length){let B=await Promise.all(O);if(B=B.filter(L=>L),0<B.length)throw Error(B.join(`
3916
- `))}return I}finally{u.Gb=null}};u._OrtCreateSession=s(u._OrtCreateSession,()=>u._OrtCreateSession,f=>u._OrtCreateSession=f),u._OrtRun=c(s(u._OrtRun,()=>u._OrtRun,f=>u._OrtRun=f)),u._OrtRunWithBinding=c(s(u._OrtRunWithBinding,()=>u._OrtRunWithBinding,f=>u._OrtRunWithBinding=f)),u._OrtBindInput=s(u._OrtBindInput,()=>u._OrtBindInput,f=>u._OrtBindInput=f),$=void 0};u.jsepInit=(s,c)=>{if($?.(),s==="webgpu"){[u.Hb,u.Vb,u.Zb,u.Ob,u.Yb,u.kb,u.$b,u.cc,u.Wb,u.Xb,u.ac]=c;let f=u.Hb;u.jsepRegisterBuffer=(b,w,I,O)=>f.registerBuffer(b,w,I,O),u.jsepGetBuffer=b=>f.getBuffer(b),u.jsepCreateDownloader=(b,w,I)=>f.createDownloader(b,w,I),u.jsepOnCreateSession=b=>{f.onCreateSession(b)},u.jsepOnReleaseSession=b=>{f.onReleaseSession(b)},u.jsepOnRunStart=b=>f.onRunStart(b),u.dc=(b,w)=>{f.upload(b,w)}}else if(s==="webnn"){[u.Hb,u.bc,u.Pb,u.jsepEnsureTensor,u.ec,u.jsepDownloadTensor]=c,u.jsepReleaseTensorId=u.Pb;let f=u.Hb;u.jsepOnRunStart=b=>f.onRunStart(b),u.jsepRegisterMLContext=(b,w)=>{f.registerMLContext(b,w)},u.jsepOnReleaseSession=b=>{f.onReleaseSession(b)},u.jsepCreateMLTensorDownloader=(b,w)=>f.createMLTensorDownloader(b,w),u.jsepRegisterMLTensor=(b,w,I)=>f.registerMLTensor(b,w,I),u.jsepCreateMLContext=b=>f.createMLContext(b),u.jsepRegisterMLConstant=(b,w,I,O,B)=>f.registerMLConstant(b,w,I,O,B,u.Fb)}};var v,S,T=Object.assign({},u),A="./this.program",k=(s,c)=>{throw c},P="";(_||y)&&(y?P=self.location.href:typeof document<"u"&&document.currentScript&&(P=document.currentScript.src),Kn&&(P=Kn),P=P.startsWith("blob:")?"":P.substr(0,P.replace(/[?#].*/,"").lastIndexOf("/")+1),y&&(S=s=>{var c=new XMLHttpRequest;return c.open("GET",s,!1),c.responseType="arraybuffer",c.send(null),new Uint8Array(c.response)}),v=(s,c,f)=>{var b=new XMLHttpRequest;b.open("GET",s,!0),b.responseType="arraybuffer",b.onload=()=>{b.status==200||b.status==0&&b.response?c(b.response):f()},b.onerror=f,b.send(null)});var D,R=console.log.bind(console),G=console.error.bind(console),K=R,j=G;if(Object.assign(u,T),T=null,g){let s=function(c){try{var f=c.data,b=f.cmd;if(b==="load"){let w=[];self.onmessage=I=>w.push(I),self.startWorker=()=>{postMessage({cmd:"loaded"});for(let I of w)s(I);self.onmessage=s};for(let I of f.handlers)u[I]&&!u[I].proxy||(u[I]=(...O)=>{postMessage({Nb:"callHandler",pc:I,args:O})},I=="print"&&(K=u[I]),I=="printErr"&&(j=u[I]));se=f.wasmMemory,ye(),V(f.wasmModule)}else if(b==="run"){Dn(f.pthread_ptr,0,0,1,0,0),An(f.pthread_ptr),xc(),Ho(),Q||(Wi(),Q=!0);try{Sc(f.start_routine,f.arg)}catch(w){if(w!="unwind")throw w}}else b==="cancel"?Mt()&&hr(-1):f.target!=="setimmediate"&&(b==="checkMailbox"?Q&&ir():b&&(j(`worker: received unknown command ${b}`),j(f)))}catch(w){throw Li(),w}};var wg=s,V,Q=!1;j=function(...c){c=c.join(" "),console.error(c)},self.alert=function(...c){postMessage({Nb:"alert",text:c.join(" "),rc:Mt()})},u.instantiateWasm=(c,f)=>new Promise(b=>{V=w=>{w=new WebAssembly.Instance(w,No()),f(w),b()}}),self.onunhandledrejection=c=>{throw c.reason||c},self.onmessage=s}u.wasmBinary&&(D=u.wasmBinary);var se,Y,ee,J,ne,be,Oe,$e,le,W,q,he,Ge,we=!1;function ye(){var s=se.buffer;u.HEAP8=J=new Int8Array(s),u.HEAP16=be=new Int16Array(s),u.HEAPU8=ne=new Uint8Array(s),u.HEAPU16=Oe=new Uint16Array(s),u.HEAP32=$e=new Int32Array(s),u.HEAPU32=le=new Uint32Array(s),u.HEAPF32=W=new Float32Array(s),u.HEAPF64=Ge=new Float64Array(s),u.HEAP64=q=new BigInt64Array(s),u.HEAPU64=he=new BigUint64Array(s)}if(!g){if(!((se=new WebAssembly.Memory({initial:256,maximum:65536,shared:!0})).buffer instanceof x))throw j("requested a shared WebAssembly.Memory but the returned buffer is not a SharedArrayBuffer, indicating that while the browser has SharedArrayBuffer it does not have WebAssembly threads support - you may need to set a flag"),Error("bad memory");ye()}var Ye=[],Lt=[],fn=[],Gt=0,hn=null,Ht=null;function Do(){if(--Gt==0&&(hn!==null&&(clearInterval(hn),hn=null),Ht)){var s=Ht;Ht=null,s()}}function ct(s){throw j(s="Aborted("+s+")"),we=!0,ee=1,s=new WebAssembly.RuntimeError(s+". Build with -sASSERTIONS for more info."),m(s),s}var gn,Bo=s=>s.startsWith("data:application/octet-stream;base64,"),Mo=s=>s.startsWith("file://");function Ro(s){if(s==gn&&D)return new Uint8Array(D);if(S)return S(s);throw"both async and sync fetching of the wasm failed"}function Uo(s,c,f){return function(b){if(!D&&(_||y)){if(typeof fetch=="function"&&!Mo(b))return fetch(b,{credentials:"same-origin"}).then(w=>{if(!w.ok)throw`failed to load wasm binary file at '${b}'`;return w.arrayBuffer()}).catch(()=>Ro(b));if(v)return new Promise((w,I)=>{v(b,O=>w(new Uint8Array(O)),I)})}return Promise.resolve().then(()=>Ro(b))}(s).then(b=>WebAssembly.instantiate(b,c)).then(f,b=>{j(`failed to asynchronously prepare wasm: ${b}`),ct(b)})}function No(){return{a:{O:$c,Aa:vc,b:Ic,aa:jo,B:Qo,qa:Xo,Y:ei,_:ti,ra:ri,oa:ni,ha:oi,na:ii,L:ai,Z:si,W:ui,pa:di,X:li,va:Cc,F:kc,Q:Ec,P:zc,E:Dc,u:Bc,q:Mc,G:Rc,A:Hc,R:Fc,ua:qc,ka:Kc,U:jc,ba:Yc,H:Zc,ja:An,ta:Qc,t:Xc,Ba:Jc,x:rp,n:np,l:ip,c:In,o:ap,j:dp,w:lp,p:cp,f:pp,s:mp,m:fp,e:hp,k:gp,i:bp,h:yp,d:_p,ea:wp,fa:vp,ga:$p,ca:Si,da:Ti,T:xp,g:Sp,D:Tp,I:Ip,M:Cp,y:Ap,sa:kp,V:Ep,v:Ci,z:Pp,N:zp,S:Op,za:Dp,ya:Bp,la:Ei,ma:Pi,$:vn,C:zi,K:Oi,ia:Di,J:Bi,a:se,xa:wn,wa:Ui,r:Up}}}var bn={913700:(s,c,f,b,w)=>{if(u===void 0||!u.Fb)return 1;if((s=Ce(Number(s>>>0))).startsWith("./")&&(s=s.substring(2)),!(s=u.Fb.get(s)))return 2;if(c=Number(c>>>0),f=Number(f>>>0),b=Number(b>>>0),c+f>s.byteLength)return 3;try{let I=s.subarray(c,c+f);switch(w){case 0:r().set(I,b>>>0);break;case 1:u.dc(b,I);break;default:return 4}return 0}catch{return 4}},914415:(s,c,f)=>{u.ec(s,r().subarray(c>>>0,c+f>>>0))},914478:()=>u.bc(),914519:s=>{u.Pb(s)},914555:()=>{u.Wb()},914586:()=>{u.Xb()},914615:()=>{u.ac()},914640:s=>u.Vb(s),914673:s=>u.Zb(s),914705:(s,c,f)=>{u.Ob(Number(s),Number(c),Number(f),!0)},914768:(s,c,f)=>{u.Ob(Number(s),Number(c),Number(f))},914825:()=>typeof wasmOffsetConverter<"u",914882:s=>{u.kb("Abs",s,void 0)},914933:s=>{u.kb("Neg",s,void 0)},914984:s=>{u.kb("Floor",s,void 0)},915037:s=>{u.kb("Ceil",s,void 0)},915089:s=>{u.kb("Reciprocal",s,void 0)},915147:s=>{u.kb("Sqrt",s,void 0)},915199:s=>{u.kb("Exp",s,void 0)},915250:s=>{u.kb("Erf",s,void 0)},915301:s=>{u.kb("Sigmoid",s,void 0)},915356:(s,c,f)=>{u.kb("HardSigmoid",s,{alpha:c,beta:f})},915435:s=>{u.kb("Log",s,void 0)},915486:s=>{u.kb("Sin",s,void 0)},915537:s=>{u.kb("Cos",s,void 0)},915588:s=>{u.kb("Tan",s,void 0)},915639:s=>{u.kb("Asin",s,void 0)},915691:s=>{u.kb("Acos",s,void 0)},915743:s=>{u.kb("Atan",s,void 0)},915795:s=>{u.kb("Sinh",s,void 0)},915847:s=>{u.kb("Cosh",s,void 0)},915899:s=>{u.kb("Asinh",s,void 0)},915952:s=>{u.kb("Acosh",s,void 0)},916005:s=>{u.kb("Atanh",s,void 0)},916058:s=>{u.kb("Tanh",s,void 0)},916110:s=>{u.kb("Not",s,void 0)},916161:(s,c,f)=>{u.kb("Clip",s,{min:c,max:f})},916230:s=>{u.kb("Clip",s,void 0)},916282:(s,c)=>{u.kb("Elu",s,{alpha:c})},916340:s=>{u.kb("Gelu",s,void 0)},916392:s=>{u.kb("Relu",s,void 0)},916444:(s,c)=>{u.kb("LeakyRelu",s,{alpha:c})},916508:(s,c)=>{u.kb("ThresholdedRelu",s,{alpha:c})},916578:(s,c)=>{u.kb("Cast",s,{to:c})},916636:s=>{u.kb("Add",s,void 0)},916687:s=>{u.kb("Sub",s,void 0)},916738:s=>{u.kb("Mul",s,void 0)},916789:s=>{u.kb("Div",s,void 0)},916840:s=>{u.kb("Pow",s,void 0)},916891:s=>{u.kb("Equal",s,void 0)},916944:s=>{u.kb("Greater",s,void 0)},916999:s=>{u.kb("GreaterOrEqual",s,void 0)},917061:s=>{u.kb("Less",s,void 0)},917113:s=>{u.kb("LessOrEqual",s,void 0)},917172:(s,c,f,b,w)=>{u.kb("ReduceMean",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},917347:(s,c,f,b,w)=>{u.kb("ReduceMax",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},917521:(s,c,f,b,w)=>{u.kb("ReduceMin",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},917695:(s,c,f,b,w)=>{u.kb("ReduceProd",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},917870:(s,c,f,b,w)=>{u.kb("ReduceSum",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},918044:(s,c,f,b,w)=>{u.kb("ReduceL1",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},918217:(s,c,f,b,w)=>{u.kb("ReduceL2",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},918390:(s,c,f,b,w)=>{u.kb("ReduceLogSum",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},918567:(s,c,f,b,w)=>{u.kb("ReduceSumSquare",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},918747:(s,c,f,b,w)=>{u.kb("ReduceLogSumExp",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},918927:s=>{u.kb("Where",s,void 0)},918980:(s,c,f)=>{u.kb("Transpose",s,{perm:c?Array.from(i().subarray(Number(c)>>>0,Number(f)>>>0)):[]})},919104:(s,c,f,b)=>{u.kb("DepthToSpace",s,{blocksize:c,mode:Ce(f),format:b?"NHWC":"NCHW"})},919237:(s,c,f,b)=>{u.kb("DepthToSpace",s,{blocksize:c,mode:Ce(f),format:b?"NHWC":"NCHW"})},919370:(s,c,f,b,w,I,O,B,L,H,X,ce,ge,z,de)=>{u.kb("ConvTranspose",s,{format:L?"NHWC":"NCHW",autoPad:c,dilations:[f],group:b,kernelShape:[w],pads:[I,O],strides:[B],wIsConst:()=>!!t()[H>>>0],outputPadding:X?Array.from(i().subarray(Number(X)>>>0,Number(ce)>>>0)):[],outputShape:ge?Array.from(i().subarray(Number(ge)>>>0,Number(z)>>>0)):[],activation:Ce(de)})},919803:(s,c,f,b,w,I,O,B,L,H,X,ce,ge,z)=>{u.kb("ConvTranspose",s,{format:B?"NHWC":"NCHW",autoPad:c,dilations:Array.from(i().subarray(Number(f)>>>0,2+(Number(f)>>>0)>>>0)),group:b,kernelShape:Array.from(i().subarray(Number(w)>>>0,2+(Number(w)>>>0)>>>0)),pads:Array.from(i().subarray(Number(I)>>>0,4+(Number(I)>>>0)>>>0)),strides:Array.from(i().subarray(Number(O)>>>0,2+(Number(O)>>>0)>>>0)),wIsConst:()=>!!t()[L>>>0],outputPadding:H?Array.from(i().subarray(Number(H)>>>0,Number(X)>>>0)):[],outputShape:ce?Array.from(i().subarray(Number(ce)>>>0,Number(ge)>>>0)):[],activation:Ce(z)})},920464:(s,c,f,b,w,I,O,B,L,H,X,ce,ge,z,de)=>{u.kb("ConvTranspose",s,{format:L?"NHWC":"NCHW",autoPad:c,dilations:[f],group:b,kernelShape:[w],pads:[I,O],strides:[B],wIsConst:()=>!!t()[H>>>0],outputPadding:X?Array.from(i().subarray(Number(X)>>>0,Number(ce)>>>0)):[],outputShape:ge?Array.from(i().subarray(Number(ge)>>>0,Number(z)>>>0)):[],activation:Ce(de)})},920897:(s,c,f,b,w,I,O,B,L,H,X,ce,ge,z)=>{u.kb("ConvTranspose",s,{format:B?"NHWC":"NCHW",autoPad:c,dilations:Array.from(i().subarray(Number(f)>>>0,2+(Number(f)>>>0)>>>0)),group:b,kernelShape:Array.from(i().subarray(Number(w)>>>0,2+(Number(w)>>>0)>>>0)),pads:Array.from(i().subarray(Number(I)>>>0,4+(Number(I)>>>0)>>>0)),strides:Array.from(i().subarray(Number(O)>>>0,2+(Number(O)>>>0)>>>0)),wIsConst:()=>!!t()[L>>>0],outputPadding:H?Array.from(i().subarray(Number(H)>>>0,Number(X)>>>0)):[],outputShape:ce?Array.from(i().subarray(Number(ce)>>>0,Number(ge)>>>0)):[],activation:Ce(z)})},921558:(s,c)=>{u.kb("GlobalAveragePool",s,{format:c?"NHWC":"NCHW"})},921649:(s,c,f,b,w,I,O,B,L,H,X,ce,ge,z)=>{u.kb("AveragePool",s,{format:z?"NHWC":"NCHW",auto_pad:c,ceil_mode:f,count_include_pad:b,storage_order:w,dilations:I?Array.from(i().subarray(Number(I)>>>0,Number(O)>>>0)):[],kernel_shape:B?Array.from(i().subarray(Number(B)>>>0,Number(L)>>>0)):[],pads:H?Array.from(i().subarray(Number(H)>>>0,Number(X)>>>0)):[],strides:ce?Array.from(i().subarray(Number(ce)>>>0,Number(ge)>>>0)):[]})},922128:(s,c)=>{u.kb("GlobalAveragePool",s,{format:c?"NHWC":"NCHW"})},922219:(s,c,f,b,w,I,O,B,L,H,X,ce,ge,z)=>{u.kb("AveragePool",s,{format:z?"NHWC":"NCHW",auto_pad:c,ceil_mode:f,count_include_pad:b,storage_order:w,dilations:I?Array.from(i().subarray(Number(I)>>>0,Number(O)>>>0)):[],kernel_shape:B?Array.from(i().subarray(Number(B)>>>0,Number(L)>>>0)):[],pads:H?Array.from(i().subarray(Number(H)>>>0,Number(X)>>>0)):[],strides:ce?Array.from(i().subarray(Number(ce)>>>0,Number(ge)>>>0)):[]})},922698:(s,c)=>{u.kb("GlobalMaxPool",s,{format:c?"NHWC":"NCHW"})},922785:(s,c,f,b,w,I,O,B,L,H,X,ce,ge,z)=>{u.kb("MaxPool",s,{format:z?"NHWC":"NCHW",auto_pad:c,ceil_mode:f,count_include_pad:b,storage_order:w,dilations:I?Array.from(i().subarray(Number(I)>>>0,Number(O)>>>0)):[],kernel_shape:B?Array.from(i().subarray(Number(B)>>>0,Number(L)>>>0)):[],pads:H?Array.from(i().subarray(Number(H)>>>0,Number(X)>>>0)):[],strides:ce?Array.from(i().subarray(Number(ce)>>>0,Number(ge)>>>0)):[]})},923260:(s,c)=>{u.kb("GlobalMaxPool",s,{format:c?"NHWC":"NCHW"})},923347:(s,c,f,b,w,I,O,B,L,H,X,ce,ge,z)=>{u.kb("MaxPool",s,{format:z?"NHWC":"NCHW",auto_pad:c,ceil_mode:f,count_include_pad:b,storage_order:w,dilations:I?Array.from(i().subarray(Number(I)>>>0,Number(O)>>>0)):[],kernel_shape:B?Array.from(i().subarray(Number(B)>>>0,Number(L)>>>0)):[],pads:H?Array.from(i().subarray(Number(H)>>>0,Number(X)>>>0)):[],strides:ce?Array.from(i().subarray(Number(ce)>>>0,Number(ge)>>>0)):[]})},923822:(s,c,f,b,w)=>{u.kb("Gemm",s,{alpha:c,beta:f,transA:b,transB:w})},923926:s=>{u.kb("MatMul",s,void 0)},923980:(s,c,f,b)=>{u.kb("ArgMax",s,{keepDims:!!c,selectLastIndex:!!f,axis:b})},924088:(s,c,f,b)=>{u.kb("ArgMin",s,{keepDims:!!c,selectLastIndex:!!f,axis:b})},924196:(s,c)=>{u.kb("Softmax",s,{axis:c})},924259:(s,c)=>{u.kb("Concat",s,{axis:c})},924319:(s,c,f,b,w)=>{u.kb("Split",s,{axis:c,numOutputs:f,splitSizes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},924475:s=>{u.kb("Expand",s,void 0)},924529:(s,c)=>{u.kb("Gather",s,{axis:Number(c)})},924600:(s,c)=>{u.kb("GatherElements",s,{axis:Number(c)})},924679:(s,c)=>{u.kb("GatherND",s,{batch_dims:Number(c)})},924758:(s,c,f,b,w,I,O,B,L,H,X)=>{u.kb("Resize",s,{antialias:c,axes:f?Array.from(i().subarray(Number(f)>>>0,Number(b)>>>0)):[],coordinateTransformMode:Ce(w),cubicCoeffA:I,excludeOutside:O,extrapolationValue:B,keepAspectRatioPolicy:Ce(L),mode:Ce(H),nearestMode:Ce(X)})},925120:(s,c,f,b,w,I,O)=>{u.kb("Slice",s,{starts:c?Array.from(i().subarray(Number(c)>>>0,Number(f)>>>0)):[],ends:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[],axes:I?Array.from(i().subarray(Number(I)>>>0,Number(O)>>>0)):[]})},925384:s=>{u.kb("Tile",s,void 0)},925436:(s,c,f)=>{u.kb("InstanceNormalization",s,{epsilon:c,format:f?"NHWC":"NCHW"})},925550:(s,c,f)=>{u.kb("InstanceNormalization",s,{epsilon:c,format:f?"NHWC":"NCHW"})},925664:s=>{u.kb("Range",s,void 0)},925717:(s,c)=>{u.kb("Einsum",s,{equation:Ce(c)})},925798:(s,c,f,b,w)=>{u.kb("Pad",s,{mode:c,value:f,pads:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},925941:(s,c,f,b,w,I)=>{u.kb("BatchNormalization",s,{epsilon:c,momentum:f,spatial:!!w,trainingMode:!!b,format:I?"NHWC":"NCHW"})},926110:(s,c,f,b,w,I)=>{u.kb("BatchNormalization",s,{epsilon:c,momentum:f,spatial:!!w,trainingMode:!!b,format:I?"NHWC":"NCHW"})},926279:(s,c,f)=>{u.kb("CumSum",s,{exclusive:Number(c),reverse:Number(f)})},926376:(s,c,f)=>{u.kb("DequantizeLinear",s,{axis:c,blockSize:f})},926466:(s,c,f,b,w)=>{u.kb("GridSample",s,{align_corners:c,mode:Ce(f),padding_mode:Ce(b),format:w?"NHWC":"NCHW"})},926636:(s,c,f,b,w)=>{u.kb("GridSample",s,{align_corners:c,mode:Ce(f),padding_mode:Ce(b),format:w?"NHWC":"NCHW"})},926806:(s,c,f,b,w,I,O,B,L)=>{u.kb("Attention",s,{numHeads:c,isUnidirectional:f,maskFilterValue:b,scale:w,doRotary:I,qkvHiddenSizes:O?Array.from(i().subarray(Number(B)>>>0,Number(B)+O>>>0)):[],pastPresentShareBuffer:!!L})},927078:s=>{u.kb("BiasAdd",s,void 0)},927133:s=>{u.kb("BiasSplitGelu",s,void 0)},927194:s=>{u.kb("FastGelu",s,void 0)},927250:(s,c,f,b,w,I,O,B,L,H,X,ce,ge,z,de,Te)=>{u.kb("Conv",s,{format:ce?"NHWC":"NCHW",auto_pad:c,dilations:f?Array.from(i().subarray(Number(f)>>>0,Number(b)>>>0)):[],group:w,kernel_shape:I?Array.from(i().subarray(Number(I)>>>0,Number(O)>>>0)):[],pads:B?Array.from(i().subarray(Number(B)>>>0,Number(L)>>>0)):[],strides:H?Array.from(i().subarray(Number(H)>>>0,Number(X)>>>0)):[],w_is_const:()=>!!t()[Number(ge)>>>0],activation:Ce(z),activation_params:de?Array.from(d().subarray(Number(de)>>>0,Number(Te)>>>0)):[]})},927834:s=>{u.kb("Gelu",s,void 0)},927886:(s,c,f,b,w,I,O,B,L)=>{u.kb("GroupQueryAttention",s,{numHeads:c,kvNumHeads:f,scale:b,softcap:w,doRotary:I,rotaryInterleaved:O,smoothSoftmax:B,localWindowSize:L})},928103:(s,c,f,b)=>{u.kb("LayerNormalization",s,{axis:c,epsilon:f,simplified:!!b})},928214:(s,c,f,b)=>{u.kb("LayerNormalization",s,{axis:c,epsilon:f,simplified:!!b})},928325:(s,c,f,b,w,I)=>{u.kb("MatMulNBits",s,{k:c,n:f,accuracyLevel:b,bits:w,blockSize:I})},928452:(s,c,f,b,w,I)=>{u.kb("MultiHeadAttention",s,{numHeads:c,isUnidirectional:f,maskFilterValue:b,scale:w,doRotary:I})},928611:(s,c)=>{u.kb("QuickGelu",s,{alpha:c})},928675:(s,c,f,b,w)=>{u.kb("RotaryEmbedding",s,{interleaved:!!c,numHeads:f,rotaryEmbeddingDim:b,scale:w})},928814:(s,c,f)=>{u.kb("SkipLayerNormalization",s,{epsilon:c,simplified:!!f})},928916:(s,c,f)=>{u.kb("SkipLayerNormalization",s,{epsilon:c,simplified:!!f})},929018:(s,c,f,b)=>{u.kb("GatherBlockQuantized",s,{gatherAxis:c,quantizeAxis:f,blockSize:b})},929139:s=>{u.$b(s)},929173:(s,c)=>u.cc(Number(s),Number(c),u.Gb.hc,u.Gb.errors)};function vc(s,c,f){return _i(async()=>{await u.Yb(Number(s),Number(c),Number(f))})}function $c(){return typeof wasmOffsetConverter<"u"}function yn(s){this.name="ExitStatus",this.message=`Program terminated with exit(${s})`,this.status=s}var _n=s=>{s.terminate(),s.onmessage=()=>{}},Vo=s=>{pt.length==0&&(qo(),Fo(pt[0]));var c=pt.pop();if(!c)return 6;vt.push(c),Ze[s.Bb]=c,c.Bb=s.Bb;var f={cmd:"run",start_routine:s.ic,arg:s.Rb,pthread_ptr:s.Bb};return c.postMessage(f,s.nc),0},wt=0,xe=(s,c,...f)=>{for(var b=2*f.length,w=Rn(),I=Mn(8*b),O=I>>>3,B=0;B<f.length;B++){var L=f[B];typeof L=="bigint"?(q[O+2*B]=1n,q[O+2*B+1]=L):(q[O+2*B]=0n,l()[O+2*B+1>>>0]=L)}return s=Gi(s,0,b,I,c),gr(w),s};function wn(s){if(g)return xe(0,1,s);if(ee=s,!(0<wt)){for(var c of vt)_n(c);for(c of pt)_n(c);pt=[],vt=[],Ze=[],we=!0}k(s,new yn(s))}function Wo(s){if(g)return xe(1,0,s);vn(s)}var vn=s=>{if(ee=s,g)throw Wo(s),"unwind";wn(s)},pt=[],vt=[],Lo=[],Ze={},Go=s=>{var c=s.Bb;delete Ze[c],pt.push(s),vt.splice(vt.indexOf(s),1),s.Bb=0,Bn(c)};function Ho(){Lo.forEach(s=>s())}var Fo=s=>new Promise(c=>{s.onmessage=w=>{var I=(w=w.data).cmd;if(w.targetThread&&w.targetThread!=Mt()){var O=Ze[w.targetThread];O?O.postMessage(w,w.transferList):j(`Internal error! Worker sent a message "${I}" to target pthread ${w.targetThread}, but that thread no longer exists!`)}else I==="checkMailbox"?ir():I==="spawnThread"?Vo(w):I==="cleanupThread"?Go(Ze[w.thread]):I==="killThread"?(w=w.thread,I=Ze[w],delete Ze[w],_n(I),Bn(w),vt.splice(vt.indexOf(I),1),I.Bb=0):I==="cancelThread"?Ze[w.thread].postMessage({cmd:"cancel"}):I==="loaded"?(s.loaded=!0,c(s)):I==="alert"?alert(`Thread ${w.threadId}: ${w.text}`):w.target==="setimmediate"?s.postMessage(w):I==="callHandler"?u[w.handler](...w.args):I&&j(`worker sent an unknown command ${I}`)},s.onerror=w=>{throw j(`worker sent an error! ${w.filename}:${w.lineno}: ${w.message}`),w};var f,b=[];for(f of[])u.hasOwnProperty(f)&&b.push(f);s.postMessage({cmd:"load",handlers:b,wasmMemory:se,wasmModule:Y})});function qo(){var s=new Worker(import.meta.url.startsWith("file:")?new URL(/* asset import */ __webpack_require__(/*! ort.bundle.min.mjs */ "./node_modules/onnxruntime-web/dist/ort.bundle.min.mjs?46eb"), __webpack_require__.b):new URL(import.meta.url),{type:"module",workerData:"em-pthread",name:"em-pthread"});pt.push(s)}var or=s=>{for(;0<s.length;)s.shift()(u)},xc=()=>{var s=Mt(),c=a()[s+52>>>2>>>0];s=a()[s+56>>>2>>>0],Fi(c,c-s),gr(c)},Sc=(s,c)=>{wt=0,s=qi(s,c),0<wt?ee=s:hr(s)};class Tc{constructor(c){this.Kb=c-24}}function Ic(s,c,f){var b=new Tc(s>>>=0);throw c>>>=0,f>>>=0,a()[b.Kb+16>>>2>>>0]=0,a()[b.Kb+4>>>2>>>0]=c,a()[b.Kb+8>>>2>>>0]=f,s}function Ko(s,c,f,b){return g?xe(2,1,s,c,f,b):jo(s,c,f,b)}function jo(s,c,f,b){if(s>>>=0,c>>>=0,f>>>=0,b>>>=0,x===void 0)return j("Current environment does not support SharedArrayBuffer, pthreads are not available!"),6;var w=[];return g&&w.length===0?Ko(s,c,f,b):(s={ic:f,Bb:s,Rb:b,nc:w},g?(s.Nb="spawnThread",postMessage(s,w),0):Vo(s))}var Yo=typeof TextDecoder<"u"?new TextDecoder("utf8"):void 0,Zo=(s,c,f)=>{var b=(c>>>=0)+f;for(f=c;s[f]&&!(f>=b);)++f;if(16<f-c&&s.buffer&&Yo)return Yo.decode(s.buffer instanceof x?s.slice(c,f):s.subarray(c,f));for(b="";c<f;){var w=s[c++];if(128&w){var I=63&s[c++];if((224&w)==192)b+=String.fromCharCode((31&w)<<6|I);else{var O=63&s[c++];65536>(w=(240&w)==224?(15&w)<<12|I<<6|O:(7&w)<<18|I<<12|O<<6|63&s[c++])?b+=String.fromCharCode(w):(w-=65536,b+=String.fromCharCode(55296|w>>10,56320|1023&w))}}else b+=String.fromCharCode(w)}return b},Ce=(s,c)=>(s>>>=0)?Zo(r(),s,c):"";function Qo(s,c,f){return g?xe(3,1,s,c,f):0}function Xo(s,c){if(g)return xe(4,1,s,c)}var $n=s=>{for(var c=0,f=0;f<s.length;++f){var b=s.charCodeAt(f);127>=b?c++:2047>=b?c+=2:55296<=b&&57343>=b?(c+=4,++f):c+=3}return c},Jo=(s,c,f,b)=>{if(!(0<b))return 0;var w=f>>>=0;b=f+b-1;for(var I=0;I<s.length;++I){var O=s.charCodeAt(I);if(55296<=O&&57343>=O&&(O=65536+((1023&O)<<10)|1023&s.charCodeAt(++I)),127>=O){if(f>=b)break;c[f++>>>0]=O}else{if(2047>=O){if(f+1>=b)break;c[f++>>>0]=192|O>>6}else{if(65535>=O){if(f+2>=b)break;c[f++>>>0]=224|O>>12}else{if(f+3>=b)break;c[f++>>>0]=240|O>>18,c[f++>>>0]=128|O>>12&63}c[f++>>>0]=128|O>>6&63}c[f++>>>0]=128|63&O}}return c[f>>>0]=0,f-w},Ot=(s,c,f)=>Jo(s,r(),c,f);function ei(s,c){if(g)return xe(5,1,s,c)}function ti(s,c,f){if(g)return xe(6,1,s,c,f)}function ri(s,c,f){return g?xe(7,1,s,c,f):0}function ni(s,c){if(g)return xe(8,1,s,c)}function oi(s,c,f){if(g)return xe(9,1,s,c,f)}function ii(s,c,f,b){if(g)return xe(10,1,s,c,f,b)}function ai(s,c,f,b){if(g)return xe(11,1,s,c,f,b)}function si(s,c,f,b){if(g)return xe(12,1,s,c,f,b)}function ui(s){if(g)return xe(13,1,s)}function di(s,c){if(g)return xe(14,1,s,c)}function li(s,c,f){if(g)return xe(15,1,s,c,f)}var ci,mt,Cc=()=>{ct("")},Qe=s=>{for(var c="";r()[s>>>0];)c+=ci[r()[s++>>>0]];return c},xn={},Sn={},Ac={};function st(s,c,f={}){if(!("argPackAdvance"in c))throw new TypeError("registerType registeredInstance requires argPackAdvance");return function(b,w,I={}){var O=w.name;if(!b)throw new mt(`type "${O}" must have a positive integer typeid pointer`);if(Sn.hasOwnProperty(b)){if(I.Tb)return;throw new mt(`Cannot register type '${O}' twice`)}Sn[b]=w,delete Ac[b],xn.hasOwnProperty(b)&&(w=xn[b],delete xn[b],w.forEach(B=>B()))}(s,c,f)}var pi=(s,c,f)=>{switch(c){case 1:return f?b=>t()[b>>>0]:b=>r()[b>>>0];case 2:return f?b=>n()[b>>>1>>>0]:b=>o()[b>>>1>>>0];case 4:return f?b=>i()[b>>>2>>>0]:b=>a()[b>>>2>>>0];case 8:return f?b=>q[b>>>3]:b=>he[b>>>3];default:throw new TypeError(`invalid integer width (${c}): ${s}`)}};function kc(s,c,f){f>>>=0,st(s>>>=0,{name:c=Qe(c>>>0),fromWireType:b=>b,toWireType:function(b,w){if(typeof w!="bigint"&&typeof w!="number")throw w=w===null?"null":(b=typeof w)=="object"||b==="array"||b==="function"?w.toString():""+w,new TypeError(`Cannot convert "${w}" to ${this.name}`);return typeof w=="number"&&(w=BigInt(w)),w},argPackAdvance:ft,readValueFromPointer:pi(c,f,c.indexOf("u")==-1),Eb:null})}var ft=8;function Ec(s,c,f,b){st(s>>>=0,{name:c=Qe(c>>>0),fromWireType:function(w){return!!w},toWireType:function(w,I){return I?f:b},argPackAdvance:ft,readValueFromPointer:function(w){return this.fromWireType(r()[w>>>0])},Eb:null})}var Tn=[],ut=[];function In(s){9<(s>>>=0)&&--ut[s+1]==0&&(ut[s]=void 0,Tn.push(s))}var Me=s=>{if(!s)throw new mt("Cannot use deleted val. handle = "+s);return ut[s]},Ve=s=>{switch(s){case void 0:return 2;case null:return 4;case!0:return 6;case!1:return 8;default:let c=Tn.pop()||ut.length;return ut[c]=s,ut[c+1]=1,c}};function Cn(s){return this.fromWireType(a()[s>>>2>>>0])}var Pc={name:"emscripten::val",fromWireType:s=>{var c=Me(s);return In(s),c},toWireType:(s,c)=>Ve(c),argPackAdvance:ft,readValueFromPointer:Cn,Eb:null};function zc(s){return st(s>>>0,Pc)}var Oc=(s,c)=>{switch(c){case 4:return function(f){return this.fromWireType(d()[f>>>2>>>0])};case 8:return function(f){return this.fromWireType(l()[f>>>3>>>0])};default:throw new TypeError(`invalid float width (${c}): ${s}`)}};function Dc(s,c,f){f>>>=0,st(s>>>=0,{name:c=Qe(c>>>0),fromWireType:b=>b,toWireType:(b,w)=>w,argPackAdvance:ft,readValueFromPointer:Oc(c,f),Eb:null})}function Bc(s,c,f,b,w){if(s>>>=0,f>>>=0,c=Qe(c>>>0),w===-1&&(w=4294967295),w=B=>B,b===0){var I=32-8*f;w=B=>B<<I>>>I}var O=c.includes("unsigned")?function(B,L){return L>>>0}:function(B,L){return L};st(s,{name:c,fromWireType:w,toWireType:O,argPackAdvance:ft,readValueFromPointer:pi(c,f,b!==0),Eb:null})}function Mc(s,c,f){function b(I){var O=a()[I>>>2>>>0];return I=a()[I+4>>>2>>>0],new w(t().buffer,I,O)}var w=[Int8Array,Uint8Array,Int16Array,Uint16Array,Int32Array,Uint32Array,Float32Array,Float64Array,BigInt64Array,BigUint64Array][c];st(s>>>=0,{name:f=Qe(f>>>0),fromWireType:b,argPackAdvance:ft,readValueFromPointer:b},{Tb:!0})}function Rc(s,c){s>>>=0;var f=(c=Qe(c>>>0))==="std::string";st(s,{name:c,fromWireType:function(b){var w=a()[b>>>2>>>0],I=b+4;if(f)for(var O=I,B=0;B<=w;++B){var L=I+B;if(B==w||r()[L>>>0]==0){if(O=Ce(O,L-O),H===void 0)var H=O;else H+=String.fromCharCode(0),H+=O;O=L+1}}else{for(H=Array(w),B=0;B<w;++B)H[B]=String.fromCharCode(r()[I+B>>>0]);H=H.join("")}return Je(b),H},toWireType:function(b,w){w instanceof ArrayBuffer&&(w=new Uint8Array(w));var I=typeof w=="string";if(!(I||w instanceof Uint8Array||w instanceof Uint8ClampedArray||w instanceof Int8Array))throw new mt("Cannot pass non-string to std::string");var O=f&&I?$n(w):w.length,B=fr(4+O+1),L=B+4;if(a()[B>>>2>>>0]=O,f&&I)Ot(w,L,O+1);else if(I)for(I=0;I<O;++I){var H=w.charCodeAt(I);if(255<H)throw Je(L),new mt("String has UTF-16 code units that do not fit in 8 bits");r()[L+I>>>0]=H}else for(I=0;I<O;++I)r()[L+I>>>0]=w[I];return b!==null&&b.push(Je,B),B},argPackAdvance:ft,readValueFromPointer:Cn,Eb(b){Je(b)}})}var mi=typeof TextDecoder<"u"?new TextDecoder("utf-16le"):void 0,Uc=(s,c)=>{for(var f=s>>1,b=f+c/2;!(f>=b)&&o()[f>>>0];)++f;if(32<(f<<=1)-s&&mi)return mi.decode(r().slice(s,f));for(f="",b=0;!(b>=c/2);++b){var w=n()[s+2*b>>>1>>>0];if(w==0)break;f+=String.fromCharCode(w)}return f},Nc=(s,c,f)=>{if(f??=2147483647,2>f)return 0;var b=c;f=(f-=2)<2*s.length?f/2:s.length;for(var w=0;w<f;++w){var I=s.charCodeAt(w);n()[c>>>1>>>0]=I,c+=2}return n()[c>>>1>>>0]=0,c-b},Vc=s=>2*s.length,Wc=(s,c)=>{for(var f=0,b="";!(f>=c/4);){var w=i()[s+4*f>>>2>>>0];if(w==0)break;++f,65536<=w?(w-=65536,b+=String.fromCharCode(55296|w>>10,56320|1023&w)):b+=String.fromCharCode(w)}return b},Lc=(s,c,f)=>{if(c>>>=0,f??=2147483647,4>f)return 0;var b=c;f=b+f-4;for(var w=0;w<s.length;++w){var I=s.charCodeAt(w);if(55296<=I&&57343>=I&&(I=65536+((1023&I)<<10)|1023&s.charCodeAt(++w)),i()[c>>>2>>>0]=I,(c+=4)+4>f)break}return i()[c>>>2>>>0]=0,c-b},Gc=s=>{for(var c=0,f=0;f<s.length;++f){var b=s.charCodeAt(f);55296<=b&&57343>=b&&++f,c+=4}return c};function Hc(s,c,f){if(s>>>=0,c>>>=0,f=Qe(f>>>=0),c===2)var b=Uc,w=Nc,I=Vc,O=B=>o()[B>>>1>>>0];else c===4&&(b=Wc,w=Lc,I=Gc,O=B=>a()[B>>>2>>>0]);st(s,{name:f,fromWireType:B=>{for(var L,H=a()[B>>>2>>>0],X=B+4,ce=0;ce<=H;++ce){var ge=B+4+ce*c;ce!=H&&O(ge)!=0||(X=b(X,ge-X),L===void 0?L=X:(L+=String.fromCharCode(0),L+=X),X=ge+c)}return Je(B),L},toWireType:(B,L)=>{if(typeof L!="string")throw new mt(`Cannot pass non-string to C++ string type ${f}`);var H=I(L),X=fr(4+H+c);return a()[X>>>2>>>0]=H/c,w(L,X+4,H+c),B!==null&&B.push(Je,X),X},argPackAdvance:ft,readValueFromPointer:Cn,Eb(B){Je(B)}})}function Fc(s,c){st(s>>>=0,{Ub:!0,name:c=Qe(c>>>0),argPackAdvance:0,fromWireType:()=>{},toWireType:()=>{}})}var qc=()=>1;function Kc(s){Dn(s>>>0,!y,1,!_,131072,!1),Ho()}var fi=s=>{if(!we)try{if(s(),!(0<wt))try{g?hr(ee):vn(ee)}catch(c){c instanceof yn||c=="unwind"||k(1,c)}}catch(c){c instanceof yn||c=="unwind"||k(1,c)}};function An(s){s>>>=0,typeof Atomics.oc=="function"&&(Atomics.oc(i(),s>>>2,s).value.then(ir),s+=128,Atomics.store(i(),s>>>2,1))}var ir=()=>{var s=Mt();s&&(An(s),fi(Hi))};function jc(s,c){(s>>>=0)==c>>>0?setTimeout(ir):g?postMessage({targetThread:s,cmd:"checkMailbox"}):(s=Ze[s])&&s.postMessage({cmd:"checkMailbox"})}var kn=[];function Yc(s,c,f,b,w){for(c>>>=0,b/=2,kn.length=b,f=w>>>0>>>3,w=0;w<b;w++)kn[w]=q[f+2*w]?q[f+2*w+1]:l()[f+2*w+1>>>0];return(c?bn[c]:Np[s])(...kn)}function Zc(s){s>>>=0,g?postMessage({cmd:"cleanupThread",thread:s}):Go(Ze[s])}function Qc(s){}var ar=(s,c)=>{var f=Sn[s];if(f===void 0)throw s=Vi(s),f=Qe(s),Je(s),new mt(`${c} has unknown type ${f}`);return f},hi=(s,c,f)=>{var b=[];return s=s.toWireType(b,f),b.length&&(a()[c>>>2>>>0]=Ve(b)),s};function Xc(s,c,f){return c>>>=0,f>>>=0,s=Me(s>>>0),c=ar(c,"emval::as"),hi(c,f,s)}function Jc(s,c){return c>>>=0,s=Me(s>>>0),(c=ar(c,"emval::as")).toWireType(null,s)}var sr=s=>{try{s()}catch(c){ct(c)}},ht=0,Xe=null,gi=0,ur=[],bi={},yi={},ep=0,En=null,tp=[];function _i(s){return function(c){if(!we){if(ht===0){var f=!1,b=!1;c((w=0)=>{if(!we&&(gi=w,f=!0,b)){ht=2,sr(()=>Yi(Xe)),typeof Browser<"u"&&Browser.Lb.Sb&&Browser.Lb.resume(),w=!1;try{var I=function(){var L=i()[Xe+8>>>2>>>0];return L=Z[yi[L]],--wt,L()}()}catch(L){I=L,w=!0}var O=!1;if(!Xe){var B=En;B&&(En=null,(w?B.reject:B.resolve)(I),O=!0)}if(w&&!O)throw I}}),b=!0,f||(ht=1,Xe=function(){var w=fr(65548),I=w+12;a()[w>>>2>>>0]=I,a()[w+4>>>2>>>0]=I+65536,I=ur[0];var O=bi[I];return O===void 0&&(O=ep++,bi[I]=O,yi[O]=I),I=O,i()[w+8>>>2>>>0]=I,w}(),typeof Browser<"u"&&Browser.Lb.Sb&&Browser.Lb.pause(),sr(()=>Ki(Xe)))}else ht===2?(ht=0,sr(Zi),Je(Xe),Xe=null,tp.forEach(fi)):ct(`invalid state: ${ht}`);return gi}}(c=>{s().then(c)})}function rp(s){return s>>>=0,_i(()=>(s=Me(s)).then(Ve))}var dr=[];function np(s,c,f,b){return f>>>=0,b>>>=0,(s=dr[s>>>0])(null,c=Me(c>>>0),f,b)}var op={},lr=s=>{var c=op[s];return c===void 0?Qe(s):c};function ip(s,c,f,b,w){return f>>>=0,b>>>=0,w>>>=0,(s=dr[s>>>0])(c=Me(c>>>0),c[f=lr(f)],b,w)}var wi=()=>typeof globalThis=="object"?globalThis:Function("return this")();function ap(s){return(s>>>=0)==0?Ve(wi()):(s=lr(s),Ve(wi()[s]))}var sp=s=>{var c=dr.length;return dr.push(s),c},up=(s,c)=>{for(var f=Array(s),b=0;b<s;++b)f[b]=ar(a()[c+4*b>>>2>>>0],"parameter "+b);return f},vi=(s,c)=>Object.defineProperty(c,"name",{value:s});function dp(s,c,f){var b=(c=up(s,c>>>0)).shift();s--;var w=`return function (obj, func, destructorsRef, args) {
3917
- `,I=0,O=[];f===0&&O.push("obj");for(var B=["retType"],L=[b],H=0;H<s;++H)O.push("arg"+H),B.push("argType"+H),L.push(c[H]),w+=` var arg${H} = argType${H}.readValueFromPointer(args${I?"+"+I:""});
3918
- `,I+=c[H].argPackAdvance;return w+=` var rv = ${f===1?"new func":"func.call"}(${O.join(", ")});
3919
- `,b.Ub||(B.push("emval_returnValue"),L.push(hi),w+=` return emval_returnValue(retType, destructorsRef, rv);
3970
+ var Rn=Object.defineProperty;var Vp=Object.getOwnPropertyDescriptor;var Wp=Object.getOwnPropertyNames;var Lp=Object.prototype.hasOwnProperty;var Un=(e=>typeof require<"u"?require:typeof Proxy<"u"?new Proxy(e,{get:(t,r)=>(typeof require<"u"?require:t)[r]}):e)(function(e){if(typeof require<"u")return require.apply(this,arguments);throw Error('Dynamic require of "'+e+'" is not supported')});var U=(e,t)=>()=>(e&&(t=e(e=0)),t);var Ht=(e,t)=>{for(var r in t)Rn(e,r,{get:t[r],enumerable:!0})},Gp=(e,t,r,n)=>{if(t&&typeof t=="object"||typeof t=="function")for(let o of Wp(t))!Lp.call(e,o)&&o!==r&&Rn(e,o,{get:()=>t[o],enumerable:!(n=Vp(t,o))||n.enumerable});return e};var gr=e=>Gp(Rn({},"__esModule",{value:!0}),e);var br,St,Tt,Hp,Xi,Nn=U(()=>{"use strict";br=new Map,St=[],Tt=(e,t,r)=>{if(t&&typeof t.init=="function"&&typeof t.createInferenceSessionHandler=="function"){let n=br.get(e);if(n===void 0)br.set(e,{backend:t,priority:r});else{if(n.priority>r)return;if(n.priority===r&&n.backend!==t)throw new Error(`cannot register backend "${e}" using priority ${r}`)}if(r>=0){let o=St.indexOf(e);o!==-1&&St.splice(o,1);for(let i=0;i<St.length;i++)if(br.get(St[i]).priority<=r){St.splice(i,0,e);return}St.push(e)}return}throw new TypeError("not a valid backend")},Hp=async e=>{let t=br.get(e);if(!t)return"backend not found.";if(t.initialized)return t.backend;if(t.aborted)return t.error;{let r=!!t.initPromise;try{return r||(t.initPromise=t.backend.init(e)),await t.initPromise,t.initialized=!0,t.backend}catch(n){return r||(t.error=`${n}`,t.aborted=!0),t.error}finally{delete t.initPromise}}},Xi=async e=>{let t=e.executionProviders||[],r=t.map(l=>typeof l=="string"?l:l.name),n=r.length===0?St:r,o,i=[],a=new Set;for(let l of n){let p=await Hp(l);typeof p=="string"?i.push({name:l,err:p}):(o||(o=p),o===p&&a.add(l))}if(!o)throw new Error(`no available backend found. ERR: ${i.map(l=>`[${l.name}] ${l.err}`).join(", ")}`);for(let{name:l,err:p}of i)r.includes(l)&&console.warn(`removing requested execution provider "${l}" from session options because it is not available: ${p}`);let d=t.filter(l=>a.has(typeof l=="string"?l:l.name));return[o,new Proxy(e,{get:(l,p)=>p==="executionProviders"?d:Reflect.get(l,p)})]}});var Ji=U(()=>{"use strict";Nn()});var ea,ta=U(()=>{"use strict";ea="1.21.0-dev.20250206-d981b153d3"});var ra,Ue,Vn=U(()=>{"use strict";ta();ra="warning",Ue={wasm:{},webgl:{},webgpu:{},versions:{common:ea},set logLevel(e){if(e!==void 0){if(typeof e!="string"||["verbose","info","warning","error","fatal"].indexOf(e)===-1)throw new Error(`Unsupported logging level: ${e}`);ra=e}},get logLevel(){return ra}};Object.defineProperty(Ue,"logLevel",{enumerable:!0})});var _e,na=U(()=>{"use strict";Vn();_e=Ue});var oa,ia,aa=U(()=>{"use strict";oa=(e,t)=>{let r=typeof document<"u"?document.createElement("canvas"):new OffscreenCanvas(1,1);r.width=e.dims[3],r.height=e.dims[2];let n=r.getContext("2d");if(n!=null){let o,i;t?.tensorLayout!==void 0&&t.tensorLayout==="NHWC"?(o=e.dims[2],i=e.dims[3]):(o=e.dims[3],i=e.dims[2]);let a=t?.format!==void 0?t.format:"RGB",d=t?.norm,l,p;d===void 0||d.mean===void 0?l=[255,255,255,255]:typeof d.mean=="number"?l=[d.mean,d.mean,d.mean,d.mean]:(l=[d.mean[0],d.mean[1],d.mean[2],0],d.mean[3]!==void 0&&(l[3]=d.mean[3])),d===void 0||d.bias===void 0?p=[0,0,0,0]:typeof d.bias=="number"?p=[d.bias,d.bias,d.bias,d.bias]:(p=[d.bias[0],d.bias[1],d.bias[2],0],d.bias[3]!==void 0&&(p[3]=d.bias[3]));let m=i*o,u=0,h=m,_=m*2,y=-1;a==="RGBA"?(u=0,h=m,_=m*2,y=m*3):a==="RGB"?(u=0,h=m,_=m*2):a==="RBG"&&(u=0,_=m,h=m*2);for(let g=0;g<i;g++)for(let x=0;x<o;x++){let $=(e.data[u++]-p[0])*l[0],v=(e.data[h++]-p[1])*l[1],S=(e.data[_++]-p[2])*l[2],T=y===-1?255:(e.data[y++]-p[3])*l[3];n.fillStyle="rgba("+$+","+v+","+S+","+T+")",n.fillRect(x,g,1,1)}if("toDataURL"in r)return r.toDataURL();throw new Error("toDataURL is not supported")}else throw new Error("Can not access image data")},ia=(e,t)=>{let r=typeof document<"u"?document.createElement("canvas").getContext("2d"):new OffscreenCanvas(1,1).getContext("2d"),n;if(r!=null){let o,i,a;t?.tensorLayout!==void 0&&t.tensorLayout==="NHWC"?(o=e.dims[2],i=e.dims[1],a=e.dims[3]):(o=e.dims[3],i=e.dims[2],a=e.dims[1]);let d=t!==void 0&&t.format!==void 0?t.format:"RGB",l=t?.norm,p,m;l===void 0||l.mean===void 0?p=[255,255,255,255]:typeof l.mean=="number"?p=[l.mean,l.mean,l.mean,l.mean]:(p=[l.mean[0],l.mean[1],l.mean[2],255],l.mean[3]!==void 0&&(p[3]=l.mean[3])),l===void 0||l.bias===void 0?m=[0,0,0,0]:typeof l.bias=="number"?m=[l.bias,l.bias,l.bias,l.bias]:(m=[l.bias[0],l.bias[1],l.bias[2],0],l.bias[3]!==void 0&&(m[3]=l.bias[3]));let u=i*o;if(t!==void 0&&(t.format!==void 0&&a===4&&t.format!=="RGBA"||a===3&&t.format!=="RGB"&&t.format!=="BGR"))throw new Error("Tensor format doesn't match input tensor dims");let h=4,_=0,y=1,g=2,x=3,$=0,v=u,S=u*2,T=-1;d==="RGBA"?($=0,v=u,S=u*2,T=u*3):d==="RGB"?($=0,v=u,S=u*2):d==="RBG"&&($=0,S=u,v=u*2),n=r.createImageData(o,i);for(let A=0;A<i*o;_+=h,y+=h,g+=h,x+=h,A++)n.data[_]=(e.data[$++]-m[0])*p[0],n.data[y]=(e.data[v++]-m[1])*p[1],n.data[g]=(e.data[S++]-m[2])*p[2],n.data[x]=T===-1?255:(e.data[T++]-m[3])*p[3]}else throw new Error("Can not access image data");return n}});var Wn,sa,ua,da,la,ca,pa=U(()=>{"use strict";yr();Wn=(e,t)=>{if(e===void 0)throw new Error("Image buffer must be defined");if(t.height===void 0||t.width===void 0)throw new Error("Image height and width must be defined");if(t.tensorLayout==="NHWC")throw new Error("NHWC Tensor layout is not supported yet");let{height:r,width:n}=t,o=t.norm??{mean:255,bias:0},i,a;typeof o.mean=="number"?i=[o.mean,o.mean,o.mean,o.mean]:i=[o.mean[0],o.mean[1],o.mean[2],o.mean[3]??255],typeof o.bias=="number"?a=[o.bias,o.bias,o.bias,o.bias]:a=[o.bias[0],o.bias[1],o.bias[2],o.bias[3]??0];let d=t.format!==void 0?t.format:"RGBA",l=t.tensorFormat!==void 0&&t.tensorFormat!==void 0?t.tensorFormat:"RGB",p=r*n,m=l==="RGBA"?new Float32Array(p*4):new Float32Array(p*3),u=4,h=0,_=1,y=2,g=3,x=0,$=p,v=p*2,S=-1;d==="RGB"&&(u=3,h=0,_=1,y=2,g=-1),l==="RGBA"?S=p*3:l==="RBG"?(x=0,v=p,$=p*2):l==="BGR"&&(v=0,$=p,x=p*2);for(let A=0;A<p;A++,h+=u,y+=u,_+=u,g+=u)m[x++]=(e[h]+a[0])/i[0],m[$++]=(e[_]+a[1])/i[1],m[v++]=(e[y]+a[2])/i[2],S!==-1&&g!==-1&&(m[S++]=(e[g]+a[3])/i[3]);return l==="RGBA"?new ze("float32",m,[1,4,r,n]):new ze("float32",m,[1,3,r,n])},sa=async(e,t)=>{let r=typeof HTMLImageElement<"u"&&e instanceof HTMLImageElement,n=typeof ImageData<"u"&&e instanceof ImageData,o=typeof ImageBitmap<"u"&&e instanceof ImageBitmap,i=typeof e=="string",a,d=t??{},l=()=>{if(typeof document<"u")return document.createElement("canvas");if(typeof OffscreenCanvas<"u")return new OffscreenCanvas(1,1);throw new Error("Canvas is not supported")},p=m=>typeof HTMLCanvasElement<"u"&&m instanceof HTMLCanvasElement||m instanceof OffscreenCanvas?m.getContext("2d"):null;if(r){let m=l();m.width=e.width,m.height=e.height;let u=p(m);if(u!=null){let h=e.height,_=e.width;if(t!==void 0&&t.resizedHeight!==void 0&&t.resizedWidth!==void 0&&(h=t.resizedHeight,_=t.resizedWidth),t!==void 0){if(d=t,t.tensorFormat!==void 0)throw new Error("Image input config format must be RGBA for HTMLImageElement");d.tensorFormat="RGBA",d.height=h,d.width=_}else d.tensorFormat="RGBA",d.height=h,d.width=_;u.drawImage(e,0,0),a=u.getImageData(0,0,_,h).data}else throw new Error("Can not access image data")}else if(n){let m,u;if(t!==void 0&&t.resizedWidth!==void 0&&t.resizedHeight!==void 0?(m=t.resizedHeight,u=t.resizedWidth):(m=e.height,u=e.width),t!==void 0&&(d=t),d.format="RGBA",d.height=m,d.width=u,t!==void 0){let h=l();h.width=u,h.height=m;let _=p(h);if(_!=null)_.putImageData(e,0,0),a=_.getImageData(0,0,u,m).data;else throw new Error("Can not access image data")}else a=e.data}else if(o){if(t===void 0)throw new Error("Please provide image config with format for Imagebitmap");let m=l();m.width=e.width,m.height=e.height;let u=p(m);if(u!=null){let h=e.height,_=e.width;return u.drawImage(e,0,0,_,h),a=u.getImageData(0,0,_,h).data,d.height=h,d.width=_,Wn(a,d)}else throw new Error("Can not access image data")}else{if(i)return new Promise((m,u)=>{let h=l(),_=p(h);if(!e||!_)return u();let y=new Image;y.crossOrigin="Anonymous",y.src=e,y.onload=()=>{h.width=y.width,h.height=y.height,_.drawImage(y,0,0,h.width,h.height);let g=_.getImageData(0,0,h.width,h.height);d.height=h.height,d.width=h.width,m(Wn(g.data,d))}});throw new Error("Input data provided is not supported - aborted tensor creation")}if(a!==void 0)return Wn(a,d);throw new Error("Input data provided is not supported - aborted tensor creation")},ua=(e,t)=>{let{width:r,height:n,download:o,dispose:i}=t,a=[1,n,r,4];return new ze({location:"texture",type:"float32",texture:e,dims:a,download:o,dispose:i})},da=(e,t)=>{let{dataType:r,dims:n,download:o,dispose:i}=t;return new ze({location:"gpu-buffer",type:r??"float32",gpuBuffer:e,dims:n,download:o,dispose:i})},la=(e,t)=>{let{dataType:r,dims:n,download:o,dispose:i}=t;return new ze({location:"ml-tensor",type:r??"float32",mlTensor:e,dims:n,download:o,dispose:i})},ca=(e,t,r)=>new ze({location:"cpu-pinned",type:e,data:t,dims:r??[t.length]})});var It,Ft,ma,fa,ha=U(()=>{"use strict";It=new Map([["float32",Float32Array],["uint8",Uint8Array],["int8",Int8Array],["uint16",Uint16Array],["int16",Int16Array],["int32",Int32Array],["bool",Uint8Array],["float64",Float64Array],["uint32",Uint32Array],["int4",Uint8Array],["uint4",Uint8Array]]),Ft=new Map([[Float32Array,"float32"],[Uint8Array,"uint8"],[Int8Array,"int8"],[Uint16Array,"uint16"],[Int16Array,"int16"],[Int32Array,"int32"],[Float64Array,"float64"],[Uint32Array,"uint32"]]),ma=!1,fa=()=>{if(!ma){ma=!0;let e=typeof BigInt64Array<"u"&&BigInt64Array.from,t=typeof BigUint64Array<"u"&&BigUint64Array.from,r=typeof Float16Array<"u"&&Float16Array.from;e&&(It.set("int64",BigInt64Array),Ft.set(BigInt64Array,"int64")),t&&(It.set("uint64",BigUint64Array),Ft.set(BigUint64Array,"uint64")),r?(It.set("float16",Float16Array),Ft.set(Float16Array,"float16")):It.set("float16",Uint16Array)}}});var ga,ba,ya=U(()=>{"use strict";yr();ga=e=>{let t=1;for(let r=0;r<e.length;r++){let n=e[r];if(typeof n!="number"||!Number.isSafeInteger(n))throw new TypeError(`dims[${r}] must be an integer, got: ${n}`);if(n<0)throw new RangeError(`dims[${r}] must be a non-negative integer, got: ${n}`);t*=n}return t},ba=(e,t)=>{switch(e.location){case"cpu":return new ze(e.type,e.data,t);case"cpu-pinned":return new ze({location:"cpu-pinned",data:e.data,type:e.type,dims:t});case"texture":return new ze({location:"texture",texture:e.texture,type:e.type,dims:t});case"gpu-buffer":return new ze({location:"gpu-buffer",gpuBuffer:e.gpuBuffer,type:e.type,dims:t});case"ml-tensor":return new ze({location:"ml-tensor",mlTensor:e.mlTensor,type:e.type,dims:t});default:throw new Error(`tensorReshape: tensor location ${e.location} is not supported`)}}});var ze,yr=U(()=>{"use strict";aa();pa();ha();ya();ze=class{constructor(t,r,n){fa();let o,i;if(typeof t=="object"&&"location"in t)switch(this.dataLocation=t.location,o=t.type,i=t.dims,t.location){case"cpu-pinned":{let d=It.get(o);if(!d)throw new TypeError(`unsupported type "${o}" to create tensor from pinned buffer`);if(!(t.data instanceof d))throw new TypeError(`buffer should be of type ${d.name}`);this.cpuData=t.data;break}case"texture":{if(o!=="float32")throw new TypeError(`unsupported type "${o}" to create tensor from texture`);this.gpuTextureData=t.texture,this.downloader=t.download,this.disposer=t.dispose;break}case"gpu-buffer":{if(o!=="float32"&&o!=="float16"&&o!=="int32"&&o!=="int64"&&o!=="uint32"&&o!=="uint8"&&o!=="bool"&&o!=="uint4"&&o!=="int4")throw new TypeError(`unsupported type "${o}" to create tensor from gpu buffer`);this.gpuBufferData=t.gpuBuffer,this.downloader=t.download,this.disposer=t.dispose;break}case"ml-tensor":{if(o!=="float32"&&o!=="float16"&&o!=="int32"&&o!=="int64"&&o!=="uint32"&&o!=="uint64"&&o!=="int8"&&o!=="uint8"&&o!=="bool"&&o!=="uint4"&&o!=="int4")throw new TypeError(`unsupported type "${o}" to create tensor from MLTensor`);this.mlTensorData=t.mlTensor,this.downloader=t.download,this.disposer=t.dispose;break}default:throw new Error(`Tensor constructor: unsupported location '${this.dataLocation}'`)}else{let d,l;if(typeof t=="string")if(o=t,l=n,t==="string"){if(!Array.isArray(r))throw new TypeError("A string tensor's data must be a string array.");d=r}else{let p=It.get(t);if(p===void 0)throw new TypeError(`Unsupported tensor type: ${t}.`);if(Array.isArray(r)){if(t==="float16"&&p===Uint16Array||t==="uint4"||t==="int4")throw new TypeError(`Creating a ${t} tensor from number array is not supported. Please use ${p.name} as data.`);t==="uint64"||t==="int64"?d=p.from(r,BigInt):d=p.from(r)}else if(r instanceof p)d=r;else if(r instanceof Uint8ClampedArray)if(t==="uint8")d=Uint8Array.from(r);else throw new TypeError("A Uint8ClampedArray tensor's data must be type of uint8");else throw new TypeError(`A ${o} tensor's data must be type of ${p}`)}else if(l=r,Array.isArray(t)){if(t.length===0)throw new TypeError("Tensor type cannot be inferred from an empty array.");let p=typeof t[0];if(p==="string")o="string",d=t;else if(p==="boolean")o="bool",d=Uint8Array.from(t);else throw new TypeError(`Invalid element type of data array: ${p}.`)}else if(t instanceof Uint8ClampedArray)o="uint8",d=Uint8Array.from(t);else{let p=Ft.get(t.constructor);if(p===void 0)throw new TypeError(`Unsupported type for tensor data: ${t.constructor}.`);o=p,d=t}if(l===void 0)l=[d.length];else if(!Array.isArray(l))throw new TypeError("A tensor's dims must be a number array");i=l,this.cpuData=d,this.dataLocation="cpu"}let a=ga(i);if(this.cpuData&&a!==this.cpuData.length&&!((o==="uint4"||o==="int4")&&Math.ceil(a/2)===this.cpuData.length))throw new Error(`Tensor's size(${a}) does not match data length(${this.cpuData.length}).`);this.type=o,this.dims=i,this.size=a}static async fromImage(t,r){return sa(t,r)}static fromTexture(t,r){return ua(t,r)}static fromGpuBuffer(t,r){return da(t,r)}static fromMLTensor(t,r){return la(t,r)}static fromPinnedBuffer(t,r,n){return ca(t,r,n)}toDataURL(t){return oa(this,t)}toImageData(t){return ia(this,t)}get data(){if(this.ensureValid(),!this.cpuData)throw new Error("The data is not on CPU. Use `getData()` to download GPU data to CPU, or use `texture` or `gpuBuffer` property to access the GPU data directly.");return this.cpuData}get location(){return this.dataLocation}get texture(){if(this.ensureValid(),!this.gpuTextureData)throw new Error("The data is not stored as a WebGL texture.");return this.gpuTextureData}get gpuBuffer(){if(this.ensureValid(),!this.gpuBufferData)throw new Error("The data is not stored as a WebGPU buffer.");return this.gpuBufferData}get mlTensor(){if(this.ensureValid(),!this.mlTensorData)throw new Error("The data is not stored as a WebNN MLTensor.");return this.mlTensorData}async getData(t){switch(this.ensureValid(),this.dataLocation){case"cpu":case"cpu-pinned":return this.data;case"texture":case"gpu-buffer":case"ml-tensor":{if(!this.downloader)throw new Error("The current tensor is not created with a specified data downloader.");if(this.isDownloading)throw new Error("The current tensor is being downloaded.");try{this.isDownloading=!0;let r=await this.downloader();return this.downloader=void 0,this.dataLocation="cpu",this.cpuData=r,t&&this.disposer&&(this.disposer(),this.disposer=void 0),r}finally{this.isDownloading=!1}}default:throw new Error(`cannot get data from location: ${this.dataLocation}`)}}dispose(){if(this.isDownloading)throw new Error("The current tensor is being downloaded.");this.disposer&&(this.disposer(),this.disposer=void 0),this.cpuData=void 0,this.gpuTextureData=void 0,this.gpuBufferData=void 0,this.mlTensorData=void 0,this.downloader=void 0,this.isDownloading=void 0,this.dataLocation="none"}ensureValid(){if(this.dataLocation==="none")throw new Error("The tensor is disposed.")}reshape(t){if(this.ensureValid(),this.downloader||this.disposer)throw new Error("Cannot reshape a tensor that owns GPU resource.");return ba(this,t)}}});var Fe,Ln=U(()=>{"use strict";yr();Fe=ze});var _r,_a,Ne,Be,Gn=U(()=>{"use strict";Vn();_r=(e,t)=>{(typeof Ue.trace>"u"?!Ue.wasm.trace:!Ue.trace)||console.timeStamp(`${e}::ORT::${t}`)},_a=(e,t)=>{let r=new Error().stack?.split(/\r\n|\r|\n/g)||[],n=!1;for(let o=0;o<r.length;o++){if(n&&!r[o].includes("TRACE_FUNC")){let i=`FUNC_${e}::${r[o].trim().split(" ")[1]}`;t&&(i+=`::${t}`),_r("CPU",i);return}r[o].includes("TRACE_FUNC")&&(n=!0)}},Ne=e=>{(typeof Ue.trace>"u"?!Ue.wasm.trace:!Ue.trace)||_a("BEGIN",e)},Be=e=>{(typeof Ue.trace>"u"?!Ue.wasm.trace:!Ue.trace)||_a("END",e)}});var wr,wa=U(()=>{"use strict";Nn();Ln();Gn();wr=class e{constructor(t){this.handler=t}async run(t,r,n){Ne();let o={},i={};if(typeof t!="object"||t===null||t instanceof Fe||Array.isArray(t))throw new TypeError("'feeds' must be an object that use input names as keys and OnnxValue as corresponding values.");let a=!0;if(typeof r=="object"){if(r===null)throw new TypeError("Unexpected argument[1]: cannot be null.");if(r instanceof Fe)throw new TypeError("'fetches' cannot be a Tensor");if(Array.isArray(r)){if(r.length===0)throw new TypeError("'fetches' cannot be an empty array.");a=!1;for(let p of r){if(typeof p!="string")throw new TypeError("'fetches' must be a string array or an object.");if(this.outputNames.indexOf(p)===-1)throw new RangeError(`'fetches' contains invalid output name: ${p}.`);o[p]=null}if(typeof n=="object"&&n!==null)i=n;else if(typeof n<"u")throw new TypeError("'options' must be an object.")}else{let p=!1,m=Object.getOwnPropertyNames(r);for(let u of this.outputNames)if(m.indexOf(u)!==-1){let h=r[u];(h===null||h instanceof Fe)&&(p=!0,a=!1,o[u]=h)}if(p){if(typeof n=="object"&&n!==null)i=n;else if(typeof n<"u")throw new TypeError("'options' must be an object.")}else i=r}}else if(typeof r<"u")throw new TypeError("Unexpected argument[1]: must be 'fetches' or 'options'.");for(let p of this.inputNames)if(typeof t[p]>"u")throw new Error(`input '${p}' is missing in 'feeds'.`);if(a)for(let p of this.outputNames)o[p]=null;let d=await this.handler.run(t,o,i),l={};for(let p in d)if(Object.hasOwnProperty.call(d,p)){let m=d[p];m instanceof Fe?l[p]=m:l[p]=new Fe(m.type,m.data,m.dims)}return Be(),l}async release(){return this.handler.dispose()}static async create(t,r,n,o){Ne();let i,a={};if(typeof t=="string"){if(i=t,typeof r=="object"&&r!==null)a=r;else if(typeof r<"u")throw new TypeError("'options' must be an object.")}else if(t instanceof Uint8Array){if(i=t,typeof r=="object"&&r!==null)a=r;else if(typeof r<"u")throw new TypeError("'options' must be an object.")}else if(t instanceof ArrayBuffer||typeof SharedArrayBuffer<"u"&&t instanceof SharedArrayBuffer){let m=t,u=0,h=t.byteLength;if(typeof r=="object"&&r!==null)a=r;else if(typeof r=="number"){if(u=r,!Number.isSafeInteger(u))throw new RangeError("'byteOffset' must be an integer.");if(u<0||u>=m.byteLength)throw new RangeError(`'byteOffset' is out of range [0, ${m.byteLength}).`);if(h=t.byteLength-u,typeof n=="number"){if(h=n,!Number.isSafeInteger(h))throw new RangeError("'byteLength' must be an integer.");if(h<=0||u+h>m.byteLength)throw new RangeError(`'byteLength' is out of range (0, ${m.byteLength-u}].`);if(typeof o=="object"&&o!==null)a=o;else if(typeof o<"u")throw new TypeError("'options' must be an object.")}else if(typeof n<"u")throw new TypeError("'byteLength' must be a number.")}else if(typeof r<"u")throw new TypeError("'options' must be an object.");i=new Uint8Array(m,u,h)}else throw new TypeError("Unexpected argument[0]: must be 'path' or 'buffer'.");let[d,l]=await Xi(a),p=await d.createInferenceSessionHandler(i,l);return Be(),new e(p)}startProfiling(){this.handler.startProfiling()}endProfiling(){this.handler.endProfiling()}get inputNames(){return this.handler.inputNames}get outputNames(){return this.handler.outputNames}}});var Fp,va=U(()=>{"use strict";wa();Fp=wr});var $a=U(()=>{"use strict"});var xa=U(()=>{"use strict"});var Sa=U(()=>{"use strict"});var Ta=U(()=>{"use strict"});var Hn={};Ht(Hn,{InferenceSession:()=>Fp,TRACE:()=>_r,TRACE_FUNC_BEGIN:()=>Ne,TRACE_FUNC_END:()=>Be,Tensor:()=>Fe,env:()=>_e,registerBackend:()=>Tt});var Le=U(()=>{"use strict";Ji();na();va();Ln();$a();xa();Gn();Sa();Ta()});var vr=U(()=>{"use strict"});var ka={};Ht(ka,{default:()=>qp});var Ca,Aa,qp,Ea=U(()=>{"use strict";Fn();gt();$r();Ca="ort-wasm-proxy-worker",Aa=globalThis.self?.name===Ca;Aa&&(self.onmessage=e=>{let{type:t,in:r}=e.data;try{switch(t){case"init-wasm":xr(r.wasm).then(()=>{Sr(r).then(()=>{postMessage({type:t})},n=>{postMessage({type:t,err:n})})},n=>{postMessage({type:t,err:n})});break;case"init-ep":{let{epName:n,env:o}=r;Tr(o,n).then(()=>{postMessage({type:t})},i=>{postMessage({type:t,err:i})});break}case"copy-from":{let{buffer:n}=r,o=qt(n);postMessage({type:t,out:o});break}case"create":{let{model:n,options:o}=r;Ir(n,o).then(i=>{postMessage({type:t,out:i})},i=>{postMessage({type:t,err:i})});break}case"release":Cr(r),postMessage({type:t});break;case"run":{let{sessionId:n,inputIndices:o,inputs:i,outputIndices:a,options:d}=r;Ar(n,o,i,a,new Array(a.length).fill(null),d).then(l=>{l.some(p=>p[3]!=="cpu")?postMessage({type:t,err:"Proxy does not support non-cpu tensor location."}):postMessage({type:t,out:l},Er([...i,...l]))},l=>{postMessage({type:t,err:l})});break}case"end-profiling":kr(r),postMessage({type:t});break;default:}}catch(n){postMessage({type:t,err:n})}});qp=Aa?null:e=>new Worker(e??Ve,{type:"module",name:Ca})});var za={};Ht(za,{default:()=>Kp});var qn,Pa,Kp,Oa=U(()=>{"use strict";Pa=(qn=import.meta.url,async function(e={}){function t(){return K.buffer!=Q.buffer&&pe(),Q}function r(){return K.buffer!=Q.buffer&&pe(),ie}function n(){return K.buffer!=Q.buffer&&pe(),te}function o(){return K.buffer!=Q.buffer&&pe(),be}function i(){return K.buffer!=Q.buffer&&pe(),Oe}function a(){return K.buffer!=Q.buffer&&pe(),ve}function d(){return K.buffer!=Q.buffer&&pe(),de}function l(){return K.buffer!=Q.buffer&&pe(),he}var p,m,u=Object.assign({},e),h=new Promise((s,c)=>{p=s,m=c}),_=typeof window=="object",y=typeof importScripts=="function",g=y&&self.name=="em-pthread";u.mountExternalData=(s,c)=>{s.startsWith("./")&&(s=s.substring(2)),(u.Fb||(u.Fb=new Map)).set(s,c)},u.unmountExternalData=()=>{delete u.Fb};var x=globalThis.SharedArrayBuffer??new WebAssembly.Memory({initial:0,maximum:0,shared:!0}).buffer.constructor;let $=()=>{let s=(f,b,w)=>(...I)=>{let O=Xe,B=b?.();I=f(...I);let G=b?.();return B!==G&&(f=G,w(B),b=w=null),Xe!=O?new Promise((F,X)=>{kn={resolve:F,reject:X}}):I},c=f=>async(...b)=>{try{if(u.Gb)throw Error("Session already started");let w=u.Gb={hc:b[0],errors:[]},I=await f(...b);if(u.Gb!==w)throw Error("Session mismatch");u.Hb?.flush();let O=w.errors;if(0<O.length){let B=await Promise.all(O);if(B=B.filter(G=>G),0<B.length)throw Error(B.join(`
3971
+ `))}return I}finally{u.Gb=null}};u._OrtCreateSession=s(u._OrtCreateSession,()=>u._OrtCreateSession,f=>u._OrtCreateSession=f),u._OrtRun=c(s(u._OrtRun,()=>u._OrtRun,f=>u._OrtRun=f)),u._OrtRunWithBinding=c(s(u._OrtRunWithBinding,()=>u._OrtRunWithBinding,f=>u._OrtRunWithBinding=f)),u._OrtBindInput=s(u._OrtBindInput,()=>u._OrtBindInput,f=>u._OrtBindInput=f),$=void 0};u.jsepInit=(s,c)=>{if($?.(),s==="webgpu"){[u.Hb,u.Vb,u.Zb,u.Ob,u.Yb,u.kb,u.$b,u.cc,u.Wb,u.Xb,u.ac]=c;let f=u.Hb;u.jsepRegisterBuffer=(b,w,I,O)=>f.registerBuffer(b,w,I,O),u.jsepGetBuffer=b=>f.getBuffer(b),u.jsepCreateDownloader=(b,w,I)=>f.createDownloader(b,w,I),u.jsepOnCreateSession=b=>{f.onCreateSession(b)},u.jsepOnReleaseSession=b=>{f.onReleaseSession(b)},u.jsepOnRunStart=b=>f.onRunStart(b),u.dc=(b,w)=>{f.upload(b,w)}}else if(s==="webnn"){[u.Hb,u.bc,u.Pb,u.jsepEnsureTensor,u.ec,u.jsepDownloadTensor]=c,u.jsepReleaseTensorId=u.Pb;let f=u.Hb;u.jsepOnRunStart=b=>f.onRunStart(b),u.jsepRegisterMLContext=(b,w)=>{f.registerMLContext(b,w)},u.jsepOnReleaseSession=b=>{f.onReleaseSession(b)},u.jsepCreateMLTensorDownloader=(b,w)=>f.createMLTensorDownloader(b,w),u.jsepRegisterMLTensor=(b,w,I)=>f.registerMLTensor(b,w,I),u.jsepCreateMLContext=b=>f.createMLContext(b),u.jsepRegisterMLConstant=(b,w,I,O,B)=>f.registerMLConstant(b,w,I,O,B,u.Fb)}};var v,S,T=Object.assign({},u),A=(s,c)=>{throw c},C="";(_||y)&&(y?C=self.location.href:typeof document<"u"&&document.currentScript&&(C=document.currentScript.src),qn&&(C=qn),C=C.startsWith("blob:")?"":C.substr(0,C.replace(/[?#].*/,"").lastIndexOf("/")+1),y&&(S=s=>{var c=new XMLHttpRequest;return c.open("GET",s,!1),c.responseType="arraybuffer",c.send(null),new Uint8Array(c.response)}),v=(s,c,f)=>{var b=new XMLHttpRequest;b.open("GET",s,!0),b.responseType="arraybuffer",b.onload=()=>{b.status==200||b.status==0&&b.response?c(b.response):f()},b.onerror=f,b.send(null)});var P,D=console.log.bind(console),R=console.error.bind(console),H=D,L=R;if(Object.assign(u,T),T=null,g){let s=function(c){try{var f=c.data,b=f.cmd;if(b==="load"){let w=[];self.onmessage=I=>w.push(I),self.startWorker=()=>{postMessage({cmd:"loaded"});for(let I of w)s(I);self.onmessage=s};for(let I of f.handlers)u[I]&&!u[I].proxy||(u[I]=(...O)=>{postMessage({Nb:"callHandler",pc:I,args:O})},I=="print"&&(H=u[I]),I=="printErr"&&(L=u[I]));K=f.wasmMemory,pe(),re(f.wasmModule)}else if(b==="run"){On(f.pthread_ptr,0,0,1,0,0),Cn(f.pthread_ptr),xc(),Go(),V||(Vi(),V=!0);try{Sc(f.start_routine,f.arg)}catch(w){if(w!="unwind")throw w}}else b==="cancel"?Rt()&&fr(-1):f.target!=="setimmediate"&&(b==="checkMailbox"?V&&or():b&&(L(`worker: received unknown command ${b}`),L(f)))}catch(w){throw Wi(),w}};var wg=s,re,V=!1;L=function(...c){c=c.join(" "),console.error(c)},self.alert=function(...c){postMessage({Nb:"alert",text:c.join(" "),rc:Rt()})},u.instantiateWasm=(c,f)=>new Promise(b=>{re=w=>{w=new WebAssembly.Instance(w,Uo()),f(w),b()}}),self.onunhandledrejection=c=>{throw c.reason||c},self.onmessage=s}u.wasmBinary&&(P=u.wasmBinary);var K,we,j,Q,ie,te,be,Oe,ve,de,W,Y,he,De=!1;function pe(){var s=K.buffer;u.HEAP8=Q=new Int8Array(s),u.HEAP16=te=new Int16Array(s),u.HEAPU8=ie=new Uint8Array(s),u.HEAPU16=be=new Uint16Array(s),u.HEAP32=Oe=new Int32Array(s),u.HEAPU32=ve=new Uint32Array(s),u.HEAPF32=de=new Float32Array(s),u.HEAPF64=he=new Float64Array(s),u.HEAP64=W=new BigInt64Array(s),u.HEAPU64=Y=new BigUint64Array(s)}if(!g){if(!((K=new WebAssembly.Memory({initial:256,maximum:65536,shared:!0})).buffer instanceof x))throw L("requested a shared WebAssembly.Memory but the returned buffer is not a SharedArrayBuffer, indicating that while the browser has SharedArrayBuffer it does not have WebAssembly threads support - you may need to set a flag"),Error("bad memory");pe()}var Ie=[],He=[],mn=[],wt=0,fn=null,Gt=null;function Oo(){if(--wt==0&&(fn!==null&&(clearInterval(fn),fn=null),Gt)){var s=Gt;Gt=null,s()}}function ct(s){throw L(s="Aborted("+s+")"),De=!0,j=1,s=new WebAssembly.RuntimeError(s+". Build with -sASSERTIONS for more info."),m(s),s}var hn,Do=s=>s.startsWith("data:application/octet-stream;base64,"),Bo=s=>s.startsWith("file://");function Mo(s){if(s==hn&&P)return new Uint8Array(P);if(S)return S(s);throw"both async and sync fetching of the wasm failed"}function Ro(s,c,f){return function(b){if(!P&&(_||y)){if(typeof fetch=="function"&&!Bo(b))return fetch(b,{credentials:"same-origin"}).then(w=>{if(!w.ok)throw`failed to load wasm binary file at '${b}'`;return w.arrayBuffer()}).catch(()=>Mo(b));if(v)return new Promise((w,I)=>{v(b,O=>w(new Uint8Array(O)),I)})}return Promise.resolve().then(()=>Mo(b))}(s).then(b=>WebAssembly.instantiate(b,c)).then(f,b=>{L(`failed to asynchronously prepare wasm: ${b}`),ct(b)})}function Uo(){return{a:{O:$c,Aa:vc,b:Ic,aa:Ko,B:Zo,qa:Qo,Y:Jo,_:ei,ra:ti,oa:ri,ha:ni,na:oi,L:ii,Z:ai,W:si,pa:ui,X:di,va:Cc,F:kc,Q:Ec,P:zc,E:Dc,u:Bc,q:Mc,G:Rc,A:Hc,R:Fc,ua:qc,ka:Kc,U:jc,ba:Yc,H:Zc,ja:Cn,ta:Qc,t:Xc,Ba:Jc,x:rp,o:np,m:ip,c:Tn,n:ap,k:dp,w:lp,p:cp,f:pp,s:mp,l:fp,e:hp,j:gp,i:bp,g:yp,d:_p,ea:wp,fa:vp,ga:$p,ca:xi,da:Si,T:xp,h:Sp,D:Tp,I:Ip,M:Cp,y:Ap,sa:kp,V:Ep,v:Ii,z:Pp,N:zp,S:Op,za:Dp,ya:Bp,la:ki,ma:Ei,$:wn,C:Pi,K:zi,ia:Oi,J:Di,a:K,xa:_n,wa:Ri,r:Up}}}var gn={916868:(s,c,f,b,w)=>{if(u===void 0||!u.Fb)return 1;if((s=Ce(Number(s>>>0))).startsWith("./")&&(s=s.substring(2)),!(s=u.Fb.get(s)))return 2;if(c=Number(c>>>0),f=Number(f>>>0),b=Number(b>>>0),c+f>s.byteLength)return 3;try{let I=s.subarray(c,c+f);switch(w){case 0:r().set(I,b>>>0);break;case 1:u.dc(b,I);break;default:return 4}return 0}catch{return 4}},917583:(s,c,f)=>{u.ec(s,r().subarray(c>>>0,c+f>>>0))},917646:()=>u.bc(),917687:s=>{u.Pb(s)},917723:()=>{u.Wb()},917754:()=>{u.Xb()},917783:()=>{u.ac()},917808:s=>u.Vb(s),917841:s=>u.Zb(s),917873:(s,c,f)=>{u.Ob(Number(s),Number(c),Number(f),!0)},917936:(s,c,f)=>{u.Ob(Number(s),Number(c),Number(f))},917993:()=>typeof wasmOffsetConverter<"u",918050:s=>{u.kb("Abs",s,void 0)},918101:s=>{u.kb("Neg",s,void 0)},918152:s=>{u.kb("Floor",s,void 0)},918205:s=>{u.kb("Ceil",s,void 0)},918257:s=>{u.kb("Reciprocal",s,void 0)},918315:s=>{u.kb("Sqrt",s,void 0)},918367:s=>{u.kb("Exp",s,void 0)},918418:s=>{u.kb("Erf",s,void 0)},918469:s=>{u.kb("Sigmoid",s,void 0)},918524:(s,c,f)=>{u.kb("HardSigmoid",s,{alpha:c,beta:f})},918603:s=>{u.kb("Log",s,void 0)},918654:s=>{u.kb("Sin",s,void 0)},918705:s=>{u.kb("Cos",s,void 0)},918756:s=>{u.kb("Tan",s,void 0)},918807:s=>{u.kb("Asin",s,void 0)},918859:s=>{u.kb("Acos",s,void 0)},918911:s=>{u.kb("Atan",s,void 0)},918963:s=>{u.kb("Sinh",s,void 0)},919015:s=>{u.kb("Cosh",s,void 0)},919067:s=>{u.kb("Asinh",s,void 0)},919120:s=>{u.kb("Acosh",s,void 0)},919173:s=>{u.kb("Atanh",s,void 0)},919226:s=>{u.kb("Tanh",s,void 0)},919278:s=>{u.kb("Not",s,void 0)},919329:(s,c,f)=>{u.kb("Clip",s,{min:c,max:f})},919398:s=>{u.kb("Clip",s,void 0)},919450:(s,c)=>{u.kb("Elu",s,{alpha:c})},919508:s=>{u.kb("Gelu",s,void 0)},919560:s=>{u.kb("Relu",s,void 0)},919612:(s,c)=>{u.kb("LeakyRelu",s,{alpha:c})},919676:(s,c)=>{u.kb("ThresholdedRelu",s,{alpha:c})},919746:(s,c)=>{u.kb("Cast",s,{to:c})},919804:s=>{u.kb("Add",s,void 0)},919855:s=>{u.kb("Sub",s,void 0)},919906:s=>{u.kb("Mul",s,void 0)},919957:s=>{u.kb("Div",s,void 0)},920008:s=>{u.kb("Pow",s,void 0)},920059:s=>{u.kb("Equal",s,void 0)},920112:s=>{u.kb("Greater",s,void 0)},920167:s=>{u.kb("GreaterOrEqual",s,void 0)},920229:s=>{u.kb("Less",s,void 0)},920281:s=>{u.kb("LessOrEqual",s,void 0)},920340:(s,c,f,b,w)=>{u.kb("ReduceMean",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},920515:(s,c,f,b,w)=>{u.kb("ReduceMax",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},920689:(s,c,f,b,w)=>{u.kb("ReduceMin",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},920863:(s,c,f,b,w)=>{u.kb("ReduceProd",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},921038:(s,c,f,b,w)=>{u.kb("ReduceSum",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},921212:(s,c,f,b,w)=>{u.kb("ReduceL1",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},921385:(s,c,f,b,w)=>{u.kb("ReduceL2",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},921558:(s,c,f,b,w)=>{u.kb("ReduceLogSum",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},921735:(s,c,f,b,w)=>{u.kb("ReduceSumSquare",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},921915:(s,c,f,b,w)=>{u.kb("ReduceLogSumExp",s,{keepDims:!!c,noopWithEmptyAxes:!!f,axes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},922095:s=>{u.kb("Where",s,void 0)},922148:(s,c,f)=>{u.kb("Transpose",s,{perm:c?Array.from(i().subarray(Number(c)>>>0,Number(f)>>>0)):[]})},922272:(s,c,f,b)=>{u.kb("DepthToSpace",s,{blocksize:c,mode:Ce(f),format:b?"NHWC":"NCHW"})},922405:(s,c,f,b)=>{u.kb("DepthToSpace",s,{blocksize:c,mode:Ce(f),format:b?"NHWC":"NCHW"})},922538:(s,c,f,b,w,I,O,B,G,F,X,le,ge,z,ue)=>{u.kb("ConvTranspose",s,{format:G?"NHWC":"NCHW",autoPad:c,dilations:[f],group:b,kernelShape:[w],pads:[I,O],strides:[B],wIsConst:()=>!!t()[F>>>0],outputPadding:X?Array.from(i().subarray(Number(X)>>>0,Number(le)>>>0)):[],outputShape:ge?Array.from(i().subarray(Number(ge)>>>0,Number(z)>>>0)):[],activation:Ce(ue)})},922971:(s,c,f,b,w,I,O,B,G,F,X,le,ge,z)=>{u.kb("ConvTranspose",s,{format:B?"NHWC":"NCHW",autoPad:c,dilations:Array.from(i().subarray(Number(f)>>>0,2+(Number(f)>>>0)>>>0)),group:b,kernelShape:Array.from(i().subarray(Number(w)>>>0,2+(Number(w)>>>0)>>>0)),pads:Array.from(i().subarray(Number(I)>>>0,4+(Number(I)>>>0)>>>0)),strides:Array.from(i().subarray(Number(O)>>>0,2+(Number(O)>>>0)>>>0)),wIsConst:()=>!!t()[G>>>0],outputPadding:F?Array.from(i().subarray(Number(F)>>>0,Number(X)>>>0)):[],outputShape:le?Array.from(i().subarray(Number(le)>>>0,Number(ge)>>>0)):[],activation:Ce(z)})},923632:(s,c,f,b,w,I,O,B,G,F,X,le,ge,z,ue)=>{u.kb("ConvTranspose",s,{format:G?"NHWC":"NCHW",autoPad:c,dilations:[f],group:b,kernelShape:[w],pads:[I,O],strides:[B],wIsConst:()=>!!t()[F>>>0],outputPadding:X?Array.from(i().subarray(Number(X)>>>0,Number(le)>>>0)):[],outputShape:ge?Array.from(i().subarray(Number(ge)>>>0,Number(z)>>>0)):[],activation:Ce(ue)})},924065:(s,c,f,b,w,I,O,B,G,F,X,le,ge,z)=>{u.kb("ConvTranspose",s,{format:B?"NHWC":"NCHW",autoPad:c,dilations:Array.from(i().subarray(Number(f)>>>0,2+(Number(f)>>>0)>>>0)),group:b,kernelShape:Array.from(i().subarray(Number(w)>>>0,2+(Number(w)>>>0)>>>0)),pads:Array.from(i().subarray(Number(I)>>>0,4+(Number(I)>>>0)>>>0)),strides:Array.from(i().subarray(Number(O)>>>0,2+(Number(O)>>>0)>>>0)),wIsConst:()=>!!t()[G>>>0],outputPadding:F?Array.from(i().subarray(Number(F)>>>0,Number(X)>>>0)):[],outputShape:le?Array.from(i().subarray(Number(le)>>>0,Number(ge)>>>0)):[],activation:Ce(z)})},924726:(s,c)=>{u.kb("GlobalAveragePool",s,{format:c?"NHWC":"NCHW"})},924817:(s,c,f,b,w,I,O,B,G,F,X,le,ge,z)=>{u.kb("AveragePool",s,{format:z?"NHWC":"NCHW",auto_pad:c,ceil_mode:f,count_include_pad:b,storage_order:w,dilations:I?Array.from(i().subarray(Number(I)>>>0,Number(O)>>>0)):[],kernel_shape:B?Array.from(i().subarray(Number(B)>>>0,Number(G)>>>0)):[],pads:F?Array.from(i().subarray(Number(F)>>>0,Number(X)>>>0)):[],strides:le?Array.from(i().subarray(Number(le)>>>0,Number(ge)>>>0)):[]})},925296:(s,c)=>{u.kb("GlobalAveragePool",s,{format:c?"NHWC":"NCHW"})},925387:(s,c,f,b,w,I,O,B,G,F,X,le,ge,z)=>{u.kb("AveragePool",s,{format:z?"NHWC":"NCHW",auto_pad:c,ceil_mode:f,count_include_pad:b,storage_order:w,dilations:I?Array.from(i().subarray(Number(I)>>>0,Number(O)>>>0)):[],kernel_shape:B?Array.from(i().subarray(Number(B)>>>0,Number(G)>>>0)):[],pads:F?Array.from(i().subarray(Number(F)>>>0,Number(X)>>>0)):[],strides:le?Array.from(i().subarray(Number(le)>>>0,Number(ge)>>>0)):[]})},925866:(s,c)=>{u.kb("GlobalMaxPool",s,{format:c?"NHWC":"NCHW"})},925953:(s,c,f,b,w,I,O,B,G,F,X,le,ge,z)=>{u.kb("MaxPool",s,{format:z?"NHWC":"NCHW",auto_pad:c,ceil_mode:f,count_include_pad:b,storage_order:w,dilations:I?Array.from(i().subarray(Number(I)>>>0,Number(O)>>>0)):[],kernel_shape:B?Array.from(i().subarray(Number(B)>>>0,Number(G)>>>0)):[],pads:F?Array.from(i().subarray(Number(F)>>>0,Number(X)>>>0)):[],strides:le?Array.from(i().subarray(Number(le)>>>0,Number(ge)>>>0)):[]})},926428:(s,c)=>{u.kb("GlobalMaxPool",s,{format:c?"NHWC":"NCHW"})},926515:(s,c,f,b,w,I,O,B,G,F,X,le,ge,z)=>{u.kb("MaxPool",s,{format:z?"NHWC":"NCHW",auto_pad:c,ceil_mode:f,count_include_pad:b,storage_order:w,dilations:I?Array.from(i().subarray(Number(I)>>>0,Number(O)>>>0)):[],kernel_shape:B?Array.from(i().subarray(Number(B)>>>0,Number(G)>>>0)):[],pads:F?Array.from(i().subarray(Number(F)>>>0,Number(X)>>>0)):[],strides:le?Array.from(i().subarray(Number(le)>>>0,Number(ge)>>>0)):[]})},926990:(s,c,f,b,w)=>{u.kb("Gemm",s,{alpha:c,beta:f,transA:b,transB:w})},927094:s=>{u.kb("MatMul",s,void 0)},927148:(s,c,f,b)=>{u.kb("ArgMax",s,{keepDims:!!c,selectLastIndex:!!f,axis:b})},927256:(s,c,f,b)=>{u.kb("ArgMin",s,{keepDims:!!c,selectLastIndex:!!f,axis:b})},927364:(s,c)=>{u.kb("Softmax",s,{axis:c})},927427:(s,c)=>{u.kb("Concat",s,{axis:c})},927487:(s,c,f,b,w)=>{u.kb("Split",s,{axis:c,numOutputs:f,splitSizes:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},927643:s=>{u.kb("Expand",s,void 0)},927697:(s,c)=>{u.kb("Gather",s,{axis:Number(c)})},927768:(s,c)=>{u.kb("GatherElements",s,{axis:Number(c)})},927847:(s,c)=>{u.kb("GatherND",s,{batch_dims:Number(c)})},927926:(s,c,f,b,w,I,O,B,G,F,X)=>{u.kb("Resize",s,{antialias:c,axes:f?Array.from(i().subarray(Number(f)>>>0,Number(b)>>>0)):[],coordinateTransformMode:Ce(w),cubicCoeffA:I,excludeOutside:O,extrapolationValue:B,keepAspectRatioPolicy:Ce(G),mode:Ce(F),nearestMode:Ce(X)})},928288:(s,c,f,b,w,I,O)=>{u.kb("Slice",s,{starts:c?Array.from(i().subarray(Number(c)>>>0,Number(f)>>>0)):[],ends:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[],axes:I?Array.from(i().subarray(Number(I)>>>0,Number(O)>>>0)):[]})},928552:s=>{u.kb("Tile",s,void 0)},928604:(s,c,f)=>{u.kb("InstanceNormalization",s,{epsilon:c,format:f?"NHWC":"NCHW"})},928718:(s,c,f)=>{u.kb("InstanceNormalization",s,{epsilon:c,format:f?"NHWC":"NCHW"})},928832:s=>{u.kb("Range",s,void 0)},928885:(s,c)=>{u.kb("Einsum",s,{equation:Ce(c)})},928966:(s,c,f,b,w)=>{u.kb("Pad",s,{mode:c,value:f,pads:b?Array.from(i().subarray(Number(b)>>>0,Number(w)>>>0)):[]})},929109:(s,c,f,b,w,I)=>{u.kb("BatchNormalization",s,{epsilon:c,momentum:f,spatial:!!w,trainingMode:!!b,format:I?"NHWC":"NCHW"})},929278:(s,c,f,b,w,I)=>{u.kb("BatchNormalization",s,{epsilon:c,momentum:f,spatial:!!w,trainingMode:!!b,format:I?"NHWC":"NCHW"})},929447:(s,c,f)=>{u.kb("CumSum",s,{exclusive:Number(c),reverse:Number(f)})},929544:(s,c,f)=>{u.kb("DequantizeLinear",s,{axis:c,blockSize:f})},929634:(s,c,f,b,w)=>{u.kb("GridSample",s,{align_corners:c,mode:Ce(f),padding_mode:Ce(b),format:w?"NHWC":"NCHW"})},929804:(s,c,f,b,w)=>{u.kb("GridSample",s,{align_corners:c,mode:Ce(f),padding_mode:Ce(b),format:w?"NHWC":"NCHW"})},929974:(s,c,f,b,w,I,O,B,G)=>{u.kb("Attention",s,{numHeads:c,isUnidirectional:f,maskFilterValue:b,scale:w,doRotary:I,qkvHiddenSizes:O?Array.from(i().subarray(Number(B)>>>0,Number(B)+O>>>0)):[],pastPresentShareBuffer:!!G})},930246:s=>{u.kb("BiasAdd",s,void 0)},930301:s=>{u.kb("BiasSplitGelu",s,void 0)},930362:s=>{u.kb("FastGelu",s,void 0)},930418:(s,c,f,b,w,I,O,B,G,F,X,le,ge,z,ue,Se)=>{u.kb("Conv",s,{format:le?"NHWC":"NCHW",auto_pad:c,dilations:f?Array.from(i().subarray(Number(f)>>>0,Number(b)>>>0)):[],group:w,kernel_shape:I?Array.from(i().subarray(Number(I)>>>0,Number(O)>>>0)):[],pads:B?Array.from(i().subarray(Number(B)>>>0,Number(G)>>>0)):[],strides:F?Array.from(i().subarray(Number(F)>>>0,Number(X)>>>0)):[],w_is_const:()=>!!t()[Number(ge)>>>0],activation:Ce(z),activation_params:ue?Array.from(d().subarray(Number(ue)>>>0,Number(Se)>>>0)):[]})},931002:s=>{u.kb("Gelu",s,void 0)},931054:(s,c,f,b,w,I,O,B,G)=>{u.kb("GroupQueryAttention",s,{numHeads:c,kvNumHeads:f,scale:b,softcap:w,doRotary:I,rotaryInterleaved:O,smoothSoftmax:B,localWindowSize:G})},931271:(s,c,f,b)=>{u.kb("LayerNormalization",s,{axis:c,epsilon:f,simplified:!!b})},931382:(s,c,f,b)=>{u.kb("LayerNormalization",s,{axis:c,epsilon:f,simplified:!!b})},931493:(s,c,f,b,w,I)=>{u.kb("MatMulNBits",s,{k:c,n:f,accuracyLevel:b,bits:w,blockSize:I})},931620:(s,c,f,b,w,I)=>{u.kb("MultiHeadAttention",s,{numHeads:c,isUnidirectional:f,maskFilterValue:b,scale:w,doRotary:I})},931779:(s,c)=>{u.kb("QuickGelu",s,{alpha:c})},931843:(s,c,f,b,w)=>{u.kb("RotaryEmbedding",s,{interleaved:!!c,numHeads:f,rotaryEmbeddingDim:b,scale:w})},931982:(s,c,f)=>{u.kb("SkipLayerNormalization",s,{epsilon:c,simplified:!!f})},932084:(s,c,f)=>{u.kb("SkipLayerNormalization",s,{epsilon:c,simplified:!!f})},932186:(s,c,f,b)=>{u.kb("GatherBlockQuantized",s,{gatherAxis:c,quantizeAxis:f,blockSize:b})},932307:s=>{u.$b(s)},932341:(s,c)=>u.cc(Number(s),Number(c),u.Gb.hc,u.Gb.errors)};function vc(s,c,f){return yi(async()=>{await u.Yb(Number(s),Number(c),Number(f))})}function $c(){return typeof wasmOffsetConverter<"u"}function bn(s){this.name="ExitStatus",this.message=`Program terminated with exit(${s})`,this.status=s}var yn=s=>{s.terminate(),s.onmessage=()=>{}},No=s=>{pt.length==0&&(Fo(),Ho(pt[0]));var c=pt.pop();if(!c)return 6;$t.push(c),Ze[s.Bb]=c,c.Bb=s.Bb;var f={cmd:"run",start_routine:s.ic,arg:s.Rb,pthread_ptr:s.Bb};return c.postMessage(f,s.nc),0},vt=0,$e=(s,c,...f)=>{for(var b=2*f.length,w=Mn(),I=Bn(8*b),O=I>>>3,B=0;B<f.length;B++){var G=f[B];typeof G=="bigint"?(W[O+2*B]=1n,W[O+2*B+1]=G):(W[O+2*B]=0n,l()[O+2*B+1>>>0]=G)}return s=Li(s,0,b,I,c),hr(w),s};function _n(s){if(g)return $e(0,1,s);if(j=s,!(0<vt)){for(var c of $t)yn(c);for(c of pt)yn(c);pt=[],$t=[],Ze=[],De=!0}A(0,new bn(s))}function Vo(s){if(g)return $e(1,0,s);wn(s)}var wn=s=>{if(j=s,g)throw Vo(s),"unwind";_n(s)},pt=[],$t=[],Wo=[],Ze={},Lo=s=>{var c=s.Bb;delete Ze[c],pt.push(s),$t.splice($t.indexOf(s),1),s.Bb=0,Dn(c)};function Go(){Wo.forEach(s=>s())}var Ho=s=>new Promise(c=>{s.onmessage=w=>{var I=(w=w.data).cmd;if(w.targetThread&&w.targetThread!=Rt()){var O=Ze[w.targetThread];O?O.postMessage(w,w.transferList):L(`Internal error! Worker sent a message "${I}" to target pthread ${w.targetThread}, but that thread no longer exists!`)}else I==="checkMailbox"?or():I==="spawnThread"?No(w):I==="cleanupThread"?Lo(Ze[w.thread]):I==="killThread"?(w=w.thread,I=Ze[w],delete Ze[w],yn(I),Dn(w),$t.splice($t.indexOf(I),1),I.Bb=0):I==="cancelThread"?Ze[w.thread].postMessage({cmd:"cancel"}):I==="loaded"?(s.loaded=!0,c(s)):I==="alert"?alert(`Thread ${w.threadId}: ${w.text}`):w.target==="setimmediate"?s.postMessage(w):I==="callHandler"?u[w.handler](...w.args):I&&L(`worker sent an unknown command ${I}`)},s.onerror=w=>{throw L(`worker sent an error! ${w.filename}:${w.lineno}: ${w.message}`),w};var f,b=[];for(f of[])u.hasOwnProperty(f)&&b.push(f);s.postMessage({cmd:"load",handlers:b,wasmMemory:K,wasmModule:we})});function Fo(){var s=new Worker(import.meta.url.startsWith("file:")?new URL(/* asset import */ __webpack_require__(/*! ort.bundle.min.mjs */ "./node_modules/onnxruntime-web/dist/ort.bundle.min.mjs?46eb"), __webpack_require__.b):new URL(import.meta.url),{type:"module",workerData:"em-pthread",name:"em-pthread"});pt.push(s)}var nr=s=>{for(;0<s.length;)s.shift()(u)},xc=()=>{var s=Rt(),c=a()[s+52>>>2>>>0];s=a()[s+56>>>2>>>0],Hi(c,c-s),hr(c)},Sc=(s,c)=>{vt=0,s=Fi(s,c),0<vt?j=s:fr(s)};class Tc{constructor(c){this.Kb=c-24}}function Ic(s,c,f){var b=new Tc(s>>>=0);throw c>>>=0,f>>>=0,a()[b.Kb+16>>>2>>>0]=0,a()[b.Kb+4>>>2>>>0]=c,a()[b.Kb+8>>>2>>>0]=f,s}function qo(s,c,f,b){return g?$e(2,1,s,c,f,b):Ko(s,c,f,b)}function Ko(s,c,f,b){if(s>>>=0,c>>>=0,f>>>=0,b>>>=0,x===void 0)return L("Current environment does not support SharedArrayBuffer, pthreads are not available!"),6;var w=[];return g&&w.length===0?qo(s,c,f,b):(s={ic:f,Bb:s,Rb:b,nc:w},g?(s.Nb="spawnThread",postMessage(s,w),0):No(s))}var jo=typeof TextDecoder<"u"?new TextDecoder("utf8"):void 0,Yo=(s,c,f)=>{var b=(c>>>=0)+f;for(f=c;s[f]&&!(f>=b);)++f;if(16<f-c&&s.buffer&&jo)return jo.decode(s.buffer instanceof x?s.slice(c,f):s.subarray(c,f));for(b="";c<f;){var w=s[c++];if(128&w){var I=63&s[c++];if((224&w)==192)b+=String.fromCharCode((31&w)<<6|I);else{var O=63&s[c++];65536>(w=(240&w)==224?(15&w)<<12|I<<6|O:(7&w)<<18|I<<12|O<<6|63&s[c++])?b+=String.fromCharCode(w):(w-=65536,b+=String.fromCharCode(55296|w>>10,56320|1023&w))}}else b+=String.fromCharCode(w)}return b},Ce=(s,c)=>(s>>>=0)?Yo(r(),s,c):"";function Zo(s,c,f){return g?$e(3,1,s,c,f):0}function Qo(s,c){if(g)return $e(4,1,s,c)}var vn=s=>{for(var c=0,f=0;f<s.length;++f){var b=s.charCodeAt(f);127>=b?c++:2047>=b?c+=2:55296<=b&&57343>=b?(c+=4,++f):c+=3}return c},Xo=(s,c,f,b)=>{if(!(0<b))return 0;var w=f>>>=0;b=f+b-1;for(var I=0;I<s.length;++I){var O=s.charCodeAt(I);if(55296<=O&&57343>=O&&(O=65536+((1023&O)<<10)|1023&s.charCodeAt(++I)),127>=O){if(f>=b)break;c[f++>>>0]=O}else{if(2047>=O){if(f+1>=b)break;c[f++>>>0]=192|O>>6}else{if(65535>=O){if(f+2>=b)break;c[f++>>>0]=224|O>>12}else{if(f+3>=b)break;c[f++>>>0]=240|O>>18,c[f++>>>0]=128|O>>12&63}c[f++>>>0]=128|O>>6&63}c[f++>>>0]=128|63&O}}return c[f>>>0]=0,f-w},Dt=(s,c,f)=>Xo(s,r(),c,f);function Jo(s,c){if(g)return $e(5,1,s,c)}function ei(s,c,f){if(g)return $e(6,1,s,c,f)}function ti(s,c,f){return g?$e(7,1,s,c,f):0}function ri(s,c){if(g)return $e(8,1,s,c)}function ni(s,c,f){if(g)return $e(9,1,s,c,f)}function oi(s,c,f,b){if(g)return $e(10,1,s,c,f,b)}function ii(s,c,f,b){if(g)return $e(11,1,s,c,f,b)}function ai(s,c,f,b){if(g)return $e(12,1,s,c,f,b)}function si(s){if(g)return $e(13,1,s)}function ui(s,c){if(g)return $e(14,1,s,c)}function di(s,c,f){if(g)return $e(15,1,s,c,f)}var li,mt,Cc=()=>{ct("")},Qe=s=>{for(var c="";r()[s>>>0];)c+=li[r()[s++>>>0]];return c},$n={},xn={},Ac={};function st(s,c,f={}){if(!("argPackAdvance"in c))throw new TypeError("registerType registeredInstance requires argPackAdvance");return function(b,w,I={}){var O=w.name;if(!b)throw new mt(`type "${O}" must have a positive integer typeid pointer`);if(xn.hasOwnProperty(b)){if(I.Tb)return;throw new mt(`Cannot register type '${O}' twice`)}xn[b]=w,delete Ac[b],$n.hasOwnProperty(b)&&(w=$n[b],delete $n[b],w.forEach(B=>B()))}(s,c,f)}var ci=(s,c,f)=>{switch(c){case 1:return f?b=>t()[b>>>0]:b=>r()[b>>>0];case 2:return f?b=>n()[b>>>1>>>0]:b=>o()[b>>>1>>>0];case 4:return f?b=>i()[b>>>2>>>0]:b=>a()[b>>>2>>>0];case 8:return f?b=>W[b>>>3]:b=>Y[b>>>3];default:throw new TypeError(`invalid integer width (${c}): ${s}`)}};function kc(s,c,f){f>>>=0,st(s>>>=0,{name:c=Qe(c>>>0),fromWireType:b=>b,toWireType:function(b,w){if(typeof w!="bigint"&&typeof w!="number")throw w=w===null?"null":(b=typeof w)=="object"||b==="array"||b==="function"?w.toString():""+w,new TypeError(`Cannot convert "${w}" to ${this.name}`);return typeof w=="number"&&(w=BigInt(w)),w},argPackAdvance:ft,readValueFromPointer:ci(c,f,c.indexOf("u")==-1),Eb:null})}var ft=8;function Ec(s,c,f,b){st(s>>>=0,{name:c=Qe(c>>>0),fromWireType:function(w){return!!w},toWireType:function(w,I){return I?f:b},argPackAdvance:ft,readValueFromPointer:function(w){return this.fromWireType(r()[w>>>0])},Eb:null})}var Sn=[],ut=[];function Tn(s){9<(s>>>=0)&&--ut[s+1]==0&&(ut[s]=void 0,Sn.push(s))}var Re=s=>{if(!s)throw new mt("Cannot use deleted val. handle = "+s);return ut[s]},We=s=>{switch(s){case void 0:return 2;case null:return 4;case!0:return 6;case!1:return 8;default:let c=Sn.pop()||ut.length;return ut[c]=s,ut[c+1]=1,c}};function In(s){return this.fromWireType(a()[s>>>2>>>0])}var Pc={name:"emscripten::val",fromWireType:s=>{var c=Re(s);return Tn(s),c},toWireType:(s,c)=>We(c),argPackAdvance:ft,readValueFromPointer:In,Eb:null};function zc(s){return st(s>>>0,Pc)}var Oc=(s,c)=>{switch(c){case 4:return function(f){return this.fromWireType(d()[f>>>2>>>0])};case 8:return function(f){return this.fromWireType(l()[f>>>3>>>0])};default:throw new TypeError(`invalid float width (${c}): ${s}`)}};function Dc(s,c,f){f>>>=0,st(s>>>=0,{name:c=Qe(c>>>0),fromWireType:b=>b,toWireType:(b,w)=>w,argPackAdvance:ft,readValueFromPointer:Oc(c,f),Eb:null})}function Bc(s,c,f,b,w){if(s>>>=0,f>>>=0,c=Qe(c>>>0),w===-1&&(w=4294967295),w=B=>B,b===0){var I=32-8*f;w=B=>B<<I>>>I}var O=c.includes("unsigned")?function(B,G){return G>>>0}:function(B,G){return G};st(s,{name:c,fromWireType:w,toWireType:O,argPackAdvance:ft,readValueFromPointer:ci(c,f,b!==0),Eb:null})}function Mc(s,c,f){function b(I){var O=a()[I>>>2>>>0];return I=a()[I+4>>>2>>>0],new w(t().buffer,I,O)}var w=[Int8Array,Uint8Array,Int16Array,Uint16Array,Int32Array,Uint32Array,Float32Array,Float64Array,BigInt64Array,BigUint64Array][c];st(s>>>=0,{name:f=Qe(f>>>0),fromWireType:b,argPackAdvance:ft,readValueFromPointer:b},{Tb:!0})}function Rc(s,c){s>>>=0;var f=(c=Qe(c>>>0))==="std::string";st(s,{name:c,fromWireType:function(b){var w=a()[b>>>2>>>0],I=b+4;if(f)for(var O=I,B=0;B<=w;++B){var G=I+B;if(B==w||r()[G>>>0]==0){if(O=Ce(O,G-O),F===void 0)var F=O;else F+=String.fromCharCode(0),F+=O;O=G+1}}else{for(F=Array(w),B=0;B<w;++B)F[B]=String.fromCharCode(r()[I+B>>>0]);F=F.join("")}return Je(b),F},toWireType:function(b,w){w instanceof ArrayBuffer&&(w=new Uint8Array(w));var I=typeof w=="string";if(!(I||w instanceof Uint8Array||w instanceof Uint8ClampedArray||w instanceof Int8Array))throw new mt("Cannot pass non-string to std::string");var O=f&&I?vn(w):w.length,B=mr(4+O+1),G=B+4;if(a()[B>>>2>>>0]=O,f&&I)Dt(w,G,O+1);else if(I)for(I=0;I<O;++I){var F=w.charCodeAt(I);if(255<F)throw Je(G),new mt("String has UTF-16 code units that do not fit in 8 bits");r()[G+I>>>0]=F}else for(I=0;I<O;++I)r()[G+I>>>0]=w[I];return b!==null&&b.push(Je,B),B},argPackAdvance:ft,readValueFromPointer:In,Eb(b){Je(b)}})}var pi=typeof TextDecoder<"u"?new TextDecoder("utf-16le"):void 0,Uc=(s,c)=>{for(var f=s>>1,b=f+c/2;!(f>=b)&&o()[f>>>0];)++f;if(32<(f<<=1)-s&&pi)return pi.decode(r().slice(s,f));for(f="",b=0;!(b>=c/2);++b){var w=n()[s+2*b>>>1>>>0];if(w==0)break;f+=String.fromCharCode(w)}return f},Nc=(s,c,f)=>{if(f??=2147483647,2>f)return 0;var b=c;f=(f-=2)<2*s.length?f/2:s.length;for(var w=0;w<f;++w){var I=s.charCodeAt(w);n()[c>>>1>>>0]=I,c+=2}return n()[c>>>1>>>0]=0,c-b},Vc=s=>2*s.length,Wc=(s,c)=>{for(var f=0,b="";!(f>=c/4);){var w=i()[s+4*f>>>2>>>0];if(w==0)break;++f,65536<=w?(w-=65536,b+=String.fromCharCode(55296|w>>10,56320|1023&w)):b+=String.fromCharCode(w)}return b},Lc=(s,c,f)=>{if(c>>>=0,f??=2147483647,4>f)return 0;var b=c;f=b+f-4;for(var w=0;w<s.length;++w){var I=s.charCodeAt(w);if(55296<=I&&57343>=I&&(I=65536+((1023&I)<<10)|1023&s.charCodeAt(++w)),i()[c>>>2>>>0]=I,(c+=4)+4>f)break}return i()[c>>>2>>>0]=0,c-b},Gc=s=>{for(var c=0,f=0;f<s.length;++f){var b=s.charCodeAt(f);55296<=b&&57343>=b&&++f,c+=4}return c};function Hc(s,c,f){if(s>>>=0,c>>>=0,f=Qe(f>>>=0),c===2)var b=Uc,w=Nc,I=Vc,O=B=>o()[B>>>1>>>0];else c===4&&(b=Wc,w=Lc,I=Gc,O=B=>a()[B>>>2>>>0]);st(s,{name:f,fromWireType:B=>{for(var G,F=a()[B>>>2>>>0],X=B+4,le=0;le<=F;++le){var ge=B+4+le*c;le!=F&&O(ge)!=0||(X=b(X,ge-X),G===void 0?G=X:(G+=String.fromCharCode(0),G+=X),X=ge+c)}return Je(B),G},toWireType:(B,G)=>{if(typeof G!="string")throw new mt(`Cannot pass non-string to C++ string type ${f}`);var F=I(G),X=mr(4+F+c);return a()[X>>>2>>>0]=F/c,w(G,X+4,F+c),B!==null&&B.push(Je,X),X},argPackAdvance:ft,readValueFromPointer:In,Eb(B){Je(B)}})}function Fc(s,c){st(s>>>=0,{Ub:!0,name:c=Qe(c>>>0),argPackAdvance:0,fromWireType:()=>{},toWireType:()=>{}})}var qc=()=>1;function Kc(s){On(s>>>0,!y,1,!_,131072,!1),Go()}var mi=s=>{if(!De)try{if(s(),!(0<vt))try{g?fr(j):wn(j)}catch(c){c instanceof bn||c=="unwind"||A(0,c)}}catch(c){c instanceof bn||c=="unwind"||A(0,c)}};function Cn(s){s>>>=0,typeof Atomics.oc=="function"&&(Atomics.oc(i(),s>>>2,s).value.then(or),s+=128,Atomics.store(i(),s>>>2,1))}var or=()=>{var s=Rt();s&&(Cn(s),mi(Gi))};function jc(s,c){(s>>>=0)==c>>>0?setTimeout(or):g?postMessage({targetThread:s,cmd:"checkMailbox"}):(s=Ze[s])&&s.postMessage({cmd:"checkMailbox"})}var An=[];function Yc(s,c,f,b,w){for(c>>>=0,b/=2,An.length=b,f=w>>>0>>>3,w=0;w<b;w++)An[w]=W[f+2*w]?W[f+2*w+1]:l()[f+2*w+1>>>0];return(c?gn[c]:Np[s])(...An)}function Zc(s){s>>>=0,g?postMessage({cmd:"cleanupThread",thread:s}):Lo(Ze[s])}function Qc(s){}var ir=(s,c)=>{var f=xn[s];if(f===void 0)throw s=Ni(s),f=Qe(s),Je(s),new mt(`${c} has unknown type ${f}`);return f},fi=(s,c,f)=>{var b=[];return s=s.toWireType(b,f),b.length&&(a()[c>>>2>>>0]=We(b)),s};function Xc(s,c,f){return c>>>=0,f>>>=0,s=Re(s>>>0),c=ir(c,"emval::as"),fi(c,f,s)}function Jc(s,c){return c>>>=0,s=Re(s>>>0),(c=ir(c,"emval::as")).toWireType(null,s)}var ar=s=>{try{s()}catch(c){ct(c)}},ht=0,Xe=null,hi=0,sr=[],gi={},bi={},ep=0,kn=null,tp=[];function yi(s){return function(c){if(!De){if(ht===0){var f=!1,b=!1;c((w=0)=>{if(!De&&(hi=w,f=!0,b)){ht=2,ar(()=>ji(Xe)),typeof Browser<"u"&&Browser.Lb.Sb&&Browser.Lb.resume(),w=!1;try{var I=function(){var G=i()[Xe+8>>>2>>>0];return G=Z[bi[G]],--vt,G()}()}catch(G){I=G,w=!0}var O=!1;if(!Xe){var B=kn;B&&(kn=null,(w?B.reject:B.resolve)(I),O=!0)}if(w&&!O)throw I}}),b=!0,f||(ht=1,Xe=function(){var w=mr(65548),I=w+12;a()[w>>>2>>>0]=I,a()[w+4>>>2>>>0]=I+65536,I=sr[0];var O=gi[I];return O===void 0&&(O=ep++,gi[I]=O,bi[O]=I),I=O,i()[w+8>>>2>>>0]=I,w}(),typeof Browser<"u"&&Browser.Lb.Sb&&Browser.Lb.pause(),ar(()=>qi(Xe)))}else ht===2?(ht=0,ar(Yi),Je(Xe),Xe=null,tp.forEach(mi)):ct(`invalid state: ${ht}`);return hi}}(c=>{s().then(c)})}function rp(s){return s>>>=0,yi(()=>(s=Re(s)).then(We))}var ur=[];function np(s,c,f,b){return f>>>=0,b>>>=0,(s=ur[s>>>0])(null,c=Re(c>>>0),f,b)}var op={},dr=s=>{var c=op[s];return c===void 0?Qe(s):c};function ip(s,c,f,b,w){return f>>>=0,b>>>=0,w>>>=0,(s=ur[s>>>0])(c=Re(c>>>0),c[f=dr(f)],b,w)}var _i=()=>typeof globalThis=="object"?globalThis:Function("return this")();function ap(s){return(s>>>=0)==0?We(_i()):(s=dr(s),We(_i()[s]))}var sp=s=>{var c=ur.length;return ur.push(s),c},up=(s,c)=>{for(var f=Array(s),b=0;b<s;++b)f[b]=ir(a()[c+4*b>>>2>>>0],"parameter "+b);return f},wi=(s,c)=>Object.defineProperty(c,"name",{value:s});function dp(s,c,f){var b=(c=up(s,c>>>0)).shift();s--;var w=`return function (obj, func, destructorsRef, args) {
3972
+ `,I=0,O=[];f===0&&O.push("obj");for(var B=["retType"],G=[b],F=0;F<s;++F)O.push("arg"+F),B.push("argType"+F),G.push(c[F]),w+=` var arg${F} = argType${F}.readValueFromPointer(args${I?"+"+I:""});
3973
+ `,I+=c[F].argPackAdvance;return w+=` var rv = ${f===1?"new func":"func.call"}(${O.join(", ")});
3974
+ `,b.Ub||(B.push("emval_returnValue"),G.push(fi),w+=` return emval_returnValue(retType, destructorsRef, rv);
3920
3975
  `),B.push(w+`};
3921
- `),s=function(X){var ce=Function;if(!(ce instanceof Function))throw new TypeError(`new_ called with constructor type ${typeof ce} which is not a function`);var ge=vi(ce.name||"unknownFunctionName",function(){});return ge.prototype=ce.prototype,ge=new ge,(X=ce.apply(ge,X))instanceof Object?X:ge}(B)(...L),f=`methodCaller<(${c.map(X=>X.name).join(", ")}) => ${b.name}>`,sp(vi(f,s))}function lp(s){return s=lr(s>>>0),Ve(u[s])}function cp(s,c){return c>>>=0,s=Me(s>>>0),c=Me(c),Ve(s[c])}function pp(s){9<(s>>>=0)&&(ut[s+1]+=1)}function mp(){return Ve([])}function fp(s){s=Me(s>>>0);for(var c=Array(s.length),f=0;f<s.length;f++)c[f]=s[f];return Ve(c)}function hp(s){return Ve(lr(s>>>0))}function gp(){return Ve({})}function bp(s){for(var c=Me(s>>>=0);c.length;){var f=c.pop();c.pop()(f)}In(s)}function yp(s,c,f){c>>>=0,f>>>=0,s=Me(s>>>0),c=Me(c),f=Me(f),s[c]=f}function _p(s,c){return c>>>=0,s=(s=ar(s>>>0,"_emval_take_value")).readValueFromPointer(c),Ve(s)}function wp(s,c){s=-9007199254740992>s||9007199254740992<s?NaN:Number(s),c>>>=0,s=new Date(1e3*s),i()[c>>>2>>>0]=s.getUTCSeconds(),i()[c+4>>>2>>>0]=s.getUTCMinutes(),i()[c+8>>>2>>>0]=s.getUTCHours(),i()[c+12>>>2>>>0]=s.getUTCDate(),i()[c+16>>>2>>>0]=s.getUTCMonth(),i()[c+20>>>2>>>0]=s.getUTCFullYear()-1900,i()[c+24>>>2>>>0]=s.getUTCDay(),s=(s.getTime()-Date.UTC(s.getUTCFullYear(),0,1,0,0,0,0))/864e5|0,i()[c+28>>>2>>>0]=s}var Dt=s=>s%4==0&&(s%100!=0||s%400==0),$i=[0,31,60,91,121,152,182,213,244,274,305,335],xi=[0,31,59,90,120,151,181,212,243,273,304,334];function vp(s,c){s=-9007199254740992>s||9007199254740992<s?NaN:Number(s),c>>>=0,s=new Date(1e3*s),i()[c>>>2>>>0]=s.getSeconds(),i()[c+4>>>2>>>0]=s.getMinutes(),i()[c+8>>>2>>>0]=s.getHours(),i()[c+12>>>2>>>0]=s.getDate(),i()[c+16>>>2>>>0]=s.getMonth(),i()[c+20>>>2>>>0]=s.getFullYear()-1900,i()[c+24>>>2>>>0]=s.getDay();var f=(Dt(s.getFullYear())?$i:xi)[s.getMonth()]+s.getDate()-1|0;i()[c+28>>>2>>>0]=f,i()[c+36>>>2>>>0]=-60*s.getTimezoneOffset(),f=new Date(s.getFullYear(),6,1).getTimezoneOffset();var b=new Date(s.getFullYear(),0,1).getTimezoneOffset();s=0|(f!=b&&s.getTimezoneOffset()==Math.min(b,f)),i()[c+32>>>2>>>0]=s}function $p(s){s>>>=0;var c=new Date(i()[s+20>>>2>>>0]+1900,i()[s+16>>>2>>>0],i()[s+12>>>2>>>0],i()[s+8>>>2>>>0],i()[s+4>>>2>>>0],i()[s>>>2>>>0],0),f=i()[s+32>>>2>>>0],b=c.getTimezoneOffset(),w=new Date(c.getFullYear(),6,1).getTimezoneOffset(),I=new Date(c.getFullYear(),0,1).getTimezoneOffset(),O=Math.min(I,w);return 0>f?i()[s+32>>>2>>>0]=+(w!=I&&O==b):0<f!=(O==b)&&(w=Math.max(I,w),c.setTime(c.getTime()+6e4*((0<f?O:w)-b))),i()[s+24>>>2>>>0]=c.getDay(),f=(Dt(c.getFullYear())?$i:xi)[c.getMonth()]+c.getDate()-1|0,i()[s+28>>>2>>>0]=f,i()[s>>>2>>>0]=c.getSeconds(),i()[s+4>>>2>>>0]=c.getMinutes(),i()[s+8>>>2>>>0]=c.getHours(),i()[s+12>>>2>>>0]=c.getDate(),i()[s+16>>>2>>>0]=c.getMonth(),i()[s+20>>>2>>>0]=c.getYear(),s=c.getTime(),BigInt(isNaN(s)?-1:s/1e3)}function Si(s,c,f,b,w,I,O){return g?xe(16,1,s,c,f,b,w,I,O):-52}function Ti(s,c,f,b,w,I){if(g)return xe(17,1,s,c,f,b,w,I)}function xp(s,c,f,b){s>>>=0,c>>>=0,f>>>=0,b>>>=0;var w=new Date().getFullYear(),I=new Date(w,0,1),O=new Date(w,6,1);w=I.getTimezoneOffset();var B=O.getTimezoneOffset(),L=Math.max(w,B);a()[s>>>2>>>0]=60*L,i()[c>>>2>>>0]=+(w!=B),I=(s=H=>H.toLocaleTimeString(void 0,{hour12:!1,timeZoneName:"short"}).split(" ")[1])(I),O=s(O),B<w?(Ot(I,f,17),Ot(O,b,17)):(Ot(I,b,17),Ot(O,f,17))}var Pn=[],Ii=(s,c)=>{Pn.length=0;for(var f;f=r()[s++>>>0];){var b=f!=105;c+=(b&=f!=112)&&c%8?4:0,Pn.push(f==112?a()[c>>>2>>>0]:f==106?q[c>>>3]:f==105?i()[c>>>2>>>0]:l()[c>>>3>>>0]),c+=b?8:4}return Pn};function Sp(s,c,f){return s>>>=0,c=Ii(c>>>0,f>>>0),bn[s](...c)}function Tp(s,c,f){return s>>>=0,c=Ii(c>>>0,f>>>0),bn[s](...c)}var Ip=()=>{},Cp=()=>Date.now();function Ap(s,c){return j(Ce(s>>>0,c>>>0))}var Ci,kp=()=>{throw wt+=1,"unwind"};function Ep(){return 4294901760}Ci=()=>performance.timeOrigin+performance.now();var Pp=()=>navigator.hardwareConcurrency;function zp(){return ct("Cannot use emscripten_pc_get_function without -sUSE_OFFSET_CONVERTER"),0}function Op(s){s>>>=0;var c=r().length;if(s<=c||4294901760<s)return!1;for(var f=1;4>=f;f*=2){var b=c*(1+.2/f);b=Math.min(b,s+100663296);var w=Math;b=Math.max(s,b);e:{w=(w.min.call(w,4294901760,b+(65536-b%65536)%65536)-se.buffer.byteLength+65535)/65536;try{se.grow(w),ye();var I=1;break e}catch{}I=void 0}if(I)return!0}return!1}var cr=()=>(ct("Cannot use convertFrameToPC (needed by __builtin_return_address) without -sUSE_OFFSET_CONVERTER"),0),Bt={},Ai=s=>{s.forEach(c=>{var f=cr();f&&(Bt[f]=c)})};function Dp(){var s=Error().stack.toString().split(`
3922
- `);return s[0]=="Error"&&s.shift(),Ai(s),Bt.Qb=cr(),Bt.fc=s,Bt.Qb}function Bp(s,c,f){if(s>>>=0,c>>>=0,Bt.Qb==s)var b=Bt.fc;else(b=Error().stack.toString().split(`
3923
- `))[0]=="Error"&&b.shift(),Ai(b);for(var w=3;b[w]&&cr()!=s;)++w;for(s=0;s<f&&b[s+w];++s)i()[c+4*s>>>2>>>0]=cr();return s}var zn,On={},ki=()=>{if(!zn){var s,c={USER:"web_user",LOGNAME:"web_user",PATH:"/",PWD:"/",HOME:"/home/web_user",LANG:(typeof navigator=="object"&&navigator.languages&&navigator.languages[0]||"C").replace("-","_")+".UTF-8",_:A||"./this.program"};for(s in On)On[s]===void 0?delete c[s]:c[s]=On[s];var f=[];for(s in c)f.push(`${s}=${c[s]}`);zn=f}return zn};function Ei(s,c){if(g)return xe(18,1,s,c);s>>>=0,c>>>=0;var f=0;return ki().forEach((b,w)=>{var I=c+f;for(w=a()[s+4*w>>>2>>>0]=I,I=0;I<b.length;++I)t()[w++>>>0]=b.charCodeAt(I);t()[w>>>0]=0,f+=b.length+1}),0}function Pi(s,c){if(g)return xe(19,1,s,c);s>>>=0,c>>>=0;var f=ki();a()[s>>>2>>>0]=f.length;var b=0;return f.forEach(w=>b+=w.length+1),a()[c>>>2>>>0]=b,0}function zi(s){return g?xe(20,1,s):52}function Oi(s,c,f,b){return g?xe(21,1,s,c,f,b):52}function Di(s,c,f,b){return g?xe(22,1,s,c,f,b):70}var Mp=[null,[],[]];function Bi(s,c,f,b){if(g)return xe(23,1,s,c,f,b);c>>>=0,f>>>=0,b>>>=0;for(var w=0,I=0;I<f;I++){var O=a()[c>>>2>>>0],B=a()[c+4>>>2>>>0];c+=8;for(var L=0;L<B;L++){var H=r()[O+L>>>0],X=Mp[s];H===0||H===10?((s===1?K:j)(Zo(X,0)),X.length=0):X.push(H)}w+=B}return a()[b>>>2>>>0]=w,0}var Mi=[31,29,31,30,31,30,31,31,30,31,30,31],Ri=[31,28,31,30,31,30,31,31,30,31,30,31],Rp=(s,c)=>{t().set(s,c>>>0)};function Ui(s,c,f,b){function w(z,de,Te){for(z=typeof z=="number"?z.toString():z||"";z.length<de;)z=Te[0]+z;return z}function I(z,de){return w(z,de,"0")}function O(z,de){function Te(Xi){return 0>Xi?-1:0<Xi?1:0}var $t;return($t=Te(z.getFullYear()-de.getFullYear()))===0&&($t=Te(z.getMonth()-de.getMonth()))===0&&($t=Te(z.getDate()-de.getDate())),$t}function B(z){switch(z.getDay()){case 0:return new Date(z.getFullYear()-1,11,29);case 1:return z;case 2:return new Date(z.getFullYear(),0,3);case 3:return new Date(z.getFullYear(),0,2);case 4:return new Date(z.getFullYear(),0,1);case 5:return new Date(z.getFullYear()-1,11,31);case 6:return new Date(z.getFullYear()-1,11,30)}}function L(z){var de=z.Cb;for(z=new Date(new Date(z.Db+1900,0,1).getTime());0<de;){var Te=z.getMonth(),$t=(Dt(z.getFullYear())?Mi:Ri)[Te];if(!(de>$t-z.getDate())){z.setDate(z.getDate()+de);break}de-=$t-z.getDate()+1,z.setDate(1),11>Te?z.setMonth(Te+1):(z.setMonth(0),z.setFullYear(z.getFullYear()+1))}return Te=new Date(z.getFullYear()+1,0,4),de=B(new Date(z.getFullYear(),0,4)),Te=B(Te),0>=O(de,z)?0>=O(Te,z)?z.getFullYear()+1:z.getFullYear():z.getFullYear()-1}s>>>=0,c>>>=0,f>>>=0,b>>>=0;var H=a()[b+40>>>2>>>0];for(var X in b={lc:i()[b>>>2>>>0],kc:i()[b+4>>>2>>>0],Ib:i()[b+8>>>2>>>0],Mb:i()[b+12>>>2>>>0],Jb:i()[b+16>>>2>>>0],Db:i()[b+20>>>2>>>0],vb:i()[b+24>>>2>>>0],Cb:i()[b+28>>>2>>>0],sc:i()[b+32>>>2>>>0],jc:i()[b+36>>>2>>>0],mc:H?Ce(H):""},f=Ce(f),H={"%c":"%a %b %d %H:%M:%S %Y","%D":"%m/%d/%y","%F":"%Y-%m-%d","%h":"%b","%r":"%I:%M:%S %p","%R":"%H:%M","%T":"%H:%M:%S","%x":"%m/%d/%y","%X":"%H:%M:%S","%Ec":"%c","%EC":"%C","%Ex":"%m/%d/%y","%EX":"%H:%M:%S","%Ey":"%y","%EY":"%Y","%Od":"%d","%Oe":"%e","%OH":"%H","%OI":"%I","%Om":"%m","%OM":"%M","%OS":"%S","%Ou":"%u","%OU":"%U","%OV":"%V","%Ow":"%w","%OW":"%W","%Oy":"%y"})f=f.replace(new RegExp(X,"g"),H[X]);var ce="Sunday Monday Tuesday Wednesday Thursday Friday Saturday".split(" "),ge="January February March April May June July August September October November December".split(" ");for(X in H={"%a":z=>ce[z.vb].substring(0,3),"%A":z=>ce[z.vb],"%b":z=>ge[z.Jb].substring(0,3),"%B":z=>ge[z.Jb],"%C":z=>I((z.Db+1900)/100|0,2),"%d":z=>I(z.Mb,2),"%e":z=>w(z.Mb,2," "),"%g":z=>L(z).toString().substring(2),"%G":L,"%H":z=>I(z.Ib,2),"%I":z=>((z=z.Ib)==0?z=12:12<z&&(z-=12),I(z,2)),"%j":z=>{for(var de=0,Te=0;Te<=z.Jb-1;de+=(Dt(z.Db+1900)?Mi:Ri)[Te++]);return I(z.Mb+de,3)},"%m":z=>I(z.Jb+1,2),"%M":z=>I(z.kc,2),"%n":()=>`
3924
- `,"%p":z=>0<=z.Ib&&12>z.Ib?"AM":"PM","%S":z=>I(z.lc,2),"%t":()=>" ","%u":z=>z.vb||7,"%U":z=>I(Math.floor((z.Cb+7-z.vb)/7),2),"%V":z=>{var de=Math.floor((z.Cb+7-(z.vb+6)%7)/7);if(2>=(z.vb+371-z.Cb-2)%7&&de++,de)de==53&&((Te=(z.vb+371-z.Cb)%7)==4||Te==3&&Dt(z.Db)||(de=1));else{de=52;var Te=(z.vb+7-z.Cb-1)%7;(Te==4||Te==5&&Dt(z.Db%400-1))&&de++}return I(de,2)},"%w":z=>z.vb,"%W":z=>I(Math.floor((z.Cb+7-(z.vb+6)%7)/7),2),"%y":z=>(z.Db+1900).toString().substring(2),"%Y":z=>z.Db+1900,"%z":z=>{var de=0<=(z=z.jc);return z=Math.abs(z)/60,(de?"+":"-")+("0000"+(z/60*100+z%60)).slice(-4)},"%Z":z=>z.mc,"%%":()=>"%"},f=f.replace(/%%/g,"\0\0"),H)f.includes(X)&&(f=f.replace(new RegExp(X,"g"),H[X](b)));return X=function(z){var de=Array($n(z)+1);return Jo(z,de,0,de.length),de}(f=f.replace(/\0\0/g,"%")),X.length>c?0:(Rp(X,s),X.length-1)}function Up(s,c,f,b){return Ui(s>>>0,c>>>0,f>>>0,b>>>0)}g||function(){for(var s=u.numThreads-1;s--;)qo();Ye.unshift(()=>{Gt++,function(c){g?c():Promise.all(pt.map(Fo)).then(c)}(()=>Do())})}();for(var Ni=Array(256),pr=0;256>pr;++pr)Ni[pr]=String.fromCharCode(pr);ci=Ni,mt=u.BindingError=class extends Error{constructor(s){super(s),this.name="BindingError"}},u.InternalError=class extends Error{constructor(s){super(s),this.name="InternalError"}},ut.push(0,1,void 0,1,null,1,!0,1,!1,1),u.count_emval_handles=()=>ut.length/2-5-Tn.length;var Np=[wn,Wo,Ko,Qo,Xo,ei,ti,ri,ni,oi,ii,ai,si,ui,di,li,Si,Ti,Ei,Pi,zi,Oi,Di,Bi],Z=function(){function s(f,b){return Z=f.exports,Z=function(){var w=Z,I={};for(let[O,B]of Object.entries(w))I[O]=typeof B=="function"?(...L)=>{ur.push(O);try{return B(...L)}finally{we||(ur.pop(),Xe&&ht===1&&ur.length===0&&(ht=0,wt+=1,sr(ji),typeof Fibers<"u"&&Fibers.tc()))}}:B;return I}(),Z=function(){var w=Z,I=B=>L=>B(L)>>>0,O=B=>()=>B()>>>0;return(w=Object.assign({},w)).Da=I(w.Da),w.gb=O(w.gb),w.ib=I(w.ib),w.emscripten_main_runtime_thread_id=O(w.emscripten_main_runtime_thread_id),w.tb=I(w.tb),w.ub=O(w.ub),w}(),Lo.push(Z.jb),Lt.unshift(Z.Ca),Y=b,Do(),Z}var c=No();if(Gt++,u.instantiateWasm)try{return u.instantiateWasm(c,s)}catch(f){j(`Module.instantiateWasm callback failed with error: ${f}`),m(f)}return gn||=u.locateFile?Bo("ort-wasm-simd-threaded.jsep.wasm")?"ort-wasm-simd-threaded.jsep.wasm":u.locateFile?u.locateFile("ort-wasm-simd-threaded.jsep.wasm",P):P+"ort-wasm-simd-threaded.jsep.wasm":new URL(/* asset import */ __webpack_require__(/*! ort-wasm-simd-threaded.jsep.wasm */ "./node_modules/onnxruntime-web/dist/ort-wasm-simd-threaded.jsep.wasm"), __webpack_require__.b).href,function(f,b){var w=gn;return D||typeof WebAssembly.instantiateStreaming!="function"||Bo(w)||Mo(w)||typeof fetch!="function"?Uo(w,f,b):fetch(w,{credentials:"same-origin"}).then(I=>WebAssembly.instantiateStreaming(I,f).then(b,function(O){return j(`wasm streaming compile failed: ${O}`),j("falling back to ArrayBuffer instantiation"),Uo(w,f,b)}))}(c,function(f){s(f.instance,f.module)}).catch(m),{}}(),Vi=s=>(Vi=Z.Da)(s),Wi=()=>(Wi=Z.Ea)();u._OrtInit=(s,c)=>(u._OrtInit=Z.Fa)(s,c),u._OrtGetLastError=(s,c)=>(u._OrtGetLastError=Z.Ga)(s,c),u._OrtCreateSessionOptions=(s,c,f,b,w,I,O,B,L,H)=>(u._OrtCreateSessionOptions=Z.Ha)(s,c,f,b,w,I,O,B,L,H),u._OrtAppendExecutionProvider=(s,c)=>(u._OrtAppendExecutionProvider=Z.Ia)(s,c),u._OrtAddFreeDimensionOverride=(s,c,f)=>(u._OrtAddFreeDimensionOverride=Z.Ja)(s,c,f),u._OrtAddSessionConfigEntry=(s,c,f)=>(u._OrtAddSessionConfigEntry=Z.Ka)(s,c,f),u._OrtReleaseSessionOptions=s=>(u._OrtReleaseSessionOptions=Z.La)(s),u._OrtCreateSession=(s,c,f)=>(u._OrtCreateSession=Z.Ma)(s,c,f),u._OrtReleaseSession=s=>(u._OrtReleaseSession=Z.Na)(s),u._OrtGetInputOutputCount=(s,c,f)=>(u._OrtGetInputOutputCount=Z.Oa)(s,c,f),u._OrtGetInputName=(s,c)=>(u._OrtGetInputName=Z.Pa)(s,c),u._OrtGetOutputName=(s,c)=>(u._OrtGetOutputName=Z.Qa)(s,c),u._OrtFree=s=>(u._OrtFree=Z.Ra)(s),u._OrtCreateTensor=(s,c,f,b,w,I)=>(u._OrtCreateTensor=Z.Sa)(s,c,f,b,w,I),u._OrtGetTensorData=(s,c,f,b,w)=>(u._OrtGetTensorData=Z.Ta)(s,c,f,b,w),u._OrtReleaseTensor=s=>(u._OrtReleaseTensor=Z.Ua)(s),u._OrtCreateRunOptions=(s,c,f,b)=>(u._OrtCreateRunOptions=Z.Va)(s,c,f,b),u._OrtAddRunConfigEntry=(s,c,f)=>(u._OrtAddRunConfigEntry=Z.Wa)(s,c,f),u._OrtReleaseRunOptions=s=>(u._OrtReleaseRunOptions=Z.Xa)(s),u._OrtCreateBinding=s=>(u._OrtCreateBinding=Z.Ya)(s),u._OrtBindInput=(s,c,f)=>(u._OrtBindInput=Z.Za)(s,c,f),u._OrtBindOutput=(s,c,f,b)=>(u._OrtBindOutput=Z._a)(s,c,f,b),u._OrtClearBoundOutputs=s=>(u._OrtClearBoundOutputs=Z.$a)(s),u._OrtReleaseBinding=s=>(u._OrtReleaseBinding=Z.ab)(s),u._OrtRunWithBinding=(s,c,f,b,w)=>(u._OrtRunWithBinding=Z.bb)(s,c,f,b,w),u._OrtRun=(s,c,f,b,w,I,O,B)=>(u._OrtRun=Z.cb)(s,c,f,b,w,I,O,B),u._OrtEndProfiling=s=>(u._OrtEndProfiling=Z.db)(s),u._JsepOutput=(s,c,f)=>(u._JsepOutput=Z.eb)(s,c,f),u._JsepGetNodeName=s=>(u._JsepGetNodeName=Z.fb)(s);var mr,Mt=()=>(Mt=Z.gb)(),Je=u._free=s=>(Je=u._free=Z.hb)(s),fr=u._malloc=s=>(fr=u._malloc=Z.ib)(s),Dn=(s,c,f,b,w,I)=>(Dn=Z.lb)(s,c,f,b,w,I),Li=()=>(Li=Z.mb)(),Gi=(s,c,f,b,w)=>(Gi=Z.nb)(s,c,f,b,w),Bn=s=>(Bn=Z.ob)(s),hr=s=>(hr=Z.pb)(s),Hi=()=>(Hi=Z.qb)(),Fi=(s,c)=>(Fi=Z.rb)(s,c),gr=s=>(gr=Z.sb)(s),Mn=s=>(Mn=Z.tb)(s),Rn=()=>(Rn=Z.ub)(),qi=u.dynCall_ii=(s,c)=>(qi=u.dynCall_ii=Z.wb)(s,c),Ki=s=>(Ki=Z.xb)(s),ji=()=>(ji=Z.yb)(),Yi=s=>(Yi=Z.zb)(s),Zi=()=>(Zi=Z.Ab)();function Qi(){0<Gt||(g?(p(u),g||or(Lt),startWorker(u)):(or(Ye),0<Gt||mr||(mr=!0,u.calledRun=!0,we||(g||or(Lt),p(u),g||or(fn)))))}return u.___start_em_js=929301,u.___stop_em_js=929547,u.stackSave=()=>Rn(),u.stackRestore=s=>gr(s),u.stackAlloc=s=>Mn(s),u.setValue=function(s,c,f="i8"){switch(f.endsWith("*")&&(f="*"),f){case"i1":case"i8":t()[s>>>0]=c;break;case"i16":n()[s>>>1>>>0]=c;break;case"i32":i()[s>>>2>>>0]=c;break;case"i64":q[s>>>3]=BigInt(c);break;case"float":d()[s>>>2>>>0]=c;break;case"double":l()[s>>>3>>>0]=c;break;case"*":a()[s>>>2>>>0]=c;break;default:ct(`invalid type for setValue: ${f}`)}},u.getValue=function(s,c="i8"){switch(c.endsWith("*")&&(c="*"),c){case"i1":case"i8":return t()[s>>>0];case"i16":return n()[s>>>1>>>0];case"i32":return i()[s>>>2>>>0];case"i64":return q[s>>>3];case"float":return d()[s>>>2>>>0];case"double":return l()[s>>>3>>>0];case"*":return a()[s>>>2>>>0];default:ct(`invalid type for getValue: ${c}`)}},u.UTF8ToString=Ce,u.stringToUTF8=Ot,u.lengthBytesUTF8=$n,Ht=function s(){mr||Qi(),mr||(Ht=s)},Qi(),u.PTR_SIZE=4,h}),Kp=za;globalThis.self?.name==="em-pthread"&&za()});var Ra,jp,Ne,Ua,jn,Yp,Zp,Na,Qp,Ba,Va,Ma,Wa,xr=U(()=>{"use strict";$r();Ra= false||typeof location>"u"?void 0:location.origin,jp=()=>{if(true)return import.meta.url?.startsWith("file:")?new URL(new URL(/* asset import */ __webpack_require__(/*! ort.bundle.min.mjs */ "./node_modules/onnxruntime-web/dist/ort.bundle.min.mjs?46eb"), __webpack_require__.b).href,Ra).href:import.meta.url},Ne=jp(),Ua=()=>{if(Ne&&!Ne.startsWith("blob:"))return Ne.substring(0,Ne.lastIndexOf("/")+1)},jn=(e,t)=>{try{let r=t??Ne;return(r?new URL(e,r):new URL(e)).origin===Ra}catch{return!1}},Yp=(e,t)=>{let r=t??Ne;try{return(r?new URL(e,r):new URL(e)).href}catch{return}},Zp=(e,t)=>`${t??"./"}${e}`,Na=async e=>{let r=await(await fetch(e,{credentials:"same-origin"})).blob();return URL.createObjectURL(r)},Qp=async e=>(await import(/*webpackIgnore:true*/e)).default,Ba=(Pa(),br(Ea)).default,Va=async()=>{if(!Ne)throw new Error("Failed to load proxy worker: cannot determine the script source URL.");if(jn(Ne))return[void 0,Ba()];let e=await Na(Ne);return[e,Ba(e)]},Ma=(Da(),br(Oa)).default,Wa=async(e,t,r)=>{if(!e&&!t&&Ma&&Ne&&jn(Ne))return[void 0,Ma];{let n="ort-wasm-simd-threaded.jsep.mjs",o=e??Yp(n,t),i= true&&r&&o&&!jn(o,t),a=i?await Na(o):o??Zp(n,t);return[i?a:void 0,await Qp(a)]}}});var Yn,Zn,zr,La,Xp,Jp,Sr,Ie,gt=U(()=>{"use strict";xr();Zn=!1,zr=!1,La=!1,Xp=()=>{if(typeof SharedArrayBuffer>"u")return!1;try{return typeof MessageChannel<"u"&&new MessageChannel().port1.postMessage(new SharedArrayBuffer(1)),WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,5,4,1,3,1,1,10,11,1,9,0,65,0,254,16,2,0,26,11]))}catch{return!1}},Jp=()=>{try{return WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,10,30,1,28,0,65,0,253,15,253,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,253,186,1,26,11]))}catch{return!1}},Sr=async e=>{if(Zn)return Promise.resolve();if(zr)throw new Error("multiple calls to 'initializeWebAssembly()' detected.");if(La)throw new Error("previous call to 'initializeWebAssembly()' failed.");zr=!0;let t=e.initTimeout,r=e.numThreads;if(!Jp())throw new Error("WebAssembly SIMD is not supported in the current environment.");let n=Xp();r>1&&!n&&(typeof self<"u"&&!self.crossOriginIsolated&&console.warn("env.wasm.numThreads is set to "+r+", but this will not work unless you enable crossOriginIsolated mode. See https://web.dev/cross-origin-isolation-guide/ for more info."),console.warn("WebAssembly multi-threading is not supported in the current environment. Falling back to single-threading."),e.numThreads=r=1);let o=e.wasmPaths,i=typeof o=="string"?o:void 0,a=o?.mjs,d=a?.href??a,l=o?.wasm,p=l?.href??l,m=e.wasmBinary,[u,h]=await Wa(d,i,r>1),_=!1,y=[];if(t>0&&y.push(new Promise(g=>{setTimeout(()=>{_=!0,g()},t)})),y.push(new Promise((g,x)=>{let $={numThreads:r};if(m)$.wasmBinary=m;else if(p||i)$.locateFile=v=>p??i+v;else if(d&&d.indexOf("blob:")!==0)$.locateFile=v=>new URL(v,d).href;else if(u){let v=Ua();v&&($.locateFile=S=>v+S)}h($).then(v=>{zr=!1,Zn=!0,Yn=v,g(),u&&URL.revokeObjectURL(u)},v=>{zr=!1,La=!0,x(v)})})),await Promise.race(y),_)throw new Error(`WebAssembly backend initializing failed due to timeout: ${t}ms`)},Ie=()=>{if(Zn&&Yn)return Yn;throw new Error("WebAssembly is not initialized yet.")}});var ke,jt,pe,Or=U(()=>{"use strict";gt();ke=(e,t)=>{let r=Ie(),n=r.lengthBytesUTF8(e)+1,o=r._malloc(n);return r.stringToUTF8(e,o,n),t.push(o),o},jt=(e,t,r,n)=>{if(typeof e=="object"&&e!==null){if(r.has(e))throw new Error("Circular reference in options");r.add(e)}Object.entries(e).forEach(([o,i])=>{let a=t?t+o:o;if(typeof i=="object")jt(i,a+".",r,n);else if(typeof i=="string"||typeof i=="number")n(a,i.toString());else if(typeof i=="boolean")n(a,i?"1":"0");else throw new Error(`Can't handle extra config type: ${typeof i}`)})},pe=e=>{let t=Ie(),r=t.stackSave();try{let n=t.PTR_SIZE,o=t.stackAlloc(2*n);t._OrtGetLastError(o,o+n);let i=Number(t.getValue(o,n===4?"i32":"i64")),a=t.getValue(o+n,"*"),d=a?t.UTF8ToString(a):"";throw new Error(`${e} ERROR_CODE: ${i}, ERROR_MESSAGE: ${d}`)}finally{t.stackRestore(r)}}});var Ga,Ha=U(()=>{"use strict";gt();Or();Ga=e=>{let t=Ie(),r=0,n=[],o=e||{};try{if(e?.logSeverityLevel===void 0)o.logSeverityLevel=2;else if(typeof e.logSeverityLevel!="number"||!Number.isInteger(e.logSeverityLevel)||e.logSeverityLevel<0||e.logSeverityLevel>4)throw new Error(`log serverity level is not valid: ${e.logSeverityLevel}`);if(e?.logVerbosityLevel===void 0)o.logVerbosityLevel=0;else if(typeof e.logVerbosityLevel!="number"||!Number.isInteger(e.logVerbosityLevel))throw new Error(`log verbosity level is not valid: ${e.logVerbosityLevel}`);e?.terminate===void 0&&(o.terminate=!1);let i=0;return e?.tag!==void 0&&(i=ke(e.tag,n)),r=t._OrtCreateRunOptions(o.logSeverityLevel,o.logVerbosityLevel,!!o.terminate,i),r===0&&pe("Can't create run options."),e?.extra!==void 0&&jt(e.extra,"",new WeakSet,(a,d)=>{let l=ke(a,n),p=ke(d,n);t._OrtAddRunConfigEntry(r,l,p)!==0&&pe(`Can't set a run config entry: ${a} - ${d}.`)}),[r,n]}catch(i){throw r!==0&&t._OrtReleaseRunOptions(r),n.forEach(a=>t._free(a)),i}}});var em,tm,rm,nm,Fa,qa=U(()=>{"use strict";gt();Or();em=e=>{switch(e){case"disabled":return 0;case"basic":return 1;case"extended":return 2;case"all":return 99;default:throw new Error(`unsupported graph optimization level: ${e}`)}},tm=e=>{switch(e){case"sequential":return 0;case"parallel":return 1;default:throw new Error(`unsupported execution mode: ${e}`)}},rm=e=>{e.extra||(e.extra={}),e.extra.session||(e.extra.session={});let t=e.extra.session;t.use_ort_model_bytes_directly||(t.use_ort_model_bytes_directly="1"),e.executionProviders&&e.executionProviders.some(r=>(typeof r=="string"?r:r.name)==="webgpu")&&(e.enableMemPattern=!1)},nm=(e,t,r)=>{for(let n of t){let o=typeof n=="string"?n:n.name;switch(o){case"webnn":if(o="WEBNN",typeof n!="string"){let d=n?.deviceType;if(d){let l=ke("deviceType",r),p=ke(d,r);Ie()._OrtAddSessionConfigEntry(e,l,p)!==0&&pe(`Can't set a session config entry: 'deviceType' - ${d}.`)}}break;case"webgpu":if(o="JS",typeof n!="string"){let a=n;if(a?.preferredLayout){if(a.preferredLayout!=="NCHW"&&a.preferredLayout!=="NHWC")throw new Error(`preferredLayout must be either 'NCHW' or 'NHWC': ${a.preferredLayout}`);let d=ke("preferredLayout",r),l=ke(a.preferredLayout,r);Ie()._OrtAddSessionConfigEntry(e,d,l)!==0&&pe(`Can't set a session config entry: 'preferredLayout' - ${a.preferredLayout}.`)}}break;case"wasm":case"cpu":continue;default:throw new Error(`not supported execution provider: ${o}`)}let i=ke(o,r);Ie()._OrtAppendExecutionProvider(e,i)!==0&&pe(`Can't append execution provider: ${o}.`)}},Fa=e=>{let t=Ie(),r=0,n=[],o=e||{};rm(o);try{let i=em(o.graphOptimizationLevel??"all"),a=tm(o.executionMode??"sequential"),d=typeof o.logId=="string"?ke(o.logId,n):0,l=o.logSeverityLevel??2;if(!Number.isInteger(l)||l<0||l>4)throw new Error(`log serverity level is not valid: ${l}`);let p=o.logVerbosityLevel??0;if(!Number.isInteger(p)||p<0||p>4)throw new Error(`log verbosity level is not valid: ${p}`);let m=typeof o.optimizedModelFilePath=="string"?ke(o.optimizedModelFilePath,n):0;if(r=t._OrtCreateSessionOptions(i,!!o.enableCpuMemArena,!!o.enableMemPattern,a,!!o.enableProfiling,0,d,l,p,m),r===0&&pe("Can't create session options."),o.executionProviders&&nm(r,o.executionProviders,n),o.enableGraphCapture!==void 0){if(typeof o.enableGraphCapture!="boolean")throw new Error(`enableGraphCapture must be a boolean value: ${o.enableGraphCapture}`);let u=ke("enableGraphCapture",n),h=ke(o.enableGraphCapture.toString(),n);t._OrtAddSessionConfigEntry(r,u,h)!==0&&pe(`Can't set a session config entry: 'enableGraphCapture' - ${o.enableGraphCapture}.`)}if(o.freeDimensionOverrides)for(let[u,h]of Object.entries(o.freeDimensionOverrides)){if(typeof u!="string")throw new Error(`free dimension override name must be a string: ${u}`);if(typeof h!="number"||!Number.isInteger(h)||h<0)throw new Error(`free dimension override value must be a non-negative integer: ${h}`);let _=ke(u,n);t._OrtAddFreeDimensionOverride(r,_,h)!==0&&pe(`Can't set a free dimension override: ${u} - ${h}.`)}return o.extra!==void 0&&jt(o.extra,"",new WeakSet,(u,h)=>{let _=ke(u,n),y=ke(h,n);t._OrtAddSessionConfigEntry(r,_,y)!==0&&pe(`Can't set a session config entry: ${u} - ${h}.`)}),[r,n]}catch(i){throw r!==0&&t._OrtReleaseSessionOptions(r)!==0&&pe("Can't release session options."),n.forEach(a=>t._free(a)),i}}});var Yt,bt,Ct,Dr,Zt,Br,Mr,Qn,te=U(()=>{"use strict";Yt=e=>{switch(e){case"int8":return 3;case"uint8":return 2;case"bool":return 9;case"int16":return 5;case"uint16":return 4;case"int32":return 6;case"uint32":return 12;case"float16":return 10;case"float32":return 1;case"float64":return 11;case"string":return 8;case"int64":return 7;case"uint64":return 13;case"int4":return 22;case"uint4":return 21;default:throw new Error(`unsupported data type: ${e}`)}},bt=e=>{switch(e){case 3:return"int8";case 2:return"uint8";case 9:return"bool";case 5:return"int16";case 4:return"uint16";case 6:return"int32";case 12:return"uint32";case 10:return"float16";case 1:return"float32";case 11:return"float64";case 8:return"string";case 7:return"int64";case 13:return"uint64";case 22:return"int4";case 21:return"uint4";default:throw new Error(`unsupported data type: ${e}`)}},Ct=(e,t)=>{let r=[-1,4,1,1,2,2,4,8,-1,1,2,8,4,8,-1,-1,-1,-1,-1,-1,-1,.5,.5][e],n=typeof t=="number"?t:t.reduce((o,i)=>o*i,1);return r>0?Math.ceil(n*r):void 0},Dr=e=>{switch(e){case"float16":return typeof Float16Array<"u"&&Float16Array.from?Float16Array:Uint16Array;case"float32":return Float32Array;case"uint8":return Uint8Array;case"int8":return Int8Array;case"uint16":return Uint16Array;case"int16":return Int16Array;case"int32":return Int32Array;case"bool":return Uint8Array;case"float64":return Float64Array;case"uint32":return Uint32Array;case"int64":return BigInt64Array;case"uint64":return BigUint64Array;default:throw new Error(`unsupported type: ${e}`)}},Zt=e=>{switch(e){case"verbose":return 0;case"info":return 1;case"warning":return 2;case"error":return 3;case"fatal":return 4;default:throw new Error(`unsupported logging level: ${e}`)}},Br=e=>e==="float32"||e==="float16"||e==="int32"||e==="int64"||e==="uint32"||e==="uint8"||e==="bool"||e==="uint4"||e==="int4",Mr=e=>e==="float32"||e==="float16"||e==="int32"||e==="int64"||e==="uint32"||e==="uint64"||e==="int8"||e==="uint8"||e==="bool"||e==="uint4"||e==="int4",Qn=e=>{switch(e){case"none":return 0;case"cpu":return 1;case"cpu-pinned":return 2;case"texture":return 3;case"gpu-buffer":return 4;case"ml-tensor":return 5;default:throw new Error(`unsupported data location: ${e}`)}}});var Qt,Xn=U(()=>{"use strict";$r();Qt=async e=>{if(typeof e=="string")if(false){}else{let t=await fetch(e);if(!t.ok)throw new Error(`failed to load external data file: ${e}`);let r=t.headers.get("Content-Length"),n=r?parseInt(r,10):0;if(n<1073741824)return new Uint8Array(await t.arrayBuffer());{if(!t.body)throw new Error(`failed to load external data file: ${e}, no response body.`);let o=t.body.getReader(),i;try{i=new ArrayBuffer(n)}catch(d){if(d instanceof RangeError){let l=Math.ceil(n/65536);i=new WebAssembly.Memory({initial:l,maximum:l}).buffer}else throw d}let a=0;for(;;){let{done:d,value:l}=await o.read();if(d)break;let p=l.byteLength;new Uint8Array(i,a,p).set(l),a+=p}return new Uint8Array(i,0,n)}}else return e instanceof Blob?new Uint8Array(await e.arrayBuffer()):e instanceof Uint8Array?e:new Uint8Array(e)}});var om,im,Ka,ja,Rr,am,ue,et=U(()=>{"use strict";te();om=["V","I","W","E","F"],im=(e,t)=>{console.log(`[${om[e]},${new Date().toISOString()}]${t}`)},Rr=(e,t)=>{Ka=e,ja=t},am=(e,t)=>{let r=Zt(e),n=Zt(Ka);r>=n&&im(r,typeof t=="function"?t():t)},ue=(...e)=>{ja&&am(...e)}});var Ur,Jn=U(()=>{"use strict";te();Ur=(e,t)=>new(Dr(t))(e)});var Nr=U(()=>{"use strict"});var Ya,eo,to,sm,um,Za,no,ro,Xa,Ja=U(()=>{"use strict";et();Nr();Ya=new Map([[64,250],[128,200],[256,200],[512,200],[2048,230],[4096,200],[8192,50],[16384,50],[32768,50],[65536,50],[131072,50],[262144,50],[524288,50],[1048576,50],[2097152,30],[4194304,20],[8388608,10],[12582912,10],[16777216,10],[26214400,15],[33554432,22],[44236800,2],[58982400,6],[67108864,6],[134217728,6],[167772160,6]]),eo=[],to=e=>Math.ceil(Number(e)/16)*16,sm=e=>{for(let t=0;t<eo.length;t++){let r=eo[t];if(e<=r)return r}return Math.ceil(e/16)*16},um=1,Za=()=>um++,no=async(e,t,r,n)=>{let o=to(r),i=e.device.createBuffer({size:o,usage:GPUBufferUsage.COPY_DST|GPUBufferUsage.MAP_READ});try{let a=e.getCommandEncoder();e.endComputePass(),a.copyBufferToBuffer(t,0,i,0,o),e.flush(),await i.mapAsync(GPUMapMode.READ);let d=i.getMappedRange();if(n){let l=n();return l.set(new Uint8Array(d,0,r)),l}else return new Uint8Array(d.slice(0,r))}finally{i.destroy()}},ro=class{constructor(t){this.backend=t;this.storageCache=new Map,this.freeBuffers=new Map,this.freeUniformBuffers=new Map,this.buffersPending=[],this.capturedPendingBuffers=new Map;for(let[r]of Ya)eo.push(r),this.freeBuffers.set(r,[]),this.freeUniformBuffers.set(r,[]);this.sessionCount=0}upload(t,r){let n=r.buffer,o=r.byteOffset,i=r.byteLength,a=to(i),d=this.storageCache.get(t);if(!d)throw new Error("gpu data for uploading does not exist");if(Number(d.originalSize)!==i)throw new Error(`inconsistent data size. gpu data size=${d.originalSize}, data size=${i}`);let l=this.backend.device.createBuffer({mappedAtCreation:!0,size:a,usage:GPUBufferUsage.MAP_WRITE|GPUBufferUsage.COPY_SRC}),p=l.getMappedRange();new Uint8Array(p).set(new Uint8Array(n,o,i)),l.unmap();let m=this.backend.device.createCommandEncoder();m.copyBufferToBuffer(l,0,d.gpuData.buffer,0,a),this.backend.device.queue.submit([m.finish()]),l.destroy(),ue("verbose",()=>`[WebGPU] GpuDataManager.upload(id=${t})`)}memcpy(t,r){let n=this.storageCache.get(t);if(!n)throw new Error("source gpu data for memcpy does not exist");let o=this.storageCache.get(r);if(!o)throw new Error("destination gpu data for memcpy does not exist");if(n.originalSize!==o.originalSize)throw new Error("inconsistent source and destination gpu data size");let i=to(n.originalSize),a=this.backend.getCommandEncoder();this.backend.endComputePass(),a.copyBufferToBuffer(n.gpuData.buffer,0,o.gpuData.buffer,0,i)}registerExternalBuffer(t,r,n){let o;if(n){if(o=n[0],t===n[1])return ue("verbose",()=>`[WebGPU] GpuDataManager.registerExternalBuffer(size=${r}) => id=${o}, buffer is the same, skip.`),o;if(this.backend.capturedCommandList.has(this.backend.currentSessionId))throw new Error(`Registering a different external buffer under graph capture mode is not supported yet.
3925
- Please use the previous external buffer!`)}else o=Za();return this.storageCache.set(o,{gpuData:{id:o,type:0,buffer:t},originalSize:r}),ue("verbose",()=>`[WebGPU] GpuDataManager.registerExternalBuffer(size=${r}) => id=${o}, registered.`),o}unregisterExternalBuffer(t){t!==void 0&&(this.storageCache.delete(t),ue("verbose",()=>`[WebGPU] GpuDataManager.unregisterExternalBuffer() => id=${t}`))}create(t,r=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST){let n=sm(t),o,i=(r&GPUBufferUsage.STORAGE)===GPUBufferUsage.STORAGE,a=(r&GPUBufferUsage.UNIFORM)===GPUBufferUsage.UNIFORM;if(i||a){let p=(i?this.freeBuffers:this.freeUniformBuffers).get(n);p?p.length>0?o=p.pop():o=this.backend.device.createBuffer({size:n,usage:r}):o=this.backend.device.createBuffer({size:n,usage:r})}else o=this.backend.device.createBuffer({size:n,usage:r});let d={id:Za(),type:0,buffer:o};return this.storageCache.set(d.id,{gpuData:d,originalSize:Number(t)}),ue("verbose",()=>`[WebGPU] GpuDataManager.create(size=${t}) => id=${d.id}`),d}get(t){return this.storageCache.get(t)?.gpuData}release(t){let r=typeof t=="bigint"?Number(t):t,n=this.storageCache.get(r);if(!n){if(this.storageCache.size===0)return 0;throw new Error("releasing data does not exist")}return ue("verbose",()=>`[WebGPU] GpuDataManager.release(id=${r}), gpuDataId=${n.gpuData.id}`),this.storageCache.delete(r),this.buffersPending.push(n.gpuData.buffer),n.originalSize}async download(t,r){let n=this.storageCache.get(Number(t));if(!n)throw new Error("data does not exist");await no(this.backend,n.gpuData.buffer,n.originalSize,r)}refreshPendingBuffers(){if(this.buffersPending.length!==0)if(this.backend.sessionStatus==="default"){for(let t of this.buffersPending){let r=Ya.get(t.size);if((t.usage&GPUBufferUsage.STORAGE)===GPUBufferUsage.STORAGE){let n=this.freeBuffers.get(t.size)||[];r===void 0||n.length>=r?t.destroy():n.push(t)}else if((t.usage&GPUBufferUsage.UNIFORM)===GPUBufferUsage.UNIFORM){let n=this.freeUniformBuffers.get(t.size)||[];r===void 0||n.length>=r?t.destroy():n.push(t)}else t.destroy()}this.buffersPending=[]}else{let t=this.capturedPendingBuffers.get(this.backend.currentSessionId);t||(t=[],this.capturedPendingBuffers.set(this.backend.currentSessionId,t));for(let r of this.buffersPending)t.push(r);this.buffersPending=[]}}dispose(){this.freeBuffers.forEach(t=>{t.forEach(r=>{r.destroy()})}),this.freeUniformBuffers.forEach(t=>{t.forEach(r=>{r.destroy()})}),this.storageCache.forEach(t=>{t.gpuData.buffer.destroy()}),this.capturedPendingBuffers.forEach(t=>{t.forEach(r=>{r.destroy()})}),this.storageCache=new Map,this.freeBuffers=new Map,this.freeUniformBuffers=new Map,this.capturedPendingBuffers=new Map}onCreateSession(){this.sessionCount+=1}onReleaseSession(t){let r=this.capturedPendingBuffers.get(t);r&&(r.forEach(n=>{n.destroy()}),this.capturedPendingBuffers.delete(t)),this.sessionCount-=1,this.sessionCount===0&&(ue("warning",()=>"[WebGPU] Clearing webgpu buffer cache"),this.storageCache.forEach(n=>{n.gpuData.buffer.destroy()}),this.storageCache=new Map)}},Xa=(...e)=>new ro(...e)});var oo,re,Se=U(()=>{"use strict";oo=class{constructor(t){Object.assign(this,t)}get cacheKey(){return this.key||(this.key=Object.getOwnPropertyNames(this).sort().map(t=>`${this[t]}`).join(";")),this.key}},re=e=>new oo(e)});var io,tt,C,At,Vr,es,ts,oe=U(()=>{"use strict";io=class{static calcMatMulShape(t,r){return t[1]!==r[0]?void 0:[t[0],r[1]]}},tt=class{static calcShape(t,r,n=!1){let o=t.length,i=r.length;if(o===0)return r;if(i===0)return t;let a=Math.max(t.length,r.length),d=new Array(a);if(n){if(o<2||i<2)return;let l=io.calcMatMulShape([t[o-2],t[o-1]],[r[i-2],r[i-1]]);if(l===void 0)return;[d[a-2],d[a-1]]=l}for(let l=n?3:1;l<=a;l++){let p=o-l<0?1:t[o-l],m=i-l<0?1:r[i-l];if(p!==m&&p>1&&m>1)return;let u=Math.max(p,m);if(p&&m)d[a-l]=Math.max(p,m);else{if(u>1)return;d[a-l]=0}}return d}static isValidBroadcast(t,r){let n=t.length,o=r.length;if(n>o)return!1;for(let i=1;i<=n;i++)if(t[n-i]!==1&&t[n-i]!==r[o-i])return!1;return!0}},C=class e{static size(t){return e.getSizeFromDimensionRange(t,0,t.length)}static convertShape(t,r=4){let n=t.length;if(n===0)return[];let o=new Array(n),i=n-1;for(;i>=0;){if(t[i]%r===0){o[i]=t[i]/r;break}if(r%t[i]!==0)throw new Error("cannot convert shape");o[i]=1,r/=t[i],i--}for(i--;i>=0;i--)o[i]=t[i];return o}static sizeFromDimension(t,r){if(r<0||r>t.length)throw new Error(`invalid dimension of ${r} for sizeFromDimension as Tensor has ${t.length} dimensions.`);return e.getSizeFromDimensionRange(t,r,t.length)}static sizeToDimension(t,r){if(r<0||r>t.length)throw new Error(`invalid dimension of ${r} for sizeToDimension as Tensor has ${t.length} dimensions.`);return e.getSizeFromDimensionRange(t,0,r)}static getSizeFromDimensionRange(t,r,n){let o=1;for(let i=r;i<n;i++){if(t[i]<0)throw new Error("cannot get valid size from specified dimension range. Most likely the range contains negative values in them.");o*=Number(t[i])}return o}static computeStrides(t){let r=t.length;if(r===0)return[];if(r===1)return[1];let n=new Array(r);n[r-1]=1,n[r-2]=t[r-1];for(let o=r-3;o>=0;--o)n[o]=n[o+1]*t[o+1];return n}static normalizeAxis(t,r){if(t<-r&&t>=r)throw new Error("unsupported axis for this operation.");return t<0?t+r:t}static normalizeAxes(t,r){return t.map(n=>this.normalizeAxis(n,r??t.length))}static sortBasedOnPerm(t,r){return r?r.map(n=>t[n]):t.slice().reverse()}static padShape(t,r){let n=t.length;return t.map((o,i)=>o+r[i]+r[i+n])}static areEqual(t,r){return t.length!==r.length?!1:t.every((n,o)=>n===r[o])}},At=class e{static adjustPoolAttributes(t,r,n,o,i,a){if(!t&&n.length!==r.length-2)throw new Error("length of specified kernel shapes should be 2 less than length of input dimensions");if(t)for(let d=0;d<r.length-2;d++)d>=n.length?n.push(r[d+2]):n[d]=r[d+2];for(let d=0;d<n.length;d++)if(d<o.length){if(o[d]<0)throw new Error("strides should be greater than or equal to 1")}else o.push(1);for(let d=0;d<n.length;d++)if(d<i.length){if(i[d]<0)throw new Error("dilations should be greater than or equal to 1")}else i.push(1);for(let d=0;d<n.length*2;d++)if(d<a.length){if(a[d]<0)throw new Error("pad should be greater than or equal to 1")}else a.push(0);for(let d=0;d<n.length;d++){if(n[d]<=0)throw new Error("kernel shapes need to be greater than 0");if(a[d]>=n[d]||a[d+n.length]>=n[d])throw new Error("pads should be smaller than kernel")}}static adjustPadsBasedOnAutoPad(t,r,n,o,i,a,d){if(d){if(i.length!==2*(t.length-2))throw new Error("length of pads should be twice the length of data dimensions");if(r.length!==t.length-2)throw new Error("length of strides should be the length of data dimensions");if(o.length!==t.length-2)throw new Error("length of kernel shapes should be the length of data dimensions");for(let l=0;l<t.length-2;l++)e.adjustPadAndReturnShape(t[l+(a?1:2)],r[l],n[l],o[l],i,l,l+t.length-2,d)}}static computePoolOutputShape(t,r,n,o,i,a,d){if(r.length<=0)throw new Error("input shape must be of size greater than 0");let l=[r[0],r[1]];return e.computeShapeHelper(t,r,l,n,o,i,a,d),l}static computeConvOutputShape(t,r,n,o,i,a,d){if(t.length<=0||r.length<=0)throw new Error("invalid input tensor dims or invalid filter tensor dims");let l=[t[0],r[0]];return e.computeShapeHelper(!1,t,l,n,o,i,a,d),l}static computeShapeHelper(t,r,n,o,i,a,d,l){if(t)for(let p=0;p<r.length-2;p++)n.push(1);else for(let p=0;p<r.length-2;p++)n.push(e.adjustPadAndReturnShape(r[p+2],o[p],i[p],a[p],d,p,p+r.length-2,l))}static adjustPadAndReturnShape(t,r,n,o,i,a,d,l){let p=n*(o-1)+1;if(l&&l!=="NOTSET")switch(l){case"VALID":return i[a]=0,i[d]=0,Math.floor((t-p)/r+1);case"SAME_LOWER":case"SAME_UPPER":if(n!==1)throw new Error("Dilation not supported for SAME_UPPER or SAME_LOWER");{let u=((t+r-1)/r-1)*r+o-t;return i[a]=Math.floor(l==="SAME_LOWER"?(u+1)/2:u/2),i[d]=u-i[a],Math.floor((t+u-o)/r+1)}default:throw new Error("Unsupported AutoPad type")}else return Math.floor((t+i[a]+i[d]-p)/r+1)}},Vr=class{static getShapeOfGemmResult(t,r,n,o,i){if(t.length!==2||n.length!==2)throw new Error("shape need to be of size 2");let a,d,l;r?(a=t[1],d=t[0]):(a=t[0],d=t[1]);let p=-1;if(o?(l=n[0],p=1):(l=n[1],p=0),n[p]!==d)throw new Error("dimension mismatch");if(a<=0||l<=0||d<=0)throw new Error("invalid shape specified");if(i&&!tt.isValidBroadcast(i,[a,l]))throw new Error("gemm: invalid bias shape for broadcast");return[a,l,d]}},es=-34028234663852886e22,ts=34028234663852886e22});var kt,so,_e,Ee,N,me,uo,Et,Fe,F,Wr,E,M,rs,Lr,ao,ns,ae=U(()=>{"use strict";te();oe();kt=64,so=(e,t)=>{if(t===3)throw new Error("vec3 has same alignment as vec4, use vec4 instead");switch(Number(e)){case 10:return t>1?`vec${t}<f16>`:"f16";case 1:return t>1?`vec${t}<f32>`:"f32";case 6:return t>1?`vec${t}<i32>`:"i32";case 12:return t>1?`vec${t}<u32>`:"u32";case 7:if(t>1)throw new Error("currently not supported vecX of uint64 yet");return["vec2<u32>","i32"];case 13:if(t>1)throw new Error("currently not supported vecX of uint64 yet");return["vec2<u32>","u32"];case 9:if(t!==4)throw new Error("bool must be vec4");return["u32","vec4<bool>"];case 22:return"i32";case 21:return"u32";default:throw new Error(`Unknown data type: ${e}`)}},_e=(e,t=1)=>{let r=so(e,t);return typeof r=="string"?r:r[0]},Ee=(e,t=1)=>{let r=so(e,t);return typeof r=="string"?r:r[1]},N=(...e)=>{let t=[];return e.forEach(r=>{r.length!==0&&t.push({type:12,data:r},{type:12,data:C.computeStrides(r)})}),t},me=e=>e%4===0?4:e%2===0?2:1,uo=(e="f32",t,r="0")=>!t||t===1?`${e}(${r})`:`vec${t}<${e}>(${r})`,Et=(e,t,r)=>e==="f32"?r:t===1?`f32(${r})`:`vec${t}<f32>(${r})`,Fe=(e,t)=>t===4?`(${e}.x + ${e}.y + ${e}.z + ${e}.w)`:t===2?`(${e}.x + ${e}.y)`:t===3?`(${e}.x + ${e}.y + ${e}.z)`:e,F=(e,t,r,n)=>e.startsWith("uniforms.")&&r>4?typeof t=="string"?n==="f16"?`${e}[(${t}) / 8][(${t}) % 8 / 4][(${t}) % 8 % 4]`:`${e}[(${t}) / 4][(${t}) % 4]`:n==="f16"?`${e}[${Math.floor(t/8)}][${Math.floor(t%8/4)}][${t%8%4}]`:`${e}[${Math.floor(t/4)}][${t%4}]`:r>1?`${e}[${t}]`:e,Wr=(e,t,r,n,o)=>{let i=typeof r=="number",a=i?r:r.length,d=[...new Array(a).keys()],l=a<2?"u32":a<=4?`vec${a}<u32>`:`array<u32, ${a}>`,p=so(t,o),m=typeof p=="string"?p:p[1],u=typeof p=="string"?p:p[0],h={indices:l,value:m,storage:u,tensor:t},_=W=>typeof W=="string"?W:`${W}u`,y={offsetToIndices:!1,indicesToOffset:!1,broadcastedIndicesToOffset:!1,set:!1,setByIndices:!1,get:!1,getByIndices:!1},g=i?"uniforms.":"",x=`${g}${e}_shape`,$=`${g}${e}_strides`,v="";for(let W=0;W<a-1;W++)v+=`
3926
- let dim${W} = current / ${F($,W,a)};
3927
- let rest${W} = current % ${F($,W,a)};
3976
+ `),s=function(X){var le=Function;if(!(le instanceof Function))throw new TypeError(`new_ called with constructor type ${typeof le} which is not a function`);var ge=wi(le.name||"unknownFunctionName",function(){});return ge.prototype=le.prototype,ge=new ge,(X=le.apply(ge,X))instanceof Object?X:ge}(B)(...G),f=`methodCaller<(${c.map(X=>X.name).join(", ")}) => ${b.name}>`,sp(wi(f,s))}function lp(s){return s=dr(s>>>0),We(u[s])}function cp(s,c){return c>>>=0,s=Re(s>>>0),c=Re(c),We(s[c])}function pp(s){9<(s>>>=0)&&(ut[s+1]+=1)}function mp(){return We([])}function fp(s){s=Re(s>>>0);for(var c=Array(s.length),f=0;f<s.length;f++)c[f]=s[f];return We(c)}function hp(s){return We(dr(s>>>0))}function gp(){return We({})}function bp(s){for(var c=Re(s>>>=0);c.length;){var f=c.pop();c.pop()(f)}Tn(s)}function yp(s,c,f){c>>>=0,f>>>=0,s=Re(s>>>0),c=Re(c),f=Re(f),s[c]=f}function _p(s,c){return c>>>=0,s=(s=ir(s>>>0,"_emval_take_value")).readValueFromPointer(c),We(s)}function wp(s,c){s=-9007199254740992>s||9007199254740992<s?NaN:Number(s),c>>>=0,s=new Date(1e3*s),i()[c>>>2>>>0]=s.getUTCSeconds(),i()[c+4>>>2>>>0]=s.getUTCMinutes(),i()[c+8>>>2>>>0]=s.getUTCHours(),i()[c+12>>>2>>>0]=s.getUTCDate(),i()[c+16>>>2>>>0]=s.getUTCMonth(),i()[c+20>>>2>>>0]=s.getUTCFullYear()-1900,i()[c+24>>>2>>>0]=s.getUTCDay(),s=(s.getTime()-Date.UTC(s.getUTCFullYear(),0,1,0,0,0,0))/864e5|0,i()[c+28>>>2>>>0]=s}var Bt=s=>s%4==0&&(s%100!=0||s%400==0),vi=[0,31,60,91,121,152,182,213,244,274,305,335],$i=[0,31,59,90,120,151,181,212,243,273,304,334];function vp(s,c){s=-9007199254740992>s||9007199254740992<s?NaN:Number(s),c>>>=0,s=new Date(1e3*s),i()[c>>>2>>>0]=s.getSeconds(),i()[c+4>>>2>>>0]=s.getMinutes(),i()[c+8>>>2>>>0]=s.getHours(),i()[c+12>>>2>>>0]=s.getDate(),i()[c+16>>>2>>>0]=s.getMonth(),i()[c+20>>>2>>>0]=s.getFullYear()-1900,i()[c+24>>>2>>>0]=s.getDay();var f=(Bt(s.getFullYear())?vi:$i)[s.getMonth()]+s.getDate()-1|0;i()[c+28>>>2>>>0]=f,i()[c+36>>>2>>>0]=-60*s.getTimezoneOffset(),f=new Date(s.getFullYear(),6,1).getTimezoneOffset();var b=new Date(s.getFullYear(),0,1).getTimezoneOffset();s=0|(f!=b&&s.getTimezoneOffset()==Math.min(b,f)),i()[c+32>>>2>>>0]=s}function $p(s){s>>>=0;var c=new Date(i()[s+20>>>2>>>0]+1900,i()[s+16>>>2>>>0],i()[s+12>>>2>>>0],i()[s+8>>>2>>>0],i()[s+4>>>2>>>0],i()[s>>>2>>>0],0),f=i()[s+32>>>2>>>0],b=c.getTimezoneOffset(),w=new Date(c.getFullYear(),6,1).getTimezoneOffset(),I=new Date(c.getFullYear(),0,1).getTimezoneOffset(),O=Math.min(I,w);return 0>f?i()[s+32>>>2>>>0]=+(w!=I&&O==b):0<f!=(O==b)&&(w=Math.max(I,w),c.setTime(c.getTime()+6e4*((0<f?O:w)-b))),i()[s+24>>>2>>>0]=c.getDay(),f=(Bt(c.getFullYear())?vi:$i)[c.getMonth()]+c.getDate()-1|0,i()[s+28>>>2>>>0]=f,i()[s>>>2>>>0]=c.getSeconds(),i()[s+4>>>2>>>0]=c.getMinutes(),i()[s+8>>>2>>>0]=c.getHours(),i()[s+12>>>2>>>0]=c.getDate(),i()[s+16>>>2>>>0]=c.getMonth(),i()[s+20>>>2>>>0]=c.getYear(),s=c.getTime(),BigInt(isNaN(s)?-1:s/1e3)}function xi(s,c,f,b,w,I,O){return g?$e(16,1,s,c,f,b,w,I,O):-52}function Si(s,c,f,b,w,I){if(g)return $e(17,1,s,c,f,b,w,I)}function xp(s,c,f,b){s>>>=0,c>>>=0,f>>>=0,b>>>=0;var w=new Date().getFullYear(),I=new Date(w,0,1),O=new Date(w,6,1);w=I.getTimezoneOffset();var B=O.getTimezoneOffset(),G=Math.max(w,B);a()[s>>>2>>>0]=60*G,i()[c>>>2>>>0]=+(w!=B),I=(s=F=>F.toLocaleTimeString(void 0,{hour12:!1,timeZoneName:"short"}).split(" ")[1])(I),O=s(O),B<w?(Dt(I,f,17),Dt(O,b,17)):(Dt(I,b,17),Dt(O,f,17))}var En=[],Ti=(s,c)=>{En.length=0;for(var f;f=r()[s++>>>0];){var b=f!=105;c+=(b&=f!=112)&&c%8?4:0,En.push(f==112?a()[c>>>2>>>0]:f==106?W[c>>>3]:f==105?i()[c>>>2>>>0]:l()[c>>>3>>>0]),c+=b?8:4}return En};function Sp(s,c,f){return s>>>=0,c=Ti(c>>>0,f>>>0),gn[s](...c)}function Tp(s,c,f){return s>>>=0,c=Ti(c>>>0,f>>>0),gn[s](...c)}var Ip=()=>{},Cp=()=>Date.now();function Ap(s,c){return L(Ce(s>>>0,c>>>0))}var Ii,kp=()=>{throw vt+=1,"unwind"};function Ep(){return 4294901760}Ii=()=>performance.timeOrigin+performance.now();var Pp=()=>navigator.hardwareConcurrency;function zp(){return ct("Cannot use emscripten_pc_get_function without -sUSE_OFFSET_CONVERTER"),0}function Op(s){s>>>=0;var c=r().length;if(s<=c||4294901760<s)return!1;for(var f=1;4>=f;f*=2){var b=c*(1+.2/f);b=Math.min(b,s+100663296);var w=Math;b=Math.max(s,b);e:{w=(w.min.call(w,4294901760,b+(65536-b%65536)%65536)-K.buffer.byteLength+65535)/65536;try{K.grow(w),pe();var I=1;break e}catch{}I=void 0}if(I)return!0}return!1}var lr=()=>(ct("Cannot use convertFrameToPC (needed by __builtin_return_address) without -sUSE_OFFSET_CONVERTER"),0),Mt={},Ci=s=>{s.forEach(c=>{var f=lr();f&&(Mt[f]=c)})};function Dp(){var s=Error().stack.toString().split(`
3977
+ `);return s[0]=="Error"&&s.shift(),Ci(s),Mt.Qb=lr(),Mt.fc=s,Mt.Qb}function Bp(s,c,f){if(s>>>=0,c>>>=0,Mt.Qb==s)var b=Mt.fc;else(b=Error().stack.toString().split(`
3978
+ `))[0]=="Error"&&b.shift(),Ci(b);for(var w=3;b[w]&&lr()!=s;)++w;for(s=0;s<f&&b[s+w];++s)i()[c+4*s>>>2>>>0]=lr();return s}var Pn,zn={},Ai=()=>{if(!Pn){var s,c={USER:"web_user",LOGNAME:"web_user",PATH:"/",PWD:"/",HOME:"/home/web_user",LANG:(typeof navigator=="object"&&navigator.languages&&navigator.languages[0]||"C").replace("-","_")+".UTF-8",_:"./this.program"};for(s in zn)zn[s]===void 0?delete c[s]:c[s]=zn[s];var f=[];for(s in c)f.push(`${s}=${c[s]}`);Pn=f}return Pn};function ki(s,c){if(g)return $e(18,1,s,c);s>>>=0,c>>>=0;var f=0;return Ai().forEach((b,w)=>{var I=c+f;for(w=a()[s+4*w>>>2>>>0]=I,I=0;I<b.length;++I)t()[w++>>>0]=b.charCodeAt(I);t()[w>>>0]=0,f+=b.length+1}),0}function Ei(s,c){if(g)return $e(19,1,s,c);s>>>=0,c>>>=0;var f=Ai();a()[s>>>2>>>0]=f.length;var b=0;return f.forEach(w=>b+=w.length+1),a()[c>>>2>>>0]=b,0}function Pi(s){return g?$e(20,1,s):52}function zi(s,c,f,b){return g?$e(21,1,s,c,f,b):52}function Oi(s,c,f,b){return g?$e(22,1,s,c,f,b):70}var Mp=[null,[],[]];function Di(s,c,f,b){if(g)return $e(23,1,s,c,f,b);c>>>=0,f>>>=0,b>>>=0;for(var w=0,I=0;I<f;I++){var O=a()[c>>>2>>>0],B=a()[c+4>>>2>>>0];c+=8;for(var G=0;G<B;G++){var F=r()[O+G>>>0],X=Mp[s];F===0||F===10?((s===1?H:L)(Yo(X,0)),X.length=0):X.push(F)}w+=B}return a()[b>>>2>>>0]=w,0}var Bi=[31,29,31,30,31,30,31,31,30,31,30,31],Mi=[31,28,31,30,31,30,31,31,30,31,30,31],Rp=(s,c)=>{t().set(s,c>>>0)};function Ri(s,c,f,b){function w(z,ue,Se){for(z=typeof z=="number"?z.toString():z||"";z.length<ue;)z=Se[0]+z;return z}function I(z,ue){return w(z,ue,"0")}function O(z,ue){function Se(Qi){return 0>Qi?-1:0<Qi?1:0}var xt;return(xt=Se(z.getFullYear()-ue.getFullYear()))===0&&(xt=Se(z.getMonth()-ue.getMonth()))===0&&(xt=Se(z.getDate()-ue.getDate())),xt}function B(z){switch(z.getDay()){case 0:return new Date(z.getFullYear()-1,11,29);case 1:return z;case 2:return new Date(z.getFullYear(),0,3);case 3:return new Date(z.getFullYear(),0,2);case 4:return new Date(z.getFullYear(),0,1);case 5:return new Date(z.getFullYear()-1,11,31);case 6:return new Date(z.getFullYear()-1,11,30)}}function G(z){var ue=z.Cb;for(z=new Date(new Date(z.Db+1900,0,1).getTime());0<ue;){var Se=z.getMonth(),xt=(Bt(z.getFullYear())?Bi:Mi)[Se];if(!(ue>xt-z.getDate())){z.setDate(z.getDate()+ue);break}ue-=xt-z.getDate()+1,z.setDate(1),11>Se?z.setMonth(Se+1):(z.setMonth(0),z.setFullYear(z.getFullYear()+1))}return Se=new Date(z.getFullYear()+1,0,4),ue=B(new Date(z.getFullYear(),0,4)),Se=B(Se),0>=O(ue,z)?0>=O(Se,z)?z.getFullYear()+1:z.getFullYear():z.getFullYear()-1}s>>>=0,c>>>=0,f>>>=0,b>>>=0;var F=a()[b+40>>>2>>>0];for(var X in b={lc:i()[b>>>2>>>0],kc:i()[b+4>>>2>>>0],Ib:i()[b+8>>>2>>>0],Mb:i()[b+12>>>2>>>0],Jb:i()[b+16>>>2>>>0],Db:i()[b+20>>>2>>>0],vb:i()[b+24>>>2>>>0],Cb:i()[b+28>>>2>>>0],sc:i()[b+32>>>2>>>0],jc:i()[b+36>>>2>>>0],mc:F?Ce(F):""},f=Ce(f),F={"%c":"%a %b %d %H:%M:%S %Y","%D":"%m/%d/%y","%F":"%Y-%m-%d","%h":"%b","%r":"%I:%M:%S %p","%R":"%H:%M","%T":"%H:%M:%S","%x":"%m/%d/%y","%X":"%H:%M:%S","%Ec":"%c","%EC":"%C","%Ex":"%m/%d/%y","%EX":"%H:%M:%S","%Ey":"%y","%EY":"%Y","%Od":"%d","%Oe":"%e","%OH":"%H","%OI":"%I","%Om":"%m","%OM":"%M","%OS":"%S","%Ou":"%u","%OU":"%U","%OV":"%V","%Ow":"%w","%OW":"%W","%Oy":"%y"})f=f.replace(new RegExp(X,"g"),F[X]);var le="Sunday Monday Tuesday Wednesday Thursday Friday Saturday".split(" "),ge="January February March April May June July August September October November December".split(" ");for(X in F={"%a":z=>le[z.vb].substring(0,3),"%A":z=>le[z.vb],"%b":z=>ge[z.Jb].substring(0,3),"%B":z=>ge[z.Jb],"%C":z=>I((z.Db+1900)/100|0,2),"%d":z=>I(z.Mb,2),"%e":z=>w(z.Mb,2," "),"%g":z=>G(z).toString().substring(2),"%G":G,"%H":z=>I(z.Ib,2),"%I":z=>((z=z.Ib)==0?z=12:12<z&&(z-=12),I(z,2)),"%j":z=>{for(var ue=0,Se=0;Se<=z.Jb-1;ue+=(Bt(z.Db+1900)?Bi:Mi)[Se++]);return I(z.Mb+ue,3)},"%m":z=>I(z.Jb+1,2),"%M":z=>I(z.kc,2),"%n":()=>`
3979
+ `,"%p":z=>0<=z.Ib&&12>z.Ib?"AM":"PM","%S":z=>I(z.lc,2),"%t":()=>" ","%u":z=>z.vb||7,"%U":z=>I(Math.floor((z.Cb+7-z.vb)/7),2),"%V":z=>{var ue=Math.floor((z.Cb+7-(z.vb+6)%7)/7);if(2>=(z.vb+371-z.Cb-2)%7&&ue++,ue)ue==53&&((Se=(z.vb+371-z.Cb)%7)==4||Se==3&&Bt(z.Db)||(ue=1));else{ue=52;var Se=(z.vb+7-z.Cb-1)%7;(Se==4||Se==5&&Bt(z.Db%400-1))&&ue++}return I(ue,2)},"%w":z=>z.vb,"%W":z=>I(Math.floor((z.Cb+7-(z.vb+6)%7)/7),2),"%y":z=>(z.Db+1900).toString().substring(2),"%Y":z=>z.Db+1900,"%z":z=>{var ue=0<=(z=z.jc);return z=Math.abs(z)/60,(ue?"+":"-")+("0000"+(z/60*100+z%60)).slice(-4)},"%Z":z=>z.mc,"%%":()=>"%"},f=f.replace(/%%/g,"\0\0"),F)f.includes(X)&&(f=f.replace(new RegExp(X,"g"),F[X](b)));return X=function(z){var ue=Array(vn(z)+1);return Xo(z,ue,0,ue.length),ue}(f=f.replace(/\0\0/g,"%")),X.length>c?0:(Rp(X,s),X.length-1)}function Up(s,c,f,b){return Ri(s>>>0,c>>>0,f>>>0,b>>>0)}g||function(){for(var s=u.numThreads-1;s--;)Fo();Ie.unshift(()=>{wt++,function(c){g?c():Promise.all(pt.map(Ho)).then(c)}(()=>Oo())})}();for(var Ui=Array(256),cr=0;256>cr;++cr)Ui[cr]=String.fromCharCode(cr);li=Ui,mt=u.BindingError=class extends Error{constructor(s){super(s),this.name="BindingError"}},u.InternalError=class extends Error{constructor(s){super(s),this.name="InternalError"}},ut.push(0,1,void 0,1,null,1,!0,1,!1,1),u.count_emval_handles=()=>ut.length/2-5-Sn.length;var Np=[_n,Vo,qo,Zo,Qo,Jo,ei,ti,ri,ni,oi,ii,ai,si,ui,di,xi,Si,ki,Ei,Pi,zi,Oi,Di],Z=function(){function s(f,b){return Z=f.exports,Z=function(){var w=Z,I={};for(let[O,B]of Object.entries(w))I[O]=typeof B=="function"?(...G)=>{sr.push(O);try{return B(...G)}finally{De||(sr.pop(),Xe&&ht===1&&sr.length===0&&(ht=0,vt+=1,ar(Ki),typeof Fibers<"u"&&Fibers.tc()))}}:B;return I}(),Z=function(){var w=Z,I=B=>G=>B(G)>>>0,O=B=>()=>B()>>>0;return(w=Object.assign({},w)).Da=I(w.Da),w.gb=O(w.gb),w.ib=I(w.ib),w.emscripten_main_runtime_thread_id=O(w.emscripten_main_runtime_thread_id),w.tb=I(w.tb),w.ub=O(w.ub),w}(),Wo.push(Z.jb),He.unshift(Z.Ca),we=b,Oo(),Z}var c=Uo();if(wt++,u.instantiateWasm)try{return u.instantiateWasm(c,s)}catch(f){L(`Module.instantiateWasm callback failed with error: ${f}`),m(f)}return hn||=u.locateFile?Do("ort-wasm-simd-threaded.jsep.wasm")?"ort-wasm-simd-threaded.jsep.wasm":u.locateFile?u.locateFile("ort-wasm-simd-threaded.jsep.wasm",C):C+"ort-wasm-simd-threaded.jsep.wasm":new URL(/* asset import */ __webpack_require__(/*! ort-wasm-simd-threaded.jsep.wasm */ "./node_modules/onnxruntime-web/dist/ort-wasm-simd-threaded.jsep.wasm"), __webpack_require__.b).href,function(f,b){var w=hn;return P||typeof WebAssembly.instantiateStreaming!="function"||Do(w)||Bo(w)||typeof fetch!="function"?Ro(w,f,b):fetch(w,{credentials:"same-origin"}).then(I=>WebAssembly.instantiateStreaming(I,f).then(b,function(O){return L(`wasm streaming compile failed: ${O}`),L("falling back to ArrayBuffer instantiation"),Ro(w,f,b)}))}(c,function(f){s(f.instance,f.module)}).catch(m),{}}(),Ni=s=>(Ni=Z.Da)(s),Vi=()=>(Vi=Z.Ea)();u._OrtInit=(s,c)=>(u._OrtInit=Z.Fa)(s,c),u._OrtGetLastError=(s,c)=>(u._OrtGetLastError=Z.Ga)(s,c),u._OrtCreateSessionOptions=(s,c,f,b,w,I,O,B,G,F)=>(u._OrtCreateSessionOptions=Z.Ha)(s,c,f,b,w,I,O,B,G,F),u._OrtAppendExecutionProvider=(s,c)=>(u._OrtAppendExecutionProvider=Z.Ia)(s,c),u._OrtAddFreeDimensionOverride=(s,c,f)=>(u._OrtAddFreeDimensionOverride=Z.Ja)(s,c,f),u._OrtAddSessionConfigEntry=(s,c,f)=>(u._OrtAddSessionConfigEntry=Z.Ka)(s,c,f),u._OrtReleaseSessionOptions=s=>(u._OrtReleaseSessionOptions=Z.La)(s),u._OrtCreateSession=(s,c,f)=>(u._OrtCreateSession=Z.Ma)(s,c,f),u._OrtReleaseSession=s=>(u._OrtReleaseSession=Z.Na)(s),u._OrtGetInputOutputCount=(s,c,f)=>(u._OrtGetInputOutputCount=Z.Oa)(s,c,f),u._OrtGetInputName=(s,c)=>(u._OrtGetInputName=Z.Pa)(s,c),u._OrtGetOutputName=(s,c)=>(u._OrtGetOutputName=Z.Qa)(s,c),u._OrtFree=s=>(u._OrtFree=Z.Ra)(s),u._OrtCreateTensor=(s,c,f,b,w,I)=>(u._OrtCreateTensor=Z.Sa)(s,c,f,b,w,I),u._OrtGetTensorData=(s,c,f,b,w)=>(u._OrtGetTensorData=Z.Ta)(s,c,f,b,w),u._OrtReleaseTensor=s=>(u._OrtReleaseTensor=Z.Ua)(s),u._OrtCreateRunOptions=(s,c,f,b)=>(u._OrtCreateRunOptions=Z.Va)(s,c,f,b),u._OrtAddRunConfigEntry=(s,c,f)=>(u._OrtAddRunConfigEntry=Z.Wa)(s,c,f),u._OrtReleaseRunOptions=s=>(u._OrtReleaseRunOptions=Z.Xa)(s),u._OrtCreateBinding=s=>(u._OrtCreateBinding=Z.Ya)(s),u._OrtBindInput=(s,c,f)=>(u._OrtBindInput=Z.Za)(s,c,f),u._OrtBindOutput=(s,c,f,b)=>(u._OrtBindOutput=Z._a)(s,c,f,b),u._OrtClearBoundOutputs=s=>(u._OrtClearBoundOutputs=Z.$a)(s),u._OrtReleaseBinding=s=>(u._OrtReleaseBinding=Z.ab)(s),u._OrtRunWithBinding=(s,c,f,b,w)=>(u._OrtRunWithBinding=Z.bb)(s,c,f,b,w),u._OrtRun=(s,c,f,b,w,I,O,B)=>(u._OrtRun=Z.cb)(s,c,f,b,w,I,O,B),u._OrtEndProfiling=s=>(u._OrtEndProfiling=Z.db)(s),u._JsepOutput=(s,c,f)=>(u._JsepOutput=Z.eb)(s,c,f),u._JsepGetNodeName=s=>(u._JsepGetNodeName=Z.fb)(s);var pr,Rt=()=>(Rt=Z.gb)(),Je=u._free=s=>(Je=u._free=Z.hb)(s),mr=u._malloc=s=>(mr=u._malloc=Z.ib)(s),On=(s,c,f,b,w,I)=>(On=Z.lb)(s,c,f,b,w,I),Wi=()=>(Wi=Z.mb)(),Li=(s,c,f,b,w)=>(Li=Z.nb)(s,c,f,b,w),Dn=s=>(Dn=Z.ob)(s),fr=s=>(fr=Z.pb)(s),Gi=()=>(Gi=Z.qb)(),Hi=(s,c)=>(Hi=Z.rb)(s,c),hr=s=>(hr=Z.sb)(s),Bn=s=>(Bn=Z.tb)(s),Mn=()=>(Mn=Z.ub)(),Fi=u.dynCall_ii=(s,c)=>(Fi=u.dynCall_ii=Z.wb)(s,c),qi=s=>(qi=Z.xb)(s),Ki=()=>(Ki=Z.yb)(),ji=s=>(ji=Z.zb)(s),Yi=()=>(Yi=Z.Ab)();function Zi(){0<wt||(g?(p(u),g||nr(He),startWorker(u)):(nr(Ie),0<wt||pr||(pr=!0,u.calledRun=!0,De||(g||nr(He),p(u),g||nr(mn)))))}return u.___start_em_js=932469,u.___stop_em_js=932715,u.stackSave=()=>Mn(),u.stackRestore=s=>hr(s),u.stackAlloc=s=>Bn(s),u.setValue=function(s,c,f="i8"){switch(f.endsWith("*")&&(f="*"),f){case"i1":case"i8":t()[s>>>0]=c;break;case"i16":n()[s>>>1>>>0]=c;break;case"i32":i()[s>>>2>>>0]=c;break;case"i64":W[s>>>3]=BigInt(c);break;case"float":d()[s>>>2>>>0]=c;break;case"double":l()[s>>>3>>>0]=c;break;case"*":a()[s>>>2>>>0]=c;break;default:ct(`invalid type for setValue: ${f}`)}},u.getValue=function(s,c="i8"){switch(c.endsWith("*")&&(c="*"),c){case"i1":case"i8":return t()[s>>>0];case"i16":return n()[s>>>1>>>0];case"i32":return i()[s>>>2>>>0];case"i64":return W[s>>>3];case"float":return d()[s>>>2>>>0];case"double":return l()[s>>>3>>>0];case"*":return a()[s>>>2>>>0];default:ct(`invalid type for getValue: ${c}`)}},u.UTF8ToString=Ce,u.stringToUTF8=Dt,u.lengthBytesUTF8=vn,Gt=function s(){pr||Zi(),pr||(Gt=s)},Zi(),u.PTR_SIZE=4,h}),Kp=Pa;globalThis.self?.name==="em-pthread"&&Pa()});var Ma,jp,Ve,Ra,Kn,Yp,Zp,Ua,Qp,Da,Na,Ba,Va,$r=U(()=>{"use strict";vr();Ma= false||typeof location>"u"?void 0:location.origin,jp=()=>{if(true)return import.meta.url?.startsWith("file:")?new URL(new URL(/* asset import */ __webpack_require__(/*! ort.bundle.min.mjs */ "./node_modules/onnxruntime-web/dist/ort.bundle.min.mjs?46eb"), __webpack_require__.b).href,Ma).href:import.meta.url},Ve=jp(),Ra=()=>{if(Ve&&!Ve.startsWith("blob:"))return Ve.substring(0,Ve.lastIndexOf("/")+1)},Kn=(e,t)=>{try{let r=t??Ve;return(r?new URL(e,r):new URL(e)).origin===Ma}catch{return!1}},Yp=(e,t)=>{let r=t??Ve;try{return(r?new URL(e,r):new URL(e)).href}catch{return}},Zp=(e,t)=>`${t??"./"}${e}`,Ua=async e=>{let r=await(await fetch(e,{credentials:"same-origin"})).blob();return URL.createObjectURL(r)},Qp=async e=>(await import(/*webpackIgnore:true*/e)).default,Da=(Ea(),gr(ka)).default,Na=async()=>{if(!Ve)throw new Error("Failed to load proxy worker: cannot determine the script source URL.");if(Kn(Ve))return[void 0,Da()];let e=await Ua(Ve);return[e,Da(e)]},Ba=(Oa(),gr(za)).default,Va=async(e,t,r)=>{if(!e&&!t&&Ba&&Ve&&Kn(Ve))return[void 0,Ba];{let n="ort-wasm-simd-threaded.jsep.mjs",o=e??Yp(n,t),i= true&&r&&o&&!Kn(o,t),a=i?await Ua(o):o??Zp(n,t);return[i?a:void 0,await Qp(a)]}}});var jn,Yn,Pr,Wa,Xp,Jp,xr,Te,gt=U(()=>{"use strict";$r();Yn=!1,Pr=!1,Wa=!1,Xp=()=>{if(typeof SharedArrayBuffer>"u")return!1;try{return typeof MessageChannel<"u"&&new MessageChannel().port1.postMessage(new SharedArrayBuffer(1)),WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,5,4,1,3,1,1,10,11,1,9,0,65,0,254,16,2,0,26,11]))}catch{return!1}},Jp=()=>{try{return WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,10,30,1,28,0,65,0,253,15,253,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,253,186,1,26,11]))}catch{return!1}},xr=async e=>{if(Yn)return Promise.resolve();if(Pr)throw new Error("multiple calls to 'initializeWebAssembly()' detected.");if(Wa)throw new Error("previous call to 'initializeWebAssembly()' failed.");Pr=!0;let t=e.initTimeout,r=e.numThreads;if(!Jp())throw new Error("WebAssembly SIMD is not supported in the current environment.");let n=Xp();r>1&&!n&&(typeof self<"u"&&!self.crossOriginIsolated&&console.warn("env.wasm.numThreads is set to "+r+", but this will not work unless you enable crossOriginIsolated mode. See https://web.dev/cross-origin-isolation-guide/ for more info."),console.warn("WebAssembly multi-threading is not supported in the current environment. Falling back to single-threading."),e.numThreads=r=1);let o=e.wasmPaths,i=typeof o=="string"?o:void 0,a=o?.mjs,d=a?.href??a,l=o?.wasm,p=l?.href??l,m=e.wasmBinary,[u,h]=await Va(d,i,r>1),_=!1,y=[];if(t>0&&y.push(new Promise(g=>{setTimeout(()=>{_=!0,g()},t)})),y.push(new Promise((g,x)=>{let $={numThreads:r};if(m)$.wasmBinary=m;else if(p||i)$.locateFile=v=>p??i+v;else if(d&&d.indexOf("blob:")!==0)$.locateFile=v=>new URL(v,d).href;else if(u){let v=Ra();v&&($.locateFile=S=>v+S)}h($).then(v=>{Pr=!1,Yn=!0,jn=v,g(),u&&URL.revokeObjectURL(u)},v=>{Pr=!1,Wa=!0,x(v)})})),await Promise.race(y),_)throw new Error(`WebAssembly backend initializing failed due to timeout: ${t}ms`)},Te=()=>{if(Yn&&jn)return jn;throw new Error("WebAssembly is not initialized yet.")}});var ke,Kt,ce,zr=U(()=>{"use strict";gt();ke=(e,t)=>{let r=Te(),n=r.lengthBytesUTF8(e)+1,o=r._malloc(n);return r.stringToUTF8(e,o,n),t.push(o),o},Kt=(e,t,r,n)=>{if(typeof e=="object"&&e!==null){if(r.has(e))throw new Error("Circular reference in options");r.add(e)}Object.entries(e).forEach(([o,i])=>{let a=t?t+o:o;if(typeof i=="object")Kt(i,a+".",r,n);else if(typeof i=="string"||typeof i=="number")n(a,i.toString());else if(typeof i=="boolean")n(a,i?"1":"0");else throw new Error(`Can't handle extra config type: ${typeof i}`)})},ce=e=>{let t=Te(),r=t.stackSave();try{let n=t.PTR_SIZE,o=t.stackAlloc(2*n);t._OrtGetLastError(o,o+n);let i=Number(t.getValue(o,n===4?"i32":"i64")),a=t.getValue(o+n,"*"),d=a?t.UTF8ToString(a):"";throw new Error(`${e} ERROR_CODE: ${i}, ERROR_MESSAGE: ${d}`)}finally{t.stackRestore(r)}}});var La,Ga=U(()=>{"use strict";gt();zr();La=e=>{let t=Te(),r=0,n=[],o=e||{};try{if(e?.logSeverityLevel===void 0)o.logSeverityLevel=2;else if(typeof e.logSeverityLevel!="number"||!Number.isInteger(e.logSeverityLevel)||e.logSeverityLevel<0||e.logSeverityLevel>4)throw new Error(`log serverity level is not valid: ${e.logSeverityLevel}`);if(e?.logVerbosityLevel===void 0)o.logVerbosityLevel=0;else if(typeof e.logVerbosityLevel!="number"||!Number.isInteger(e.logVerbosityLevel))throw new Error(`log verbosity level is not valid: ${e.logVerbosityLevel}`);e?.terminate===void 0&&(o.terminate=!1);let i=0;return e?.tag!==void 0&&(i=ke(e.tag,n)),r=t._OrtCreateRunOptions(o.logSeverityLevel,o.logVerbosityLevel,!!o.terminate,i),r===0&&ce("Can't create run options."),e?.extra!==void 0&&Kt(e.extra,"",new WeakSet,(a,d)=>{let l=ke(a,n),p=ke(d,n);t._OrtAddRunConfigEntry(r,l,p)!==0&&ce(`Can't set a run config entry: ${a} - ${d}.`)}),[r,n]}catch(i){throw r!==0&&t._OrtReleaseRunOptions(r),n.forEach(a=>t._free(a)),i}}});var em,tm,rm,nm,Ha,Fa=U(()=>{"use strict";gt();zr();em=e=>{switch(e){case"disabled":return 0;case"basic":return 1;case"extended":return 2;case"all":return 99;default:throw new Error(`unsupported graph optimization level: ${e}`)}},tm=e=>{switch(e){case"sequential":return 0;case"parallel":return 1;default:throw new Error(`unsupported execution mode: ${e}`)}},rm=e=>{e.extra||(e.extra={}),e.extra.session||(e.extra.session={});let t=e.extra.session;t.use_ort_model_bytes_directly||(t.use_ort_model_bytes_directly="1"),e.executionProviders&&e.executionProviders.some(r=>(typeof r=="string"?r:r.name)==="webgpu")&&(e.enableMemPattern=!1)},nm=(e,t,r)=>{for(let n of t){let o=typeof n=="string"?n:n.name;switch(o){case"webnn":if(o="WEBNN",typeof n!="string"){let d=n?.deviceType;if(d){let l=ke("deviceType",r),p=ke(d,r);Te()._OrtAddSessionConfigEntry(e,l,p)!==0&&ce(`Can't set a session config entry: 'deviceType' - ${d}.`)}}break;case"webgpu":if(o="JS",typeof n!="string"){let a=n;if(a?.preferredLayout){if(a.preferredLayout!=="NCHW"&&a.preferredLayout!=="NHWC")throw new Error(`preferredLayout must be either 'NCHW' or 'NHWC': ${a.preferredLayout}`);let d=ke("preferredLayout",r),l=ke(a.preferredLayout,r);Te()._OrtAddSessionConfigEntry(e,d,l)!==0&&ce(`Can't set a session config entry: 'preferredLayout' - ${a.preferredLayout}.`)}}break;case"wasm":case"cpu":continue;default:throw new Error(`not supported execution provider: ${o}`)}let i=ke(o,r);Te()._OrtAppendExecutionProvider(e,i)!==0&&ce(`Can't append execution provider: ${o}.`)}},Ha=e=>{let t=Te(),r=0,n=[],o=e||{};rm(o);try{let i=em(o.graphOptimizationLevel??"all"),a=tm(o.executionMode??"sequential"),d=typeof o.logId=="string"?ke(o.logId,n):0,l=o.logSeverityLevel??2;if(!Number.isInteger(l)||l<0||l>4)throw new Error(`log serverity level is not valid: ${l}`);let p=o.logVerbosityLevel??0;if(!Number.isInteger(p)||p<0||p>4)throw new Error(`log verbosity level is not valid: ${p}`);let m=typeof o.optimizedModelFilePath=="string"?ke(o.optimizedModelFilePath,n):0;if(r=t._OrtCreateSessionOptions(i,!!o.enableCpuMemArena,!!o.enableMemPattern,a,!!o.enableProfiling,0,d,l,p,m),r===0&&ce("Can't create session options."),o.executionProviders&&nm(r,o.executionProviders,n),o.enableGraphCapture!==void 0){if(typeof o.enableGraphCapture!="boolean")throw new Error(`enableGraphCapture must be a boolean value: ${o.enableGraphCapture}`);let u=ke("enableGraphCapture",n),h=ke(o.enableGraphCapture.toString(),n);t._OrtAddSessionConfigEntry(r,u,h)!==0&&ce(`Can't set a session config entry: 'enableGraphCapture' - ${o.enableGraphCapture}.`)}if(o.freeDimensionOverrides)for(let[u,h]of Object.entries(o.freeDimensionOverrides)){if(typeof u!="string")throw new Error(`free dimension override name must be a string: ${u}`);if(typeof h!="number"||!Number.isInteger(h)||h<0)throw new Error(`free dimension override value must be a non-negative integer: ${h}`);let _=ke(u,n);t._OrtAddFreeDimensionOverride(r,_,h)!==0&&ce(`Can't set a free dimension override: ${u} - ${h}.`)}return o.extra!==void 0&&Kt(o.extra,"",new WeakSet,(u,h)=>{let _=ke(u,n),y=ke(h,n);t._OrtAddSessionConfigEntry(r,_,y)!==0&&ce(`Can't set a session config entry: ${u} - ${h}.`)}),[r,n]}catch(i){throw r!==0&&t._OrtReleaseSessionOptions(r)!==0&&ce("Can't release session options."),n.forEach(a=>t._free(a)),i}}});var jt,bt,At,Or,Yt,Dr,Br,Zn,J=U(()=>{"use strict";jt=e=>{switch(e){case"int8":return 3;case"uint8":return 2;case"bool":return 9;case"int16":return 5;case"uint16":return 4;case"int32":return 6;case"uint32":return 12;case"float16":return 10;case"float32":return 1;case"float64":return 11;case"string":return 8;case"int64":return 7;case"uint64":return 13;case"int4":return 22;case"uint4":return 21;default:throw new Error(`unsupported data type: ${e}`)}},bt=e=>{switch(e){case 3:return"int8";case 2:return"uint8";case 9:return"bool";case 5:return"int16";case 4:return"uint16";case 6:return"int32";case 12:return"uint32";case 10:return"float16";case 1:return"float32";case 11:return"float64";case 8:return"string";case 7:return"int64";case 13:return"uint64";case 22:return"int4";case 21:return"uint4";default:throw new Error(`unsupported data type: ${e}`)}},At=(e,t)=>{let r=[-1,4,1,1,2,2,4,8,-1,1,2,8,4,8,-1,-1,-1,-1,-1,-1,-1,.5,.5][e],n=typeof t=="number"?t:t.reduce((o,i)=>o*i,1);return r>0?Math.ceil(n*r):void 0},Or=e=>{switch(e){case"float16":return typeof Float16Array<"u"&&Float16Array.from?Float16Array:Uint16Array;case"float32":return Float32Array;case"uint8":return Uint8Array;case"int8":return Int8Array;case"uint16":return Uint16Array;case"int16":return Int16Array;case"int32":return Int32Array;case"bool":return Uint8Array;case"float64":return Float64Array;case"uint32":return Uint32Array;case"int64":return BigInt64Array;case"uint64":return BigUint64Array;default:throw new Error(`unsupported type: ${e}`)}},Yt=e=>{switch(e){case"verbose":return 0;case"info":return 1;case"warning":return 2;case"error":return 3;case"fatal":return 4;default:throw new Error(`unsupported logging level: ${e}`)}},Dr=e=>e==="float32"||e==="float16"||e==="int32"||e==="int64"||e==="uint32"||e==="uint8"||e==="bool"||e==="uint4"||e==="int4",Br=e=>e==="float32"||e==="float16"||e==="int32"||e==="int64"||e==="uint32"||e==="uint64"||e==="int8"||e==="uint8"||e==="bool"||e==="uint4"||e==="int4",Zn=e=>{switch(e){case"none":return 0;case"cpu":return 1;case"cpu-pinned":return 2;case"texture":return 3;case"gpu-buffer":return 4;case"ml-tensor":return 5;default:throw new Error(`unsupported data location: ${e}`)}}});var Zt,Qn=U(()=>{"use strict";vr();Zt=async e=>{if(typeof e=="string")if(false){}else{let t=await fetch(e);if(!t.ok)throw new Error(`failed to load external data file: ${e}`);let r=t.headers.get("Content-Length"),n=r?parseInt(r,10):0;if(n<1073741824)return new Uint8Array(await t.arrayBuffer());{if(!t.body)throw new Error(`failed to load external data file: ${e}, no response body.`);let o=t.body.getReader(),i;try{i=new ArrayBuffer(n)}catch(d){if(d instanceof RangeError){let l=Math.ceil(n/65536);i=new WebAssembly.Memory({initial:l,maximum:l}).buffer}else throw d}let a=0;for(;;){let{done:d,value:l}=await o.read();if(d)break;let p=l.byteLength;new Uint8Array(i,a,p).set(l),a+=p}return new Uint8Array(i,0,n)}}else return e instanceof Blob?new Uint8Array(await e.arrayBuffer()):e instanceof Uint8Array?e:new Uint8Array(e)}});var om,im,qa,Ka,Mr,am,se,et=U(()=>{"use strict";J();om=["V","I","W","E","F"],im=(e,t)=>{console.log(`[${om[e]},${new Date().toISOString()}]${t}`)},Mr=(e,t)=>{qa=e,Ka=t},am=(e,t)=>{let r=Yt(e),n=Yt(qa);r>=n&&im(r,typeof t=="function"?t():t)},se=(...e)=>{Ka&&am(...e)}});var Rr,Xn=U(()=>{"use strict";J();Rr=(e,t)=>new(Or(t))(e)});var Ur=U(()=>{"use strict"});var ja,Jn,eo,sm,um,Ya,ro,to,Qa,Xa=U(()=>{"use strict";et();Ur();ja=new Map([[64,250],[128,200],[256,200],[512,200],[2048,230],[4096,200],[8192,50],[16384,50],[32768,50],[65536,50],[131072,50],[262144,50],[524288,50],[1048576,50],[2097152,30],[4194304,20],[8388608,10],[12582912,10],[16777216,10],[26214400,15],[33554432,22],[44236800,2],[58982400,6],[67108864,6],[134217728,6],[167772160,6]]),Jn=[],eo=e=>Math.ceil(Number(e)/16)*16,sm=e=>{for(let t=0;t<Jn.length;t++){let r=Jn[t];if(e<=r)return r}return Math.ceil(e/16)*16},um=1,Ya=()=>um++,ro=async(e,t,r,n)=>{let o=eo(r),i=e.device.createBuffer({size:o,usage:GPUBufferUsage.COPY_DST|GPUBufferUsage.MAP_READ});try{let a=e.getCommandEncoder();e.endComputePass(),a.copyBufferToBuffer(t,0,i,0,o),e.flush(),await i.mapAsync(GPUMapMode.READ);let d=i.getMappedRange();if(n){let l=n();return l.set(new Uint8Array(d,0,r)),l}else return new Uint8Array(d.slice(0,r))}finally{i.destroy()}},to=class{constructor(t){this.backend=t;this.storageCache=new Map,this.freeBuffers=new Map,this.freeUniformBuffers=new Map,this.buffersPending=[],this.capturedPendingBuffers=new Map;for(let[r]of ja)Jn.push(r),this.freeBuffers.set(r,[]),this.freeUniformBuffers.set(r,[]);this.sessionCount=0}upload(t,r){let n=r.buffer,o=r.byteOffset,i=r.byteLength,a=eo(i),d=this.storageCache.get(t);if(!d)throw new Error("gpu data for uploading does not exist");if(Number(d.originalSize)!==i)throw new Error(`inconsistent data size. gpu data size=${d.originalSize}, data size=${i}`);let l=this.backend.device.createBuffer({mappedAtCreation:!0,size:a,usage:GPUBufferUsage.MAP_WRITE|GPUBufferUsage.COPY_SRC}),p=l.getMappedRange();new Uint8Array(p).set(new Uint8Array(n,o,i)),l.unmap();let m=this.backend.device.createCommandEncoder();m.copyBufferToBuffer(l,0,d.gpuData.buffer,0,a),this.backend.device.queue.submit([m.finish()]),l.destroy(),se("verbose",()=>`[WebGPU] GpuDataManager.upload(id=${t})`)}memcpy(t,r){let n=this.storageCache.get(t);if(!n)throw new Error("source gpu data for memcpy does not exist");let o=this.storageCache.get(r);if(!o)throw new Error("destination gpu data for memcpy does not exist");if(n.originalSize!==o.originalSize)throw new Error("inconsistent source and destination gpu data size");let i=eo(n.originalSize),a=this.backend.getCommandEncoder();this.backend.endComputePass(),a.copyBufferToBuffer(n.gpuData.buffer,0,o.gpuData.buffer,0,i)}registerExternalBuffer(t,r,n){let o;if(n){if(o=n[0],t===n[1])return se("verbose",()=>`[WebGPU] GpuDataManager.registerExternalBuffer(size=${r}) => id=${o}, buffer is the same, skip.`),o;if(this.backend.capturedCommandList.has(this.backend.currentSessionId))throw new Error(`Registering a different external buffer under graph capture mode is not supported yet.
3980
+ Please use the previous external buffer!`)}else o=Ya();return this.storageCache.set(o,{gpuData:{id:o,type:0,buffer:t},originalSize:r}),se("verbose",()=>`[WebGPU] GpuDataManager.registerExternalBuffer(size=${r}) => id=${o}, registered.`),o}unregisterExternalBuffer(t){t!==void 0&&(this.storageCache.delete(t),se("verbose",()=>`[WebGPU] GpuDataManager.unregisterExternalBuffer() => id=${t}`))}create(t,r=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST){let n=sm(t),o,i=(r&GPUBufferUsage.STORAGE)===GPUBufferUsage.STORAGE,a=(r&GPUBufferUsage.UNIFORM)===GPUBufferUsage.UNIFORM;if(i||a){let p=(i?this.freeBuffers:this.freeUniformBuffers).get(n);p?p.length>0?o=p.pop():o=this.backend.device.createBuffer({size:n,usage:r}):o=this.backend.device.createBuffer({size:n,usage:r})}else o=this.backend.device.createBuffer({size:n,usage:r});let d={id:Ya(),type:0,buffer:o};return this.storageCache.set(d.id,{gpuData:d,originalSize:Number(t)}),se("verbose",()=>`[WebGPU] GpuDataManager.create(size=${t}) => id=${d.id}`),d}get(t){return this.storageCache.get(t)?.gpuData}release(t){let r=typeof t=="bigint"?Number(t):t,n=this.storageCache.get(r);if(!n){if(this.storageCache.size===0)return 0;throw new Error("releasing data does not exist")}return se("verbose",()=>`[WebGPU] GpuDataManager.release(id=${r}), gpuDataId=${n.gpuData.id}`),this.storageCache.delete(r),this.buffersPending.push(n.gpuData.buffer),n.originalSize}async download(t,r){let n=this.storageCache.get(Number(t));if(!n)throw new Error("data does not exist");await ro(this.backend,n.gpuData.buffer,n.originalSize,r)}refreshPendingBuffers(){if(this.buffersPending.length!==0)if(this.backend.sessionStatus==="default"){for(let t of this.buffersPending){let r=ja.get(t.size);if((t.usage&GPUBufferUsage.STORAGE)===GPUBufferUsage.STORAGE){let n=this.freeBuffers.get(t.size)||[];r===void 0||n.length>=r?t.destroy():n.push(t)}else if((t.usage&GPUBufferUsage.UNIFORM)===GPUBufferUsage.UNIFORM){let n=this.freeUniformBuffers.get(t.size)||[];r===void 0||n.length>=r?t.destroy():n.push(t)}else t.destroy()}this.buffersPending=[]}else{let t=this.capturedPendingBuffers.get(this.backend.currentSessionId);t||(t=[],this.capturedPendingBuffers.set(this.backend.currentSessionId,t));for(let r of this.buffersPending)t.push(r);this.buffersPending=[]}}dispose(){this.freeBuffers.forEach(t=>{t.forEach(r=>{r.destroy()})}),this.freeUniformBuffers.forEach(t=>{t.forEach(r=>{r.destroy()})}),this.storageCache.forEach(t=>{t.gpuData.buffer.destroy()}),this.capturedPendingBuffers.forEach(t=>{t.forEach(r=>{r.destroy()})}),this.storageCache=new Map,this.freeBuffers=new Map,this.freeUniformBuffers=new Map,this.capturedPendingBuffers=new Map}onCreateSession(){this.sessionCount+=1}onReleaseSession(t){let r=this.capturedPendingBuffers.get(t);r&&(r.forEach(n=>{n.destroy()}),this.capturedPendingBuffers.delete(t)),this.sessionCount-=1,this.sessionCount===0&&(se("warning",()=>"[WebGPU] Clearing webgpu buffer cache"),this.storageCache.forEach(n=>{n.gpuData.buffer.destroy()}),this.storageCache=new Map)}},Qa=(...e)=>new to(...e)});var no,ee,xe=U(()=>{"use strict";no=class{constructor(t){Object.assign(this,t)}get cacheKey(){return this.key||(this.key=Object.getOwnPropertyNames(this).sort().map(t=>`${this[t]}`).join(";")),this.key}},ee=e=>new no(e)});var oo,tt,k,kt,Nr,Ja,es,ne=U(()=>{"use strict";oo=class{static calcMatMulShape(t,r){return t[1]!==r[0]?void 0:[t[0],r[1]]}},tt=class{static calcShape(t,r,n=!1){let o=t.length,i=r.length;if(o===0)return r;if(i===0)return t;let a=Math.max(t.length,r.length),d=new Array(a);if(n){if(o<2||i<2)return;let l=oo.calcMatMulShape([t[o-2],t[o-1]],[r[i-2],r[i-1]]);if(l===void 0)return;[d[a-2],d[a-1]]=l}for(let l=n?3:1;l<=a;l++){let p=o-l<0?1:t[o-l],m=i-l<0?1:r[i-l];if(p!==m&&p>1&&m>1)return;let u=Math.max(p,m);if(p&&m)d[a-l]=Math.max(p,m);else{if(u>1)return;d[a-l]=0}}return d}static isValidBroadcast(t,r){let n=t.length,o=r.length;if(n>o)return!1;for(let i=1;i<=n;i++)if(t[n-i]!==1&&t[n-i]!==r[o-i])return!1;return!0}},k=class e{static size(t){return e.getSizeFromDimensionRange(t,0,t.length)}static convertShape(t,r=4){let n=t.length;if(n===0)return[];let o=new Array(n),i=n-1;for(;i>=0;){if(t[i]%r===0){o[i]=t[i]/r;break}if(r%t[i]!==0)throw new Error("cannot convert shape");o[i]=1,r/=t[i],i--}for(i--;i>=0;i--)o[i]=t[i];return o}static sizeFromDimension(t,r){if(r<0||r>t.length)throw new Error(`invalid dimension of ${r} for sizeFromDimension as Tensor has ${t.length} dimensions.`);return e.getSizeFromDimensionRange(t,r,t.length)}static sizeToDimension(t,r){if(r<0||r>t.length)throw new Error(`invalid dimension of ${r} for sizeToDimension as Tensor has ${t.length} dimensions.`);return e.getSizeFromDimensionRange(t,0,r)}static getSizeFromDimensionRange(t,r,n){let o=1;for(let i=r;i<n;i++){if(t[i]<0)throw new Error("cannot get valid size from specified dimension range. Most likely the range contains negative values in them.");o*=Number(t[i])}return o}static computeStrides(t){let r=t.length;if(r===0)return[];if(r===1)return[1];let n=new Array(r);n[r-1]=1,n[r-2]=t[r-1];for(let o=r-3;o>=0;--o)n[o]=n[o+1]*t[o+1];return n}static normalizeAxis(t,r){if(t<-r&&t>=r)throw new Error("unsupported axis for this operation.");return t<0?t+r:t}static normalizeAxes(t,r){return t.map(n=>this.normalizeAxis(n,r??t.length))}static sortBasedOnPerm(t,r){return r?r.map(n=>t[n]):t.slice().reverse()}static padShape(t,r){let n=t.length;return t.map((o,i)=>o+r[i]+r[i+n])}static areEqual(t,r){return t.length!==r.length?!1:t.every((n,o)=>n===r[o])}},kt=class e{static adjustPoolAttributes(t,r,n,o,i,a){if(!t&&n.length!==r.length-2)throw new Error("length of specified kernel shapes should be 2 less than length of input dimensions");if(t)for(let d=0;d<r.length-2;d++)d>=n.length?n.push(r[d+2]):n[d]=r[d+2];for(let d=0;d<n.length;d++)if(d<o.length){if(o[d]<0)throw new Error("strides should be greater than or equal to 1")}else o.push(1);for(let d=0;d<n.length;d++)if(d<i.length){if(i[d]<0)throw new Error("dilations should be greater than or equal to 1")}else i.push(1);for(let d=0;d<n.length*2;d++)if(d<a.length){if(a[d]<0)throw new Error("pad should be greater than or equal to 1")}else a.push(0);for(let d=0;d<n.length;d++){if(n[d]<=0)throw new Error("kernel shapes need to be greater than 0");if(a[d]>=n[d]||a[d+n.length]>=n[d])throw new Error("pads should be smaller than kernel")}}static adjustPadsBasedOnAutoPad(t,r,n,o,i,a,d){if(d){if(i.length!==2*(t.length-2))throw new Error("length of pads should be twice the length of data dimensions");if(r.length!==t.length-2)throw new Error("length of strides should be the length of data dimensions");if(o.length!==t.length-2)throw new Error("length of kernel shapes should be the length of data dimensions");for(let l=0;l<t.length-2;l++)e.adjustPadAndReturnShape(t[l+(a?1:2)],r[l],n[l],o[l],i,l,l+t.length-2,d)}}static computePoolOutputShape(t,r,n,o,i,a,d){if(r.length<=0)throw new Error("input shape must be of size greater than 0");let l=[r[0],r[1]];return e.computeShapeHelper(t,r,l,n,o,i,a,d),l}static computeConvOutputShape(t,r,n,o,i,a,d){if(t.length<=0||r.length<=0)throw new Error("invalid input tensor dims or invalid filter tensor dims");let l=[t[0],r[0]];return e.computeShapeHelper(!1,t,l,n,o,i,a,d),l}static computeShapeHelper(t,r,n,o,i,a,d,l){if(t)for(let p=0;p<r.length-2;p++)n.push(1);else for(let p=0;p<r.length-2;p++)n.push(e.adjustPadAndReturnShape(r[p+2],o[p],i[p],a[p],d,p,p+r.length-2,l))}static adjustPadAndReturnShape(t,r,n,o,i,a,d,l){let p=n*(o-1)+1;if(l&&l!=="NOTSET")switch(l){case"VALID":return i[a]=0,i[d]=0,Math.floor((t-p)/r+1);case"SAME_LOWER":case"SAME_UPPER":if(n!==1)throw new Error("Dilation not supported for SAME_UPPER or SAME_LOWER");{let u=((t+r-1)/r-1)*r+o-t;return i[a]=Math.floor(l==="SAME_LOWER"?(u+1)/2:u/2),i[d]=u-i[a],Math.floor((t+u-o)/r+1)}default:throw new Error("Unsupported AutoPad type")}else return Math.floor((t+i[a]+i[d]-p)/r+1)}},Nr=class{static getShapeOfGemmResult(t,r,n,o,i){if(t.length!==2||n.length!==2)throw new Error("shape need to be of size 2");let a,d,l;r?(a=t[1],d=t[0]):(a=t[0],d=t[1]);let p=-1;if(o?(l=n[0],p=1):(l=n[1],p=0),n[p]!==d)throw new Error("dimension mismatch");if(a<=0||l<=0||d<=0)throw new Error("invalid shape specified");if(i&&!tt.isValidBroadcast(i,[a,l]))throw new Error("gemm: invalid bias shape for broadcast");return[a,l,d]}},Ja=-34028234663852886e22,es=34028234663852886e22});var Et,ao,ye,Ee,N,me,so,Pt,qe,q,Vr,E,M,ts,Wr,io,rs,ae=U(()=>{"use strict";J();ne();Et=64,ao=(e,t)=>{if(t===3)throw new Error("vec3 has same alignment as vec4, use vec4 instead");switch(Number(e)){case 10:return t>1?`vec${t}<f16>`:"f16";case 1:return t>1?`vec${t}<f32>`:"f32";case 6:return t>1?`vec${t}<i32>`:"i32";case 12:return t>1?`vec${t}<u32>`:"u32";case 7:if(t>1)throw new Error("currently not supported vecX of uint64 yet");return["vec2<u32>","i32"];case 13:if(t>1)throw new Error("currently not supported vecX of uint64 yet");return["vec2<u32>","u32"];case 9:if(t!==4)throw new Error("bool must be vec4");return["u32","vec4<bool>"];case 22:return"i32";case 21:return"u32";default:throw new Error(`Unknown data type: ${e}`)}},ye=(e,t=1)=>{let r=ao(e,t);return typeof r=="string"?r:r[0]},Ee=(e,t=1)=>{let r=ao(e,t);return typeof r=="string"?r:r[1]},N=(...e)=>{let t=[];return e.forEach(r=>{r.length!==0&&t.push({type:12,data:r},{type:12,data:k.computeStrides(r)})}),t},me=e=>e%4===0?4:e%2===0?2:1,so=(e="f32",t,r="0")=>!t||t===1?`${e}(${r})`:`vec${t}<${e}>(${r})`,Pt=(e,t,r)=>e==="f32"?r:t===1?`f32(${r})`:`vec${t}<f32>(${r})`,qe=(e,t)=>t===4?`(${e}.x + ${e}.y + ${e}.z + ${e}.w)`:t===2?`(${e}.x + ${e}.y)`:t===3?`(${e}.x + ${e}.y + ${e}.z)`:e,q=(e,t,r,n)=>e.startsWith("uniforms.")&&r>4?typeof t=="string"?n==="f16"?`${e}[(${t}) / 8][(${t}) % 8 / 4][(${t}) % 8 % 4]`:`${e}[(${t}) / 4][(${t}) % 4]`:n==="f16"?`${e}[${Math.floor(t/8)}][${Math.floor(t%8/4)}][${t%8%4}]`:`${e}[${Math.floor(t/4)}][${t%4}]`:r>1?`${e}[${t}]`:e,Vr=(e,t,r,n,o)=>{let i=typeof r=="number",a=i?r:r.length,d=[...new Array(a).keys()],l=a<2?"u32":a<=4?`vec${a}<u32>`:`array<u32, ${a}>`,p=ao(t,o),m=typeof p=="string"?p:p[1],u=typeof p=="string"?p:p[0],h={indices:l,value:m,storage:u,tensor:t},_=W=>typeof W=="string"?W:`${W}u`,y={offsetToIndices:!1,indicesToOffset:!1,broadcastedIndicesToOffset:!1,set:!1,setByIndices:!1,get:!1,getByIndices:!1},g=i?"uniforms.":"",x=`${g}${e}_shape`,$=`${g}${e}_strides`,v="";for(let W=0;W<a-1;W++)v+=`
3981
+ let dim${W} = current / ${q($,W,a)};
3982
+ let rest${W} = current % ${q($,W,a)};
3928
3983
  indices[${W}] = dim${W};
3929
3984
  current = rest${W};
3930
3985
  `;v+=`indices[${a-1}] = current;`;let S=a<2?"":`
@@ -3933,25 +3988,25 @@ var Un=Object.defineProperty;var Vp=Object.getOwnPropertyDescriptor;var Wp=Objec
3933
3988
  var current = offset;
3934
3989
  ${v}
3935
3990
  return indices;
3936
- }`,T=W=>(y.offsetToIndices=!0,a<2?W:`o2i_${e}(${W})`),A=[];if(a>=2)for(let W=a-1;W>=0;W--)A.push(`${F($,W,a)} * (indices[${W}])`);let k=a<2?"":`
3991
+ }`,T=W=>(y.offsetToIndices=!0,a<2?W:`o2i_${e}(${W})`),A=[];if(a>=2)for(let W=a-1;W>=0;W--)A.push(`${q($,W,a)} * (indices[${W}])`);let C=a<2?"":`
3937
3992
  fn i2o_${e}(indices: ${h.indices}) -> u32 {
3938
3993
  return ${A.join("+")};
3939
- }`,P=W=>(y.indicesToOffset=!0,a<2?W:`i2o_${e}(${W})`),D=(...W)=>a===0?"0u":`${h.indices}(${W.map(_).join(",")})`,R=(W,q)=>a<2?`${W}`:`${F(W,q,a)}`,G=(W,q,he)=>a<2?`${W}=${he};`:`${F(W,q,a)}=${he};`,K={},j=(W,q)=>{y.broadcastedIndicesToOffset=!0;let he=`${q.name}broadcastedIndicesTo${e}Offset`;if(he in K)return`${he}(${W})`;let Ge=[];for(let we=a-1;we>=0;we--){let ye=q.indicesGet("outputIndices",we+q.rank-a);Ge.push(`${R($,we)} * (${ye} % ${R(x,we)})`)}return K[he]=`fn ${he}(outputIndices: ${q.type.indices}) -> u32 {
3940
- return ${Ge.length>0?Ge.join("+"):"0u"};
3941
- }`,`${he}(${W})`},V=(W,q)=>(()=>{if(h.storage===h.value)return`${e}[${W}]=${q};`;if(h.storage==="vec2<u32>"&&h.value==="i32")return`${e}[${W}]=vec2<u32>(u32(${q}), select(0u, 0xFFFFFFFFu, ${q} < 0));`;if(h.storage==="vec2<u32>"&&h.value==="u32")return`${e}[${W}]=vec2<u32>(u32(${q}), 0u);`;if(h.storage==="u32"&&h.value==="vec4<bool>")return`${e}[${W}]=dot(vec4<u32>(0x1, 0x100, 0x10000, 0x1000000), vec4<u32>(${q}));`;throw new Error(`not supported combination of storage type ${h.storage} and value type ${h.value} yet`)})(),Q=W=>(()=>{if(h.storage===h.value)return`${e}[${W}]`;if(h.storage==="vec2<u32>"&&h.value==="i32")return`i32(${e}[${W}].x)`;if(h.storage==="vec2<u32>"&&h.value==="u32")return`u32(${e}[${W}].x)`;if(h.storage==="u32"&&h.value==="vec4<bool>")return`vec4<bool>(bool(${e}[${W}] & 0xFFu), bool(${e}[${W}] & 0xFF00u), bool(${e}[${W}] & 0xFF0000u), bool(${e}[${W}] & 0xFF000000u))`;throw new Error(`not supported combination of storage type ${h.storage} and value type ${h.value} yet`)})(),se=a<2?"":`
3994
+ }`,P=W=>(y.indicesToOffset=!0,a<2?W:`i2o_${e}(${W})`),D=(...W)=>a===0?"0u":`${h.indices}(${W.map(_).join(",")})`,R=(W,Y)=>a<2?`${W}`:`${q(W,Y,a)}`,H=(W,Y,he)=>a<2?`${W}=${he};`:`${q(W,Y,a)}=${he};`,L={},re=(W,Y)=>{y.broadcastedIndicesToOffset=!0;let he=`${Y.name}broadcastedIndicesTo${e}Offset`;if(he in L)return`${he}(${W})`;let De=[];for(let pe=a-1;pe>=0;pe--){let Ie=Y.indicesGet("outputIndices",pe+Y.rank-a);De.push(`${R($,pe)} * (${Ie} % ${R(x,pe)})`)}return L[he]=`fn ${he}(outputIndices: ${Y.type.indices}) -> u32 {
3995
+ return ${De.length>0?De.join("+"):"0u"};
3996
+ }`,`${he}(${W})`},V=(W,Y)=>(()=>{if(h.storage===h.value)return`${e}[${W}]=${Y};`;if(h.storage==="vec2<u32>"&&h.value==="i32")return`${e}[${W}]=vec2<u32>(u32(${Y}), select(0u, 0xFFFFFFFFu, ${Y} < 0));`;if(h.storage==="vec2<u32>"&&h.value==="u32")return`${e}[${W}]=vec2<u32>(u32(${Y}), 0u);`;if(h.storage==="u32"&&h.value==="vec4<bool>")return`${e}[${W}]=dot(vec4<u32>(0x1, 0x100, 0x10000, 0x1000000), vec4<u32>(${Y}));`;throw new Error(`not supported combination of storage type ${h.storage} and value type ${h.value} yet`)})(),K=W=>(()=>{if(h.storage===h.value)return`${e}[${W}]`;if(h.storage==="vec2<u32>"&&h.value==="i32")return`i32(${e}[${W}].x)`;if(h.storage==="vec2<u32>"&&h.value==="u32")return`u32(${e}[${W}].x)`;if(h.storage==="u32"&&h.value==="vec4<bool>")return`vec4<bool>(bool(${e}[${W}] & 0xFFu), bool(${e}[${W}] & 0xFF00u), bool(${e}[${W}] & 0xFF0000u), bool(${e}[${W}] & 0xFF000000u))`;throw new Error(`not supported combination of storage type ${h.storage} and value type ${h.value} yet`)})(),we=a<2?"":`
3942
3997
  fn get_${e}ByIndices(indices: ${h.indices}) -> ${m} {
3943
- return ${Q(`i2o_${e}(indices)`)};
3944
- }`,Y=a<2?"":(()=>{let W=d.map(he=>`d${he}: u32`).join(", "),q=d.map(he=>`d${he}`).join(", ");return`
3998
+ return ${K(`i2o_${e}(indices)`)};
3999
+ }`,j=a<2?"":(()=>{let W=d.map(he=>`d${he}: u32`).join(", "),Y=d.map(he=>`d${he}`).join(", ");return`
3945
4000
  fn get_${e}(${W}) -> ${m} {
3946
- return get_${e}ByIndices(${D(q)});
3947
- }`})(),ee=(...W)=>{if(W.length!==a)throw new Error(`indices length must be ${a}`);let q=W.map(_).join(",");return a===0?Q("0u"):a===1?Q(q[0]):(y.get=!0,y.getByIndices=!0,y.indicesToOffset=!0,`get_${e}(${q})`)},J=W=>a<2?Q(W):(y.getByIndices=!0,y.indicesToOffset=!0,`get_${e}ByIndices(${W})`),ne=a<2?"":`
4001
+ return get_${e}ByIndices(${D(Y)});
4002
+ }`})(),Q=(...W)=>{if(W.length!==a)throw new Error(`indices length must be ${a}`);let Y=W.map(_).join(",");return a===0?K("0u"):a===1?K(Y[0]):(y.get=!0,y.getByIndices=!0,y.indicesToOffset=!0,`get_${e}(${Y})`)},ie=W=>a<2?K(W):(y.getByIndices=!0,y.indicesToOffset=!0,`get_${e}ByIndices(${W})`),te=a<2?"":`
3948
4003
  fn set_${e}ByIndices(indices: ${h.indices}, value: ${m}) {
3949
4004
  ${V(`i2o_${e}(indices)`,"value")}
3950
- }`,be=a<2?"":(()=>{let W=d.map(he=>`d${he}: u32`).join(", "),q=d.map(he=>`d${he}`).join(", ");return`
4005
+ }`,be=a<2?"":(()=>{let W=d.map(he=>`d${he}: u32`).join(", "),Y=d.map(he=>`d${he}`).join(", ");return`
3951
4006
  fn set_${e}(${W}, value: ${m}) {
3952
- set_${e}ByIndices(${D(q)}, value);
3953
- }`})();return{impl:()=>{let W=[],q=!1;return y.offsetToIndices&&(W.push(S),q=!0),y.indicesToOffset&&(W.push(k),q=!0),y.broadcastedIndicesToOffset&&(Object.values(K).forEach(he=>W.push(he)),q=!0),y.set&&(W.push(be),q=!0),y.setByIndices&&(W.push(ne),q=!0),y.get&&(W.push(Y),q=!0),y.getByIndices&&(W.push(se),q=!0),!i&&q&&W.unshift(`const ${x} = ${h.indices}(${r.join(",")});`,`const ${$} = ${h.indices}(${C.computeStrides(r).join(",")});`),W.join(`
3954
- `)},type:h,offsetToIndices:T,indicesToOffset:P,broadcastedIndicesToOffset:j,indices:D,indicesGet:R,indicesSet:G,set:(...W)=>{if(W.length!==a+1)throw new Error(`indices length must be ${a}`);let q=W[a];if(typeof q!="string")throw new Error("value must be string");let he=W.slice(0,a).map(_).join(",");return a===0?V("0u",q):a===1?V(he[0],q):(y.set=!0,y.setByIndices=!0,y.indicesToOffset=!0,`set_${e}(${he}, ${q})`)},setByOffset:V,setByIndices:(W,q)=>a<2?V(W,q):(y.setByIndices=!0,y.indicesToOffset=!0,`set_${e}ByIndices(${W}, ${q});`),get:ee,getByOffset:Q,getByIndices:J,usage:n,name:e,strides:$,shape:x,rank:a}},E=(e,t,r,n=1)=>Wr(e,t,r,"input",n),M=(e,t,r,n=1)=>Wr(e,t,r,"output",n),rs=(e,t,r)=>Wr(e,t,r,"atomicOutput",1),Lr=(e,t,r,n=1)=>Wr(e,t,r,"internal",n),ao=class{constructor(t,r){this.normalizedDispatchGroup=t;this.limits=r;this.internalVariables=[];this.variables=[];this.uniforms=[];this.variableIndex=0}guardAgainstOutOfBoundsWorkgroupSizes(t){return`if (global_idx >= ${typeof t=="number"?`${t}u`:t}) { return; }`}mainStart(t=kt){let r=typeof t=="number"?t:t[0],n=typeof t=="number"?1:t[1],o=typeof t=="number"?1:t[2];if(r>this.limits.maxComputeWorkgroupSizeX||n>this.limits.maxComputeWorkgroupSizeY||o>this.limits.maxComputeWorkgroupSizeZ)throw new Error(`workgroup size [${r}, ${n}, ${o}] exceeds the maximum workgroup size [${this.limits.maxComputeWorkgroupSizeX}, ${this.limits.maxComputeWorkgroupSizeY}, ${this.limits.maxComputeWorkgroupSizeZ}].`);if(r*n*o>this.limits.maxComputeInvocationsPerWorkgroup)throw new Error(`workgroup size [${r}, ${n}, ${o}] exceeds the maximum workgroup invocations ${this.limits.maxComputeInvocationsPerWorkgroup}.`);let i=this.normalizedDispatchGroup[1]===1&&this.normalizedDispatchGroup[2]===1,a=i?`@builtin(global_invocation_id) global_id : vec3<u32>,
4007
+ set_${e}ByIndices(${D(Y)}, value);
4008
+ }`})();return{impl:()=>{let W=[],Y=!1;return y.offsetToIndices&&(W.push(S),Y=!0),y.indicesToOffset&&(W.push(C),Y=!0),y.broadcastedIndicesToOffset&&(Object.values(L).forEach(he=>W.push(he)),Y=!0),y.set&&(W.push(be),Y=!0),y.setByIndices&&(W.push(te),Y=!0),y.get&&(W.push(j),Y=!0),y.getByIndices&&(W.push(we),Y=!0),!i&&Y&&W.unshift(`const ${x} = ${h.indices}(${r.join(",")});`,`const ${$} = ${h.indices}(${k.computeStrides(r).join(",")});`),W.join(`
4009
+ `)},type:h,offsetToIndices:T,indicesToOffset:P,broadcastedIndicesToOffset:re,indices:D,indicesGet:R,indicesSet:H,set:(...W)=>{if(W.length!==a+1)throw new Error(`indices length must be ${a}`);let Y=W[a];if(typeof Y!="string")throw new Error("value must be string");let he=W.slice(0,a).map(_).join(",");return a===0?V("0u",Y):a===1?V(he[0],Y):(y.set=!0,y.setByIndices=!0,y.indicesToOffset=!0,`set_${e}(${he}, ${Y})`)},setByOffset:V,setByIndices:(W,Y)=>a<2?V(W,Y):(y.setByIndices=!0,y.indicesToOffset=!0,`set_${e}ByIndices(${W}, ${Y});`),get:Q,getByOffset:K,getByIndices:ie,usage:n,name:e,strides:$,shape:x,rank:a}},E=(e,t,r,n=1)=>Vr(e,t,r,"input",n),M=(e,t,r,n=1)=>Vr(e,t,r,"output",n),ts=(e,t,r)=>Vr(e,t,r,"atomicOutput",1),Wr=(e,t,r,n=1)=>Vr(e,t,r,"internal",n),io=class{constructor(t,r){this.normalizedDispatchGroup=t;this.limits=r;this.internalVariables=[];this.variables=[];this.uniforms=[];this.variableIndex=0}guardAgainstOutOfBoundsWorkgroupSizes(t){return`if (global_idx >= ${typeof t=="number"?`${t}u`:t}) { return; }`}mainStart(t=Et){let r=typeof t=="number"?t:t[0],n=typeof t=="number"?1:t[1],o=typeof t=="number"?1:t[2];if(r>this.limits.maxComputeWorkgroupSizeX||n>this.limits.maxComputeWorkgroupSizeY||o>this.limits.maxComputeWorkgroupSizeZ)throw new Error(`workgroup size [${r}, ${n}, ${o}] exceeds the maximum workgroup size [${this.limits.maxComputeWorkgroupSizeX}, ${this.limits.maxComputeWorkgroupSizeY}, ${this.limits.maxComputeWorkgroupSizeZ}].`);if(r*n*o>this.limits.maxComputeInvocationsPerWorkgroup)throw new Error(`workgroup size [${r}, ${n}, ${o}] exceeds the maximum workgroup invocations ${this.limits.maxComputeInvocationsPerWorkgroup}.`);let i=this.normalizedDispatchGroup[1]===1&&this.normalizedDispatchGroup[2]===1,a=i?`@builtin(global_invocation_id) global_id : vec3<u32>,
3955
4010
  @builtin(workgroup_id) workgroup_id : vec3<u32>,
3956
4011
  @builtin(local_invocation_index) local_idx : u32,
3957
4012
  @builtin(local_invocation_id) local_id : vec3<u32>`:`@builtin(global_invocation_id) global_id : vec3<u32>,
@@ -3969,13 +4024,13 @@ var Un=Object.defineProperty;var Vp=Object.getOwnPropertyDescriptor;var Wp=Objec
3969
4024
  struct Uniforms { ${t.join(", ")} };
3970
4025
  @group(0) @binding(${this.variableIndex}) var<uniform> uniforms: Uniforms;`}get additionalImplementations(){return this.uniformDeclaration()+this.variables.map(t=>t.impl()).join(`
3971
4026
  `)+this.internalVariables.map(t=>t.impl()).join(`
3972
- `)}get variablesInfo(){if(this.uniforms.length===0)return;let t=r=>[12,10,1,6][["u32","f16","f32","i32"].indexOf(r)];return this.uniforms.map(r=>[t(r.type),r.length??1])}},ns=(e,t)=>new ao(e,t)});var dm,os,lm,cm,pm,mm,Pe,is,as,dt=U(()=>{"use strict";te();oe();Se();ae();dm=(e,t)=>{if(!e||e.length!==1)throw new Error("Transpose requires 1 input.");if(t.length!==0&&t.length!==e[0].dims.length)throw new Error(`perm size ${t.length} does not match input rank ${e[0].dims.length}`)},os=(e,t)=>t.length!==0?t:[...new Array(e).keys()].reverse(),lm=(e,t)=>C.sortBasedOnPerm(e,os(e.length,t)),cm=(e,t,r,n)=>{let o=`fn perm(i: ${n.type.indices}) -> ${r.type.indices} {
3973
- var a: ${r.type.indices};`;for(let i=0;i<t;++i)o+=`a[${e[i]}]=i[${i}];`;return o+="return a;}"},pm=(e,t)=>{let r=[],n=[];for(let o=0;o<e.length;++o)e[o]!==1&&r.push(e[o]),e[t[o]]!==1&&n.push(t[o]);return{newShape:r,newPerm:n}},mm=(e,t)=>{let r=0;for(let n=0;n<e.length;++n)if(t[e[n]]!==1){if(e[n]<r)return!1;r=e[n]}return!0},Pe=(e,t)=>{let r=e.dataType,n=e.dims.length,o=os(n,t),i=lm(e.dims,o),a=e.dims,d=i,l=n<2||mm(o,e.dims),p;if(l)return p=g=>{let x=E("input",r,a,4),$=M("output",r,d,4);return`
4027
+ `)}get variablesInfo(){if(this.uniforms.length===0)return;let t=r=>[12,10,1,6][["u32","f16","f32","i32"].indexOf(r)];return this.uniforms.map(r=>[t(r.type),r.length??1])}},rs=(e,t)=>new io(e,t)});var dm,ns,lm,cm,pm,mm,Pe,os,is,dt=U(()=>{"use strict";J();ne();xe();ae();dm=(e,t)=>{if(!e||e.length!==1)throw new Error("Transpose requires 1 input.");if(t.length!==0&&t.length!==e[0].dims.length)throw new Error(`perm size ${t.length} does not match input rank ${e[0].dims.length}`)},ns=(e,t)=>t.length!==0?t:[...new Array(e).keys()].reverse(),lm=(e,t)=>k.sortBasedOnPerm(e,ns(e.length,t)),cm=(e,t,r,n)=>{let o=`fn perm(i: ${n.type.indices}) -> ${r.type.indices} {
4028
+ var a: ${r.type.indices};`;for(let i=0;i<t;++i)o+=`a[${e[i]}]=i[${i}];`;return o+="return a;}"},pm=(e,t)=>{let r=[],n=[];for(let o=0;o<e.length;++o)e[o]!==1&&r.push(e[o]),e[t[o]]!==1&&n.push(t[o]);return{newShape:r,newPerm:n}},mm=(e,t)=>{let r=0;for(let n=0;n<e.length;++n)if(t[e[n]]!==1){if(e[n]<r)return!1;r=e[n]}return!0},Pe=(e,t)=>{let r=e.dataType,n=e.dims.length,o=ns(n,t),i=lm(e.dims,o),a=e.dims,d=i,l=n<2||mm(o,e.dims),p;if(l)return p=g=>{let x=E("input",r,a,4),$=M("output",r,d,4);return`
3974
4029
  ${g.registerUniform("output_size","u32").declareVariables(x,$)}
3975
4030
  ${g.mainStart()}
3976
4031
  ${g.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
3977
4032
  output[global_idx] = input[global_idx];
3978
- }`},{name:"TransposeCopy",shaderCache:{inputDependencies:["type"]},getRunData:()=>{let g=C.size(i);return{outputs:[{dims:i,dataType:e.dataType}],dispatchGroup:{x:Math.ceil(g/64/4)},programUniforms:[{type:12,data:Math.ceil(g/4)}]}},getShaderSource:p};let{newShape:m,newPerm:u}=pm(e.dims,o),h=C.areEqual(u,[2,3,1]),_=C.areEqual(u,[3,1,2]);if(m.length===2||h||_){a=h?[m[0],m[1]*m[2]]:_?[m[0]*m[1],m[2]]:m,d=[a[1],a[0]];let g=16;return p=x=>{let $=E("a",r,a.length),v=M("output",r,d.length);return`
4033
+ }`},{name:"TransposeCopy",shaderCache:{inputDependencies:["type"]},getRunData:()=>{let g=k.size(i);return{outputs:[{dims:i,dataType:e.dataType}],dispatchGroup:{x:Math.ceil(g/64/4)},programUniforms:[{type:12,data:Math.ceil(g/4)}]}},getShaderSource:p};let{newShape:m,newPerm:u}=pm(e.dims,o),h=k.areEqual(u,[2,3,1]),_=k.areEqual(u,[3,1,2]);if(m.length===2||h||_){a=h?[m[0],m[1]*m[2]]:_?[m[0]*m[1],m[2]]:m,d=[a[1],a[0]];let g=16;return p=x=>{let $=E("a",r,a.length),v=M("output",r,d.length);return`
3979
4034
  ${x.registerUniform("output_size","u32").declareVariables($,v)}
3980
4035
  var<workgroup> tile : array<array<${v.type.value}, ${g+1}>, ${g}>;
3981
4036
  ${x.mainStart([g,g,1])}
@@ -3994,7 +4049,7 @@ var Un=Object.defineProperty;var Vp=Object.getOwnPropertyDescriptor;var Wp=Objec
3994
4049
  if (output_row < uniforms.output_shape[0] && output_col < uniforms.output_shape[1]) {
3995
4050
  ${v.setByIndices(`${v.type.indices}(output_row, output_col)`,"tile[local_id.x][local_id.y]")}
3996
4051
  }
3997
- }`},{name:"TransposeShared",shaderCache:{inputDependencies:["type"]},getRunData:()=>{let x=C.size(i);return{outputs:[{dims:i,dataType:e.dataType}],dispatchGroup:{x:Math.ceil(d[1]/g),y:Math.ceil(d[0]/g)},programUniforms:[{type:12,data:x},...N(a,d)]}},getShaderSource:p}}return p=g=>{let x=E("a",r,a.length),$=M("output",r,d.length);return`
4052
+ }`},{name:"TransposeShared",shaderCache:{inputDependencies:["type"]},getRunData:()=>{let x=k.size(i);return{outputs:[{dims:i,dataType:e.dataType}],dispatchGroup:{x:Math.ceil(d[1]/g),y:Math.ceil(d[0]/g)},programUniforms:[{type:12,data:x},...N(a,d)]}},getShaderSource:p}}return p=g=>{let x=E("a",r,a.length),$=M("output",r,d.length);return`
3998
4053
  ${g.registerUniform("output_size","u32").declareVariables(x,$)}
3999
4054
 
4000
4055
  ${cm(o,n,x,$)}
@@ -4006,7 +4061,7 @@ var Un=Object.defineProperty;var Vp=Object.getOwnPropertyDescriptor;var Wp=Objec
4006
4061
  let aIndices = perm(indices);
4007
4062
 
4008
4063
  ${$.setByOffset("global_idx",x.getByIndices("aIndices"))}
4009
- }`},{name:"Transpose",shaderCache:{hint:`${t}`,inputDependencies:["rank"]},getRunData:()=>{let g=C.size(i);return{outputs:[{dims:i,dataType:e.dataType}],dispatchGroup:{x:Math.ceil(g/64)},programUniforms:[{type:12,data:g},...N(a,d)]}},getShaderSource:p}},is=(e,t)=>{dm(e.inputs,t.perm),e.compute(Pe(e.inputs[0],t.perm))},as=e=>re({perm:e.perm})});var fm,hm,gm,bm,ym,_m,wm,vm,$m,xm,rt,ss,us,ds,ls,cs,ps,ms,fs,hs,gs,bs=U(()=>{"use strict";te();oe();ae();Gr();dt();fm={max:"select(bestValue, candidate, candidate > bestValue)",min:"select(bestValue, candidate, candidate < bestValue)",mean:"bestValue + candidate",sum:"bestValue + candidate",prod:"bestValue * candidate",sumSquare:"bestValue + candidate * candidate",logSumExp:"bestValue + exp(candidate)",l1:"bestValue + abs(candidate)",l2:"bestValue + candidate * candidate",logSum:"bestValue + candidate"},hm={max:"select(bestValue, candidate, candidate > bestValue)",min:"select(bestValue, candidate, candidate < bestValue)",mean:"bestValue + candidate",sum:"bestValue + candidate",prod:"bestValue * candidate",sumSquare:"bestValue + candidate",logSumExp:"bestValue + candidate",l1:"bestValue + candidate",l2:"bestValue + candidate",logSum:"bestValue + candidate"},gm={max:"_A[offset]",min:"_A[offset]",mean:"0",sum:"0",prod:"1",sumSquare:"0",logSumExp:"0",l1:"0",l2:"0",logSum:"0"},bm={max:"bestValue",min:"bestValue",sum:"bestValue",prod:"bestValue",sumSquare:"bestValue",logSumExp:"log(bestValue)",l1:"bestValue",l2:"sqrt(bestValue)",logSum:"log(bestValue)"},ym=(e,t)=>{let r=[];for(let n=t-e;n<t;++n)r.push(n);return r},_m=(e,t)=>{let r=[],n=e.length;for(let i=0;i<n;i++)t.indexOf(i)===-1&&r.push(e[i]);let o=t.map(i=>e[i]);return[r,o]},wm=(e,t)=>{let r=e.length+t.length,n=[],o=0;for(let i=0;i<r;i++)t.indexOf(i)===-1?n.push(e[o++]):n.push(1);return n},vm=(e,t)=>{for(let r=0;r<e.length;++r)if(e[e.length-r-1]!==t-1-r)return!1;return!0},$m=(e,t)=>{let r=[];if(!vm(e,t)){for(let n=0;n<t;++n)e.indexOf(n)===-1&&r.push(n);e.forEach(n=>r.push(n))}return r},xm=(e,t,r,n,o,i,a)=>{let d=r[0].dims,l=C.size(i),p=C.size(a),m=E("_A",r[0].dataType,d),u=M("output",o,i),h=64;l===1&&(h=256);let _=`
4064
+ }`},{name:"Transpose",shaderCache:{hint:`${t}`,inputDependencies:["rank"]},getRunData:()=>{let g=k.size(i);return{outputs:[{dims:i,dataType:e.dataType}],dispatchGroup:{x:Math.ceil(g/64)},programUniforms:[{type:12,data:g},...N(a,d)]}},getShaderSource:p}},os=(e,t)=>{dm(e.inputs,t.perm),e.compute(Pe(e.inputs[0],t.perm))},is=e=>ee({perm:e.perm})});var fm,hm,gm,bm,ym,_m,wm,vm,$m,xm,rt,as,ss,us,ds,ls,cs,ps,ms,fs,hs,gs=U(()=>{"use strict";J();ne();ae();Lr();dt();fm={max:"select(bestValue, candidate, candidate > bestValue)",min:"select(bestValue, candidate, candidate < bestValue)",mean:"bestValue + candidate",sum:"bestValue + candidate",prod:"bestValue * candidate",sumSquare:"bestValue + candidate * candidate",logSumExp:"bestValue + exp(candidate)",l1:"bestValue + abs(candidate)",l2:"bestValue + candidate * candidate",logSum:"bestValue + candidate"},hm={max:"select(bestValue, candidate, candidate > bestValue)",min:"select(bestValue, candidate, candidate < bestValue)",mean:"bestValue + candidate",sum:"bestValue + candidate",prod:"bestValue * candidate",sumSquare:"bestValue + candidate",logSumExp:"bestValue + candidate",l1:"bestValue + candidate",l2:"bestValue + candidate",logSum:"bestValue + candidate"},gm={max:"_A[offset]",min:"_A[offset]",mean:"0",sum:"0",prod:"1",sumSquare:"0",logSumExp:"0",l1:"0",l2:"0",logSum:"0"},bm={max:"bestValue",min:"bestValue",sum:"bestValue",prod:"bestValue",sumSquare:"bestValue",logSumExp:"log(bestValue)",l1:"bestValue",l2:"sqrt(bestValue)",logSum:"log(bestValue)"},ym=(e,t)=>{let r=[];for(let n=t-e;n<t;++n)r.push(n);return r},_m=(e,t)=>{let r=[],n=e.length;for(let i=0;i<n;i++)t.indexOf(i)===-1&&r.push(e[i]);let o=t.map(i=>e[i]);return[r,o]},wm=(e,t)=>{let r=e.length+t.length,n=[],o=0;for(let i=0;i<r;i++)t.indexOf(i)===-1?n.push(e[o++]):n.push(1);return n},vm=(e,t)=>{for(let r=0;r<e.length;++r)if(e[e.length-r-1]!==t-1-r)return!1;return!0},$m=(e,t)=>{let r=[];if(!vm(e,t)){for(let n=0;n<t;++n)e.indexOf(n)===-1&&r.push(n);e.forEach(n=>r.push(n))}return r},xm=(e,t,r,n,o,i,a)=>{let d=r[0].dims,l=k.size(i),p=k.size(a),m=E("_A",r[0].dataType,d),u=M("output",o,i),h=64;l===1&&(h=256);let _=`
4010
4065
  var<workgroup> aBestValues : array<f32, ${h}>;
4011
4066
  `,y=g=>`
4012
4067
  ${g.registerUniform("reduceSize","u32").declareVariables(m,u)}
@@ -4044,11 +4099,11 @@ var Un=Object.defineProperty;var Vp=Object.getOwnPropertyDescriptor;var Wp=Objec
4044
4099
  if (local_idx == 0u) {
4045
4100
  ${u.setByOffset("outputIndex",`${n==="mean"?`${u.type.storage}(bestValue / f32(uniforms.reduceSize))`:`${u.type.storage}(${bm[n]})`}`)};
4046
4101
  }
4047
- }`;return{name:e,shaderCache:{hint:`${t};${h}`,inputDependencies:["type"]},getShaderSource:y,getRunData:()=>({outputs:[{dims:i,dataType:o}],dispatchGroup:{x:l},programUniforms:[{type:12,data:p}]})}},rt=(e,t,r,n)=>{let o=e.inputs.length===1?r:lo(e.inputs,r),i=o.axes;i.length===0&&!o.noopWithEmptyAxes&&(i=e.inputs[0].dims.map((_,y)=>y));let a=C.normalizeAxes(i,e.inputs[0].dims.length),d=a,l=e.inputs[0],p=$m(d,e.inputs[0].dims.length);p.length>0&&(l=e.compute(Pe(e.inputs[0],p),{inputs:[0],outputs:[-1]})[0],d=ym(d.length,l.dims.length));let[m,u]=_m(l.dims,d),h=m;o.keepDims&&(h=wm(m,a)),e.compute(xm(t,o.cacheKey,[l],n,e.inputs[0].dataType,h,u),{inputs:[l]})},ss=(e,t)=>{rt(e,"ReduceMeanShared",t,"mean")},us=(e,t)=>{rt(e,"ReduceL1Shared",t,"l1")},ds=(e,t)=>{rt(e,"ReduceL2Shared",t,"l2")},ls=(e,t)=>{rt(e,"ReduceLogSumExpShared",t,"logSumExp")},cs=(e,t)=>{rt(e,"ReduceMaxShared",t,"max")},ps=(e,t)=>{rt(e,"ReduceMinShared",t,"min")},ms=(e,t)=>{rt(e,"ReduceProdShared",t,"prod")},fs=(e,t)=>{rt(e,"ReduceSumShared",t,"sum")},hs=(e,t)=>{rt(e,"ReduceSumSquareShared",t,"sumSquare")},gs=(e,t)=>{rt(e,"ReduceLogSumShared",t,"logSum")}});var nt,Sm,Hr,lo,ot,Tm,Im,Cm,Am,km,Em,Pm,zm,Om,Dm,it,ys,_s,ws,vs,$s,xs,Ss,Ts,Is,Cs,Gr=U(()=>{"use strict";te();oe();Se();ae();bs();nt=e=>{if(!e||e.length===0||e.length>2)throw new Error("Reduce op requires 1 or 2 inputs.");if(e.length===2&&e[1].dims.length!==1)throw new Error("Invalid axes input dims.")},Sm=e=>["","",`var value = ${e.getByIndices("input_indices")};`,""],Hr=(e,t,r,n,o,i,a=!1,d=!1)=>{let l=[],p=r[0].dims,m=p.length,u=C.normalizeAxes(o,m),h=!d&&u.length===0;p.forEach((x,$)=>{h||u.indexOf($)>=0?a&&l.push(1):l.push(x)});let _=l.length,y=C.size(l);return{name:e,shaderCache:t,getShaderSource:x=>{let $=[],v=E("_A",r[0].dataType,m),S=M("output",i,_),T=n(v,S,u),A=T[2];for(let k=0,P=0;k<m;k++)h||u.indexOf(k)>=0?(a&&P++,A=`for(var j${k}: u32 = 0; j${k} < ${p[k]}; j${k}++) {
4048
- ${T[2].includes("last_index")?`let last_index = j${k};`:""}
4049
- ${v.indicesSet("input_indices",k,`j${k}`)}
4102
+ }`;return{name:e,shaderCache:{hint:`${t};${h}`,inputDependencies:["type"]},getShaderSource:y,getRunData:()=>({outputs:[{dims:i,dataType:o}],dispatchGroup:{x:l},programUniforms:[{type:12,data:p}]})}},rt=(e,t,r,n)=>{let o=e.inputs.length===1?r:uo(e.inputs,r),i=o.axes;i.length===0&&!o.noopWithEmptyAxes&&(i=e.inputs[0].dims.map((_,y)=>y));let a=k.normalizeAxes(i,e.inputs[0].dims.length),d=a,l=e.inputs[0],p=$m(d,e.inputs[0].dims.length);p.length>0&&(l=e.compute(Pe(e.inputs[0],p),{inputs:[0],outputs:[-1]})[0],d=ym(d.length,l.dims.length));let[m,u]=_m(l.dims,d),h=m;o.keepDims&&(h=wm(m,a)),e.compute(xm(t,o.cacheKey,[l],n,e.inputs[0].dataType,h,u),{inputs:[l]})},as=(e,t)=>{rt(e,"ReduceMeanShared",t,"mean")},ss=(e,t)=>{rt(e,"ReduceL1Shared",t,"l1")},us=(e,t)=>{rt(e,"ReduceL2Shared",t,"l2")},ds=(e,t)=>{rt(e,"ReduceLogSumExpShared",t,"logSumExp")},ls=(e,t)=>{rt(e,"ReduceMaxShared",t,"max")},cs=(e,t)=>{rt(e,"ReduceMinShared",t,"min")},ps=(e,t)=>{rt(e,"ReduceProdShared",t,"prod")},ms=(e,t)=>{rt(e,"ReduceSumShared",t,"sum")},fs=(e,t)=>{rt(e,"ReduceSumSquareShared",t,"sumSquare")},hs=(e,t)=>{rt(e,"ReduceLogSumShared",t,"logSum")}});var nt,Sm,Gr,uo,ot,Tm,Im,Cm,Am,km,Em,Pm,zm,Om,Dm,it,bs,ys,_s,ws,vs,$s,xs,Ss,Ts,Is,Lr=U(()=>{"use strict";J();ne();xe();ae();gs();nt=e=>{if(!e||e.length===0||e.length>2)throw new Error("Reduce op requires 1 or 2 inputs.");if(e.length===2&&e[1].dims.length!==1)throw new Error("Invalid axes input dims.")},Sm=e=>["","",`var value = ${e.getByIndices("input_indices")};`,""],Gr=(e,t,r,n,o,i,a=!1,d=!1)=>{let l=[],p=r[0].dims,m=p.length,u=k.normalizeAxes(o,m),h=!d&&u.length===0;p.forEach((x,$)=>{h||u.indexOf($)>=0?a&&l.push(1):l.push(x)});let _=l.length,y=k.size(l);return{name:e,shaderCache:t,getShaderSource:x=>{let $=[],v=E("_A",r[0].dataType,m),S=M("output",i,_),T=n(v,S,u),A=T[2];for(let C=0,P=0;C<m;C++)h||u.indexOf(C)>=0?(a&&P++,A=`for(var j${C}: u32 = 0; j${C} < ${p[C]}; j${C}++) {
4103
+ ${T[2].includes("last_index")?`let last_index = j${C};`:""}
4104
+ ${v.indicesSet("input_indices",C,`j${C}`)}
4050
4105
  ${A}
4051
- }`):($.push(`${v.indicesSet("input_indices",k,S.indicesGet("output_indices",P))};`),P++);return`
4106
+ }`):($.push(`${v.indicesSet("input_indices",C,S.indicesGet("output_indices",P))};`),P++);return`
4052
4107
 
4053
4108
  ${x.registerUniform("output_size","u32").declareVariables(v,S)}
4054
4109
 
@@ -4065,19 +4120,19 @@ var Un=Object.defineProperty;var Vp=Object.getOwnPropertyDescriptor;var Wp=Objec
4065
4120
  ${T[3]}
4066
4121
  ${T.length===4?S.setByOffset("global_idx","value"):T.slice(4).join(`
4067
4122
  `)}
4068
- }`},getRunData:()=>({outputs:[{dims:l,dataType:i}],dispatchGroup:{x:Math.ceil(y/64)},programUniforms:[{type:12,data:y},...N(p,l)]})}},lo=(e,t)=>{let r=[];return e[1].dims[0]>0&&e[1].getBigInt64Array().forEach(n=>r.push(Number(n))),re({axes:r,keepDims:t.keepDims,noopWithEmptyAxes:t.noopWithEmptyAxes})},ot=(e,t,r,n)=>{let o=e.inputs,i=o.length===1?r:lo(o,r);e.compute(Hr(t,{hint:i.cacheKey,inputDependencies:["rank"]},[o[0]],i.noopWithEmptyAxes&&i.axes.length===0?Sm:n,i.axes,o[0].dataType,i.keepDims,i.noopWithEmptyAxes),{inputs:[0]})},Tm=(e,t)=>{nt(e.inputs),ot(e,"ReduceLogSum",t,(n,o)=>[`var value = ${o.type.storage}(0);`,"",`value += ${n.getByIndices("input_indices")};`,"value = log(value);"])},Im=(e,t)=>{nt(e.inputs),ot(e,"ReduceL1",t,(n,o)=>[`var value = ${o.type.storage}(0);`,"",`value += abs(${n.getByIndices("input_indices")});`,""])},Cm=(e,t)=>{nt(e.inputs),ot(e,"ReduceL2",t,(n,o)=>[`var t = ${o.type.value}(0); var value = ${o.type.value}(0);`,"",`t = ${n.getByIndices("input_indices")}; value += (t * t);`,"value = sqrt(value);"])},Am=(e,t)=>{nt(e.inputs),ot(e,"ReduceLogSumExp",t,(n,o)=>[`var value = ${o.type.storage}(0);`,"",`value += exp(${n.getByIndices("input_indices")});`,"value = log(value);"])},km=(e,t)=>{nt(e.inputs),ot(e,"ReduceMax",t,(n,o,i)=>{let a=[];for(let d=0;d<n.rank;d++)(i.indexOf(d)>=0||i.length===0)&&a.push(n.indicesSet("input_indices",d,0));return[`${a.join(`
4123
+ }`},getRunData:()=>({outputs:[{dims:l,dataType:i}],dispatchGroup:{x:Math.ceil(y/64)},programUniforms:[{type:12,data:y},...N(p,l)]})}},uo=(e,t)=>{let r=[];return e[1].dims[0]>0&&e[1].getBigInt64Array().forEach(n=>r.push(Number(n))),ee({axes:r,keepDims:t.keepDims,noopWithEmptyAxes:t.noopWithEmptyAxes})},ot=(e,t,r,n)=>{let o=e.inputs,i=o.length===1?r:uo(o,r);e.compute(Gr(t,{hint:i.cacheKey,inputDependencies:["rank"]},[o[0]],i.noopWithEmptyAxes&&i.axes.length===0?Sm:n,i.axes,o[0].dataType,i.keepDims,i.noopWithEmptyAxes),{inputs:[0]})},Tm=(e,t)=>{nt(e.inputs),ot(e,"ReduceLogSum",t,(n,o)=>[`var value = ${o.type.storage}(0);`,"",`value += ${n.getByIndices("input_indices")};`,"value = log(value);"])},Im=(e,t)=>{nt(e.inputs),ot(e,"ReduceL1",t,(n,o)=>[`var value = ${o.type.storage}(0);`,"",`value += abs(${n.getByIndices("input_indices")});`,""])},Cm=(e,t)=>{nt(e.inputs),ot(e,"ReduceL2",t,(n,o)=>[`var t = ${o.type.value}(0); var value = ${o.type.value}(0);`,"",`t = ${n.getByIndices("input_indices")}; value += (t * t);`,"value = sqrt(value);"])},Am=(e,t)=>{nt(e.inputs),ot(e,"ReduceLogSumExp",t,(n,o)=>[`var value = ${o.type.storage}(0);`,"",`value += exp(${n.getByIndices("input_indices")});`,"value = log(value);"])},km=(e,t)=>{nt(e.inputs),ot(e,"ReduceMax",t,(n,o,i)=>{let a=[];for(let d=0;d<n.rank;d++)(i.indexOf(d)>=0||i.length===0)&&a.push(n.indicesSet("input_indices",d,0));return[`${a.join(`
4069
4124
  `)}`,`var value = ${n.getByIndices("input_indices")};`,`value = max(value, ${n.getByIndices("input_indices")});`,""]})},Em=(e,t)=>{nt(e.inputs),ot(e,"ReduceMean",t,(n,o,i)=>{let a=1;for(let d=0;d<n.rank;d++)(i.indexOf(d)>=0||i.length===0)&&(a*=e.inputs[0].dims[d]);return["var sum = f32(0);","",`sum += f32(${n.getByIndices("input_indices")});`,`let value = ${o.type.value}(sum / ${a});`]})},Pm=(e,t)=>{nt(e.inputs),ot(e,"ReduceMin",t,(n,o,i)=>{let a=[];for(let d=0;d<n.rank;d++)(i.indexOf(d)>=0||i.length===0)&&a.push(`input_indices[${d}] = 0;`);return[`${a.join(`
4070
- `)}`,`var value = ${n.getByIndices("input_indices")};`,`value = min(value, ${n.getByIndices("input_indices")});`,""]})},zm=(e,t)=>{nt(e.inputs),ot(e,"ReduceProd",t,(n,o)=>[`var value = ${o.type.storage}(1);`,"",`value *= ${n.getByIndices("input_indices")};`,""])},Om=(e,t)=>{nt(e.inputs),ot(e,"ReduceSum",t,(n,o)=>[`var value = ${o.type.storage}(0);`,"",`value += ${n.getByIndices("input_indices")};`,""])},Dm=(e,t)=>{nt(e.inputs),ot(e,"ReduceSumSquare",t,(n,o)=>[`var t = ${o.type.value}(0); var value = ${o.type.value}(0);`,"",`t = ${n.getByIndices("input_indices")}; value += t * t;`,""])},it=(e,t,r)=>{if(t.length===0)return r;let n=1,o=1;for(let i=0;i<t.length;i++)t.indexOf(i)===-1?n*=e[i]:o*=e[i];return o<32&&n>1024},ys=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Em(e,t):ss(e,t)},_s=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Im(e,t):us(e,t)},ws=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Cm(e,t):ds(e,t)},vs=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Am(e,t):ls(e,t)},$s=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?km(e,t):cs(e,t)},xs=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Pm(e,t):ps(e,t)},Ss=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?zm(e,t):ms(e,t)},Ts=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Om(e,t):fs(e,t)},Is=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Dm(e,t):hs(e,t)},Cs=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Tm(e,t):gs(e,t)}});var As,ks,Es,co,Ps=U(()=>{"use strict";te();Se();Gr();As=e=>{if(!e||e.length===0||e.length>2)throw new Error("ArgMinMaxOp op requires 1 or 2 inputs.");if(e[0].dataType!==1)throw new Error("Invalid input type.")},ks=(e,t)=>{As(e.inputs);let r=(n,o,i)=>{let a=[];for(let d=0;d<n.rank;d++)(i.indexOf(d)>=0||i.length===0)&&a.push(`input_indices[${d}] = 0;`);return[`${a.join(`
4125
+ `)}`,`var value = ${n.getByIndices("input_indices")};`,`value = min(value, ${n.getByIndices("input_indices")});`,""]})},zm=(e,t)=>{nt(e.inputs),ot(e,"ReduceProd",t,(n,o)=>[`var value = ${o.type.storage}(1);`,"",`value *= ${n.getByIndices("input_indices")};`,""])},Om=(e,t)=>{nt(e.inputs),ot(e,"ReduceSum",t,(n,o)=>[`var value = ${o.type.storage}(0);`,"",`value += ${n.getByIndices("input_indices")};`,""])},Dm=(e,t)=>{nt(e.inputs),ot(e,"ReduceSumSquare",t,(n,o)=>[`var t = ${o.type.value}(0); var value = ${o.type.value}(0);`,"",`t = ${n.getByIndices("input_indices")}; value += t * t;`,""])},it=(e,t,r)=>{if(t.length===0)return r;let n=1,o=1;for(let i=0;i<t.length;i++)t.indexOf(i)===-1?n*=e[i]:o*=e[i];return o<32&&n>1024},bs=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Em(e,t):as(e,t)},ys=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Im(e,t):ss(e,t)},_s=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Cm(e,t):us(e,t)},ws=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Am(e,t):ds(e,t)},vs=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?km(e,t):ls(e,t)},$s=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Pm(e,t):cs(e,t)},xs=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?zm(e,t):ps(e,t)},Ss=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Om(e,t):ms(e,t)},Ts=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Dm(e,t):fs(e,t)},Is=(e,t)=>{it(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?Tm(e,t):hs(e,t)}});var Cs,As,ks,lo,Es=U(()=>{"use strict";J();xe();Lr();Cs=e=>{if(!e||e.length===0||e.length>2)throw new Error("ArgMinMaxOp op requires 1 or 2 inputs.");if(e[0].dataType!==1)throw new Error("Invalid input type.")},As=(e,t)=>{Cs(e.inputs);let r=(n,o,i)=>{let a=[];for(let d=0;d<n.rank;d++)(i.indexOf(d)>=0||i.length===0)&&a.push(`input_indices[${d}] = 0;`);return[`${a.join(`
4071
4126
  `)}`,`var value = ${n.getByIndices("input_indices")};
4072
4127
  var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLastIndex>0?"<=":"<"} value) {
4073
4128
  value = ${n.getByIndices("input_indices")};
4074
4129
  best_index = i32(last_index);
4075
- }`,"",o.setByOffset("global_idx","best_index")]};e.compute(Hr("ArgMin",{hint:t.cacheKey,inputDependencies:["rank"]},[e.inputs[0]],r,[t.axis],7,t.keepDims),{inputs:[0]})},Es=(e,t)=>{As(e.inputs);let r=(n,o,i)=>{let a=[];for(let d=0;d<n.rank;d++)(i.indexOf(d)>=0||i.length===0)&&a.push(`input_indices[${d}] = 0;`);return[`${a.join(`
4130
+ }`,"",o.setByOffset("global_idx","best_index")]};e.compute(Gr("ArgMin",{hint:t.cacheKey,inputDependencies:["rank"]},[e.inputs[0]],r,[t.axis],7,t.keepDims),{inputs:[0]})},ks=(e,t)=>{Cs(e.inputs);let r=(n,o,i)=>{let a=[];for(let d=0;d<n.rank;d++)(i.indexOf(d)>=0||i.length===0)&&a.push(`input_indices[${d}] = 0;`);return[`${a.join(`
4076
4131
  `)}`,`var value = ${n.getByIndices("input_indices")};
4077
4132
  var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLastIndex>0?">=":">"} value) {
4078
4133
  value = ${n.getByIndices("input_indices")};
4079
4134
  best_index = i32(last_index);
4080
- }`,"",o.setByOffset("global_idx","best_index")]};e.compute(Hr("argMax",{hint:t.cacheKey,inputDependencies:["rank"]},[e.inputs[0]],r,[t.axis],7,t.keepDims),{inputs:[0]})},co=e=>re(e)});var Bm,po,Mm,Rm,Um,Rt,Nm,zs,Fr=U(()=>{"use strict";te();oe();Nr();ae();Bm=(e,t)=>{let r=e[0],n=e[1],o=e[2],i=e[3],a=e[4],d=e[5];if(a&&d)throw new Error("Attention cannot have both past and attention_bias");if(r.dims.length!==3)throw new Error('Input "input" must have 3 dimensions');let l=r.dims[0],p=r.dims[1],m=r.dims[2];if(o.dims.length!==1)throw new Error('Input "bias" is expected to have 1 dimensions');if(n.dims.length!==2)throw new Error('Input "weights" is expected to have 2 dimensions');if(n.dims[0]!==m)throw new Error("Input 1 dimension 0 should have same length as dimension 2 of input 0");if(o.dims[0]!==n.dims[1])throw new Error('Input "bias" dimension 0 should have same length as dimension 1 of input "weights"');let u=o.dims[0]/3,h=u,_=h;if(t.qkvHiddenSizes.length>0){if(t.qkvHiddenSizes.length!==3)throw new Error("qkv_hidden_sizes attribute should have 3 elements");for(let S of t.qkvHiddenSizes)if(S%t.numHeads!==0)throw new Error("qkv_hidden_sizes should be divisible by num_heads");u=t.qkvHiddenSizes[0],h=t.qkvHiddenSizes[1],_=t.qkvHiddenSizes[2]}let y=p;if(u!==h)throw new Error("qkv_hidden_sizes first element should be same as the second");if(o.dims[0]!==u+h+_)throw new Error('Input "bias" dimension 0 should have same length as sum of Q/K/V hidden sizes');let g=0;if(a){if(h!==_)throw new Error('Input "past" expect k_hidden_size == v_hidden_size');if(a.dims.length!==5)throw new Error('Input "past" must have 5 dimensions');if(a.dims[0]!==2)throw new Error('Input "past" first dimension must be 2');if(a.dims[1]!==l)throw new Error('Input "past" second dimension must be batch_size');if(a.dims[2]!==t.numHeads)throw new Error('Input "past" third dimension must be num_heads');if(a.dims[4]!==h/t.numHeads)throw new Error('Input "past" fifth dimension must be k_hidden_size / num_heads');t.pastPresentShareBuffer||(g=a.dims[3])}let x=y+g,$=-1,v=0;if(i)throw new Error("Mask not supported");if(a)throw new Error("past is not supported");if(d){if(d.dims.length!==4)throw new Error('Input "attention_bias" must have 4 dimensions');if(d.dims[0]!==l||d.dims[1]!==t.numHeads||d.dims[2]!==p||d.dims[3]!==x)throw new Error('Expect "attention_bias" shape (batch_size, num_heads, sequence_length, total_sequence_length)')}return{batchSize:l,sequenceLength:p,pastSequenceLength:g,kvSequenceLength:y,totalSequenceLength:x,maxSequenceLength:$,inputHiddenSize:m,hiddenSize:u,vHiddenSize:_,headSize:Math.floor(u/t.numHeads),vHeadSize:Math.floor(_/t.numHeads),numHeads:t.numHeads,isUnidirectional:!1,pastPresentShareBuffer:!1,maskFilterValue:t.maskFilterValue,maskType:v,scale:t.scale,broadcastResPosBias:!1,passPastInKv:!1,qkvFormat:1}},po=(e,t,r)=>t&&e?`
4135
+ }`,"",o.setByOffset("global_idx","best_index")]};e.compute(Gr("argMax",{hint:t.cacheKey,inputDependencies:["rank"]},[e.inputs[0]],r,[t.axis],7,t.keepDims),{inputs:[0]})},lo=e=>ee(e)});var Bm,co,Mm,Rm,Um,Ut,Nm,Ps,Hr=U(()=>{"use strict";J();ne();Ur();ae();Bm=(e,t)=>{let r=e[0],n=e[1],o=e[2],i=e[3],a=e[4],d=e[5];if(a&&d)throw new Error("Attention cannot have both past and attention_bias");if(r.dims.length!==3)throw new Error('Input "input" must have 3 dimensions');let l=r.dims[0],p=r.dims[1],m=r.dims[2];if(o.dims.length!==1)throw new Error('Input "bias" is expected to have 1 dimensions');if(n.dims.length!==2)throw new Error('Input "weights" is expected to have 2 dimensions');if(n.dims[0]!==m)throw new Error("Input 1 dimension 0 should have same length as dimension 2 of input 0");if(o.dims[0]!==n.dims[1])throw new Error('Input "bias" dimension 0 should have same length as dimension 1 of input "weights"');let u=o.dims[0]/3,h=u,_=h;if(t.qkvHiddenSizes.length>0){if(t.qkvHiddenSizes.length!==3)throw new Error("qkv_hidden_sizes attribute should have 3 elements");for(let S of t.qkvHiddenSizes)if(S%t.numHeads!==0)throw new Error("qkv_hidden_sizes should be divisible by num_heads");u=t.qkvHiddenSizes[0],h=t.qkvHiddenSizes[1],_=t.qkvHiddenSizes[2]}let y=p;if(u!==h)throw new Error("qkv_hidden_sizes first element should be same as the second");if(o.dims[0]!==u+h+_)throw new Error('Input "bias" dimension 0 should have same length as sum of Q/K/V hidden sizes');let g=0;if(a){if(h!==_)throw new Error('Input "past" expect k_hidden_size == v_hidden_size');if(a.dims.length!==5)throw new Error('Input "past" must have 5 dimensions');if(a.dims[0]!==2)throw new Error('Input "past" first dimension must be 2');if(a.dims[1]!==l)throw new Error('Input "past" second dimension must be batch_size');if(a.dims[2]!==t.numHeads)throw new Error('Input "past" third dimension must be num_heads');if(a.dims[4]!==h/t.numHeads)throw new Error('Input "past" fifth dimension must be k_hidden_size / num_heads');t.pastPresentShareBuffer||(g=a.dims[3])}let x=y+g,$=-1,v=0;if(i)throw new Error("Mask not supported");if(a)throw new Error("past is not supported");if(d){if(d.dims.length!==4)throw new Error('Input "attention_bias" must have 4 dimensions');if(d.dims[0]!==l||d.dims[1]!==t.numHeads||d.dims[2]!==p||d.dims[3]!==x)throw new Error('Expect "attention_bias" shape (batch_size, num_heads, sequence_length, total_sequence_length)')}return{batchSize:l,sequenceLength:p,pastSequenceLength:g,kvSequenceLength:y,totalSequenceLength:x,maxSequenceLength:$,inputHiddenSize:m,hiddenSize:u,vHiddenSize:_,headSize:Math.floor(u/t.numHeads),vHeadSize:Math.floor(_/t.numHeads),numHeads:t.numHeads,isUnidirectional:!1,pastPresentShareBuffer:!1,maskFilterValue:t.maskFilterValue,maskType:v,scale:t.scale,broadcastResPosBias:!1,passPastInKv:!1,qkvFormat:1}},co=(e,t,r)=>t&&e?`
4081
4136
  let total_sequence_length_input = u32(${t.getByOffset("0")});
4082
4137
  let present_sequence_length = max(total_sequence_length_input, uniforms.past_sequence_length);
4083
4138
  let is_subsequent_prompt: bool = sequence_length > 1 && sequence_length != total_sequence_length_input;
@@ -4090,7 +4145,7 @@ var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLas
4090
4145
  `:`
4091
4146
  ${r?"let past_sequence_length = uniforms.past_sequence_length":""};
4092
4147
  let present_sequence_length = total_sequence_length;
4093
- `,Mm=(e,t,r,n,o,i,a,d)=>{let l=me(a?1:i),p=64,m=i/l;m<p&&(p=32);let u=Math.ceil(i/l/p),h=[{type:12,data:t},{type:12,data:r},{type:12,data:n},{type:12,data:o},{type:12,data:m},{type:12,data:u}],_=_e(e.dataType,l),y=Ee(1,l),g=["type"];a&&g.push("type"),d&&g.push("type");let x=$=>{let v=M("x",e.dataType,e.dims,l),S=[v],T=a?E("seq_lens",a.dataType,a.dims):void 0;T&&S.push(T);let A=d?E("total_sequence_length_input",d.dataType,d.dims):void 0;A&&S.push(A);let k=Ee(e.dataType),P=[{name:"batch_size",type:"u32"},{name:"num_heads",type:"u32"},{name:"past_sequence_length",type:"u32"},{name:"sequence_length",type:"u32"},{name:"total_sequence_length",type:"u32"},{name:"elements_per_thread",type:"u32"}];return`
4148
+ `,Mm=(e,t,r,n,o,i,a,d)=>{let l=me(a?1:i),p=64,m=i/l;m<p&&(p=32);let u=Math.ceil(i/l/p),h=[{type:12,data:t},{type:12,data:r},{type:12,data:n},{type:12,data:o},{type:12,data:m},{type:12,data:u}],_=ye(e.dataType,l),y=Ee(1,l),g=["type"];a&&g.push("type"),d&&g.push("type");let x=$=>{let v=M("x",e.dataType,e.dims,l),S=[v],T=a?E("seq_lens",a.dataType,a.dims):void 0;T&&S.push(T);let A=d?E("total_sequence_length_input",d.dataType,d.dims):void 0;A&&S.push(A);let C=Ee(e.dataType),P=[{name:"batch_size",type:"u32"},{name:"num_heads",type:"u32"},{name:"past_sequence_length",type:"u32"},{name:"sequence_length",type:"u32"},{name:"total_sequence_length",type:"u32"},{name:"elements_per_thread",type:"u32"}];return`
4094
4149
  var<workgroup> thread_max: array<f32, ${p}>;
4095
4150
  var<workgroup> thread_sum: array<f32, ${p}>;
4096
4151
  ${$.registerUniforms(P).declareVariables(...S)}
@@ -4099,7 +4154,7 @@ var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLas
4099
4154
  let headIdx = workgroup_id.z % uniforms.num_heads;
4100
4155
  let sequence_length = uniforms.sequence_length;
4101
4156
  var total_sequence_length = uniforms.total_sequence_length;
4102
- ${po(T,A,!1)}
4157
+ ${co(T,A,!1)}
4103
4158
  let local_offset = local_idx * uniforms.elements_per_thread;
4104
4159
  let offset = (global_idx / ${p}) * uniforms.total_sequence_length + local_offset;
4105
4160
  let seq_causal_length = ${a?"u32(past_sequence_length + workgroup_id.y + 1)":"total_sequence_length"};
@@ -4129,7 +4184,7 @@ var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLas
4129
4184
 
4130
4185
  if (sum == 0) {
4131
4186
  for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < seq_causal_length; i++) {
4132
- x[offset + i] = ${v.type.value}(${k}(1.0) / ${k}(seq_causal_length));
4187
+ x[offset + i] = ${v.type.value}(${C}(1.0) / ${C}(seq_causal_length));
4133
4188
  }
4134
4189
  } else {
4135
4190
  for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < seq_causal_length; i++) {
@@ -4139,14 +4194,14 @@ var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLas
4139
4194
  }
4140
4195
  ${a?`
4141
4196
  for (var total_seq_id: u32 = seq_causal_length; total_seq_id + local_offset < uniforms.total_sequence_length; total_seq_id++) {
4142
- x[offset + total_seq_id] = ${v.type.value}(${k}(0));
4197
+ x[offset + total_seq_id] = ${v.type.value}(${C}(0));
4143
4198
  }`:""};
4144
- }`};return{name:"AttentionProbsSoftmax",shaderCache:{hint:`${p};${_};${l}`,inputDependencies:g},getShaderSource:x,getRunData:()=>({outputs:[],dispatchGroup:{x:Math.ceil(i/p),y:o,z:t*r},programUniforms:h})}},Rm=(e,t,r,n,o,i,a,d,l)=>{let p=a+i.kvSequenceLength,m=[i.batchSize,i.numHeads,i.sequenceLength,p],u=e>1&&n,h=i.kvNumHeads?i.kvNumHeads:i.numHeads,_=u?[i.batchSize,h,p,i.headSize]:void 0,y=i.nReps?i.nReps:1,g=i.scale===0?1/Math.sqrt(i.headSize):i.scale,x=me(i.headSize),$=i.headSize/x,v=12,S={x:Math.ceil(p/v),y:Math.ceil(i.sequenceLength/v),z:i.batchSize*i.numHeads},T=[{type:12,data:i.sequenceLength},{type:12,data:$},{type:12,data:p},{type:12,data:i.numHeads},{type:12,data:i.headSize},{type:1,data:g},{type:12,data:a},{type:12,data:i.kvSequenceLength},{type:12,data:y}],A=u&&n&&C.size(n.dims)>0,k=["type","type"];A&&k.push("type"),o&&k.push("type"),d&&k.push("type"),l&&k.push("type");let P=[{dims:m,dataType:t.dataType,gpuDataType:0}];u&&P.push({dims:_,dataType:t.dataType,gpuDataType:0});let D=R=>{let G=E("q",t.dataType,t.dims,x),K=E("key",r.dataType,r.dims,x),j=[G,K];if(A){let ne=E("past_key",n.dataType,n.dims,x);j.push(ne)}o&&j.push(E("attention_bias",o.dataType,o.dims));let V=d?E("seq_lens",d.dataType,d.dims):void 0;V&&j.push(V);let Q=l?E("total_sequence_length_input",l.dataType,l.dims):void 0;Q&&j.push(Q);let se=M("output",t.dataType,m),Y=[se];u&&Y.push(M("present_key",t.dataType,_,x));let ee=Ee(1,x),J=[{name:"M",type:"u32"},{name:"K",type:"u32"},{name:"N",type:"u32"},{name:"num_heads",type:"u32"},{name:"head_size",type:"u32"},{name:"alpha",type:"f32"},{name:"past_sequence_length",type:"u32"},{name:"kv_sequence_length",type:"u32"},{name:"n_reps",type:"u32"}];return`
4199
+ }`};return{name:"AttentionProbsSoftmax",shaderCache:{hint:`${p};${_};${l}`,inputDependencies:g},getShaderSource:x,getRunData:()=>({outputs:[],dispatchGroup:{x:Math.ceil(i/p),y:o,z:t*r},programUniforms:h})}},Rm=(e,t,r,n,o,i,a,d,l)=>{let p=a+i.kvSequenceLength,m=[i.batchSize,i.numHeads,i.sequenceLength,p],u=e>1&&n,h=i.kvNumHeads?i.kvNumHeads:i.numHeads,_=u?[i.batchSize,h,p,i.headSize]:void 0,y=i.nReps?i.nReps:1,g=i.scale===0?1/Math.sqrt(i.headSize):i.scale,x=me(i.headSize),$=i.headSize/x,v=12,S={x:Math.ceil(p/v),y:Math.ceil(i.sequenceLength/v),z:i.batchSize*i.numHeads},T=[{type:12,data:i.sequenceLength},{type:12,data:$},{type:12,data:p},{type:12,data:i.numHeads},{type:12,data:i.headSize},{type:1,data:g},{type:12,data:a},{type:12,data:i.kvSequenceLength},{type:12,data:y}],A=u&&n&&k.size(n.dims)>0,C=["type","type"];A&&C.push("type"),o&&C.push("type"),d&&C.push("type"),l&&C.push("type");let P=[{dims:m,dataType:t.dataType,gpuDataType:0}];u&&P.push({dims:_,dataType:t.dataType,gpuDataType:0});let D=R=>{let H=E("q",t.dataType,t.dims,x),L=E("key",r.dataType,r.dims,x),re=[H,L];if(A){let te=E("past_key",n.dataType,n.dims,x);re.push(te)}o&&re.push(E("attention_bias",o.dataType,o.dims));let V=d?E("seq_lens",d.dataType,d.dims):void 0;V&&re.push(V);let K=l?E("total_sequence_length_input",l.dataType,l.dims):void 0;K&&re.push(K);let we=M("output",t.dataType,m),j=[we];u&&j.push(M("present_key",t.dataType,_,x));let Q=Ee(1,x),ie=[{name:"M",type:"u32"},{name:"K",type:"u32"},{name:"N",type:"u32"},{name:"num_heads",type:"u32"},{name:"head_size",type:"u32"},{name:"alpha",type:"f32"},{name:"past_sequence_length",type:"u32"},{name:"kv_sequence_length",type:"u32"},{name:"n_reps",type:"u32"}];return`
4145
4200
  const TILE_SIZE = ${v}u;
4146
4201
 
4147
- var<workgroup> tileQ: array<${G.type.storage}, ${v*v}>;
4148
- var<workgroup> tileK: array<${G.type.storage}, ${v*v}>;
4149
- ${R.registerUniforms(J).declareVariables(...j,...Y)}
4202
+ var<workgroup> tileQ: array<${H.type.storage}, ${v*v}>;
4203
+ var<workgroup> tileK: array<${H.type.storage}, ${v*v}>;
4204
+ ${R.registerUniforms(ie).declareVariables(...re,...j)}
4150
4205
  ${R.mainStart([v,v,1])}
4151
4206
  // x holds the N and y holds the M
4152
4207
  let headIdx = workgroup_id.z % uniforms.num_heads;
@@ -4157,13 +4212,13 @@ var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLas
4157
4212
  let n = workgroup_id.x * TILE_SIZE;
4158
4213
  let sequence_length = uniforms.M;
4159
4214
  var total_sequence_length = uniforms.N;
4160
- ${po(V,Q,!0)}
4215
+ ${co(V,K,!0)}
4161
4216
  let absKvHeadIdx = batchIdx * kv_num_heads + kvHeadIdx;
4162
4217
  let qOffset = workgroup_id.z * uniforms.M * uniforms.K + m * uniforms.K;
4163
4218
  ${A&&u?"let pastKeyOffset = absKvHeadIdx * uniforms.past_sequence_length * uniforms.K;":""};
4164
4219
  let kOffset = absKvHeadIdx * uniforms.kv_sequence_length * uniforms.K;
4165
4220
  ${u?"let presentKeyOffset = absKvHeadIdx * uniforms.N * uniforms.K;":""}
4166
- var value = ${ee}(0);
4221
+ var value = ${Q}(0);
4167
4222
  for (var w: u32 = 0u; w < uniforms.K; w += TILE_SIZE) {
4168
4223
  if (global_id.y < uniforms.M && w + local_id.x < uniforms.K) {
4169
4224
  tileQ[TILE_SIZE * local_id.y + local_id.x] = q[qOffset + local_id.y * uniforms.K + w + local_id.x];
@@ -4186,7 +4241,7 @@ var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLas
4186
4241
  workgroupBarrier();
4187
4242
 
4188
4243
  for (var k: u32 = 0u; k < TILE_SIZE && w+k < uniforms.K; k++) {
4189
- value += ${ee}(tileQ[TILE_SIZE * local_id.y + k] * tileK[TILE_SIZE * local_id.x + k]);
4244
+ value += ${Q}(tileQ[TILE_SIZE * local_id.y + k] * tileK[TILE_SIZE * local_id.x + k]);
4190
4245
  }
4191
4246
 
4192
4247
  workgroupBarrier();
@@ -4196,14 +4251,14 @@ var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLas
4196
4251
  let headOffset = workgroup_id.z * uniforms.M * uniforms.N;
4197
4252
  let outputIdx = headOffset + global_id.y * uniforms.N + global_id.x;
4198
4253
  var sum: f32 = ${(()=>{switch(x){case 1:return"value";case 2:return"value.x + value.y";case 4:return"value.x + value.y + value.z + value.w";default:throw new Error(`Unsupported components: ${x}`)}})()};
4199
- output[outputIdx] = ${se.type.value} (sum * uniforms.alpha) + ${o?"attention_bias[outputIdx]":"0.0"};
4254
+ output[outputIdx] = ${we.type.value} (sum * uniforms.alpha) + ${o?"attention_bias[outputIdx]":"0.0"};
4200
4255
  }
4201
- }`};return{name:"AttentionProbs",shaderCache:{hint:`${x};${o!==void 0};${n!==void 0};${e}`,inputDependencies:k},getRunData:()=>({outputs:P,dispatchGroup:S,programUniforms:T}),getShaderSource:D}},Um=(e,t,r,n,o,i,a=void 0,d=void 0)=>{let l=i+o.kvSequenceLength,p=o.nReps?o.nReps:1,m=o.vHiddenSize*p,u=e>1&&n,h=o.kvNumHeads?o.kvNumHeads:o.numHeads,_=u?[o.batchSize,h,l,o.headSize]:void 0,y=[o.batchSize,o.sequenceLength,m],g=12,x={x:Math.ceil(o.vHeadSize/g),y:Math.ceil(o.sequenceLength/g),z:o.batchSize*o.numHeads},$=[{type:12,data:o.sequenceLength},{type:12,data:l},{type:12,data:o.vHeadSize},{type:12,data:o.numHeads},{type:12,data:o.headSize},{type:12,data:m},{type:12,data:i},{type:12,data:o.kvSequenceLength},{type:12,data:p}],v=u&&n&&C.size(n.dims)>0,S=["type","type"];v&&S.push("type"),a&&S.push("type"),d&&S.push("type");let T=[{dims:y,dataType:t.dataType,gpuDataType:0}];u&&T.push({dims:_,dataType:t.dataType,gpuDataType:0});let A=k=>{let P=E("probs",t.dataType,t.dims),D=E("v",r.dataType,r.dims),R=[P,D];v&&R.push(E("past_value",n.dataType,n.dims));let G=a?E("seq_lens",a.dataType,a.dims):void 0;a&&R.push(G);let K=d?E("total_sequence_length_input",d.dataType,d.dims):void 0;d&&R.push(K);let V=[M("output",t.dataType,y)];u&&V.push(M("present_value",t.dataType,_));let Q=[{name:"M",type:"u32"},{name:"K",type:"u32"},{name:"N",type:"u32"},{name:"num_heads",type:"u32"},{name:"head_size",type:"u32"},{name:"v_hidden_size",type:"u32"},{name:"past_sequence_length",type:"u32"},{name:"kv_sequence_length",type:"u32"},{name:"n_reps",type:"u32"}];return`
4256
+ }`};return{name:"AttentionProbs",shaderCache:{hint:`${x};${o!==void 0};${n!==void 0};${e}`,inputDependencies:C},getRunData:()=>({outputs:P,dispatchGroup:S,programUniforms:T}),getShaderSource:D}},Um=(e,t,r,n,o,i,a=void 0,d=void 0)=>{let l=i+o.kvSequenceLength,p=o.nReps?o.nReps:1,m=o.vHiddenSize*p,u=e>1&&n,h=o.kvNumHeads?o.kvNumHeads:o.numHeads,_=u?[o.batchSize,h,l,o.headSize]:void 0,y=[o.batchSize,o.sequenceLength,m],g=12,x={x:Math.ceil(o.vHeadSize/g),y:Math.ceil(o.sequenceLength/g),z:o.batchSize*o.numHeads},$=[{type:12,data:o.sequenceLength},{type:12,data:l},{type:12,data:o.vHeadSize},{type:12,data:o.numHeads},{type:12,data:o.headSize},{type:12,data:m},{type:12,data:i},{type:12,data:o.kvSequenceLength},{type:12,data:p}],v=u&&n&&k.size(n.dims)>0,S=["type","type"];v&&S.push("type"),a&&S.push("type"),d&&S.push("type");let T=[{dims:y,dataType:t.dataType,gpuDataType:0}];u&&T.push({dims:_,dataType:t.dataType,gpuDataType:0});let A=C=>{let P=E("probs",t.dataType,t.dims),D=E("v",r.dataType,r.dims),R=[P,D];v&&R.push(E("past_value",n.dataType,n.dims));let H=a?E("seq_lens",a.dataType,a.dims):void 0;a&&R.push(H);let L=d?E("total_sequence_length_input",d.dataType,d.dims):void 0;d&&R.push(L);let V=[M("output",t.dataType,y)];u&&V.push(M("present_value",t.dataType,_));let K=[{name:"M",type:"u32"},{name:"K",type:"u32"},{name:"N",type:"u32"},{name:"num_heads",type:"u32"},{name:"head_size",type:"u32"},{name:"v_hidden_size",type:"u32"},{name:"past_sequence_length",type:"u32"},{name:"kv_sequence_length",type:"u32"},{name:"n_reps",type:"u32"}];return`
4202
4257
  const TILE_SIZE = ${g}u;
4203
4258
  var<workgroup> tileQ: array<${P.type.value}, ${g*g}>;
4204
4259
  var<workgroup> tileV: array<${P.type.value}, ${g*g}>;
4205
- ${k.registerUniforms(Q).declareVariables(...R,...V)}
4206
- ${k.mainStart([g,g,1])}
4260
+ ${C.registerUniforms(K).declareVariables(...R,...V)}
4261
+ ${C.mainStart([g,g,1])}
4207
4262
  let headIdx = workgroup_id.z % uniforms.num_heads;
4208
4263
  let batchIdx = workgroup_id.z / uniforms.num_heads;
4209
4264
  let kvHeadIdx = ${p===1?"headIdx":"headIdx / uniforms.n_reps"};
@@ -4212,7 +4267,7 @@ var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLas
4212
4267
  let n = global_id.x;
4213
4268
  let sequence_length = uniforms.M;
4214
4269
  var total_sequence_length = uniforms.K;
4215
- ${po(G,K,!0)}
4270
+ ${co(H,L,!0)}
4216
4271
  let offsetA = workgroup_id.z * uniforms.M * uniforms.K + m * uniforms.K;
4217
4272
  let absKvHeadIdx = batchIdx * kv_num_heads + kvHeadIdx; // kvHeadIdx is relative to the batch
4218
4273
  ${v&&u?"let pastValueOffset = absKvHeadIdx * uniforms.N * uniforms.past_sequence_length + n;":""};
@@ -4253,7 +4308,7 @@ var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLas
4253
4308
  + headIdx * uniforms.N + n;
4254
4309
  output[outputIdx] = value;
4255
4310
  }
4256
- }`};return{name:"AttentionScore",shaderCache:{hint:`${n!==void 0};${e}`,inputDependencies:S},getRunData:()=>({outputs:T,dispatchGroup:x,programUniforms:$}),getShaderSource:A}},Rt=(e,t,r,n,o,i,a,d,l,p,m=void 0,u=void 0)=>{let h=Math.min(e.outputCount,1+(a?1:0)+(d?1:0)),_=h>1?p.pastSequenceLength:0,y=_+p.kvSequenceLength,g=l&&C.size(l.dims)>0?l:void 0,x=[t,r];h>1&&a&&C.size(a.dims)>0&&x.push(a),g&&x.push(g),m&&x.push(m),u&&x.push(u);let $=e.compute(Rm(h,t,r,a,g,p,_,m,u),{inputs:x,outputs:h>1?[-1,1]:[-1]})[0];e.compute(Mm($,p.batchSize,p.numHeads,_,p.sequenceLength,y,m,u),{inputs:m&&u?[$,m,u]:[$],outputs:[]});let v=[$,n];h>1&&d&&C.size(d.dims)>0&&v.push(d),m&&v.push(m),u&&v.push(u),e.compute(Um(h,$,n,d,p,_,m,u),{inputs:v,outputs:h>1?[0,2]:[0]})},Nm=(e,t)=>{let r=[t.batchSize,t.numHeads,t.sequenceLength,t.headSize],n=t.sequenceLength,o=t.inputHiddenSize,i=t.headSize,a=12,d={x:Math.ceil(t.headSize/a),y:Math.ceil(t.sequenceLength/a),z:t.batchSize*t.numHeads},l=[e.inputs[0],e.inputs[1],e.inputs[2]],p=[{type:12,data:n},{type:12,data:o},{type:12,data:i},{type:12,data:t.numHeads},{type:12,data:t.headSize},{type:12,data:t.hiddenSize},{type:12,data:t.hiddenSize+t.hiddenSize+t.vHiddenSize}],m=u=>{let h=M("output_q",l[0].dataType,r),_=M("output_k",l[0].dataType,r),y=M("output_v",l[0].dataType,r),g=E("input",l[0].dataType,l[0].dims),x=E("weight",l[1].dataType,l[1].dims),$=E("bias",l[2].dataType,l[2].dims),v=g.type.storage,S=[{name:"M",type:"u32"},{name:"K",type:"u32"},{name:"N",type:"u32"},{name:"num_heads",type:"u32"},{name:"head_size",type:"u32"},{name:"hidden_size",type:"u32"},{name:"ldb",type:"u32"}];return`
4311
+ }`};return{name:"AttentionScore",shaderCache:{hint:`${n!==void 0};${e}`,inputDependencies:S},getRunData:()=>({outputs:T,dispatchGroup:x,programUniforms:$}),getShaderSource:A}},Ut=(e,t,r,n,o,i,a,d,l,p,m=void 0,u=void 0)=>{let h=Math.min(e.outputCount,1+(a?1:0)+(d?1:0)),_=h>1?p.pastSequenceLength:0,y=_+p.kvSequenceLength,g=l&&k.size(l.dims)>0?l:void 0,x=[t,r];h>1&&a&&k.size(a.dims)>0&&x.push(a),g&&x.push(g),m&&x.push(m),u&&x.push(u);let $=e.compute(Rm(h,t,r,a,g,p,_,m,u),{inputs:x,outputs:h>1?[-1,1]:[-1]})[0];e.compute(Mm($,p.batchSize,p.numHeads,_,p.sequenceLength,y,m,u),{inputs:m&&u?[$,m,u]:[$],outputs:[]});let v=[$,n];h>1&&d&&k.size(d.dims)>0&&v.push(d),m&&v.push(m),u&&v.push(u),e.compute(Um(h,$,n,d,p,_,m,u),{inputs:v,outputs:h>1?[0,2]:[0]})},Nm=(e,t)=>{let r=[t.batchSize,t.numHeads,t.sequenceLength,t.headSize],n=t.sequenceLength,o=t.inputHiddenSize,i=t.headSize,a=12,d={x:Math.ceil(t.headSize/a),y:Math.ceil(t.sequenceLength/a),z:t.batchSize*t.numHeads},l=[e.inputs[0],e.inputs[1],e.inputs[2]],p=[{type:12,data:n},{type:12,data:o},{type:12,data:i},{type:12,data:t.numHeads},{type:12,data:t.headSize},{type:12,data:t.hiddenSize},{type:12,data:t.hiddenSize+t.hiddenSize+t.vHiddenSize}],m=u=>{let h=M("output_q",l[0].dataType,r),_=M("output_k",l[0].dataType,r),y=M("output_v",l[0].dataType,r),g=E("input",l[0].dataType,l[0].dims),x=E("weight",l[1].dataType,l[1].dims),$=E("bias",l[2].dataType,l[2].dims),v=g.type.storage,S=[{name:"M",type:"u32"},{name:"K",type:"u32"},{name:"N",type:"u32"},{name:"num_heads",type:"u32"},{name:"head_size",type:"u32"},{name:"hidden_size",type:"u32"},{name:"ldb",type:"u32"}];return`
4257
4312
  const TILE_SIZE = ${a}u;
4258
4313
  var<workgroup> tileInput: array<${v}, ${a*a}>;
4259
4314
  var<workgroup> tileWeightQ: array<${v}, ${a*a}>;
@@ -4308,7 +4363,7 @@ var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLas
4308
4363
  output_k[outputIdx] = valueK;
4309
4364
  output_v[outputIdx] = valueV;
4310
4365
  }
4311
- }`};return e.compute({name:"AttentionPrepare",shaderCache:{inputDependencies:["type","type","type"]},getRunData:()=>({outputs:[{dims:r,dataType:e.inputs[0].dataType,gpuDataType:0},{dims:r,dataType:e.inputs[0].dataType,gpuDataType:0},{dims:r,dataType:e.inputs[0].dataType,gpuDataType:0}],dispatchGroup:d,programUniforms:p}),getShaderSource:m},{inputs:l,outputs:[-1,-1,-1]})},zs=(e,t)=>{let r=Bm(e.inputs,t),[n,o,i]=Nm(e,r);return Rt(e,n,o,i,e.inputs[4],void 0,void 0,void 0,e.inputs[5],r)}});var Vm,Wm,Lm,Os,Ds=U(()=>{"use strict";We();te();oe();Se();ae();Vm=(e,t)=>{if(!e||e.length!==5)throw new Error("BatchNormalization requires 5 inputs");let r=(n,o,i)=>{let a=o.length;if(a!==n.length)throw new Error(`${i}: num dimensions != ${a}`);o.forEach((d,l)=>{if(d!==n[l])throw new Error(`${i}: dim[${l}] do not match`)})};if(e[0].dims.length>1){let n=t.format==="NHWC"?t.spatial?e[0].dims.slice(-1):e[0].dims.slice(-1).concat(e[0].dims.slice(1,e[0].dims.length-1)):e[0].dims.slice(1,t.spatial?2:void 0);r(e[1].dims,n,"Invalid input scale"),r(e[2].dims,n,"Invalid input B"),r(e[3].dims,n,"Invalid input mean"),r(e[4].dims,n,"Invalid input var")}else r(e[1].dims,[1],"Invalid input scale"),r(e[2].dims,[1],"Invalid input B"),r(e[3].dims,[1],"Invalid input mean"),r(e[4].dims,[1],"Invalid input var")},Wm=(e,t)=>{let{epsilon:r,spatial:n,format:o}=t,i=e[0].dims,a=n?me(i[i.length-1]):1,d=o==="NHWC"&&i.length>1?a:1,l=C.size(i)/a,p=n,m=p?i.length:i,u=E("x",e[0].dataType,e[0].dims,a),h=E("scale",e[1].dataType,e[1].dims,d),_=E("bias",e[2].dataType,e[2].dims,d),y=E("inputMean",e[3].dataType,e[3].dims,d),g=E("inputVar",e[4].dataType,e[4].dims,d),x=M("y",e[0].dataType,m,a),$=()=>{let S="";if(n)S=`let cOffset = ${i.length===1?"0u":o==="NHWC"?`outputIndices[${i.length-1}] / ${a}`:"outputIndices[1]"};`;else if(o==="NCHW")S=`
4366
+ }`};return e.compute({name:"AttentionPrepare",shaderCache:{inputDependencies:["type","type","type"]},getRunData:()=>({outputs:[{dims:r,dataType:e.inputs[0].dataType,gpuDataType:0},{dims:r,dataType:e.inputs[0].dataType,gpuDataType:0},{dims:r,dataType:e.inputs[0].dataType,gpuDataType:0}],dispatchGroup:d,programUniforms:p}),getShaderSource:m},{inputs:l,outputs:[-1,-1,-1]})},Ps=(e,t)=>{let r=Bm(e.inputs,t),[n,o,i]=Nm(e,r);return Ut(e,n,o,i,e.inputs[4],void 0,void 0,void 0,e.inputs[5],r)}});var Vm,Wm,Lm,zs,Os=U(()=>{"use strict";Le();J();ne();xe();ae();Vm=(e,t)=>{if(!e||e.length!==5)throw new Error("BatchNormalization requires 5 inputs");let r=(n,o,i)=>{let a=o.length;if(a!==n.length)throw new Error(`${i}: num dimensions != ${a}`);o.forEach((d,l)=>{if(d!==n[l])throw new Error(`${i}: dim[${l}] do not match`)})};if(e[0].dims.length>1){let n=t.format==="NHWC"?t.spatial?e[0].dims.slice(-1):e[0].dims.slice(-1).concat(e[0].dims.slice(1,e[0].dims.length-1)):e[0].dims.slice(1,t.spatial?2:void 0);r(e[1].dims,n,"Invalid input scale"),r(e[2].dims,n,"Invalid input B"),r(e[3].dims,n,"Invalid input mean"),r(e[4].dims,n,"Invalid input var")}else r(e[1].dims,[1],"Invalid input scale"),r(e[2].dims,[1],"Invalid input B"),r(e[3].dims,[1],"Invalid input mean"),r(e[4].dims,[1],"Invalid input var")},Wm=(e,t)=>{let{epsilon:r,spatial:n,format:o}=t,i=e[0].dims,a=n?me(i[i.length-1]):1,d=o==="NHWC"&&i.length>1?a:1,l=k.size(i)/a,p=n,m=p?i.length:i,u=E("x",e[0].dataType,e[0].dims,a),h=E("scale",e[1].dataType,e[1].dims,d),_=E("bias",e[2].dataType,e[2].dims,d),y=E("inputMean",e[3].dataType,e[3].dims,d),g=E("inputVar",e[4].dataType,e[4].dims,d),x=M("y",e[0].dataType,m,a),$=()=>{let S="";if(n)S=`let cOffset = ${i.length===1?"0u":o==="NHWC"?`outputIndices[${i.length-1}] / ${a}`:"outputIndices[1]"};`;else if(o==="NCHW")S=`
4312
4367
  ${x.indicesSet("outputIndices","0","0")}
4313
4368
  let cOffset = ${x.indicesToOffset("outputIndices")};`;else{S=`var cIndices = ${h.type.indices}(0);
4314
4369
  cIndices[0] = outputIndices[${i.length-1}];`;for(let T=1;T<h.rank;T++)S+=`cIndices[${T}] = outputIndices[${T}];`;S+=`let cOffset = ${h.indicesToOffset("cIndices")};`}return S},v=S=>`
@@ -4325,7 +4380,7 @@ var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLas
4325
4380
  let x = ${u.getByOffset("global_idx")};
4326
4381
  let value = (x - inputMean) * inverseSqrt(inputVar + epsilon) * scale + bias;
4327
4382
  ${x.setByOffset("global_idx","value")}
4328
- }`;return{name:"BatchNormalization",shaderCache:{hint:`${t.epsilon}_${t.format}_${n}_${a}`,inputDependencies:p?["rank","type","type","type","type"]:void 0},getShaderSource:v,getRunData:()=>({outputs:[{dims:e[0].dims,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(l/64)},programUniforms:p?[{type:12,data:l},...N(i)]:[{type:12,data:l}]})}},Lm=e=>re(e),Os=(e,t)=>{let{inputs:r,outputCount:n}=e,o=Lm({...t,outputCount:n});if(ve.webgpu.validateInputContent&&Vm(r,o),t.trainingMode)throw new Error("BatchNormalization trainingMode is not supported yet.");e.compute(Wm(r,o))}});var Gm,Hm,Bs,Ms=U(()=>{"use strict";oe();ae();Gm=e=>{if(e[0].dims.length!==3)throw new Error("input should have 3 dimensions");if(![320,640,1280].includes(e[0].dims[2]))throw new Error("number of channels should be 320, 640 or 1280");if(e[1].dims.length!==1)throw new Error("bias is expected to have 1 dimensions");if(e[0].dims[2]!==e[1].dims[0])throw new Error("last dimension of input and bias are not the same")},Hm=e=>{let t=e[0].dims,r=e[0].dims[2],n=C.size(t)/4,o=e[0].dataType,i=E("input",o,t,4),a=E("bias",o,[r],4),d=E("residual",o,t,4),l=M("output",o,t,4);return{name:"BiasAdd",getRunData:()=>({outputs:[{dims:t,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(n/64)}}),getShaderSource:m=>`
4383
+ }`;return{name:"BatchNormalization",shaderCache:{hint:`${t.epsilon}_${t.format}_${n}_${a}`,inputDependencies:p?["rank","type","type","type","type"]:void 0},getShaderSource:v,getRunData:()=>({outputs:[{dims:e[0].dims,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(l/64)},programUniforms:p?[{type:12,data:l},...N(i)]:[{type:12,data:l}]})}},Lm=e=>ee(e),zs=(e,t)=>{let{inputs:r,outputCount:n}=e,o=Lm({...t,outputCount:n});if(_e.webgpu.validateInputContent&&Vm(r,o),t.trainingMode)throw new Error("BatchNormalization trainingMode is not supported yet.");e.compute(Wm(r,o))}});var Gm,Hm,Ds,Bs=U(()=>{"use strict";ne();ae();Gm=e=>{if(e[0].dims.length!==3)throw new Error("input should have 3 dimensions");if(![320,640,1280].includes(e[0].dims[2]))throw new Error("number of channels should be 320, 640 or 1280");if(e[1].dims.length!==1)throw new Error("bias is expected to have 1 dimensions");if(e[0].dims[2]!==e[1].dims[0])throw new Error("last dimension of input and bias are not the same")},Hm=e=>{let t=e[0].dims,r=e[0].dims[2],n=k.size(t)/4,o=e[0].dataType,i=E("input",o,t,4),a=E("bias",o,[r],4),d=E("residual",o,t,4),l=M("output",o,t,4);return{name:"BiasAdd",getRunData:()=>({outputs:[{dims:t,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(n/64)}}),getShaderSource:m=>`
4329
4384
  const channels = ${r}u / 4;
4330
4385
  ${m.declareVariables(i,a,d,l)}
4331
4386
 
@@ -4334,7 +4389,7 @@ var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLas
4334
4389
  let value = ${i.getByOffset("global_idx")}
4335
4390
  + ${a.getByOffset("global_idx % channels")} + ${d.getByOffset("global_idx")};
4336
4391
  ${l.setByOffset("global_idx","value")}
4337
- }`}},Bs=e=>{Gm(e.inputs),e.compute(Hm(e.inputs))}});var Fm,fe,Rs,Us,Ns,Vs,Ws,Ls,Gs,Hs,Fs,qm,qs,Ks,js,Ys,Xt,Zs,qr,Qs,Xs,Js,eu,tu,ru,nu,ou,iu,au,su,uu,du,lu,cu,pu,mu,fu,mo,fo,hu,gu,bu,Km,jm,yu,Kr=U(()=>{"use strict";te();oe();Se();ae();Fm=(e,t,r,n,o,i,a)=>{let d=Math.ceil(t/4),l="";typeof o=="string"?l=`${o}(a)`:l=o("a");let p=E("inputData",r,[d],4),m=M("outputData",n,[d],4),u=[{name:"vec_size",type:"u32"}];return a&&u.push(...a),`
4392
+ }`}},Ds=e=>{Gm(e.inputs),e.compute(Hm(e.inputs))}});var Fm,fe,Ms,Rs,Us,Ns,Vs,Ws,Ls,Gs,Hs,qm,Fs,qs,Ks,js,Qt,Ys,Fr,Zs,Qs,Xs,Js,eu,tu,ru,nu,ou,iu,au,su,uu,du,lu,cu,pu,mu,po,mo,fu,hu,gu,Km,jm,bu,qr=U(()=>{"use strict";J();ne();xe();ae();Fm=(e,t,r,n,o,i,a)=>{let d=Math.ceil(t/4),l="";typeof o=="string"?l=`${o}(a)`:l=o("a");let p=E("inputData",r,[d],4),m=M("outputData",n,[d],4),u=[{name:"vec_size",type:"u32"}];return a&&u.push(...a),`
4338
4393
  ${e.registerUniforms(u).declareVariables(p,m)}
4339
4394
 
4340
4395
  ${i??""}
@@ -4344,7 +4399,7 @@ var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLas
4344
4399
 
4345
4400
  let a = ${p.getByOffset("global_idx")};
4346
4401
  ${m.setByOffset("global_idx",l)}
4347
- }`},fe=(e,t,r,n,o,i=e.dataType,a,d)=>{let l=[{type:12,data:Math.ceil(C.size(e.dims)/4)}];return a&&l.push(...a),{name:t,shaderCache:{hint:o,inputDependencies:["type"]},getShaderSource:p=>Fm(p,C.size(e.dims),e.dataType,i,r,n,d),getRunData:p=>({outputs:[{dims:e.dims,dataType:i}],dispatchGroup:{x:Math.ceil(C.size(p[0].dims)/64/4)},programUniforms:l})}},Rs=e=>{e.compute(fe(e.inputs[0],"Abs","abs"))},Us=e=>{e.compute(fe(e.inputs[0],"Acos","acos"))},Ns=e=>{e.compute(fe(e.inputs[0],"Acosh","acosh"))},Vs=e=>{e.compute(fe(e.inputs[0],"Asin","asin"))},Ws=e=>{e.compute(fe(e.inputs[0],"Asinh","asinh"))},Ls=e=>{e.compute(fe(e.inputs[0],"Atan","atan"))},Gs=e=>{e.compute(fe(e.inputs[0],"Atanh","atanh"))},Hs=e=>re(e),Fs=(e,t)=>{let r;switch(t.to){case 10:r="vec4<f16>";break;case 1:r="vec4<f32>";break;case 12:r="vec4<u32>";break;case 6:r="vec4<i32>";break;case 9:r="vec4<bool>";break;default:throw new RangeError(`not supported type (specified in attribute 'to' from 'Cast' operator): ${t.to}`)}e.compute(fe(e.inputs[0],"Cast",r,void 0,t.cacheKey,t.to))},qm=e=>{let t,r,n=e.length>=2&&e[1].data!==0,o=e.length>=3&&e[2].data!==0;switch(e[0].dataType){case 1:t=n?e[1].getFloat32Array()[0]:-34028234663852886e22,r=o?e[2].getFloat32Array()[0]:34028234663852886e22;break;case 10:t=n?e[1].getUint16Array()[0]:64511,r=o?e[2].getUint16Array()[0]:31743;break;default:throw new Error("Unsupport data type")}return re({min:t,max:r})},qs=(e,t)=>{let r=t||qm(e.inputs),n=Ee(e.inputs[0].dataType);e.compute(fe(e.inputs[0],"Clip",o=>`clamp(${o}, vec4<${n}>(uniforms.min), vec4<${n}>(uniforms.max))`,void 0,r.cacheKey,void 0,[{type:e.inputs[0].dataType,data:r.min},{type:e.inputs[0].dataType,data:r.max}],[{name:"min",type:n},{name:"max",type:n}]),{inputs:[0]})},Ks=e=>{e.compute(fe(e.inputs[0],"Ceil","ceil"))},js=e=>{e.compute(fe(e.inputs[0],"Cos","cos"))},Ys=e=>{e.compute(fe(e.inputs[0],"Cosh","cosh"))},Xt=e=>re(e),Zs=(e,t)=>{let r=Ee(e.inputs[0].dataType);e.compute(fe(e.inputs[0],"Elu",n=>`elu_vf32(${n})`,`
4402
+ }`},fe=(e,t,r,n,o,i=e.dataType,a,d)=>{let l=[{type:12,data:Math.ceil(k.size(e.dims)/4)}];return a&&l.push(...a),{name:t,shaderCache:{hint:o,inputDependencies:["type"]},getShaderSource:p=>Fm(p,k.size(e.dims),e.dataType,i,r,n,d),getRunData:p=>({outputs:[{dims:e.dims,dataType:i}],dispatchGroup:{x:Math.ceil(k.size(p[0].dims)/64/4)},programUniforms:l})}},Ms=e=>{e.compute(fe(e.inputs[0],"Abs","abs"))},Rs=e=>{e.compute(fe(e.inputs[0],"Acos","acos"))},Us=e=>{e.compute(fe(e.inputs[0],"Acosh","acosh"))},Ns=e=>{e.compute(fe(e.inputs[0],"Asin","asin"))},Vs=e=>{e.compute(fe(e.inputs[0],"Asinh","asinh"))},Ws=e=>{e.compute(fe(e.inputs[0],"Atan","atan"))},Ls=e=>{e.compute(fe(e.inputs[0],"Atanh","atanh"))},Gs=e=>ee(e),Hs=(e,t)=>{let r;switch(t.to){case 10:r="vec4<f16>";break;case 1:r="vec4<f32>";break;case 12:r="vec4<u32>";break;case 6:r="vec4<i32>";break;case 9:r="vec4<bool>";break;default:throw new RangeError(`not supported type (specified in attribute 'to' from 'Cast' operator): ${t.to}`)}e.compute(fe(e.inputs[0],"Cast",r,void 0,t.cacheKey,t.to))},qm=e=>{let t,r,n=e.length>=2&&e[1].data!==0,o=e.length>=3&&e[2].data!==0;switch(e[0].dataType){case 1:t=n?e[1].getFloat32Array()[0]:-34028234663852886e22,r=o?e[2].getFloat32Array()[0]:34028234663852886e22;break;case 10:t=n?e[1].getUint16Array()[0]:64511,r=o?e[2].getUint16Array()[0]:31743;break;default:throw new Error("Unsupport data type")}return ee({min:t,max:r})},Fs=(e,t)=>{let r=t||qm(e.inputs),n=Ee(e.inputs[0].dataType);e.compute(fe(e.inputs[0],"Clip",o=>`clamp(${o}, vec4<${n}>(uniforms.min), vec4<${n}>(uniforms.max))`,void 0,r.cacheKey,void 0,[{type:e.inputs[0].dataType,data:r.min},{type:e.inputs[0].dataType,data:r.max}],[{name:"min",type:n},{name:"max",type:n}]),{inputs:[0]})},qs=e=>{e.compute(fe(e.inputs[0],"Ceil","ceil"))},Ks=e=>{e.compute(fe(e.inputs[0],"Cos","cos"))},js=e=>{e.compute(fe(e.inputs[0],"Cosh","cosh"))},Qt=e=>ee(e),Ys=(e,t)=>{let r=Ee(e.inputs[0].dataType);e.compute(fe(e.inputs[0],"Elu",n=>`elu_vf32(${n})`,`
4348
4403
  const elu_alpha_ = ${r}(${t.alpha});
4349
4404
 
4350
4405
  fn elu_f32(a: ${r}) -> ${r} {
@@ -4353,7 +4408,7 @@ var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLas
4353
4408
 
4354
4409
  fn elu_vf32(v: vec4<${r}>) -> vec4<${r}> {
4355
4410
  return vec4(elu_f32(v.x), elu_f32(v.y), elu_f32(v.z), elu_f32(v.w));
4356
- }`,t.cacheKey))},qr=(e="f32")=>`
4411
+ }`,t.cacheKey))},Fr=(e="f32")=>`
4357
4412
  const r0: ${e} = 0.3275911;
4358
4413
  const r1: ${e} = 0.254829592;
4359
4414
  const r2: ${e} = -0.284496736;
@@ -4365,15 +4420,15 @@ fn erf_vf32(v: vec4<${e}>) -> vec4<${e}> {
4365
4420
  let absv = abs(v);
4366
4421
  let x = 1.0 / (1.0 + r0 * absv);
4367
4422
  return sign(v) * (1.0 - ((((r5 * x + r4) * x + r3) * x + r2) * x + r1) * x * exp(-absv * absv));
4368
- }`,Qs=e=>{let t=Ee(e.inputs[0].dataType);e.compute(fe(e.inputs[0],"Erf",r=>`erf_vf32(${r})`,qr(t)))},Xs=e=>{e.compute(fe(e.inputs[0],"Exp","exp"))},Js=e=>{e.compute(fe(e.inputs[0],"Floor","floor"))},eu=e=>{let t=Ee(e.inputs[0].dataType);e.compute(fe(e.inputs[0],"Gelu",r=>`0.5 * ${r} * (1.0 + erf_vf32(${r} * 0.7071067811865475))`,qr(t)))},tu=(e,t)=>{let r=Ee(e.inputs[0].dataType);e.compute(fe(e.inputs[0],"LeakyRelu",n=>`select(leaky_relu_alpha_ * ${n}, ${n}, ${n} >= vec4<${r}>(0.0))`,`const leaky_relu_alpha_ = ${r}(${t.alpha});`,t.cacheKey))},ru=e=>{e.compute(fe(e.inputs[0],"Not",t=>`!${t}`))},nu=e=>{e.compute(fe(e.inputs[0],"Neg",t=>`-${t}`))},ou=e=>{e.compute(fe(e.inputs[0],"Reciprocal",t=>`1.0/${t}`))},iu=e=>{let t=Ee(e.inputs[0].dataType);e.compute(fe(e.inputs[0],"Relu",r=>`select(vec4<${t}>(0.0), ${r}, ${r} > vec4<${t}>(0.0))`))},au=e=>{e.compute(fe(e.inputs[0],"Sigmoid",t=>`(1.0 / (1.0 + exp(-${t})))`))},su=e=>re(e),uu=(e,t)=>{let r=Ee(e.inputs[0].dataType);e.compute(fe(e.inputs[0],"HardSigmoid",n=>`max(vec4<${r}>(0.0), min(vec4<${r}>(1.0), ${t.alpha} * ${n} + vec4<${r}>(${t.beta})))`,void 0,t.cacheKey))},du=e=>{e.compute(fe(e.inputs[0],"Sin","sin"))},lu=e=>{e.compute(fe(e.inputs[0],"Sinh","sinh"))},cu=e=>{e.compute(fe(e.inputs[0],"Sqrt","sqrt"))},pu=e=>{e.compute(fe(e.inputs[0],"Tan","tan"))},mu=e=>`sign(${e}) * (1 - exp(-2 * abs(${e}))) / (1 + exp(-2 * abs(${e})))`,fu=e=>{e.compute(fe(e.inputs[0],"Tanh",mu))},mo=(e="f32")=>`
4423
+ }`,Zs=e=>{let t=Ee(e.inputs[0].dataType);e.compute(fe(e.inputs[0],"Erf",r=>`erf_vf32(${r})`,Fr(t)))},Qs=e=>{e.compute(fe(e.inputs[0],"Exp","exp"))},Xs=e=>{e.compute(fe(e.inputs[0],"Floor","floor"))},Js=e=>{let t=Ee(e.inputs[0].dataType);e.compute(fe(e.inputs[0],"Gelu",r=>`0.5 * ${r} * (1.0 + erf_vf32(${r} * 0.7071067811865475))`,Fr(t)))},eu=(e,t)=>{let r=Ee(e.inputs[0].dataType);e.compute(fe(e.inputs[0],"LeakyRelu",n=>`select(leaky_relu_alpha_ * ${n}, ${n}, ${n} >= vec4<${r}>(0.0))`,`const leaky_relu_alpha_ = ${r}(${t.alpha});`,t.cacheKey))},tu=e=>{e.compute(fe(e.inputs[0],"Not",t=>`!${t}`))},ru=e=>{e.compute(fe(e.inputs[0],"Neg",t=>`-${t}`))},nu=e=>{e.compute(fe(e.inputs[0],"Reciprocal",t=>`1.0/${t}`))},ou=e=>{let t=Ee(e.inputs[0].dataType);e.compute(fe(e.inputs[0],"Relu",r=>`select(vec4<${t}>(0.0), ${r}, ${r} > vec4<${t}>(0.0))`))},iu=e=>{e.compute(fe(e.inputs[0],"Sigmoid",t=>`(1.0 / (1.0 + exp(-${t})))`))},au=e=>ee(e),su=(e,t)=>{let r=Ee(e.inputs[0].dataType);e.compute(fe(e.inputs[0],"HardSigmoid",n=>`max(vec4<${r}>(0.0), min(vec4<${r}>(1.0), ${t.alpha} * ${n} + vec4<${r}>(${t.beta})))`,void 0,t.cacheKey))},uu=e=>{e.compute(fe(e.inputs[0],"Sin","sin"))},du=e=>{e.compute(fe(e.inputs[0],"Sinh","sinh"))},lu=e=>{e.compute(fe(e.inputs[0],"Sqrt","sqrt"))},cu=e=>{e.compute(fe(e.inputs[0],"Tan","tan"))},pu=e=>`sign(${e}) * (1 - exp(-2 * abs(${e}))) / (1 + exp(-2 * abs(${e})))`,mu=e=>{e.compute(fe(e.inputs[0],"Tanh",pu))},po=(e="f32")=>`
4369
4424
  const fast_gelu_a: ${e} = 0.5;
4370
4425
  const fast_gelu_b: ${e} = 0.7978845608028654;
4371
4426
  const fast_gelu_c: ${e} = 0.035677408136300125;
4372
4427
 
4373
4428
  fn tanh_v(v: vec4<${e}>) -> vec4<${e}> {
4374
- return ${mu("v")};
4429
+ return ${pu("v")};
4375
4430
  }
4376
- `,fo=e=>`(fast_gelu_a + fast_gelu_a * tanh_v(${e} * (fast_gelu_c * ${e} * ${e} + fast_gelu_b))) * ${e}`,hu=e=>{let t=Ee(e.inputs[0].dataType);e.compute(fe(e.inputs[0],"FastGelu",fo,mo(t),void 0,e.inputs[0].dataType))},gu=(e,t)=>{let r=Ee(e.inputs[0].dataType);return e.compute(fe(e.inputs[0],"ThresholdedRelu",n=>`select(vec4<${r}>(0.0), ${n}, ${n} > thresholded_relu_alpha_)`,`const thresholded_relu_alpha_ = vec4<${r}>(${t.alpha});`,t.cacheKey)),0},bu=e=>{e.compute(fe(e.inputs[0],"Log","log"))},Km=(e,t)=>`
4431
+ `,mo=e=>`(fast_gelu_a + fast_gelu_a * tanh_v(${e} * (fast_gelu_c * ${e} * ${e} + fast_gelu_b))) * ${e}`,fu=e=>{let t=Ee(e.inputs[0].dataType);e.compute(fe(e.inputs[0],"FastGelu",mo,po(t),void 0,e.inputs[0].dataType))},hu=(e,t)=>{let r=Ee(e.inputs[0].dataType);return e.compute(fe(e.inputs[0],"ThresholdedRelu",n=>`select(vec4<${r}>(0.0), ${n}, ${n} > thresholded_relu_alpha_)`,`const thresholded_relu_alpha_ = vec4<${r}>(${t.alpha});`,t.cacheKey)),0},gu=e=>{e.compute(fe(e.inputs[0],"Log","log"))},Km=(e,t)=>`
4377
4432
  const alpha = vec4<${e}>(${t});
4378
4433
  const one = ${e}(1.0);
4379
4434
  const zero = ${e}(0.0);
@@ -4390,13 +4445,13 @@ fn quick_gelu_impl(x: vec4<${e}>) -> vec4<${e}> {
4390
4445
  }
4391
4446
  return x * x1;
4392
4447
  }
4393
- `,jm=e=>`quick_gelu_impl(${e})`,yu=(e,t)=>{let r=Ee(e.inputs[0].dataType);e.compute(fe(e.inputs[0],"QuickGelu",jm,Km(r,t.alpha),t.cacheKey,e.inputs[0].dataType))}});var Ym,Zm,wu,vu=U(()=>{"use strict";oe();ae();Kr();Ym=e=>{if(e[0].dims.length!==3)throw new Error("input should have 3 dimensions");if(![2560,5120,10240].includes(e[0].dims[2]))throw new Error("hidden state should be 2560, 5120 or 10240");if(e[1].dims.length!==1)throw new Error("bias is expected to have 1 dimensions");if(e[0].dims[2]!==e[1].dims[0])throw new Error("last dimension of input and bias are not the same")},Zm=e=>{let t=e[0].dims.slice();t[2]=t[2]/2;let r=E("input",e[0].dataType,e[0].dims,4),n=E("bias",e[0].dataType,[e[0].dims[2]],4),o=M("output",e[0].dataType,t,4),i=C.size(t)/4,a=_e(e[0].dataType);return{name:"BiasSplitGelu",getRunData:()=>({outputs:[{dims:t,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(i/64)}}),getShaderSource:l=>`
4448
+ `,jm=e=>`quick_gelu_impl(${e})`,bu=(e,t)=>{let r=Ee(e.inputs[0].dataType);e.compute(fe(e.inputs[0],"QuickGelu",jm,Km(r,t.alpha),t.cacheKey,e.inputs[0].dataType))}});var Ym,Zm,_u,wu=U(()=>{"use strict";ne();ae();qr();Ym=e=>{if(e[0].dims.length!==3)throw new Error("input should have 3 dimensions");if(![2560,5120,10240].includes(e[0].dims[2]))throw new Error("hidden state should be 2560, 5120 or 10240");if(e[1].dims.length!==1)throw new Error("bias is expected to have 1 dimensions");if(e[0].dims[2]!==e[1].dims[0])throw new Error("last dimension of input and bias are not the same")},Zm=e=>{let t=e[0].dims.slice();t[2]=t[2]/2;let r=E("input",e[0].dataType,e[0].dims,4),n=E("bias",e[0].dataType,[e[0].dims[2]],4),o=M("output",e[0].dataType,t,4),i=k.size(t)/4,a=ye(e[0].dataType);return{name:"BiasSplitGelu",getRunData:()=>({outputs:[{dims:t,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(i/64)}}),getShaderSource:l=>`
4394
4449
  const M_SQRT2 = sqrt(2.0);
4395
4450
  const halfChannels = ${e[0].dims[2]/4/2}u;
4396
4451
 
4397
4452
  ${l.declareVariables(r,n,o)}
4398
4453
 
4399
- ${qr(a)}
4454
+ ${Fr(a)}
4400
4455
 
4401
4456
  ${l.mainStart()}
4402
4457
  ${l.guardAgainstOutOfBoundsWorkgroupSizes(i)}
@@ -4408,12 +4463,12 @@ fn quick_gelu_impl(x: vec4<${e}>) -> vec4<${e}> {
4408
4463
  let geluRight = valueRight * 0.5 * (erf_vf32(valueRight / M_SQRT2) + 1);
4409
4464
 
4410
4465
  ${o.setByOffset("global_idx","valueLeft * geluRight")}
4411
- }`}},wu=e=>{Ym(e.inputs),e.compute(Zm(e.inputs))}});var Qm,Xm,at,$u,xu,Su,Tu,Iu,Cu,Au,ku,Eu,Pu,zu=U(()=>{"use strict";te();oe();ae();Qm=(e,t,r,n,o,i,a,d,l,p,m,u)=>{let h,_;typeof d=="string"?h=_=(v,S)=>`${d}((${v}),(${S}))`:typeof d=="function"?h=_=d:(h=d.scalar,_=d.vector);let y=M("outputData",m,n.length,4),g=E("aData",l,t.length,4),x=E("bData",p,r.length,4),$;if(o)if(i){let v=C.size(t)===1,S=C.size(r)===1,T=t.length>0&&t[t.length-1]%4===0,A=r.length>0&&r[r.length-1]%4===0;v||S?$=y.setByOffset("global_idx",_(v?`${g.type.value}(${g.getByOffset("0")}.x)`:g.getByOffset("global_idx"),S?`${x.type.value}(${x.getByOffset("0")}.x)`:x.getByOffset("global_idx"))):$=`
4466
+ }`}},_u=e=>{Ym(e.inputs),e.compute(Zm(e.inputs))}});var Qm,Xm,at,vu,$u,xu,Su,Tu,Iu,Cu,Au,ku,Eu,Pu=U(()=>{"use strict";J();ne();ae();Qm=(e,t,r,n,o,i,a,d,l,p,m,u)=>{let h,_;typeof d=="string"?h=_=(v,S)=>`${d}((${v}),(${S}))`:typeof d=="function"?h=_=d:(h=d.scalar,_=d.vector);let y=M("outputData",m,n.length,4),g=E("aData",l,t.length,4),x=E("bData",p,r.length,4),$;if(o)if(i){let v=k.size(t)===1,S=k.size(r)===1,T=t.length>0&&t[t.length-1]%4===0,A=r.length>0&&r[r.length-1]%4===0;v||S?$=y.setByOffset("global_idx",_(v?`${g.type.value}(${g.getByOffset("0")}.x)`:g.getByOffset("global_idx"),S?`${x.type.value}(${x.getByOffset("0")}.x)`:x.getByOffset("global_idx"))):$=`
4412
4467
  let outputIndices = ${y.offsetToIndices("global_idx * 4u")};
4413
4468
  let offsetA = ${g.broadcastedIndicesToOffset("outputIndices",y)};
4414
4469
  let offsetB = ${x.broadcastedIndicesToOffset("outputIndices",y)};
4415
4470
  ${y.setByOffset("global_idx",_(a||T?g.getByOffset("offsetA / 4u"):`${g.type.value}(${g.getByOffset("offsetA / 4u")}[offsetA % 4u])`,a||A?x.getByOffset("offsetB / 4u"):`${x.type.value}(${x.getByOffset("offsetB / 4u")}[offsetB % 4u])`))}
4416
- `}else $=y.setByOffset("global_idx",_(g.getByOffset("global_idx"),x.getByOffset("global_idx")));else{if(!i)throw new Error("no necessary to use scalar implementation for element-wise binary op implementation.");let v=(S,T,A="")=>{let k=`aData[indexA${T}][componentA${T}]`,P=`bData[indexB${T}][componentB${T}]`;return`
4471
+ `}else $=y.setByOffset("global_idx",_(g.getByOffset("global_idx"),x.getByOffset("global_idx")));else{if(!i)throw new Error("no necessary to use scalar implementation for element-wise binary op implementation.");let v=(S,T,A="")=>{let C=`aData[indexA${T}][componentA${T}]`,P=`bData[indexB${T}][componentB${T}]`;return`
4417
4472
  let outputIndices${T} = ${y.offsetToIndices(`global_idx * 4u + ${T}u`)};
4418
4473
  let offsetA${T} = ${g.broadcastedIndicesToOffset(`outputIndices${T}`,y)};
4419
4474
  let offsetB${T} = ${x.broadcastedIndicesToOffset(`outputIndices${T}`,y)};
@@ -4421,7 +4476,7 @@ fn quick_gelu_impl(x: vec4<${e}>) -> vec4<${e}> {
4421
4476
  let indexB${T} = offsetB${T} / 4u;
4422
4477
  let componentA${T} = offsetA${T} % 4u;
4423
4478
  let componentB${T} = offsetB${T} % 4u;
4424
- ${S}[${T}] = ${A}(${h(k,P)});
4479
+ ${S}[${T}] = ${A}(${h(C,P)});
4425
4480
  `};m===9?$=`
4426
4481
  var data = vec4<u32>(0);
4427
4482
  ${v("data",0,"u32")}
@@ -4441,7 +4496,7 @@ fn quick_gelu_impl(x: vec4<${e}>) -> vec4<${e}> {
4441
4496
  ${e.mainStart()}
4442
4497
  ${e.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")}
4443
4498
  ${$}
4444
- }`},Xm=(e,t,r,n,o,i,a=r.dataType)=>{let d=r.dims.map(g=>Number(g)??1),l=n.dims.map(g=>Number(g)??1),p=!C.areEqual(d,l),m=d,u=C.size(d),h=!1,_=!1,y=[p];if(p){let g=tt.calcShape(d,l,!1);if(!g)throw new Error("Can't perform binary op on the given tensors");m=g.slice(),u=C.size(m);let x=C.size(d)===1,$=C.size(l)===1,v=d.length>0&&d[d.length-1]%4===0,S=l.length>0&&l[l.length-1]%4===0;y.push(x),y.push($),y.push(v),y.push(S);let T=1;for(let A=1;A<m.length;A++){let k=d[d.length-A],P=l[l.length-A];if(k===P)T*=k;else break}T%4===0?(_=!0,h=!0):(x||$||v||S)&&(h=!0)}else h=!0;return y.push(h),{name:e,shaderCache:{hint:t+y.map(g=>g.toString()).join("_"),inputDependencies:["rank","rank"]},getShaderSource:g=>Qm(g,d,l,m,h,p,_,o,r.dataType,n.dataType,a,i),getRunData:()=>({outputs:[{dims:m,dataType:a}],dispatchGroup:{x:Math.ceil(u/64/4)},programUniforms:[{type:12,data:Math.ceil(C.size(m)/4)},...N(d,l,m)]})}},at=(e,t,r,n,o,i)=>{e.compute(Xm(t,o??"",e.inputs[0],e.inputs[1],r,n,i))},$u=e=>{at(e,"Add",(t,r)=>`${t}+${r}`)},xu=e=>{at(e,"Div",(t,r)=>`${t}/${r}`)},Su=e=>{at(e,"Equal",{scalar:(t,r)=>`u32(${t}==${r})`,vector:(t,r)=>`vec4<u32>(${t}==${r})`},void 0,void 0,9)},Tu=e=>{at(e,"Mul",(t,r)=>`${t}*${r}`)},Iu=e=>{let t=E("input",e.inputs[0].dataType,e.inputs[0].dims).type.value;at(e,"Pow",{scalar:(n,o)=>`pow_custom(${n},${o})`,vector:(n,o)=>`pow_vector_custom(${n},${o})`},`
4499
+ }`},Xm=(e,t,r,n,o,i,a=r.dataType)=>{let d=r.dims.map(g=>Number(g)??1),l=n.dims.map(g=>Number(g)??1),p=!k.areEqual(d,l),m=d,u=k.size(d),h=!1,_=!1,y=[p];if(p){let g=tt.calcShape(d,l,!1);if(!g)throw new Error("Can't perform binary op on the given tensors");m=g.slice(),u=k.size(m);let x=k.size(d)===1,$=k.size(l)===1,v=d.length>0&&d[d.length-1]%4===0,S=l.length>0&&l[l.length-1]%4===0;y.push(x),y.push($),y.push(v),y.push(S);let T=1;for(let A=1;A<m.length;A++){let C=d[d.length-A],P=l[l.length-A];if(C===P)T*=C;else break}T%4===0?(_=!0,h=!0):(x||$||v||S)&&(h=!0)}else h=!0;return y.push(h),{name:e,shaderCache:{hint:t+y.map(g=>g.toString()).join("_"),inputDependencies:["rank","rank"]},getShaderSource:g=>Qm(g,d,l,m,h,p,_,o,r.dataType,n.dataType,a,i),getRunData:()=>({outputs:[{dims:m,dataType:a}],dispatchGroup:{x:Math.ceil(u/64/4)},programUniforms:[{type:12,data:Math.ceil(k.size(m)/4)},...N(d,l,m)]})}},at=(e,t,r,n,o,i)=>{e.compute(Xm(t,o??"",e.inputs[0],e.inputs[1],r,n,i))},vu=e=>{at(e,"Add",(t,r)=>`${t}+${r}`)},$u=e=>{at(e,"Div",(t,r)=>`${t}/${r}`)},xu=e=>{at(e,"Equal",{scalar:(t,r)=>`u32(${t}==${r})`,vector:(t,r)=>`vec4<u32>(${t}==${r})`},void 0,void 0,9)},Su=e=>{at(e,"Mul",(t,r)=>`${t}*${r}`)},Tu=e=>{let t=E("input",e.inputs[0].dataType,e.inputs[0].dims).type.value;at(e,"Pow",{scalar:(n,o)=>`pow_custom(${n},${o})`,vector:(n,o)=>`pow_vector_custom(${n},${o})`},`
4445
4500
  fn pow_custom(a : ${t}, b : ${t}) -> ${t} {
4446
4501
  if (b == ${t}(0.0)) {
4447
4502
  return ${t}(1.0);
@@ -4454,7 +4509,7 @@ fn quick_gelu_impl(x: vec4<${e}>) -> vec4<${e}> {
4454
4509
  // TODO: implement vectorized pow
4455
4510
  return vec4<${t}>(pow_custom(a.x, b.x), pow_custom(a.y, b.y), pow_custom(a.z, b.z), pow_custom(a.w, b.w));
4456
4511
  }
4457
- `)},Cu=e=>{at(e,"Sub",(t,r)=>`${t}-${r}`)},Au=e=>{at(e,"Greater",{scalar:(t,r)=>`u32(${t}>${r})`,vector:(t,r)=>`vec4<u32>(${t}>${r})`},void 0,void 0,9)},ku=e=>{at(e,"Less",{scalar:(t,r)=>`u32(${t}<${r})`,vector:(t,r)=>`vec4<u32>(${t}<${r})`},void 0,void 0,9)},Eu=e=>{at(e,"GreaterOrEqual",{scalar:(t,r)=>`u32(${t}>=${r})`,vector:(t,r)=>`vec4<u32>(${t}>=${r})`},void 0,void 0,9)},Pu=e=>{at(e,"LessOrEqual",{scalar:(t,r)=>`u32(${t}<=${r})`,vector:(t,r)=>`vec4<u32>(${t}<=${r})`},void 0,void 0,9)}});var ef,tf,rf,nf,Ou,Du,Bu=U(()=>{"use strict";te();oe();Se();ae();ef=(e,t)=>{if(!e||e.length<1)throw new Error("too few inputs");let r=0,n=e[r],o=n.dataType,i=n.dims.length;e.forEach((a,d)=>{if(d!==r){if(a.dataType!==o)throw new Error("input tensors should be one type");if(a.dims.length!==i)throw new Error("input tensors should have the same shape");a.dims.forEach((l,p)=>{if(p!==t&&l!==n.dims[p])throw new Error("non concat dimensions must match")})}})},tf=(e,t)=>`
4512
+ `)},Iu=e=>{at(e,"Sub",(t,r)=>`${t}-${r}`)},Cu=e=>{at(e,"Greater",{scalar:(t,r)=>`u32(${t}>${r})`,vector:(t,r)=>`vec4<u32>(${t}>${r})`},void 0,void 0,9)},Au=e=>{at(e,"Less",{scalar:(t,r)=>`u32(${t}<${r})`,vector:(t,r)=>`vec4<u32>(${t}<${r})`},void 0,void 0,9)},ku=e=>{at(e,"GreaterOrEqual",{scalar:(t,r)=>`u32(${t}>=${r})`,vector:(t,r)=>`vec4<u32>(${t}>=${r})`},void 0,void 0,9)},Eu=e=>{at(e,"LessOrEqual",{scalar:(t,r)=>`u32(${t}<=${r})`,vector:(t,r)=>`vec4<u32>(${t}<=${r})`},void 0,void 0,9)}});var ef,tf,rf,nf,zu,Ou,Du=U(()=>{"use strict";J();ne();xe();ae();ef=(e,t)=>{if(!e||e.length<1)throw new Error("too few inputs");let r=0,n=e[r],o=n.dataType,i=n.dims.length;e.forEach((a,d)=>{if(d!==r){if(a.dataType!==o)throw new Error("input tensors should be one type");if(a.dims.length!==i)throw new Error("input tensors should have the same shape");a.dims.forEach((l,p)=>{if(p!==t&&l!==n.dims[p])throw new Error("non concat dimensions must match")})}})},tf=(e,t)=>`
4458
4513
  fn calculateInputIndex(index: u32) -> u32 {
4459
4514
  let sizeInConcatAxis = array<u32, ${e}u>(${t});
4460
4515
  for (var i: u32 = 0u; i < ${e}; i += 1u ) {
@@ -4464,7 +4519,7 @@ fn quick_gelu_impl(x: vec4<${e}>) -> vec4<${e}> {
4464
4519
  }
4465
4520
  return ${e}u;
4466
4521
  }`,rf=(e,t)=>{let r=e.length,n=[];for(let o=0;o<r;++o){let i=t.setByOffset("global_idx",e[o].getByIndices("indices"));r===1?n.push(i):o===0?n.push(`if (inputIndex == ${o}u) { ${i} }`):o===r-1?n.push(`else { ${i} }`):n.push(`else if (inputIndex == ${o}) { ${i} }`)}return n.join(`
4467
- `)},nf=(e,t,r,n)=>{let o=C.size(r),i=new Array(e.length),a=new Array(e.length),d=0,l=[],p=[],m=[{type:12,data:o}];for(let g=0;g<e.length;++g)d+=e[g].dims[t],i[g]=d,p.push(e[g].dims.length),a[g]=E(`input${g}`,n,p[g]),l.push("rank"),m.push({type:12,data:i[g]});for(let g=0;g<e.length;++g)m.push(...N(e[g].dims));m.push(...N(r));let u=M("output",n,r.length),h=u.indicesGet("indices",t),_=Array.from(Array(i.length).keys()).map(g=>`uniforms.sizeInConcatAxis${g}`).join(","),y=g=>`
4522
+ `)},nf=(e,t,r,n)=>{let o=k.size(r),i=new Array(e.length),a=new Array(e.length),d=0,l=[],p=[],m=[{type:12,data:o}];for(let g=0;g<e.length;++g)d+=e[g].dims[t],i[g]=d,p.push(e[g].dims.length),a[g]=E(`input${g}`,n,p[g]),l.push("rank"),m.push({type:12,data:i[g]});for(let g=0;g<e.length;++g)m.push(...N(e[g].dims));m.push(...N(r));let u=M("output",n,r.length),h=u.indicesGet("indices",t),_=Array.from(Array(i.length).keys()).map(g=>`uniforms.sizeInConcatAxis${g}`).join(","),y=g=>`
4468
4523
 
4469
4524
  ${(()=>{g.registerUniform("outputSize","u32");for(let x=0;x<e.length;x++)g.registerUniform(`sizeInConcatAxis${x}`,"u32");return g.declareVariables(...a,u)})()}
4470
4525
 
@@ -4482,11 +4537,11 @@ fn quick_gelu_impl(x: vec4<${e}>) -> vec4<${e}> {
4482
4537
  }
4483
4538
 
4484
4539
  ${rf(a,u)}
4485
- }`;return{name:"Concat",shaderCache:{hint:`${t}`,inputDependencies:l},getRunData:()=>({outputs:[{dims:r,dataType:n}],dispatchGroup:{x:Math.ceil(o/64)},programUniforms:m}),getShaderSource:y}},Ou=(e,t)=>{let r=e.inputs,n=r[0].dims,o=C.normalizeAxis(t.axis,n.length);ef(r,o);let i=n.slice();i[o]=r.reduce((d,l)=>d+(l.dims.length>o?l.dims[o]:0),0);let a=r.filter(d=>C.size(d.dims)>0);e.compute(nf(a,o,i,r[0].dataType),{inputs:a})},Du=e=>re({axis:e.axis})});var qe,Ke,je,jr,yt=U(()=>{"use strict";te();oe();qe=(e,t,r="f32")=>{switch(e.activation){case"Relu":return`value = max(value, ${t}(0.0));`;case"Sigmoid":return`value = (${t}(1.0) / (${t}(1.0) + exp(-value)));`;case"Clip":return`value = clamp(value, ${t}(${r}(uniforms.clip_min)), ${t}(${r}(uniforms.clip_max)));`;case"HardSigmoid":return`value = max(${t}(0.0), min(${t}(1.0), ${r}(uniforms.alpha) * value + ${r}(uniforms.beta)));`;case"LeakyRelu":return`value = select(${r}(uniforms.alpha) * value, value, value >= ${t}(0.0));`;case"Tanh":return`let e2x = exp(-2.0 * abs(value));
4540
+ }`;return{name:"Concat",shaderCache:{hint:`${t}`,inputDependencies:l},getRunData:()=>({outputs:[{dims:r,dataType:n}],dispatchGroup:{x:Math.ceil(o/64)},programUniforms:m}),getShaderSource:y}},zu=(e,t)=>{let r=e.inputs,n=r[0].dims,o=k.normalizeAxis(t.axis,n.length);ef(r,o);let i=n.slice();i[o]=r.reduce((d,l)=>d+(l.dims.length>o?l.dims[o]:0),0);let a=r.filter(d=>k.size(d.dims)>0);e.compute(nf(a,o,i,r[0].dataType),{inputs:a})},Ou=e=>ee({axis:e.axis})});var Ke,je,Ye,Kr,yt=U(()=>{"use strict";J();ne();Ke=(e,t,r="f32")=>{switch(e.activation){case"Relu":return`value = max(value, ${t}(0.0));`;case"Sigmoid":return`value = (${t}(1.0) / (${t}(1.0) + exp(-value)));`;case"Clip":return`value = clamp(value, ${t}(${r}(uniforms.clip_min)), ${t}(${r}(uniforms.clip_max)));`;case"HardSigmoid":return`value = max(${t}(0.0), min(${t}(1.0), ${r}(uniforms.alpha) * value + ${r}(uniforms.beta)));`;case"LeakyRelu":return`value = select(${r}(uniforms.alpha) * value, value, value >= ${t}(0.0));`;case"Tanh":return`let e2x = exp(-2.0 * abs(value));
4486
4541
  value = sign(value) * (1.0 - e2x) / (1.0 + e2x);
4487
- `;case"":return"";default:throw new Error(`Unsupported activation ${e.activation}`)}},Ke=(e,t)=>{e.activation==="Clip"?t.push({type:1,data:e.clipMax},{type:1,data:e.clipMin}):e.activation==="HardSigmoid"?t.push({type:1,data:e.alpha},{type:1,data:e.beta}):e.activation==="LeakyRelu"&&t.push({type:1,data:e.alpha})},je=(e,t)=>{e.activation==="Clip"?t.push({name:"clip_max",type:"f32"},{name:"clip_min",type:"f32"}):e.activation==="HardSigmoid"?t.push({name:"alpha",type:"f32"},{name:"beta",type:"f32"}):e.activation==="LeakyRelu"&&t.push({name:"alpha",type:"f32"})},jr=e=>{let t=e?.activation||"";if(t==="HardSigmoid"){let[r,n]=e?.activation_params||[.2,.5];return{activation:t,alpha:r,beta:n}}else if(t==="Clip"){let[r,n]=e?.activation_params||[es,ts];return{activation:t,clipMax:n,clipMin:r}}else if(t==="LeakyRelu"){let[r]=e?.activation_params||[.01];return{activation:t,alpha:r}}return{activation:t}}});var Ae,Mu,Yr=U(()=>{"use strict";Ae=(e,t)=>{switch(e){case 1:return t;case 2:return`vec2<${t}>`;case 3:return`vec3<${t}>`;case 4:return`vec4<${t}>`;default:throw new Error(`${e}-component is not supported.`)}},Mu=e=>`
4542
+ `;case"":return"";default:throw new Error(`Unsupported activation ${e.activation}`)}},je=(e,t)=>{e.activation==="Clip"?t.push({type:1,data:e.clipMax},{type:1,data:e.clipMin}):e.activation==="HardSigmoid"?t.push({type:1,data:e.alpha},{type:1,data:e.beta}):e.activation==="LeakyRelu"&&t.push({type:1,data:e.alpha})},Ye=(e,t)=>{e.activation==="Clip"?t.push({name:"clip_max",type:"f32"},{name:"clip_min",type:"f32"}):e.activation==="HardSigmoid"?t.push({name:"alpha",type:"f32"},{name:"beta",type:"f32"}):e.activation==="LeakyRelu"&&t.push({name:"alpha",type:"f32"})},Kr=e=>{let t=e?.activation||"";if(t==="HardSigmoid"){let[r,n]=e?.activation_params||[.2,.5];return{activation:t,alpha:r,beta:n}}else if(t==="Clip"){let[r,n]=e?.activation_params||[Ja,es];return{activation:t,clipMax:n,clipMin:r}}else if(t==="LeakyRelu"){let[r]=e?.activation_params||[.01];return{activation:t,alpha:r}}return{activation:t}}});var Ae,Bu,jr=U(()=>{"use strict";Ae=(e,t)=>{switch(e){case 1:return t;case 2:return`vec2<${t}>`;case 3:return`vec3<${t}>`;case 4:return`vec4<${t}>`;default:throw new Error(`${e}-component is not supported.`)}},Bu=e=>`
4488
4543
  ${e?"value = value + getBiasByOutputCoords(coords);":""}
4489
- `});var Ru,Uu=U(()=>{"use strict";Ru=e=>`
4544
+ `});var Mu,Ru=U(()=>{"use strict";Mu=e=>`
4490
4545
  fn getIndexFromCoords4D(coords : vec4<i32>, shape : vec4<i32>) -> i32 {
4491
4546
  return dot(coords, vec4<i32>(
4492
4547
  shape.y * shape.z * shape.w, shape.z * shape.w, shape.w, 1));
@@ -4495,18 +4550,18 @@ fn getOutputIndexFromCoords(coords : vec4<i32>) -> i32 {
4495
4550
  return dot(coords, vec4<i32>(
4496
4551
  i32(${e}.x), i32(${e}.y), i32(${e}.z), 1));
4497
4552
  }
4498
- `});var Jt,Zr,Qr=U(()=>{"use strict";te();oe();ae();yt();Jt=(e,t,r,n,o)=>{let i=n-r;return`
4553
+ `});var Xt,Yr,Zr=U(()=>{"use strict";J();ne();ae();yt();Xt=(e,t,r,n,o)=>{let i=n-r;return`
4499
4554
  ${Array.from({length:r}).map((a,d)=>`
4500
- if (${F(t.shape,d,t.rank)} != 1) {
4501
- ${t.indicesSet(e,d,F(o,d+i,n))}
4555
+ if (${q(t.shape,d,t.rank)} != 1) {
4556
+ ${t.indicesSet(e,d,q(o,d+i,n))}
4502
4557
  } else {
4503
4558
  ${t.indicesSet(e,d,0)}
4504
4559
  }`).join("")}
4505
- `},Zr=(e,t,r,n,o=!1,i)=>{let a=e[0].dims,d=e[1].dims,l=a[a.length-2],p=d[d.length-1],m=a[a.length-1],u=me(p),h=me(m),_=me(l),y=C.size(r)/u/_,g=e.length>2,x=n?n.slice(0,-2):r.slice(0,-2),v=[C.size(x),l,p],S=[{type:12,data:y},{type:12,data:l},{type:12,data:p},{type:12,data:m}];Ke(t,S),S.push(...N(x,a,d)),g&&S.push(...N(e[2].dims)),S.push(...N(v));let T=A=>{let k=Lr("batch_dims",e[0].dataType,x.length),P=E("a",e[0].dataType,a.length,h),D=E("b",e[1].dataType,d.length,u),R=M("output",e[0].dataType,v.length,u),G=_e(R.type.tensor),K=qe(t,R.type.value,G),j=[P,D],V="";if(g){let Y=o?u:1;j.push(E("bias",e[2].dataType,e[2].dims.length,Y)),V=`${o?`value += bias[col / ${Y}];`:`value += ${R.type.value}(bias[row + i]);`}`}let Q=[{name:"output_size",type:"u32"},{name:"M",type:"u32"},{name:"N",type:"u32"},{name:"K",type:"u32"}];je(t,Q);let se=()=>{let Y=`var a_data: ${P.type.value};`;for(let ee=0;ee<h;ee++)Y+=`
4506
- let b_data${ee} = b[(b_offset + (k + ${ee}) * uniforms.N + col) / ${u}];`;for(let ee=0;ee<_;ee++){Y+=`a_data = a[(a_offset + (row + ${ee}) * uniforms.K + k) / ${h}];`;for(let J=0;J<h;J++)Y+=`
4507
- values[${ee}] = fma(${D.type.value}(a_data${h===1?"":`[${J}]`}), b_data${J}, values[${ee}]);
4508
- `}return Y};return`
4509
- ${A.registerUniforms(Q).registerInternalVariables(k).declareVariables(...j,R)}
4560
+ `},Yr=(e,t,r,n,o=!1,i)=>{let a=e[0].dims,d=e[1].dims,l=a[a.length-2],p=d[d.length-1],m=a[a.length-1],u=me(p),h=me(m),_=me(l),y=k.size(r)/u/_,g=e.length>2,x=n?n.slice(0,-2):r.slice(0,-2),v=[k.size(x),l,p],S=[{type:12,data:y},{type:12,data:l},{type:12,data:p},{type:12,data:m}];je(t,S),S.push(...N(x,a,d)),g&&S.push(...N(e[2].dims)),S.push(...N(v));let T=A=>{let C=Wr("batch_dims",e[0].dataType,x.length),P=E("a",e[0].dataType,a.length,h),D=E("b",e[1].dataType,d.length,u),R=M("output",e[0].dataType,v.length,u),H=ye(R.type.tensor),L=Ke(t,R.type.value,H),re=[P,D],V="";if(g){let j=o?u:1;re.push(E("bias",e[2].dataType,e[2].dims.length,j)),V=`${o?`value += bias[col / ${j}];`:`value += ${R.type.value}(bias[row + i]);`}`}let K=[{name:"output_size",type:"u32"},{name:"M",type:"u32"},{name:"N",type:"u32"},{name:"K",type:"u32"}];Ye(t,K);let we=()=>{let j=`var a_data: ${P.type.value};`;for(let Q=0;Q<h;Q++)j+=`
4561
+ let b_data${Q} = b[(b_offset + (k + ${Q}) * uniforms.N + col) / ${u}];`;for(let Q=0;Q<_;Q++){j+=`a_data = a[(a_offset + (row + ${Q}) * uniforms.K + k) / ${h}];`;for(let ie=0;ie<h;ie++)j+=`
4562
+ values[${Q}] = fma(${D.type.value}(a_data${h===1?"":`[${ie}]`}), b_data${ie}, values[${Q}]);
4563
+ `}return j};return`
4564
+ ${A.registerUniforms(K).registerInternalVariables(C).declareVariables(...re,R)}
4510
4565
  ${A.mainStart()}
4511
4566
  ${A.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
4512
4567
  let col = (global_idx % (uniforms.N / ${u})) * ${u};
@@ -4515,33 +4570,33 @@ fn getOutputIndexFromCoords(coords : vec4<i32>) -> i32 {
4515
4570
  let row = (index1 % stride1) * ${_};
4516
4571
  let batch = index1 / stride1;
4517
4572
 
4518
- ${r.length===2?"":`let batch_indices = ${k.offsetToIndices("batch")};`}
4573
+ ${r.length===2?"":`let batch_indices = ${C.offsetToIndices("batch")};`}
4519
4574
 
4520
4575
  var a_indices: ${P.type.indices};
4521
- ${Jt("a_indices",P,P.rank-2,k.rank,"batch_indices")}
4576
+ ${Xt("a_indices",P,P.rank-2,C.rank,"batch_indices")}
4522
4577
  ${P.indicesSet("a_indices",P.rank-2,0)}
4523
4578
  ${P.indicesSet("a_indices",P.rank-1,0)}
4524
4579
  let a_offset = ${P.indicesToOffset("a_indices")};
4525
4580
 
4526
4581
  var b_indices: ${D.type.indices};
4527
- ${Jt("b_indices",D,D.rank-2,k.rank,"batch_indices")}
4582
+ ${Xt("b_indices",D,D.rank-2,C.rank,"batch_indices")}
4528
4583
  ${D.indicesSet("b_indices",D.rank-2,0)}
4529
4584
  ${D.indicesSet("b_indices",D.rank-1,0)}
4530
4585
  let b_offset = ${D.indicesToOffset("b_indices")};
4531
4586
  var values: array<${R.type.value}, ${_}>;
4532
4587
  for (var k: u32 = 0u; k < uniforms.K; k = k + ${h}) {
4533
- ${se()}
4588
+ ${we()}
4534
4589
  }
4535
4590
  for (var i = 0u; i < ${_}u; i++) {
4536
4591
  var value = values[i];
4537
4592
  ${V}
4538
- ${K}
4593
+ ${L}
4539
4594
  let cur_indices = ${R.type.indices}(batch, row + i, col);
4540
4595
  let offset = ${R.indicesToOffset("cur_indices")};
4541
4596
  ${R.setByOffset(`offset / ${u}`,"value")};
4542
4597
  }
4543
4598
  }
4544
- `};return{name:"MatMulNaive",shaderCache:{hint:`${t.activation};${u};${h};${_};${o}`,inputDependencies:g?["rank","rank","rank"]:["rank","rank"]},getRunData:()=>({outputs:[{dims:i?i(r):r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(y/64)},programUniforms:S}),getShaderSource:T}}});var of,af,ho,Nu,sf,go,uf,er,Xr=U(()=>{"use strict";te();oe();ae();yt();Qr();Yr();of=(e,t)=>e?`
4599
+ `};return{name:"MatMulNaive",shaderCache:{hint:`${t.activation};${u};${h};${_};${o}`,inputDependencies:g?["rank","rank","rank"]:["rank","rank"]},getRunData:()=>({outputs:[{dims:i?i(r):r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(y/64)},programUniforms:S}),getShaderSource:T}}});var of,af,fo,Uu,sf,ho,uf,Jt,Qr=U(()=>{"use strict";J();ne();ae();yt();Zr();jr();of=(e,t)=>e?`
4545
4600
  mm_Asub[inputRow][inputCol] = mm_readA(batch,
4546
4601
  kStart + inputRow,
4547
4602
  globalRowStart / innerElementSize + inputCol${t?", batchIndices":""});
@@ -4566,7 +4621,7 @@ fn getOutputIndexFromCoords(coords : vec4<i32>) -> i32 {
4566
4621
  acc[i] = BCached1 * ACached.y + acc[i];
4567
4622
  acc[i] = BCached2 * ACached.z + acc[i];
4568
4623
  ${t===3?"":"acc[i] = BCached3 * ACached.w + acc[i];"}
4569
- }`,ho=(e,t,r="f32",n,o=!1,i=32,a=!1,d=32)=>{let l=t[1]*e[1],p=t[0]*e[0],m=o?l:i,u=o?i:l,h=m/t[0],_=i/t[1];if(!((o&&h===4&&e[1]===4||!o&&(h===3||h===4))&&m%t[0]===0&&i%t[1]===0&&e[0]===4))throw new Error(`If transposeA ${o} is true, innerElementSize ${h} and workPerThread[1] ${e[1]} must be 4.
4624
+ }`,fo=(e,t,r="f32",n,o=!1,i=32,a=!1,d=32)=>{let l=t[1]*e[1],p=t[0]*e[0],m=o?l:i,u=o?i:l,h=m/t[0],_=i/t[1];if(!((o&&h===4&&e[1]===4||!o&&(h===3||h===4))&&m%t[0]===0&&i%t[1]===0&&e[0]===4))throw new Error(`If transposeA ${o} is true, innerElementSize ${h} and workPerThread[1] ${e[1]} must be 4.
4570
4625
  Otherwise, innerElementSize ${h} must be 3 or 4.
4571
4626
  tileAWidth ${m} must be divisible by workgroupSize[0]${t[0]}. tileInner ${i} must be divisible by workgroupSize[1] ${t[1]}. colPerThread ${e[0]} must be 4.`);return`
4572
4627
  var<workgroup> mm_Asub: array<array<vec${h}<${r}>, ${m/h}>, ${u}>;
@@ -4631,7 +4686,7 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
4631
4686
  for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) {
4632
4687
  mm_write(batch, globalRow + innerRow, globalCol, acc[innerRow]);
4633
4688
  }
4634
- }`},Nu=(e,t)=>e?`
4689
+ }`},Uu=(e,t)=>e?`
4635
4690
  mm_Asub[inputRow][inputCol] = mm_readA(batch,
4636
4691
  kStart + inputRow,
4637
4692
  globalRowStart + inputCol${t?", batchIndices":""});
@@ -4639,7 +4694,7 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
4639
4694
  mm_Asub[inputRow][inputCol] = mm_readA(batch,
4640
4695
  globalRowStart + inputRow,
4641
4696
  kStart + inputCol${t?", batchIndices":""});
4642
- `,sf=e=>e?"let ACached = mm_Asub[k][tileRow + innerRow];":"let ACached = mm_Asub[tileRow + innerRow][k];",go=(e,t,r="f32",n,o=!1,i=32,a=!1,d=32,l=!1)=>{let p=e[1]*t[1],m=e[0]*t[0],u=o?p:i,h=o?i:p;if(!(h%t[1]===0&&u%t[0]===0&&i%t[1]===0))throw new Error(`tileAHight ${h} must be divisible by workgroupSize[1]${t[1]}, tileAWidth ${u} must be divisible by workgroupSize[0]${t[0]}, tileInner ${i} must be divisible by workgroupSize[1]${t[1]}`);let _=h/t[1],y=u/t[0],g=i/t[1],x=l?`
4697
+ `,sf=e=>e?"let ACached = mm_Asub[k][tileRow + innerRow];":"let ACached = mm_Asub[tileRow + innerRow][k];",ho=(e,t,r="f32",n,o=!1,i=32,a=!1,d=32,l=!1)=>{let p=e[1]*t[1],m=e[0]*t[0],u=o?p:i,h=o?i:p;if(!(h%t[1]===0&&u%t[0]===0&&i%t[1]===0))throw new Error(`tileAHight ${h} must be divisible by workgroupSize[1]${t[1]}, tileAWidth ${u} must be divisible by workgroupSize[0]${t[0]}, tileInner ${i} must be divisible by workgroupSize[1]${t[1]}`);let _=h/t[1],y=u/t[0],g=i/t[1],x=l?`
4643
4698
  let localRow = i32(localId.y);
4644
4699
  let localCol = i32(localId.x);
4645
4700
  let globalRowStart = i32(workgroupId.y) * ${p};
@@ -4650,7 +4705,7 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
4650
4705
  // Load one tile of A into local memory.
4651
4706
  for (var inputRow = localRow; inputRow < ${h}; inputRow = inputRow + ${t[1]}) {
4652
4707
  for (var inputCol = localCol; inputCol < ${u}; inputCol = inputCol + ${t[0]}) {
4653
- ${Nu(o,n)}
4708
+ ${Uu(o,n)}
4654
4709
  }
4655
4710
  }
4656
4711
  // Load one tile of B into local memory.
@@ -4705,7 +4760,7 @@ for (var t = 0; t < num_tiles; t = t + 1) {
4705
4760
  for (var innerCol = 0; innerCol < ${y}; innerCol = innerCol + 1) {
4706
4761
  let inputRow = tileRowA + innerRow;
4707
4762
  let inputCol = tileColA + innerCol;
4708
- ${Nu(o,n)}
4763
+ ${Uu(o,n)}
4709
4764
  }
4710
4765
  }
4711
4766
 
@@ -4765,14 +4820,14 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
4765
4820
  var acc : array<array<${r}, colPerThread>, rowPerThread>;
4766
4821
  ${x}
4767
4822
  }
4768
- `},uf=(e,t,r,n,o=!1)=>{let[i,a,d,l]=n,p=_e(n[0].type.tensor);return`
4823
+ `},uf=(e,t,r,n,o=!1)=>{let[i,a,d,l]=n,p=ye(n[0].type.tensor);return`
4769
4824
  fn mm_readA(batch: i32, row: i32, colIn: i32, batchIndices: ${i.type.indices}) -> ${Ae(e,p)} {
4770
4825
  var value = ${Ae(e,p)}(0.0);
4771
4826
  let col = colIn * ${e};
4772
4827
  if(row < uniforms.dim_a_outer && col < uniforms.dim_inner)
4773
4828
  {
4774
4829
  var aIndices: ${a.type.indices};
4775
- ${Jt("aIndices",a,a.rank-2,i.rank,"batchIndices")}
4830
+ ${Xt("aIndices",a,a.rank-2,i.rank,"batchIndices")}
4776
4831
  ${a.indicesSet("aIndices",a.rank-2,"u32(row)")}
4777
4832
  ${a.indicesSet("aIndices",a.rank-1,"u32(colIn)")}
4778
4833
  value = ${a.getByIndices("aIndices")};
@@ -4786,7 +4841,7 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
4786
4841
  if(row < uniforms.dim_inner && col < uniforms.dim_b_outer)
4787
4842
  {
4788
4843
  var bIndices: ${d.type.indices};
4789
- ${Jt("bIndices",d,d.rank-2,i.rank,"batchIndices")}
4844
+ ${Xt("bIndices",d,d.rank-2,i.rank,"batchIndices")}
4790
4845
  ${d.indicesSet("bIndices",d.rank-2,"u32(row)")}
4791
4846
  ${d.indicesSet("bIndices",d.rank-1,"u32(colIn)")}
4792
4847
  value = ${d.getByIndices("bIndices")};
@@ -4804,11 +4859,11 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
4804
4859
  ${l.setByIndices("vec3<u32>(coords)","value")}
4805
4860
  }
4806
4861
  }
4807
- `},er=(e,t,r,n,o=!1,i)=>{let a=e[0].dims,d=e[1].dims,l=a.slice(0,-2),p=d.slice(0,-2),m=n?n.slice(0,-2):r.slice(0,-2),u=C.size(m),h=a[a.length-2],_=a[a.length-1],y=d[d.length-1],g=_%4===0&&y%4===0,x=h<=8?[4,1,1]:[4,4,1],$=[8,8,1],v=[Math.ceil(y/$[0]/x[0]),Math.ceil(h/$[1]/x[1]),Math.ceil(u/$[2]/x[2])],S=g?4:1,T=[...l,h,_/S],A=T.length,k=[...p,_,y/S],P=k.length,D=[u,h,y/S],R=[{type:6,data:h},{type:6,data:y},{type:6,data:_}];Ke(t,R),R.push(...N(m,T,k));let G=["rank","rank"],K=e.length>2;K&&(R.push(...N(e[2].dims)),G.push("rank")),R.push(...N(D));let j=V=>{let Q=m.length,se=Lr("batchDims",e[0].dataType,Q,1),Y=_e(e[0].dataType),ee=E("a",e[0].dataType,A,S),J=E("b",e[1].dataType,P,S),ne=M("result",e[0].dataType,D.length,S),be=[ee,J];if(K){let q=o?S:1;be.push(E("bias",e[2].dataType,e[2].dims.length,q))}let Oe=[{name:"dim_a_outer",type:"i32"},{name:"dim_b_outer",type:"i32"},{name:"dim_inner",type:"i32"}];je(t,Oe);let $e=_e(ne.type.tensor),le=qe(t,ne.type.value,$e),W=uf(S,K,le,[se,ee,J,ne],o);return`
4808
- ${V.registerUniforms(Oe).registerInternalVariables(se).declareVariables(...be,ne)}
4862
+ `},Jt=(e,t,r,n,o=!1,i)=>{let a=e[0].dims,d=e[1].dims,l=a.slice(0,-2),p=d.slice(0,-2),m=n?n.slice(0,-2):r.slice(0,-2),u=k.size(m),h=a[a.length-2],_=a[a.length-1],y=d[d.length-1],g=_%4===0&&y%4===0,x=h<=8?[4,1,1]:[4,4,1],$=[8,8,1],v=[Math.ceil(y/$[0]/x[0]),Math.ceil(h/$[1]/x[1]),Math.ceil(u/$[2]/x[2])],S=g?4:1,T=[...l,h,_/S],A=T.length,C=[...p,_,y/S],P=C.length,D=[u,h,y/S],R=[{type:6,data:h},{type:6,data:y},{type:6,data:_}];je(t,R),R.push(...N(m,T,C));let H=["rank","rank"],L=e.length>2;L&&(R.push(...N(e[2].dims)),H.push("rank")),R.push(...N(D));let re=V=>{let K=m.length,we=Wr("batchDims",e[0].dataType,K,1),j=ye(e[0].dataType),Q=E("a",e[0].dataType,A,S),ie=E("b",e[1].dataType,P,S),te=M("result",e[0].dataType,D.length,S),be=[Q,ie];if(L){let Y=o?S:1;be.push(E("bias",e[2].dataType,e[2].dims.length,Y))}let Oe=[{name:"dim_a_outer",type:"i32"},{name:"dim_b_outer",type:"i32"},{name:"dim_inner",type:"i32"}];Ye(t,Oe);let ve=ye(te.type.tensor),de=Ke(t,te.type.value,ve),W=uf(S,L,de,[we,Q,ie,te],o);return`
4863
+ ${V.registerUniforms(Oe).registerInternalVariables(we).declareVariables(...be,te)}
4809
4864
  ${W}
4810
- ${g?ho(x,$,Y,se):go(x,$,Y,se)}
4811
- `};return{name:"MatMul",shaderCache:{hint:`${x};${t.activation};${g};${o}`,inputDependencies:G},getRunData:()=>({outputs:[{dims:i?i(r):r,dataType:e[0].dataType}],dispatchGroup:{x:v[0],y:v[1],z:v[2]},programUniforms:R}),getShaderSource:j}}});var df,Vu,Wu=U(()=>{"use strict";te();et();ae();yt();Yr();Uu();Xr();df=(e,t,r,n,o=!1,i,a=4,d=4,l=4,p="f32")=>{let m=G=>{switch(G){case 1:return"resData = x[xIndex];";case 3:return`resData = vec3<${p}>(x[xIndex], x[xIndex + 1], x[xIndex + 2]);`;case 4:return"resData = x[xIndex / 4];";default:throw new Error(`innerElementSize ${G} is not supported.`)}},u=G=>{switch(G){case 1:return"return w[row * i32(uniforms.w_shape[3]) + colIn];";case 4:return"return w[row * i32(uniforms.w_shape[3]) / 4 + colIn];";default:throw new Error(`innerElementSize ${G} is not supported.`)}},h=e?`
4865
+ ${g?fo(x,$,j,we):ho(x,$,j,we)}
4866
+ `};return{name:"MatMul",shaderCache:{hint:`${x};${t.activation};${g};${o}`,inputDependencies:H},getRunData:()=>({outputs:[{dims:i?i(r):r,dataType:e[0].dataType}],dispatchGroup:{x:v[0],y:v[1],z:v[2]},programUniforms:R}),getShaderSource:re}}});var df,Nu,Vu=U(()=>{"use strict";J();et();ae();yt();jr();Ru();Qr();df=(e,t,r,n,o=!1,i,a=4,d=4,l=4,p="f32")=>{let m=H=>{switch(H){case 1:return"resData = x[xIndex];";case 3:return`resData = vec3<${p}>(x[xIndex], x[xIndex + 1], x[xIndex + 2]);`;case 4:return"resData = x[xIndex / 4];";default:throw new Error(`innerElementSize ${H} is not supported.`)}},u=H=>{switch(H){case 1:return"return w[row * i32(uniforms.w_shape[3]) + colIn];";case 4:return"return w[row * i32(uniforms.w_shape[3]) / 4 + colIn];";default:throw new Error(`innerElementSize ${H} is not supported.`)}},h=e?`
4812
4867
  let coord = vec4<i32>(batch, xRow, xCol, xCh);
4813
4868
  `:`
4814
4869
  let coord = vec4<i32>(batch, xCh, xRow, xCol);
@@ -4867,8 +4922,8 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
4867
4922
  if (row < uniforms.dim_inner && col < uniforms.dim_a_outer) {
4868
4923
  ${u(d)}
4869
4924
  }
4870
- return ${Ae(d,p)}(0.0);`,A=Ae(l,p),k=e?Ae(a,p):Ae(d,p),P=e?Ae(d,p):Ae(a,p),D=qe(i,A,p);return`
4871
- fn mm_readA(batch: i32, row : i32, colIn : i32) -> ${k} {
4925
+ return ${Ae(d,p)}(0.0);`,A=Ae(l,p),C=e?Ae(a,p):Ae(d,p),P=e?Ae(d,p):Ae(a,p),D=Ke(i,A,p);return`
4926
+ fn mm_readA(batch: i32, row : i32, colIn : i32) -> ${C} {
4872
4927
  ${e?S:T}
4873
4928
  }
4874
4929
 
@@ -4883,36 +4938,36 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
4883
4938
  var value = valueIn;
4884
4939
  let outWidth = ${e?"i32(uniforms.result_shape[2])":"i32(uniforms.result_shape[3])"};
4885
4940
  ${_}
4886
- ${Mu(o)}
4941
+ ${Bu(o)}
4887
4942
  ${D}
4888
4943
  setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value);
4889
4944
  }
4890
- }`},Vu=(e,t,r,n,o,i,a,d,l)=>{let p=t.format==="NHWC",m=p?e[0].dims[3]:e[0].dims[1],u=r[0],h=p?r[2]:r[3],_=p?r[1]:r[2],y=p?r[3]:r[1],g=p&&(m%4===0||m%3===0)&&y%4===0,x=p?y:h*_,$=p?h*_:y,v=[8,8,1],S=n<=8?[4,1,1]:[4,4,1],T=[Math.ceil(x/v[0]/S[0]),Math.ceil($/v[1]/S[1]),Math.ceil(u/v[2]/S[2])];ue("verbose",()=>`[conv2d_mm_webgpu] dispatch = ${T}`);let A=g?p&&m%4!==0?3:4:1,k=v[1]*S[1],P=v[0]*S[0],D=Math.max(v[0]*A,v[1]),R=n%k===0,G=o%P===0,K=i%D===0,j=g?[A,4,4]:[1,1,1],V=[{type:6,data:n},{type:6,data:o},{type:6,data:i},{type:6,data:[t.pads[0],t.pads[1]]},{type:6,data:t.strides},{type:6,data:t.dilations}];Ke(t,V),V.push(...N(e[0].dims,e[1].dims));let Q=["rank","rank"];a&&(V.push(...N(e[2].dims)),Q.push("rank")),V.push(...N(r));let se=Y=>{let ee=[{name:"dim_a_outer",type:"i32"},{name:"dim_b_outer",type:"i32"},{name:"dim_inner",type:"i32"},{name:"pad",type:"i32",length:2},{name:"stride",type:"i32",length:2},{name:"dilation",type:"i32",length:2}];je(t,ee);let J=g?4:1,ne=_e(e[0].dataType),be=`
4891
- fn setOutputAtIndex(flatIndex : i32, value : ${g?`vec4<${ne}>`:ne}) {
4892
- result[flatIndex] = ${g?`vec4<${ne}>`:ne}(value);
4945
+ }`},Nu=(e,t,r,n,o,i,a,d,l)=>{let p=t.format==="NHWC",m=p?e[0].dims[3]:e[0].dims[1],u=r[0],h=p?r[2]:r[3],_=p?r[1]:r[2],y=p?r[3]:r[1],g=p&&(m%4===0||m%3===0)&&y%4===0,x=p?y:h*_,$=p?h*_:y,v=[8,8,1],S=n<=8?[4,1,1]:[4,4,1],T=[Math.ceil(x/v[0]/S[0]),Math.ceil($/v[1]/S[1]),Math.ceil(u/v[2]/S[2])];se("verbose",()=>`[conv2d_mm_webgpu] dispatch = ${T}`);let A=g?p&&m%4!==0?3:4:1,C=v[1]*S[1],P=v[0]*S[0],D=Math.max(v[0]*A,v[1]),R=n%C===0,H=o%P===0,L=i%D===0,re=g?[A,4,4]:[1,1,1],V=[{type:6,data:n},{type:6,data:o},{type:6,data:i},{type:6,data:[t.pads[0],t.pads[1]]},{type:6,data:t.strides},{type:6,data:t.dilations}];je(t,V),V.push(...N(e[0].dims,e[1].dims));let K=["rank","rank"];a&&(V.push(...N(e[2].dims)),K.push("rank")),V.push(...N(r));let we=j=>{let Q=[{name:"dim_a_outer",type:"i32"},{name:"dim_b_outer",type:"i32"},{name:"dim_inner",type:"i32"},{name:"pad",type:"i32",length:2},{name:"stride",type:"i32",length:2},{name:"dilation",type:"i32",length:2}];Ye(t,Q);let ie=g?4:1,te=ye(e[0].dataType),be=`
4946
+ fn setOutputAtIndex(flatIndex : i32, value : ${g?`vec4<${te}>`:te}) {
4947
+ result[flatIndex] = ${g?`vec4<${te}>`:te}(value);
4893
4948
  }
4894
- fn setOutputAtCoords(d0 : i32, d1 : i32, d2 : i32, d3 : i32, value : ${g?`vec4<${ne}>`:ne}) {
4949
+ fn setOutputAtCoords(d0 : i32, d1 : i32, d2 : i32, d3 : i32, value : ${g?`vec4<${te}>`:te}) {
4895
4950
  let flatIndex = getOutputIndexFromCoords(vec4<i32>(d0, d1, d2, d3));
4896
4951
  setOutputAtIndex(flatIndex ${g?"/ 4":""}, value);
4897
- }`,Oe=E("x",e[0].dataType,e[0].dims.length,A===3?1:A),$e=E("w",e[1].dataType,e[1].dims.length,J),le=[Oe,$e],W=M("result",e[0].dataType,r.length,J);if(a){let q=E("bias",e[2].dataType,e[2].dims.length,J);le.push(q),be+=`
4898
- fn getBiasByOutputCoords(coords : vec4<i32>) -> ${g?`vec4<${ne}>`:ne} {
4952
+ }`,Oe=E("x",e[0].dataType,e[0].dims.length,A===3?1:A),ve=E("w",e[1].dataType,e[1].dims.length,ie),de=[Oe,ve],W=M("result",e[0].dataType,r.length,ie);if(a){let Y=E("bias",e[2].dataType,e[2].dims.length,ie);de.push(Y),be+=`
4953
+ fn getBiasByOutputCoords(coords : vec4<i32>) -> ${g?`vec4<${te}>`:te} {
4899
4954
  return bias[coords.${p?"w":"y"}${g?"/ 4":""}];
4900
4955
  }`}return`
4901
- ${Ru("uniforms.result_strides")}
4956
+ ${Mu("uniforms.result_strides")}
4902
4957
  //struct Uniforms { xShape : vec4<i32>, wShape : vec4<i32>, outShape : vec4<i32>,
4903
4958
  // outShapeStrides: vec3<i32>, filterDims : vec2<i32>, pad : vec2<i32>, stride : vec2<i32>,
4904
4959
  // dilation : vec2<i32>, dimAOuter : i32, dimBOuter : i32, dimInner : i32 };
4905
- ${Y.registerUniforms(ee).declareVariables(...le,W)}
4960
+ ${j.registerUniforms(Q).declareVariables(...de,W)}
4906
4961
  ${be}
4907
- ${df(p,R,G,K,a,t,j[0],j[1],j[2],ne)}
4908
- ${g?ho(S,v,ne,void 0,!p,D):go(S,v,ne,void 0,!p,D,!1,void 0,d)}`};return{name:"Conv2DMatMul",shaderCache:{hint:`${t.cacheKey};${A};${g};${R};${G};${K};${k};${P};${D}`,inputDependencies:Q},getRunData:()=>({outputs:[{dims:l?l(r):r,dataType:e[0].dataType}],dispatchGroup:{x:T[0],y:T[1],z:T[2]},programUniforms:V}),getShaderSource:se}}});var lf,Lu,Jr,cf,Gu,pf,Hu,Fu,qu=U(()=>{"use strict";te();et();oe();ae();yt();Yr();lf=e=>{let t=1;for(let r=0;r<e.length;r++)t*=e[r];return t},Lu=e=>typeof e=="number"?[e,e,e]:e,Jr=(e,t)=>t<=1?e:e+(e-1)*(t-1),cf=(e,t,r,n=1)=>{let o=Jr(t,n);return Math.floor((e[0]*(r-1)-r+o)/2)},Gu=(e,t,r,n,o)=>{o==null&&(o=cf(e,t[0],n[0]));let i=[0,0,0,r];for(let a=0;a<3;a++)e[a]+2*o>=t[a]&&(i[a]=Math.trunc((e[a]-t[a]+2*o)/n[a]+1));return i},pf=(e,t,r,n,o,i,a,d,l,p)=>{let m,u,h,_;if(e==="VALID"&&(e=0),typeof e=="number"){m={top:e,bottom:e,left:e,right:e,front:e,back:e};let y=Gu([t,r,n,1],[d,l,p],1,[o,i,a],e);u=y[0],h=y[1],_=y[2]}else if(Array.isArray(e)){if(!e.every((g,x,$)=>g===$[0]))throw Error(`Unsupported padding parameter: ${e}`);m={top:e[0],bottom:e[1],left:e[2],right:e[3],front:e[4],back:e[5]};let y=Gu([t,r,n,1],[d,l,p],1,[o,i,a],e[0]);u=y[0],h=y[1],_=y[2]}else if(e==="SAME_UPPER"){u=Math.ceil(t/o),h=Math.ceil(r/i),_=Math.ceil(n/a);let y=(u-1)*o+d-t,g=(h-1)*i+l-r,x=(_-1)*a+p-n,$=Math.floor(y/2),v=y-$,S=Math.floor(g/2),T=g-S,A=Math.floor(x/2),k=x-A;m={top:S,bottom:T,left:A,right:k,front:$,back:v}}else throw Error(`Unknown padding parameter: ${e}`);return{padInfo:m,outDepth:u,outHeight:h,outWidth:_}},Hu=(e,t,r,n,o,i=!1,a="channelsLast")=>{let d,l,p,m,u;if(a==="channelsLast")[d,l,p,m,u]=e;else if(a==="channelsFirst")[d,u,l,p,m]=e;else throw new Error(`Unknown dataFormat ${a}`);let[h,,_,y,g]=t,[x,$,v]=Lu(r),[S,T,A]=Lu(n),k=Jr(_,S),P=Jr(y,T),D=Jr(g,A),{padInfo:R,outDepth:G,outHeight:K,outWidth:j}=pf(o,l,p,m,x,$,v,k,P,D),V=i?h*u:h,Q=[0,0,0,0,0];return a==="channelsFirst"?Q=[d,V,G,K,j]:a==="channelsLast"&&(Q=[d,G,K,j,V]),{batchSize:d,dataFormat:a,inDepth:l,inHeight:p,inWidth:m,inChannels:u,outDepth:G,outHeight:K,outWidth:j,outChannels:V,padInfo:R,strideDepth:x,strideHeight:$,strideWidth:v,filterDepth:_,filterHeight:y,filterWidth:g,effectiveFilterDepth:k,effectiveFilterHeight:P,effectiveFilterWidth:D,dilationDepth:S,dilationHeight:T,dilationWidth:A,inShape:e,outShape:Q,filterShape:t}},Fu=(e,t,r,n,o,i)=>{let a=i==="channelsLast",d=a?e[0].dims[3]:e[0].dims[1],l=!1,p=[64,1,1],m={x:r.map((v,S)=>S)},u=[Math.ceil(lf(m.x.map(v=>r[v]))/p[0]),1,1];ue("verbose",()=>`[conv3d_naive_webgpu] dispatch = ${u}`);let h=l?a&&d%4!==0?3:4:1,_=C.size(r),y=[{type:12,data:_},{type:12,data:n},{type:12,data:o},{type:12,data:t.strides},{type:12,data:t.dilations}];Ke(t,y),y.push(...N(e[0].dims,e[1].dims));let g=["rank","rank"],x=e.length===3;x&&(y.push(...N(e[2].dims)),g.push("rank")),y.push(...N(r));let $=v=>{let S=[{name:"output_size",type:"u32"},{name:"filter_dims",type:"u32",length:n.length},{name:"pads",type:"u32",length:o.length},{name:"strides",type:"u32",length:t.strides.length},{name:"dilations",type:"u32",length:t.dilations.length}];je(t,S);let T=l?4:1,A=_e(e[0].dataType),k=E("x",e[0].dataType,e[0].dims.length,h===3?1:h),P=E("W",e[1].dataType,e[1].dims.length,T),D=[k,P],R=M("result",e[0].dataType,r.length,T),G="";if(x){let V=E("bias",e[2].dataType,e[2].dims.length,T);D.push(V),G+=`
4962
+ ${df(p,R,H,L,a,t,re[0],re[1],re[2],te)}
4963
+ ${g?fo(S,v,te,void 0,!p,D):ho(S,v,te,void 0,!p,D,!1,void 0,d)}`};return{name:"Conv2DMatMul",shaderCache:{hint:`${t.cacheKey};${A};${g};${R};${H};${L};${C};${P};${D}`,inputDependencies:K},getRunData:()=>({outputs:[{dims:l?l(r):r,dataType:e[0].dataType}],dispatchGroup:{x:T[0],y:T[1],z:T[2]},programUniforms:V}),getShaderSource:we}}});var lf,Wu,Xr,cf,Lu,pf,Gu,Hu,Fu=U(()=>{"use strict";J();et();ne();ae();yt();jr();lf=e=>{let t=1;for(let r=0;r<e.length;r++)t*=e[r];return t},Wu=e=>typeof e=="number"?[e,e,e]:e,Xr=(e,t)=>t<=1?e:e+(e-1)*(t-1),cf=(e,t,r,n=1)=>{let o=Xr(t,n);return Math.floor((e[0]*(r-1)-r+o)/2)},Lu=(e,t,r,n,o)=>{o==null&&(o=cf(e,t[0],n[0]));let i=[0,0,0,r];for(let a=0;a<3;a++)e[a]+2*o>=t[a]&&(i[a]=Math.trunc((e[a]-t[a]+2*o)/n[a]+1));return i},pf=(e,t,r,n,o,i,a,d,l,p)=>{let m,u,h,_;if(e==="VALID"&&(e=0),typeof e=="number"){m={top:e,bottom:e,left:e,right:e,front:e,back:e};let y=Lu([t,r,n,1],[d,l,p],1,[o,i,a],e);u=y[0],h=y[1],_=y[2]}else if(Array.isArray(e)){if(!e.every((g,x,$)=>g===$[0]))throw Error(`Unsupported padding parameter: ${e}`);m={top:e[0],bottom:e[1],left:e[2],right:e[3],front:e[4],back:e[5]};let y=Lu([t,r,n,1],[d,l,p],1,[o,i,a],e[0]);u=y[0],h=y[1],_=y[2]}else if(e==="SAME_UPPER"){u=Math.ceil(t/o),h=Math.ceil(r/i),_=Math.ceil(n/a);let y=(u-1)*o+d-t,g=(h-1)*i+l-r,x=(_-1)*a+p-n,$=Math.floor(y/2),v=y-$,S=Math.floor(g/2),T=g-S,A=Math.floor(x/2),C=x-A;m={top:S,bottom:T,left:A,right:C,front:$,back:v}}else throw Error(`Unknown padding parameter: ${e}`);return{padInfo:m,outDepth:u,outHeight:h,outWidth:_}},Gu=(e,t,r,n,o,i=!1,a="channelsLast")=>{let d,l,p,m,u;if(a==="channelsLast")[d,l,p,m,u]=e;else if(a==="channelsFirst")[d,u,l,p,m]=e;else throw new Error(`Unknown dataFormat ${a}`);let[h,,_,y,g]=t,[x,$,v]=Wu(r),[S,T,A]=Wu(n),C=Xr(_,S),P=Xr(y,T),D=Xr(g,A),{padInfo:R,outDepth:H,outHeight:L,outWidth:re}=pf(o,l,p,m,x,$,v,C,P,D),V=i?h*u:h,K=[0,0,0,0,0];return a==="channelsFirst"?K=[d,V,H,L,re]:a==="channelsLast"&&(K=[d,H,L,re,V]),{batchSize:d,dataFormat:a,inDepth:l,inHeight:p,inWidth:m,inChannels:u,outDepth:H,outHeight:L,outWidth:re,outChannels:V,padInfo:R,strideDepth:x,strideHeight:$,strideWidth:v,filterDepth:_,filterHeight:y,filterWidth:g,effectiveFilterDepth:C,effectiveFilterHeight:P,effectiveFilterWidth:D,dilationDepth:S,dilationHeight:T,dilationWidth:A,inShape:e,outShape:K,filterShape:t}},Hu=(e,t,r,n,o,i)=>{let a=i==="channelsLast",d=a?e[0].dims[3]:e[0].dims[1],l=!1,p=[64,1,1],m={x:r.map((v,S)=>S)},u=[Math.ceil(lf(m.x.map(v=>r[v]))/p[0]),1,1];se("verbose",()=>`[conv3d_naive_webgpu] dispatch = ${u}`);let h=l?a&&d%4!==0?3:4:1,_=k.size(r),y=[{type:12,data:_},{type:12,data:n},{type:12,data:o},{type:12,data:t.strides},{type:12,data:t.dilations}];je(t,y),y.push(...N(e[0].dims,e[1].dims));let g=["rank","rank"],x=e.length===3;x&&(y.push(...N(e[2].dims)),g.push("rank")),y.push(...N(r));let $=v=>{let S=[{name:"output_size",type:"u32"},{name:"filter_dims",type:"u32",length:n.length},{name:"pads",type:"u32",length:o.length},{name:"strides",type:"u32",length:t.strides.length},{name:"dilations",type:"u32",length:t.dilations.length}];Ye(t,S);let T=l?4:1,A=ye(e[0].dataType),C=E("x",e[0].dataType,e[0].dims.length,h===3?1:h),P=E("W",e[1].dataType,e[1].dims.length,T),D=[C,P],R=M("result",e[0].dataType,r.length,T),H="";if(x){let V=E("bias",e[2].dataType,e[2].dims.length,T);D.push(V),H+=`
4909
4964
  fn getBiasByOutputCoords(coords : array<u32, 5>) -> ${l?`vec4<${A}>`:A} {
4910
- return bias[${a?F("coords",4,5):F("coords",1,5)}${l?"/ 4":""}];
4911
- }`}let K=Ae(h,A),j=qe(t,K,A);return`
4912
- ${G}
4965
+ return bias[${a?q("coords",4,5):q("coords",1,5)}${l?"/ 4":""}];
4966
+ }`}let L=Ae(h,A),re=Ke(t,L,A);return`
4967
+ ${H}
4913
4968
  fn getX(d0 : u32, d1 : u32, d2 : u32, d3 : u32, d4 : u32) -> f32 {
4914
4969
  let aIndices = array<u32, 5>(d0, d1, d2, d3, d4);
4915
- return ${k.getByIndices("aIndices")};
4970
+ return ${C.getByIndices("aIndices")};
4916
4971
  }
4917
4972
  fn getW(d0 : u32, d1 : u32, d2 : u32, d3 : u32, d4 : u32) -> f32 {
4918
4973
  let aIndices = array<u32, 5>(d0, d1, d2, d3, d4);
@@ -4922,18 +4977,18 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
4922
4977
  ${v.mainStart()}
4923
4978
  ${v.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
4924
4979
  let coords = ${R.offsetToIndices("global_idx")};
4925
- let batch = ${F("coords",0,k.rank)};
4926
- let d2 = ${a?F("coords",k.rank-1,k.rank):F("coords",1,k.rank)};
4927
- let xFRCCorner = vec3<u32>(${a?F("coords",1,k.rank):F("coords",2,k.rank)},
4928
- ${a?F("coords",2,k.rank):F("coords",3,k.rank)},
4929
- ${a?F("coords",3,k.rank):F("coords",4,k.rank)}) * uniforms.strides - uniforms.pads;
4980
+ let batch = ${q("coords",0,C.rank)};
4981
+ let d2 = ${a?q("coords",C.rank-1,C.rank):q("coords",1,C.rank)};
4982
+ let xFRCCorner = vec3<u32>(${a?q("coords",1,C.rank):q("coords",2,C.rank)},
4983
+ ${a?q("coords",2,C.rank):q("coords",3,C.rank)},
4984
+ ${a?q("coords",3,C.rank):q("coords",4,C.rank)}) * uniforms.strides - uniforms.pads;
4930
4985
  let xFCorner = xFRCCorner.x;
4931
4986
  let xRCorner = xFRCCorner.y;
4932
4987
  let xCCorner = xFRCCorner.z;
4933
- let xShapeY = ${a?F("uniforms.x_shape",1,k.rank):F("uniforms.x_shape",2,k.rank)};
4934
- let xShapeZ = ${a?F("uniforms.x_shape",2,k.rank):F("uniforms.x_shape",3,k.rank)};
4935
- let xShapeW = ${a?F("uniforms.x_shape",3,k.rank):F("uniforms.x_shape",4,k.rank)};
4936
- let xShapeU = ${a?F("uniforms.x_shape",4,k.rank):F("uniforms.x_shape",1,k.rank)};
4988
+ let xShapeY = ${a?q("uniforms.x_shape",1,C.rank):q("uniforms.x_shape",2,C.rank)};
4989
+ let xShapeZ = ${a?q("uniforms.x_shape",2,C.rank):q("uniforms.x_shape",3,C.rank)};
4990
+ let xShapeW = ${a?q("uniforms.x_shape",3,C.rank):q("uniforms.x_shape",4,C.rank)};
4991
+ let xShapeU = ${a?q("uniforms.x_shape",4,C.rank):q("uniforms.x_shape",1,C.rank)};
4937
4992
  let inputDepthNearestVec4 = (xShapeU / 4) * 4;
4938
4993
  let inputDepthVec4Remainder = xShapeU % 4;
4939
4994
 
@@ -5011,9 +5066,9 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5011
5066
  }
5012
5067
  }
5013
5068
  ${x?"value = value + getBiasByOutputCoords(coords)":""};
5014
- ${j}
5069
+ ${re}
5015
5070
  result[global_idx] = f32(value);
5016
- }`};return{name:"Conv3DNaive",shaderCache:{hint:`${t.cacheKey};${a};${h};${x}`,inputDependencies:g},getRunData:()=>({outputs:[{dims:r,dataType:e[0].dataType}],dispatchGroup:{x:u[0],y:u[1],z:u[2]},programUniforms:y}),getShaderSource:$}}});var Ku,ju,Yu=U(()=>{"use strict";te();oe();ae();yt();Ku=(e,t,r,n)=>{let o=e.length>2,i=o?"value += b[output_channel];":"",a=e[0].dims,d=e[1].dims,l=t.format==="NHWC",p=l?r[3]:r[1],m=p/t.group,u=l&&m>=4?me(p):1,h=C.size(r)/u,_=[{type:12,data:h},{type:12,data:t.dilations},{type:12,data:[t.strides[0],t.strides[1]]},{type:12,data:[t.pads[0],t.pads[1]]},{type:12,data:m}];Ke(t,_),_.push(...N(a,[d[0],d[1],d[2],d[3]/u]));let y=o?["rank","rank","rank"]:["rank","rank"];_.push(...N([r[0],r[1],r[2],r[3]/u]));let g=x=>{let $=M("output",e[0].dataType,r.length,u),v=_e($.type.tensor),S=qe(t,$.type.value,v),T=E("x",e[0].dataType,a.length),A=E("w",e[1].dataType,d.length,u),k=[T,A];o&&k.push(E("b",e[2].dataType,e[2].dims,u));let P=[{name:"output_size",type:"u32"},{name:"dilations",type:"u32",length:t.dilations.length},{name:"strides",type:"u32",length:2},{name:"pads",type:"u32",length:2},{name:"output_channels_per_group",type:"u32"}];je(t,P);let D=l?`
5071
+ }`};return{name:"Conv3DNaive",shaderCache:{hint:`${t.cacheKey};${a};${h};${x}`,inputDependencies:g},getRunData:()=>({outputs:[{dims:r,dataType:e[0].dataType}],dispatchGroup:{x:u[0],y:u[1],z:u[2]},programUniforms:y}),getShaderSource:$}}});var qu,Ku,ju=U(()=>{"use strict";J();ne();ae();yt();qu=(e,t,r,n)=>{let o=e.length>2,i=o?"value += b[output_channel];":"",a=e[0].dims,d=e[1].dims,l=t.format==="NHWC",p=l?r[3]:r[1],m=p/t.group,u=l&&m>=4?me(p):1,h=k.size(r)/u,_=[{type:12,data:h},{type:12,data:t.dilations},{type:12,data:[t.strides[0],t.strides[1]]},{type:12,data:[t.pads[0],t.pads[1]]},{type:12,data:m}];je(t,_),_.push(...N(a,[d[0],d[1],d[2],d[3]/u]));let y=o?["rank","rank","rank"]:["rank","rank"];_.push(...N([r[0],r[1],r[2],r[3]/u]));let g=x=>{let $=M("output",e[0].dataType,r.length,u),v=ye($.type.tensor),S=Ke(t,$.type.value,v),T=E("x",e[0].dataType,a.length),A=E("w",e[1].dataType,d.length,u),C=[T,A];o&&C.push(E("b",e[2].dataType,e[2].dims,u));let P=[{name:"output_size",type:"u32"},{name:"dilations",type:"u32",length:t.dilations.length},{name:"strides",type:"u32",length:2},{name:"pads",type:"u32",length:2},{name:"output_channels_per_group",type:"u32"}];Ye(t,P);let D=l?`
5017
5072
  for (var wHeight: u32 = 0u; wHeight < uniforms.w_shape[0]; wHeight++) {
5018
5073
  let xHeight = xRCCorner.x + wHeight * uniforms.dilations[0];
5019
5074
 
@@ -5058,7 +5113,7 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5058
5113
  }
5059
5114
  }
5060
5115
  `;return`
5061
- ${x.registerUniforms(P).declareVariables(...k,$)}
5116
+ ${x.registerUniforms(P).declareVariables(...C,$)}
5062
5117
 
5063
5118
  ${x.mainStart()}
5064
5119
  ${x.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
@@ -5075,8 +5130,8 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5075
5130
  ${i}
5076
5131
  ${S}
5077
5132
  ${$.setByOffset("global_idx","value")}
5078
- }`};return{name:"GroupedConv",shaderCache:{hint:`${t.cacheKey}_${u}`,inputDependencies:y},getRunData:()=>({outputs:[{dims:n?n(r):r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(h/64)},programUniforms:_}),getShaderSource:g}},ju=(e,t,r,n)=>{let o=e.length>2,i=me(r[3]),a=me(r[2]),d=C.size(r)/i/a,l=[e[0].dims[0],e[0].dims[1],e[0].dims[2],e[0].dims[3]/i],p=[e[1].dims[0],e[1].dims[1],e[1].dims[2],e[1].dims[3]/i],m=[r[0],r[1],r[2],r[3]/i],u=[{type:12,data:d},{type:6,data:[t.strides[0],t.strides[1]]},{type:6,data:[t.pads[0],t.pads[1]]}];Ke(t,u),u.push(...N(l,p,m));let h=(a-1)*t.strides[1]+p[1],_=y=>{let g=M("output",e[0].dataType,m.length,i),x=_e(g.type.tensor),$=qe(t,g.type.value,x),v=E("x",e[0].dataType,l.length,i),S=E("w",e[1].dataType,p.length,i),T=[v,S];o&&T.push(E("b",e[2].dataType,e[2].dims,i));let A=o?"value += b[output_channel];":"",k=[{name:"output_size",type:"u32"},{name:"strides",type:"i32",length:2},{name:"pads",type:"i32",length:2}];return je(t,k),`
5079
- ${y.registerUniforms(k).declareVariables(...T,g)}
5133
+ }`};return{name:"GroupedConv",shaderCache:{hint:`${t.cacheKey}_${u}`,inputDependencies:y},getRunData:()=>({outputs:[{dims:n?n(r):r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(h/64)},programUniforms:_}),getShaderSource:g}},Ku=(e,t,r,n)=>{let o=e.length>2,i=me(r[3]),a=me(r[2]),d=k.size(r)/i/a,l=[e[0].dims[0],e[0].dims[1],e[0].dims[2],e[0].dims[3]/i],p=[e[1].dims[0],e[1].dims[1],e[1].dims[2],e[1].dims[3]/i],m=[r[0],r[1],r[2],r[3]/i],u=[{type:12,data:d},{type:6,data:[t.strides[0],t.strides[1]]},{type:6,data:[t.pads[0],t.pads[1]]}];je(t,u),u.push(...N(l,p,m));let h=(a-1)*t.strides[1]+p[1],_=y=>{let g=M("output",e[0].dataType,m.length,i),x=ye(g.type.tensor),$=Ke(t,g.type.value,x),v=E("x",e[0].dataType,l.length,i),S=E("w",e[1].dataType,p.length,i),T=[v,S];o&&T.push(E("b",e[2].dataType,e[2].dims,i));let A=o?"value += b[output_channel];":"",C=[{name:"output_size",type:"u32"},{name:"strides",type:"i32",length:2},{name:"pads",type:"i32",length:2}];return Ye(t,C),`
5134
+ ${y.registerUniforms(C).declareVariables(...T,g)}
5080
5135
  ${y.mainStart()}
5081
5136
  ${y.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
5082
5137
  let width0 = uniforms.output_shape[3];
@@ -5120,19 +5175,19 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5120
5175
  ${$}
5121
5176
  ${g.set("batch","row","col + i","output_channel","value")};
5122
5177
  }
5123
- }`};return{name:"GroupedConv-Vectorize",shaderCache:{hint:`${t.cacheKey};${i};${a};${h};${p[0]};${p[1]}`,inputDependencies:o?["rank","rank","type"]:["rank","rank"]},getRunData:()=>({outputs:[{dims:n?n(r):r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(d/64)},programUniforms:u}),getShaderSource:_}}});var mf,bo,ff,yo,_o,Zu,hf,gf,wo,Qu=U(()=>{"use strict";oe();Wu();qu();Xr();Yu();yt();Qr();dt();mf=(e,t,r,n,o,i)=>{let a=e[0],d=e.slice(i?1:2,i?3:4),l=d.length,p=t[0],u=t.slice(2).map((y,g)=>y+(y-1)*(r[g]-1)),_=d.map((y,g)=>y+n[g]+n[g+l]).map((y,g)=>Math.floor((y-u[g]+o[g])/o[g]));return _.splice(0,0,a),_.splice(i?3:1,0,p),_},bo=[2,3,1,0],ff=(e,t)=>{if(!e||e.length!==2&&e.length!==3)throw new Error("Conv requires 2 or 3 inputs");if(e[0].dims.length>5)throw new Error("greater than 5D is not supported");if(e[0].dims.length!==e[1].dims.length)throw new Error("filter does not have same dimension as input");let r=e[0].dims[t.format==="NHWC"?e[0].dims.length-1:1],n=e[1].dims[1]*t.group;if(r!==n)throw new Error("FILTER_IN_CHANNEL should be equal to DATA_CHANNEL");if(e.length===3&&(e[2].dims.length!==1||e[1].dims[0]!==e[2].dims[0]))throw new Error("invalid bias");let o=e[0].dims.length-2;if(t.dilations.length!==o)throw new Error(`dilations should be ${o}D`);if(t.strides.length!==o)throw new Error(`strides should be ${o}D`);if(t.pads.length!==o*2)throw new Error(`pads should be ${o*2}D`);if(t.kernelShape.length!==0&&t.kernelShape.length!==e[1].dims.length-2)throw new Error("invalid kernel shape")},yo=(e,t)=>{let r=e.kernelShape.slice();r.length<t[1].dims.length-2&&r.push(...Array(t[1].dims.length-2-r.length).fill(0));for(let i=2;i<t[1].dims.length;++i)r[i-2]===0&&(r[i-2]=t[1].dims[i]);let n=e.pads.slice();At.adjustPadsBasedOnAutoPad(t[0].dims,e.strides,e.dilations,r,n,e.format==="NHWC",e.autoPad);let o=Object.assign({},e);return Object.assign(o,{kernelShape:r,pads:n}),o},_o=e=>{let t=jr(e),r=e.format,n=["NOTSET","VALID","SAME_UPPER","SAME_LOWER"][e.auto_pad],o=e.dilations,i=e.group,a=e.kernel_shape,d=e.pads,l=e.strides,p=e.w_is_const();return{autoPad:n,format:r,dilations:o,group:i,kernelShape:a,pads:d,strides:l,wIsConst:p,...t,cacheKey:`${e.format};${t.activation};`}},Zu=(e,t,r,n)=>{let o=r.format==="NHWC",i=mf(t[0].dims,t[1].dims,r.dilations,r.pads,r.strides,o);if(r.group!==1){let k=[t[0]];if(o){let D=e.kernelCustomData.wT??e.compute(Pe(t[1],bo),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=D),k.push(D)}else k.push(t[1]);t.length===3&&k.push(t[2]),!e.adapterInfo.isArchitecture("ampere")&&o&&t[1].dims[0]===r.group&&t[1].dims[1]===1&&r.dilations[0]===1&&r.dilations[1]===1?e.compute(ju(k,r,i,n),{inputs:k}):e.compute(Ku(k,r,i,n),{inputs:k});return}let a=t.length===3,d=t[0].dims[o?1:2],l=t[0].dims[o?2:3],p=t[0].dims[o?3:1],m=t[1].dims[2],u=t[1].dims[3],h=i[o?1:2],_=i[o?2:3],y=i[o?3:1],g=o&&m===d&&u===l&&r.pads[0]===0&&r.pads[1]===0;if(g||m===1&&u===1&&r.dilations[0]===1&&r.dilations[1]===1&&r.strides[0]===1&&r.strides[1]===1&&r.pads[0]===0&&r.pads[1]===0){let k=i[0],P,D,R,G=[];if(o){let V=e.kernelCustomData.wT??e.compute(Pe(t[1],bo),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];if(r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=V),g){let Q=d*l*p;P=t[0].reshape([1,k,Q]),D=V.reshape([1,Q,y]),R=[1,k,y]}else P=t[0].reshape([k,d*l,p]),D=V.reshape([1,p,y]),R=[k,h*_,y];G.push(P),G.push(D)}else P=t[0].reshape([k,p,d*l]),D=t[1].reshape([1,y,p]),R=[k,y,h*_],G.push(D),G.push(P);a&&G.push(t[2]);let K=R[2],j=G[0].dims[G[0].dims.length-1];K<8&&j<8?e.compute(Zr(G,r,i,R,o,n),{inputs:G}):e.compute(er(G,r,i,R,o,n),{inputs:G});return}let x=!0,$=e.kernelCustomData.wT??e.compute(Pe(t[1],bo),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=$);let v=[t[0],$];a&&v.push(t[2]);let S=o?h*_:y,T=o?y:h*_,A=m*u*p;e.compute(Vu(v,r,i,S,T,A,a,x,n),{inputs:v})},hf=(e,t)=>{let r=t.format==="NHWC",n=[e.inputs[0].reshape(r?[e.inputs[0].dims[0],1,e.inputs[0].dims[1],e.inputs[0].dims[2]]:[e.inputs[0].dims[0],e.inputs[0].dims[1],1,e.inputs[0].dims[2]]),e.inputs[1].reshape([e.inputs[1].dims[0],e.inputs[1].dims[1],1,e.inputs[1].dims[2]])];e.inputs.length===3&&n.push(e.inputs[2]);let o=[0,t.pads[0],0,t.pads[1]],i=[1].concat(t.strides),a=[1].concat(t.dilations),d=[1].concat(t.kernelShape),l=yo({...t,pads:o,strides:i,dilations:a,kernelShape:d},n);Zu(e,n,l,p=>r?[p[0],p[2],p[3]]:[p[0],p[1],p[3]])},gf=(e,t,r)=>{let n=r.format==="NHWC"?"channelsLast":"channelsFirst",o=yo(r,t),i=r.autoPad==="NOTSET"?r.pads:r.autoPad,a=Hu(t[0].dims,t[1].dims,r.strides,r.dilations,i,!1,n);e.compute(Fu(t,o,a.outShape,[a.filterDepth,a.filterHeight,a.filterWidth],[a.padInfo.front,a.padInfo.top,a.padInfo.left],n))},wo=(e,t)=>{if(ff(e.inputs,t),e.inputs[0].dims.length===3)hf(e,t);else if(e.inputs[0].dims.length===5)gf(e,e.inputs,t);else{let r=yo(t,e.inputs);Zu(e,e.inputs,r)}}});var Xu,Ju=U(()=>{"use strict";te();et();oe();ae();Xu=(e,t,r)=>{let n=e.length>2,o=t.outputShape,i=t.format==="NHWC",a=t.group,d=e[1].dims,l=d[2]/a,p=d[3],m=i?me(l):1,u=i?me(p):1,h=i?p===1?m:u:1,_=C.size(o)/u,y=[Math.ceil(_/64),1,1];ue("verbose",()=>`[conv2d_backprop_webgpu] dispatch = ${y}`);let g=["rank","rank"],x=[t.strides[0],t.strides[1]],$=[t.kernelShape[i?1:2],t.kernelShape[i?2:3]],v=[t.dilations[0],t.dilations[1]],S=[$[0]+(t.dilations[0]<=1?0:(t.kernelShape[i?1:2]-1)*(t.dilations[0]-1)),$[1]+(t.dilations[1]<=1?0:(t.kernelShape[i?2:3]-1)*(t.dilations[1]-1))],T=[S[0]-1-Math.floor((t.pads[0]+t.pads[2])/2),S[1]-1-Math.floor((t.pads[1]+t.pads[3])/2)],A=[{type:12,data:_},{type:12,data:x},{type:12,data:$},{type:12,data:v},{type:12,data:S},{type:6,data:T},{type:12,data:l},{type:12,data:p},...N(e[0].dims,e[1].dims)];n&&(A.push(...N(e[2].dims)),g.push("rank")),A.push(...N(o));let k=P=>{let D=[{name:"output_size",type:"u32"},{name:"strides",type:"u32",length:x.length},{name:"filter_dims",type:"u32",length:$.length},{name:"dilations",type:"u32",length:$.length},{name:"effective_filter_dims",type:"u32",length:S.length},{name:"pads",type:"i32",length:T.length},{name:"input_channels_per_group",type:"u32"},{name:"output_channels_per_group",type:"u32"}],R=_e(e[0].dataType),G=i?1:2,K=i?2:3,j=i?3:1,V=E("W",e[1].dataType,e[1].dims.length,h),Q=E("Dy",e[0].dataType,e[0].dims.length,m),se=[Q,V];n&&se.push(E("bias",e[2].dataType,[o[j]].length,u));let Y=M("result",e[0].dataType,o.length,u),ee=()=>{let ne="";if(m===1)ne+=`
5178
+ }`};return{name:"GroupedConv-Vectorize",shaderCache:{hint:`${t.cacheKey};${i};${a};${h};${p[0]};${p[1]}`,inputDependencies:o?["rank","rank","type"]:["rank","rank"]},getRunData:()=>({outputs:[{dims:n?n(r):r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(d/64)},programUniforms:u}),getShaderSource:_}}});var mf,go,ff,bo,yo,Yu,hf,gf,_o,Zu=U(()=>{"use strict";ne();Vu();Fu();Qr();ju();yt();Zr();dt();mf=(e,t,r,n,o,i)=>{let a=e[0],d=e.slice(i?1:2,i?3:4),l=d.length,p=t[0],u=t.slice(2).map((y,g)=>y+(y-1)*(r[g]-1)),_=d.map((y,g)=>y+n[g]+n[g+l]).map((y,g)=>Math.floor((y-u[g]+o[g])/o[g]));return _.splice(0,0,a),_.splice(i?3:1,0,p),_},go=[2,3,1,0],ff=(e,t)=>{if(!e||e.length!==2&&e.length!==3)throw new Error("Conv requires 2 or 3 inputs");if(e[0].dims.length>5)throw new Error("greater than 5D is not supported");if(e[0].dims.length!==e[1].dims.length)throw new Error("filter does not have same dimension as input");let r=e[0].dims[t.format==="NHWC"?e[0].dims.length-1:1],n=e[1].dims[1]*t.group;if(r!==n)throw new Error("FILTER_IN_CHANNEL should be equal to DATA_CHANNEL");if(e.length===3&&(e[2].dims.length!==1||e[1].dims[0]!==e[2].dims[0]))throw new Error("invalid bias");let o=e[0].dims.length-2;if(t.dilations.length!==o)throw new Error(`dilations should be ${o}D`);if(t.strides.length!==o)throw new Error(`strides should be ${o}D`);if(t.pads.length!==o*2)throw new Error(`pads should be ${o*2}D`);if(t.kernelShape.length!==0&&t.kernelShape.length!==e[1].dims.length-2)throw new Error("invalid kernel shape")},bo=(e,t)=>{let r=e.kernelShape.slice();r.length<t[1].dims.length-2&&r.push(...Array(t[1].dims.length-2-r.length).fill(0));for(let i=2;i<t[1].dims.length;++i)r[i-2]===0&&(r[i-2]=t[1].dims[i]);let n=e.pads.slice();kt.adjustPadsBasedOnAutoPad(t[0].dims,e.strides,e.dilations,r,n,e.format==="NHWC",e.autoPad);let o=Object.assign({},e);return Object.assign(o,{kernelShape:r,pads:n}),o},yo=e=>{let t=Kr(e),r=e.format,n=["NOTSET","VALID","SAME_UPPER","SAME_LOWER"][e.auto_pad],o=e.dilations,i=e.group,a=e.kernel_shape,d=e.pads,l=e.strides,p=e.w_is_const();return{autoPad:n,format:r,dilations:o,group:i,kernelShape:a,pads:d,strides:l,wIsConst:p,...t,cacheKey:`${e.format};${t.activation};`}},Yu=(e,t,r,n)=>{let o=r.format==="NHWC",i=mf(t[0].dims,t[1].dims,r.dilations,r.pads,r.strides,o);if(r.group!==1){let C=[t[0]];if(o){let D=e.kernelCustomData.wT??e.compute(Pe(t[1],go),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=D),C.push(D)}else C.push(t[1]);t.length===3&&C.push(t[2]),!e.adapterInfo.isArchitecture("ampere")&&o&&t[1].dims[0]===r.group&&t[1].dims[1]===1&&r.dilations[0]===1&&r.dilations[1]===1?e.compute(Ku(C,r,i,n),{inputs:C}):e.compute(qu(C,r,i,n),{inputs:C});return}let a=t.length===3,d=t[0].dims[o?1:2],l=t[0].dims[o?2:3],p=t[0].dims[o?3:1],m=t[1].dims[2],u=t[1].dims[3],h=i[o?1:2],_=i[o?2:3],y=i[o?3:1],g=o&&m===d&&u===l&&r.pads[0]===0&&r.pads[1]===0;if(g||m===1&&u===1&&r.dilations[0]===1&&r.dilations[1]===1&&r.strides[0]===1&&r.strides[1]===1&&r.pads[0]===0&&r.pads[1]===0){let C=i[0],P,D,R,H=[];if(o){let V=e.kernelCustomData.wT??e.compute(Pe(t[1],go),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];if(r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=V),g){let K=d*l*p;P=t[0].reshape([1,C,K]),D=V.reshape([1,K,y]),R=[1,C,y]}else P=t[0].reshape([C,d*l,p]),D=V.reshape([1,p,y]),R=[C,h*_,y];H.push(P),H.push(D)}else P=t[0].reshape([C,p,d*l]),D=t[1].reshape([1,y,p]),R=[C,y,h*_],H.push(D),H.push(P);a&&H.push(t[2]);let L=R[2],re=H[0].dims[H[0].dims.length-1];L<8&&re<8?e.compute(Yr(H,r,i,R,o,n),{inputs:H}):e.compute(Jt(H,r,i,R,o,n),{inputs:H});return}let x=!0,$=e.kernelCustomData.wT??e.compute(Pe(t[1],go),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=$);let v=[t[0],$];a&&v.push(t[2]);let S=o?h*_:y,T=o?y:h*_,A=m*u*p;e.compute(Nu(v,r,i,S,T,A,a,x,n),{inputs:v})},hf=(e,t)=>{let r=t.format==="NHWC",n=[e.inputs[0].reshape(r?[e.inputs[0].dims[0],1,e.inputs[0].dims[1],e.inputs[0].dims[2]]:[e.inputs[0].dims[0],e.inputs[0].dims[1],1,e.inputs[0].dims[2]]),e.inputs[1].reshape([e.inputs[1].dims[0],e.inputs[1].dims[1],1,e.inputs[1].dims[2]])];e.inputs.length===3&&n.push(e.inputs[2]);let o=[0,t.pads[0],0,t.pads[1]],i=[1].concat(t.strides),a=[1].concat(t.dilations),d=[1].concat(t.kernelShape),l=bo({...t,pads:o,strides:i,dilations:a,kernelShape:d},n);Yu(e,n,l,p=>r?[p[0],p[2],p[3]]:[p[0],p[1],p[3]])},gf=(e,t,r)=>{let n=r.format==="NHWC"?"channelsLast":"channelsFirst",o=bo(r,t),i=r.autoPad==="NOTSET"?r.pads:r.autoPad,a=Gu(t[0].dims,t[1].dims,r.strides,r.dilations,i,!1,n);e.compute(Hu(t,o,a.outShape,[a.filterDepth,a.filterHeight,a.filterWidth],[a.padInfo.front,a.padInfo.top,a.padInfo.left],n))},_o=(e,t)=>{if(ff(e.inputs,t),e.inputs[0].dims.length===3)hf(e,t);else if(e.inputs[0].dims.length===5)gf(e,e.inputs,t);else{let r=bo(t,e.inputs);Yu(e,e.inputs,r)}}});var Qu,Xu=U(()=>{"use strict";J();et();ne();ae();Qu=(e,t,r)=>{let n=e.length>2,o=t.outputShape,i=t.format==="NHWC",a=t.group,d=e[1].dims,l=d[2]/a,p=d[3],m=i?me(l):1,u=i?me(p):1,h=i?p===1?m:u:1,_=k.size(o)/u,y=[Math.ceil(_/64),1,1];se("verbose",()=>`[conv2d_backprop_webgpu] dispatch = ${y}`);let g=["rank","rank"],x=[t.strides[0],t.strides[1]],$=[t.kernelShape[i?1:2],t.kernelShape[i?2:3]],v=[t.dilations[0],t.dilations[1]],S=[$[0]+(t.dilations[0]<=1?0:(t.kernelShape[i?1:2]-1)*(t.dilations[0]-1)),$[1]+(t.dilations[1]<=1?0:(t.kernelShape[i?2:3]-1)*(t.dilations[1]-1))],T=[S[0]-1-Math.floor((t.pads[0]+t.pads[2])/2),S[1]-1-Math.floor((t.pads[1]+t.pads[3])/2)],A=[{type:12,data:_},{type:12,data:x},{type:12,data:$},{type:12,data:v},{type:12,data:S},{type:6,data:T},{type:12,data:l},{type:12,data:p},...N(e[0].dims,e[1].dims)];n&&(A.push(...N(e[2].dims)),g.push("rank")),A.push(...N(o));let C=P=>{let D=[{name:"output_size",type:"u32"},{name:"strides",type:"u32",length:x.length},{name:"filter_dims",type:"u32",length:$.length},{name:"dilations",type:"u32",length:$.length},{name:"effective_filter_dims",type:"u32",length:S.length},{name:"pads",type:"i32",length:T.length},{name:"input_channels_per_group",type:"u32"},{name:"output_channels_per_group",type:"u32"}],R=ye(e[0].dataType),H=i?1:2,L=i?2:3,re=i?3:1,V=E("W",e[1].dataType,e[1].dims.length,h),K=E("Dy",e[0].dataType,e[0].dims.length,m),we=[K,V];n&&we.push(E("bias",e[2].dataType,[o[re]].length,u));let j=M("result",e[0].dataType,o.length,u),Q=()=>{let te="";if(m===1)te+=`
5124
5179
  let w_offset = ${V.indicesToOffset(`${V.type.indices}(u32(wRPerm), u32(wCPerm), inputChannel, wOutChannel)`)};
5125
5180
  let wValue = ${V.getByOffset(`w_offset / ${h}`)};
5126
- dotProd = dotProd + xValue * wValue;`;else if(p===1)ne+=`
5181
+ dotProd = dotProd + xValue * wValue;`;else if(p===1)te+=`
5127
5182
  let wValue = ${V.getByOffset(`${V.indicesToOffset(`${V.type.indices}(u32(wRPerm), u32(wCPerm), inputChannel, wOutChannel)`)} / ${h}`)};
5128
- dotProd = dotProd + dot(xValue, wValue);`;else for(let be=0;be<m;be++)ne+=`
5183
+ dotProd = dotProd + dot(xValue, wValue);`;else for(let be=0;be<m;be++)te+=`
5129
5184
  let wValue${be} = ${V.getByOffset(`${V.indicesToOffset(`${V.type.indices}(u32(wRPerm), u32(wCPerm), inputChannel + ${be}, wOutChannel)`)} / ${h}`)};
5130
- dotProd = dotProd + xValue[${be}] * wValue${be};`;return ne},J=`
5131
- let outputIndices = ${Y.offsetToIndices(`global_idx * ${u}`)};
5132
- let batch = ${Y.indicesGet("outputIndices",0)};
5133
- let d1 = ${Y.indicesGet("outputIndices",j)};
5134
- let r = ${Y.indicesGet("outputIndices",G)};
5135
- let c = ${Y.indicesGet("outputIndices",K)};
5185
+ dotProd = dotProd + xValue[${be}] * wValue${be};`;return te},ie=`
5186
+ let outputIndices = ${j.offsetToIndices(`global_idx * ${u}`)};
5187
+ let batch = ${j.indicesGet("outputIndices",0)};
5188
+ let d1 = ${j.indicesGet("outputIndices",re)};
5189
+ let r = ${j.indicesGet("outputIndices",H)};
5190
+ let c = ${j.indicesGet("outputIndices",L)};
5136
5191
  let dyCorner = vec2<i32>(i32(r), i32(c)) - uniforms.pads;
5137
5192
  let dyRCorner = dyCorner.x;
5138
5193
  let dyCCorner = dyCorner.y;
@@ -5140,47 +5195,57 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5140
5195
  let wOutChannel = d1 - groupId * uniforms.output_channels_per_group;
5141
5196
  // Convolve dy(?, ?, d2) with w(:, :, d1, d2) to compute dx(xR, xC, d1).
5142
5197
  // ? = to be determined. : = across all values in that axis.
5143
- var dotProd = ${Y.type.value}(0.0);
5144
- for (var wR: u32 = 0; wR < uniforms.effective_filter_dims.x; wR = wR + 1) {
5198
+ var dotProd = ${j.type.value}(0.0);
5199
+ var wR: u32 = 0;
5200
+ if (uniforms.dilations.x == 1) {
5201
+ // Minimum wR >= 0 that satisfies (dyRCorner + wR) % (uniforms.strides.x) == 0
5202
+ wR = u32(((dyRCorner + i32(uniforms.strides.x) - 1) / i32(uniforms.strides.x)) * i32(uniforms.strides.x) - dyRCorner);
5203
+ }
5204
+ for (; wR < uniforms.effective_filter_dims.x; wR = wR + 1) {
5145
5205
  if (wR % uniforms.dilations.x != 0) {
5146
5206
  continue;
5147
5207
  }
5148
5208
  let dyR = (${R}(dyRCorner) + ${R}(wR)) / ${R}(uniforms.strides[0]);
5149
5209
  let wRPerm = uniforms.filter_dims.x - 1 - wR / uniforms.dilations.x;
5150
- if (dyR < 0.0 || dyR >= ${R}(uniforms.Dy_shape[${G}]) || fract(dyR) > 0.0 ||
5210
+ if (dyR < 0.0 || dyR >= ${R}(uniforms.Dy_shape[${H}]) || fract(dyR) > 0.0 ||
5151
5211
  wRPerm < 0) {
5152
5212
  continue;
5153
5213
  }
5154
- wR = wR + uniforms.strides[0] - 1;
5155
5214
  let idyR: u32 = u32(dyR);
5215
+ var wC: u32 = 0;
5216
+ if (uniforms.dilations.y == 1) {
5217
+ // Minimum wC >= 0 that satisfies (dyCCorner + wC) % (uniforms.strides.y) == 0
5218
+ wC = u32(((dyCCorner + i32(uniforms.strides.y) - 1) / i32(uniforms.strides.y)) * i32(uniforms.strides.y) - dyCCorner);
5219
+ }
5156
5220
 
5157
- for (var wC: u32 = 0; wC < uniforms.effective_filter_dims.y; wC = wC + 1) {
5221
+ for (; wC < uniforms.effective_filter_dims.y; wC = wC + 1) {
5158
5222
  if (wC % uniforms.dilations.y != 0) {
5159
5223
  continue;
5160
5224
  }
5161
5225
  let dyC = (${R}(dyCCorner) + ${R}(wC)) / ${R}(uniforms.strides.y);
5162
5226
  let wCPerm = uniforms.filter_dims.y - 1 - wC / uniforms.dilations.y;
5163
- if (dyC < 0.0 || dyC >= ${R}(uniforms.Dy_shape[${K}]) ||
5227
+ if (dyC < 0.0 || dyC >= ${R}(uniforms.Dy_shape[${L}]) ||
5164
5228
  fract(dyC) > 0.0 || wCPerm < 0) {
5165
5229
  continue;
5166
5230
  }
5167
- wC = wC + uniforms.strides.y - 1;
5168
5231
  let idyC: u32 = u32(dyC);
5169
5232
  var inputChannel = groupId * uniforms.input_channels_per_group;
5170
5233
  for (var d2: u32 = 0; d2 < uniforms.input_channels_per_group; d2 = d2 + ${m}) {
5171
- let xValue = ${i?Q.getByOffset(`${Q.indicesToOffset(`${Q.type.indices}(batch, idyR, idyC, inputChannel)`)} / ${m}`):Q.get("batch","inputChannel","idyR","idyC")};
5172
- ${ee()}
5234
+ let xValue = ${i?K.getByOffset(`${K.indicesToOffset(`${K.type.indices}(batch, idyR, idyC, inputChannel)`)} / ${m}`):K.get("batch","inputChannel","idyR","idyC")};
5235
+ ${Q()}
5173
5236
  inputChannel = inputChannel + ${m};
5174
5237
  }
5238
+ wC = wC + uniforms.strides.y - 1;
5175
5239
  }
5240
+ wR = wR + uniforms.strides[0] - 1;
5176
5241
  }
5177
5242
  let value = dotProd${n?` + bias[d1 / ${u}]`:""};
5178
- ${Y.setByOffset("global_idx","value")};
5243
+ ${j.setByOffset("global_idx","value")};
5179
5244
  `;return`
5180
- ${P.registerUniforms(D).declareVariables(...se,Y)}
5245
+ ${P.registerUniforms(D).declareVariables(...we,j)}
5181
5246
  ${P.mainStart()}
5182
5247
  ${P.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")};
5183
- ${J}}`};return{name:"ConvTranspose2D",shaderCache:{hint:`${t.cacheKey};${m}${h}${u}${p===1}`,inputDependencies:g},getRunData:()=>({dispatchGroup:{x:y[0],y:y[1],z:y[2]},outputs:[{dims:r?r(o):o,dataType:e[0].dataType}],programUniforms:A}),getShaderSource:k}}});var bf,yf,_f,ed,td,wf,rd,vf,nd,od=U(()=>{"use strict";Ju();yt();dt();bf=(e,t,r,n,o,i)=>(e-1)*t+r+(n-1)*o+1-i,yf=(e,t,r,n,o)=>{let i=Math.floor(e/2);t==="SAME_UPPER"?(r[n]=i,r[o]=e-i):t==="SAME_LOWER"&&(r[n]=e-i,r[o]=i)},_f=(e,t,r,n,o,i,a,d,l,p)=>{let m=e.length-2,u=p.length===0;l.length<m&&l.push(...Array(m-l.length).fill(0));let h=e[0],_=t[d?3:1]*o;for(let y=0,g=e.length-m-(d?1:0);y<m;++y,++g){let x=e[g],$=u?x*a[y]:p[y],v=bf(x,a[y],i[y],t[g],r[y],$);yf(v,n,i,y,y+m),u&&p.push(a[y]*(x-1)+l[y]+(t[g]-1)*r[y]+1-i[y]-i[y+m])}p.splice(0,0,h),p.splice(d?3:1,0,_)},ed=(e,t)=>{let r=e.kernelShape.slice();if(e.kernelShape.length===0||e.kernelShape.reduce((u,h)=>u*h,1)===0){r.length=0;for(let u=2;u<t[1].dims.length;++u)r.push(t[1].dims[u])}let n=e.format==="NHWC";r.splice(0,0,t[1].dims[0]),r.splice(n?3:1,0,t[1].dims[1]);let o=e.pads.slice(),i=e.outputShape.slice(),a=e.outputPadding.slice(),d=t[0].dims,l=e.dilations.slice();if(l.reduce((u,h)=>u+h,0)===0){let u=t[0].dims.length-2;l=new Array(u).fill(1)}let p=e.strides.slice();if(p.reduce((u,h)=>u+h,0)===0){let u=t[0].dims.length-2;p=new Array(u).fill(1)}_f(d,r,l,e.autoPad,e.group,o,p,n,a,i);let m=Object.assign({},e);return Object.assign(m,{kernelShape:r,pads:o,outputPadding:a,outputShape:i,dilations:l,strides:p}),m},td=e=>{let t=jr(e),r=e.format,n=["NOTSET","VALID","SAME_UPPER","SAME_LOWER"][typeof e.autoPad>"u"?0:e.autoPad],o=e.dilations,i=e.group,a=e.kernelShape,d=e.pads,l=e.strides,p=e.wIsConst(),m=e.outputPadding,u=e.outputShape;return{autoPad:n,format:r,dilations:o,group:i,kernelShape:a,outputPadding:m,outputShape:u,pads:d,strides:l,wIsConst:p,...t,cacheKey:`${e.format};${t.activation};`}},wf=(e,t)=>{if(!e||e.length!==2&&e.length!==3)throw new Error("Conv requires 2 or 3 inputs");if(e[0].dims.length!==4&&e[0].dims.length!==3)throw new Error("currently only support 2-dimensional conv");if(e[0].dims.length!==e[1].dims.length)throw new Error("filter does not have same dimension as input");let r=e[0].dims[t.format==="NHWC"?e[0].dims.length-1:1],n=e[1].dims[0];if(r!==n)throw new Error("FILTER_IN_CHANNEL should be equal to DATA_CHANNEL");let o=e[1].dims[1]*t.group;if(e.length===3&&(e[2].dims.length!==1||e[2].dims[0]!==o))throw new Error("invalid bias");let i=e[0].dims.length-2;if(t.dilations.reduce((m,u)=>m+u,0)>0&&t.dilations.length!==i)throw new Error(`dilations should be ${i}D`);if(t.strides.reduce((m,u)=>m+u,0)>0&&t.strides.length!==i)throw new Error(`strides should be ${i}D`);if(t.pads.reduce((m,u)=>m+u,0)>0&&t.pads.length!==i*2)throw new Error(`pads should be ${i*2}D`);if(t.outputPadding.length!==i&&t.outputPadding.length!==0)throw new Error(`output_padding should be ${i}D`);if(t.kernelShape.reduce((m,u)=>m+u,0)>0&&t.kernelShape.length!==0&&t.kernelShape.length!==e[1].dims.length-2)throw new Error("invalid kernel shape");if(t.outputShape.length!==0&&t.outputShape.length!==e[0].dims.length-2)throw new Error("invalid output shape")},rd=(e,t,r,n)=>{let o=e.kernelCustomData.wT??e.compute(Pe(t[1],[2,3,0,1]),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=o);let i=[t[0],o];t.length===3&&i.push(t[2]),e.compute(Xu(i,r,n),{inputs:i})},vf=(e,t)=>{let r=t.format==="NHWC",n=[e.inputs[0].reshape(r?[e.inputs[0].dims[0],1,e.inputs[0].dims[1],e.inputs[0].dims[2]]:[e.inputs[0].dims[0],e.inputs[0].dims[1],1,e.inputs[0].dims[2]]),e.inputs[1].reshape([e.inputs[1].dims[0],e.inputs[1].dims[1],1,e.inputs[1].dims[2]])];e.inputs.length===3&&n.push(e.inputs[2]);let o=t.kernelShape;(o.length===0||o[0]===0)&&(o=[e.inputs[1].dims[2]]);let i=t.dilations;(i.length===0||i[0]===0)&&(i=[1]);let a=t.strides;(a.length===0||a[0]===0)&&(a=[1]);let d=t.pads;d.length===0&&(d=[0,0]),d=[0,d[0],0,d[1]],a=[1].concat(a),i=[1].concat(i),o=[1].concat(o);let l=ed({...t,pads:d,strides:a,dilations:i,kernelShape:o},n);rd(e,n,l,p=>r?[p[0],p[2],p[3]]:[p[0],p[1],p[3]])},nd=(e,t)=>{if(wf(e.inputs,t),e.inputs[0].dims.length===3)vf(e,t);else{let r=ed(t,e.inputs);rd(e,e.inputs,r)}}});var $f,id,ad,sd=U(()=>{"use strict";te();oe();Se();ae();$f=(e,t,r,n)=>{let o=C.size(t),i=t.length,a=E("input",e,i),d=M("output",e,i),l=r.dataType===6?r.getInt32Array()[0]:Number(r.getBigInt64Array()[0]),p=C.normalizeAxis(l,i),m=u=>{let h=` i32(${a.indicesGet("inputIndices","uniforms.axis")}) `,_=F("uniforms.input_shape","uniforms.axis",i),y=n.reverse?h+(n.exclusive?" + 1":""):"0",g=n.reverse?_:h+(n.exclusive?"":" + 1");return`
5248
+ ${ie}}`};return{name:"ConvTranspose2D",shaderCache:{hint:`${t.cacheKey};${m}${h}${u}${p===1}`,inputDependencies:g},getRunData:()=>({dispatchGroup:{x:y[0],y:y[1],z:y[2]},outputs:[{dims:r?r(o):o,dataType:e[0].dataType}],programUniforms:A}),getShaderSource:C}}});var bf,yf,_f,Ju,ed,wf,td,vf,rd,nd=U(()=>{"use strict";Xu();yt();dt();bf=(e,t,r,n,o,i)=>(e-1)*t+r+(n-1)*o+1-i,yf=(e,t,r,n,o)=>{let i=Math.floor(e/2);t==="SAME_UPPER"?(r[n]=i,r[o]=e-i):t==="SAME_LOWER"&&(r[n]=e-i,r[o]=i)},_f=(e,t,r,n,o,i,a,d,l,p)=>{let m=e.length-2,u=p.length===0;l.length<m&&l.push(...Array(m-l.length).fill(0));let h=e[0],_=t[d?3:1]*o;for(let y=0,g=e.length-m-(d?1:0);y<m;++y,++g){let x=e[g],$=u?x*a[y]:p[y],v=bf(x,a[y],i[y],t[g],r[y],$);yf(v,n,i,y,y+m),u&&p.push(a[y]*(x-1)+l[y]+(t[g]-1)*r[y]+1-i[y]-i[y+m])}p.splice(0,0,h),p.splice(d?3:1,0,_)},Ju=(e,t)=>{let r=e.kernelShape.slice();if(e.kernelShape.length===0||e.kernelShape.reduce((u,h)=>u*h,1)===0){r.length=0;for(let u=2;u<t[1].dims.length;++u)r.push(t[1].dims[u])}let n=e.format==="NHWC";r.splice(0,0,t[1].dims[0]),r.splice(n?3:1,0,t[1].dims[1]);let o=e.pads.slice(),i=e.outputShape.slice(),a=e.outputPadding.slice(),d=t[0].dims,l=e.dilations.slice();if(l.reduce((u,h)=>u+h,0)===0){let u=t[0].dims.length-2;l=new Array(u).fill(1)}let p=e.strides.slice();if(p.reduce((u,h)=>u+h,0)===0){let u=t[0].dims.length-2;p=new Array(u).fill(1)}_f(d,r,l,e.autoPad,e.group,o,p,n,a,i);let m=Object.assign({},e);return Object.assign(m,{kernelShape:r,pads:o,outputPadding:a,outputShape:i,dilations:l,strides:p}),m},ed=e=>{let t=Kr(e),r=e.format,n=["NOTSET","VALID","SAME_UPPER","SAME_LOWER"][typeof e.autoPad>"u"?0:e.autoPad],o=e.dilations,i=e.group,a=e.kernelShape,d=e.pads,l=e.strides,p=e.wIsConst(),m=e.outputPadding,u=e.outputShape;return{autoPad:n,format:r,dilations:o,group:i,kernelShape:a,outputPadding:m,outputShape:u,pads:d,strides:l,wIsConst:p,...t,cacheKey:`${e.format};${t.activation};`}},wf=(e,t)=>{if(!e||e.length!==2&&e.length!==3)throw new Error("Conv requires 2 or 3 inputs");if(e[0].dims.length!==4&&e[0].dims.length!==3)throw new Error("currently only support 2-dimensional conv");if(e[0].dims.length!==e[1].dims.length)throw new Error("filter does not have same dimension as input");let r=e[0].dims[t.format==="NHWC"?e[0].dims.length-1:1],n=e[1].dims[0];if(r!==n)throw new Error("FILTER_IN_CHANNEL should be equal to DATA_CHANNEL");let o=e[1].dims[1]*t.group;if(e.length===3&&(e[2].dims.length!==1||e[2].dims[0]!==o))throw new Error("invalid bias");let i=e[0].dims.length-2;if(t.dilations.reduce((m,u)=>m+u,0)>0&&t.dilations.length!==i)throw new Error(`dilations should be ${i}D`);if(t.strides.reduce((m,u)=>m+u,0)>0&&t.strides.length!==i)throw new Error(`strides should be ${i}D`);if(t.pads.reduce((m,u)=>m+u,0)>0&&t.pads.length!==i*2)throw new Error(`pads should be ${i*2}D`);if(t.outputPadding.length!==i&&t.outputPadding.length!==0)throw new Error(`output_padding should be ${i}D`);if(t.kernelShape.reduce((m,u)=>m+u,0)>0&&t.kernelShape.length!==0&&t.kernelShape.length!==e[1].dims.length-2)throw new Error("invalid kernel shape");if(t.outputShape.length!==0&&t.outputShape.length!==e[0].dims.length-2)throw new Error("invalid output shape")},td=(e,t,r,n)=>{let o=e.kernelCustomData.wT??e.compute(Pe(t[1],[2,3,0,1]),{inputs:[1],outputs:[r.wIsConst?-2:-1]})[0];r.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=o);let i=[t[0],o];t.length===3&&i.push(t[2]),e.compute(Qu(i,r,n),{inputs:i})},vf=(e,t)=>{let r=t.format==="NHWC",n=[e.inputs[0].reshape(r?[e.inputs[0].dims[0],1,e.inputs[0].dims[1],e.inputs[0].dims[2]]:[e.inputs[0].dims[0],e.inputs[0].dims[1],1,e.inputs[0].dims[2]]),e.inputs[1].reshape([e.inputs[1].dims[0],e.inputs[1].dims[1],1,e.inputs[1].dims[2]])];e.inputs.length===3&&n.push(e.inputs[2]);let o=t.kernelShape;(o.length===0||o[0]===0)&&(o=[e.inputs[1].dims[2]]);let i=t.dilations;(i.length===0||i[0]===0)&&(i=[1]);let a=t.strides;(a.length===0||a[0]===0)&&(a=[1]);let d=t.pads;d.length===0&&(d=[0,0]),d=[0,d[0],0,d[1]],a=[1].concat(a),i=[1].concat(i),o=[1].concat(o);let l=t.outputPadding;l=[0].concat(l);let p=Ju({...t,pads:d,strides:a,dilations:i,kernelShape:o,outputPadding:l},n);td(e,n,p,m=>r?[m[0],m[2],m[3]]:[m[0],m[1],m[3]])},rd=(e,t)=>{if(wf(e.inputs,t),e.inputs[0].dims.length===3)vf(e,t);else{let r=Ju(t,e.inputs);td(e,e.inputs,r)}}});var $f,od,id,ad=U(()=>{"use strict";J();ne();xe();ae();$f=(e,t,r,n)=>{let o=k.size(t),i=t.length,a=E("input",e,i),d=M("output",e,i),l=r.dataType===6?r.getInt32Array()[0]:Number(r.getBigInt64Array()[0]),p=k.normalizeAxis(l,i),m=u=>{let h=` i32(${a.indicesGet("inputIndices","uniforms.axis")}) `,_=q("uniforms.input_shape","uniforms.axis",i),y=n.reverse?h+(n.exclusive?" + 1":""):"0",g=n.reverse?_:h+(n.exclusive?"":" + 1");return`
5184
5249
  ${u.registerUniform("outputSize","u32").registerUniform("axis","u32").declareVariables(a,d)}
5185
5250
  ${u.mainStart()}
5186
5251
  ${u.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}
@@ -5193,7 +5258,7 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5193
5258
  sum = sum + ${a.getByIndices("inputIndices")};
5194
5259
  }
5195
5260
  ${d.setByOffset("global_idx","sum")};
5196
- }`};return{name:"CumSum",shaderCache:{hint:n.cacheKey,inputDependencies:["rank"]},getRunData:()=>({outputs:[{dims:t,dataType:e}],dispatchGroup:{x:Math.ceil(o/64)},programUniforms:[{type:12,data:o},{type:12,data:p},...N(t,t)]}),getShaderSource:m}},id=(e,t)=>{let r=e.inputs[0].dims,n=e.inputs[0].dataType,o=e.inputs[1];e.compute($f(n,r,o,t),{inputs:[0]})},ad=e=>{let t=e.exclusive===1,r=e.reverse===1;return re({exclusive:t,reverse:r})}});var xf,Sf,Tf,ud,dd,ld=U(()=>{"use strict";te();oe();Se();ae();xf=e=>{if(!e||e.length!==1)throw new Error("DepthToSpace requires 1 input.");if(e[0].dims.length!==4)throw new Error("DepthToSpace requires 4D input.")},Sf=(e,t,r,n)=>{let o=[];o.push(`fn perm(i: ${n.type.indices}) -> ${r.type.indices} {
5261
+ }`};return{name:"CumSum",shaderCache:{hint:n.cacheKey,inputDependencies:["rank"]},getRunData:()=>({outputs:[{dims:t,dataType:e}],dispatchGroup:{x:Math.ceil(o/64)},programUniforms:[{type:12,data:o},{type:12,data:p},...N(t,t)]}),getShaderSource:m}},od=(e,t)=>{let r=e.inputs[0].dims,n=e.inputs[0].dataType,o=e.inputs[1];e.compute($f(n,r,o,t),{inputs:[0]})},id=e=>{let t=e.exclusive===1,r=e.reverse===1;return ee({exclusive:t,reverse:r})}});var xf,Sf,Tf,sd,ud,dd=U(()=>{"use strict";J();ne();xe();ae();xf=e=>{if(!e||e.length!==1)throw new Error("DepthToSpace requires 1 input.");if(e[0].dims.length!==4)throw new Error("DepthToSpace requires 4D input.")},Sf=(e,t,r,n)=>{let o=[];o.push(`fn perm(i: ${n.type.indices}) -> ${r.type.indices} {
5197
5262
  var a: ${r.type.indices};`);for(let i=0;i<t;++i)o.push(r.indicesSet("a",e[i],`i[${i}]`));return o.push("return a;}"),o.join(`
5198
5263
  `)},Tf=(e,t)=>{let r,n,o,i,a,d,l=t.format==="NHWC",p=t.blocksize,m=t.mode==="DCR";l?([r,n,o,i]=e.dims,a=m?[r,n,o,p,p,i/p**2]:[r,n,o,i/p**2,p,p],d=m?[0,1,3,2,4,5]:[0,1,4,2,5,3]):([r,n,o,i]=[e.dims[0],e.dims[2],e.dims[3],e.dims[1]],a=m?[r,p,p,i/p**2,n,o]:[r,i/p**2,p,p,n,o],d=m?[0,3,4,1,5,2]:[0,1,4,2,5,3]);let u=e.reshape(a),h=u.dims.length,_=e.dataType,y=E("a",_,h),g=M("output",_,h),x=$=>`
5199
5264
  ${$.registerUniform("output_size","u32").declareVariables(y,g)}
@@ -5207,18 +5272,18 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5207
5272
  let aIndices = perm(indices);
5208
5273
 
5209
5274
  ${g.setByOffset("global_idx",y.getByIndices("aIndices"))}
5210
- }`;return{name:"DepthToSpace",shaderCache:{hint:`${e.dims};${t.blocksize};${t.mode}`,inputDependencies:["rank"]},getRunData:$=>{let v=l?[r,n*p,o*p,i/p**2]:[r,i/p**2,n*p,o*p],S=C.size(v),T=u.dims,A=C.sortBasedOnPerm(T,d);return{outputs:[{dims:v,dataType:$[0].dataType}],dispatchGroup:{x:Math.ceil(S/64)},programUniforms:[{type:12,data:S},...N(T,A)]}},getShaderSource:x}},ud=(e,t)=>{xf(e.inputs),e.compute(Tf(e.inputs[0],t))},dd=e=>re({blocksize:e.blocksize,mode:e.mode,format:e.format})});var vo,en,cd,If,Cf,$o,xo,pd,Af,md,fd,hd=U(()=>{"use strict";te();oe();Se();ae();vo="[a-zA-Z]|\\.\\.\\.",en="("+vo+")+",cd="^"+en+"$",If="("+en+",)*"+en,Cf="^"+If+"$",$o=class{constructor(t=-1){this.symbolToIndices=new Map,this.inputIndex=t}addSymbol(t,r){let n=this.symbolToIndices.get(t);n===void 0?n=[r]:n.push(r),this.symbolToIndices.set(t,n)}},xo=class{constructor(t,r){this.equation=r;this.hasEllipsis=!1,this.symbolToInfo=new Map,this.lhs=new Array,this.outputDims=[];let[n,o]=r.includes("->")?r.split("->",2):[r,""];if(!n.match(RegExp(Cf)))throw new Error("Invalid LHS term");if(n.split(",").forEach((d,l)=>{let p=t[l].dims.slice();if(!d.match(RegExp(cd)))throw new Error("Invalid LHS term");let m=this.processTerm(d,!0,p,l);this.lhs.push(m)}),o==="")o+=[...this.symbolToInfo.entries()].filter(([d,l])=>l.count===1||d==="...").map(([d])=>d).join("");else if(!o.match(RegExp(en)))throw new Error("Invalid RHS");o.match(RegExp(vo,"g"))?.forEach(d=>{if(d==="...")this.outputDims=this.outputDims.concat(this.ellipsisDims);else{let l=this.symbolToInfo.get(d);if(l===void 0)throw new Error("Invalid RHS symbol");this.outputDims.push(l.dimValue)}}),this.rhs=this.processTerm(o,!1,this.outputDims)}addSymbol(t,r,n){let o=this.symbolToInfo.get(t);if(o!==void 0){if(o.dimValue!==r&&o.count!==1)throw new Error("Dimension mismatch");o.count++,o.inputIndices.push(n)}else o={count:1,dimValue:r,inputIndices:[n]};this.symbolToInfo.set(t,o)}processTerm(t,r,n,o=-1){let i=n.length,a=!1,d=[],l=0;if(!t.match(RegExp(cd))&&!r&&t!=="")throw new Error("Invalid LHS term");let p=t.match(RegExp(vo,"g")),m=new $o(o);return p?.forEach((u,h)=>{if(u==="..."){if(a)throw new Error("Only one ellipsis is allowed per input term");a=!0;let _=i-p.length+1;if(_<0)throw new Error("Ellipsis out of bounds");if(d=n.slice(l,l+_),this.hasEllipsis){if(this.ellipsisDims.length!==d.length||this.ellipsisDims.toString()!==d.toString())throw new Error("Ellipsis dimensions mismatch")}else if(r)this.hasEllipsis=!0,this.ellipsisDims=d;else throw new Error("Ellipsis must be specified in the LHS");for(let y=0;y<d.length;y++){let g=String.fromCharCode("0".charCodeAt(0)+y);m.addSymbol(g,h+y),this.addSymbol(g,n[l++],o)}}else m.addSymbol(u,h+(this.hasEllipsis?this.ellipsisDims.length-1:0)),this.addSymbol(u,n[l++],o)}),m}},pd=e=>e+"_max",Af=(e,t,r,n)=>{let i=e.map(m=>m.length).map((m,u)=>E(`input${u}`,t,m)),a=C.size(n),d=M("output",t,n.length),l=[...r.symbolToInfo.keys()].filter(m=>!r.rhs.symbolToIndices.has(m)),p=m=>{let u=[],h="var prod = 1.0;",_="var sum = 0.0;",y="sum += prod;",g=[],x=[],$=[],v=[],S=r.symbolToInfo.size===r.rhs.symbolToIndices.size;r.symbolToInfo.forEach((A,k)=>{if(r.rhs.symbolToIndices.has(k)){let P=r.rhs.symbolToIndices.get(k)?.[0];P!==void 0&&r.lhs.forEach((D,R)=>{if(A.inputIndices.includes(R)){let G=D.symbolToIndices.get(k);if(G===void 0)throw new Error("Invalid symbol error");G.forEach(K=>{u.push(`${i[R].indicesSet(`input${R}Indices`,K,d.indicesGet("outputIndices",P))}`)})}})}else r.lhs.forEach((P,D)=>{if(A.inputIndices.includes(D)){let R=P.symbolToIndices.get(k);if(R===void 0)throw new Error("Invalid symbol error");R.forEach(G=>{g.push(`${i[D].indicesSet(`input${D}Indices`,G,`${k}`)}`)}),v.push(`prod *= ${i[D].getByIndices(`input${D}Indices`)};`)}}),x.push(`for(var ${k}: u32 = 0; ${k} < uniforms.${pd(k)}; ${k}++) {`),$.push("}")});let T=S?[...u,`let sum = ${i.map((A,k)=>A.getByIndices(`input${k}Indices`)).join(" * ")};`]:[...u,_,...x,...g,h,...v,y,...$];return`
5211
- ${m.registerUniforms(l.map(A=>({name:`${pd(A)}`,type:"u32"}))).registerUniform("outputSize","u32").declareVariables(...i,d)}
5275
+ }`;return{name:"DepthToSpace",shaderCache:{hint:`${e.dims};${t.blocksize};${t.mode}`,inputDependencies:["rank"]},getRunData:$=>{let v=l?[r,n*p,o*p,i/p**2]:[r,i/p**2,n*p,o*p],S=k.size(v),T=u.dims,A=k.sortBasedOnPerm(T,d);return{outputs:[{dims:v,dataType:$[0].dataType}],dispatchGroup:{x:Math.ceil(S/64)},programUniforms:[{type:12,data:S},...N(T,A)]}},getShaderSource:x}},sd=(e,t)=>{xf(e.inputs),e.compute(Tf(e.inputs[0],t))},ud=e=>ee({blocksize:e.blocksize,mode:e.mode,format:e.format})});var wo,Jr,ld,If,Cf,vo,$o,cd,Af,pd,md,fd=U(()=>{"use strict";J();ne();xe();ae();wo="[a-zA-Z]|\\.\\.\\.",Jr="("+wo+")+",ld="^"+Jr+"$",If="("+Jr+",)*"+Jr,Cf="^"+If+"$",vo=class{constructor(t=-1){this.symbolToIndices=new Map,this.inputIndex=t}addSymbol(t,r){let n=this.symbolToIndices.get(t);n===void 0?n=[r]:n.push(r),this.symbolToIndices.set(t,n)}},$o=class{constructor(t,r){this.equation=r;this.hasEllipsis=!1,this.symbolToInfo=new Map,this.lhs=new Array,this.outputDims=[];let[n,o]=r.includes("->")?r.split("->",2):[r,""];if(!n.match(RegExp(Cf)))throw new Error("Invalid LHS term");if(n.split(",").forEach((d,l)=>{let p=t[l].dims.slice();if(!d.match(RegExp(ld)))throw new Error("Invalid LHS term");let m=this.processTerm(d,!0,p,l);this.lhs.push(m)}),o==="")o+=[...this.symbolToInfo.entries()].filter(([d,l])=>l.count===1||d==="...").map(([d])=>d).join("");else if(!o.match(RegExp(Jr)))throw new Error("Invalid RHS");o.match(RegExp(wo,"g"))?.forEach(d=>{if(d==="...")this.outputDims=this.outputDims.concat(this.ellipsisDims);else{let l=this.symbolToInfo.get(d);if(l===void 0)throw new Error("Invalid RHS symbol");this.outputDims.push(l.dimValue)}}),this.rhs=this.processTerm(o,!1,this.outputDims)}addSymbol(t,r,n){let o=this.symbolToInfo.get(t);if(o!==void 0){if(o.dimValue!==r&&o.count!==1)throw new Error("Dimension mismatch");o.count++,o.inputIndices.push(n)}else o={count:1,dimValue:r,inputIndices:[n]};this.symbolToInfo.set(t,o)}processTerm(t,r,n,o=-1){let i=n.length,a=!1,d=[],l=0;if(!t.match(RegExp(ld))&&!r&&t!=="")throw new Error("Invalid LHS term");let p=t.match(RegExp(wo,"g")),m=new vo(o);return p?.forEach((u,h)=>{if(u==="..."){if(a)throw new Error("Only one ellipsis is allowed per input term");a=!0;let _=i-p.length+1;if(_<0)throw new Error("Ellipsis out of bounds");if(d=n.slice(l,l+_),this.hasEllipsis){if(this.ellipsisDims.length!==d.length||this.ellipsisDims.toString()!==d.toString())throw new Error("Ellipsis dimensions mismatch")}else if(r)this.hasEllipsis=!0,this.ellipsisDims=d;else throw new Error("Ellipsis must be specified in the LHS");for(let y=0;y<d.length;y++){let g=String.fromCharCode("0".charCodeAt(0)+y);m.addSymbol(g,h+y),this.addSymbol(g,n[l++],o)}}else m.addSymbol(u,h+(this.hasEllipsis?this.ellipsisDims.length-1:0)),this.addSymbol(u,n[l++],o)}),m}},cd=e=>e+"_max",Af=(e,t,r,n)=>{let i=e.map(m=>m.length).map((m,u)=>E(`input${u}`,t,m)),a=k.size(n),d=M("output",t,n.length),l=[...r.symbolToInfo.keys()].filter(m=>!r.rhs.symbolToIndices.has(m)),p=m=>{let u=[],h="var prod = 1.0;",_="var sum = 0.0;",y="sum += prod;",g=[],x=[],$=[],v=[],S=r.symbolToInfo.size===r.rhs.symbolToIndices.size;r.symbolToInfo.forEach((A,C)=>{if(r.rhs.symbolToIndices.has(C)){let P=r.rhs.symbolToIndices.get(C)?.[0];P!==void 0&&r.lhs.forEach((D,R)=>{if(A.inputIndices.includes(R)){let H=D.symbolToIndices.get(C);if(H===void 0)throw new Error("Invalid symbol error");H.forEach(L=>{u.push(`${i[R].indicesSet(`input${R}Indices`,L,d.indicesGet("outputIndices",P))}`)})}})}else r.lhs.forEach((P,D)=>{if(A.inputIndices.includes(D)){let R=P.symbolToIndices.get(C);if(R===void 0)throw new Error("Invalid symbol error");R.forEach(H=>{g.push(`${i[D].indicesSet(`input${D}Indices`,H,`${C}`)}`)}),v.push(`prod *= ${i[D].getByIndices(`input${D}Indices`)};`)}}),x.push(`for(var ${C}: u32 = 0; ${C} < uniforms.${cd(C)}; ${C}++) {`),$.push("}")});let T=S?[...u,`let sum = ${i.map((A,C)=>A.getByIndices(`input${C}Indices`)).join(" * ")};`]:[...u,_,...x,...g,h,...v,y,...$];return`
5276
+ ${m.registerUniforms(l.map(A=>({name:`${cd(A)}`,type:"u32"}))).registerUniform("outputSize","u32").declareVariables(...i,d)}
5212
5277
 
5213
5278
  ${m.mainStart()}
5214
5279
  ${m.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}
5215
5280
  var outputIndices = ${d.offsetToIndices("global_idx")};
5216
- ${i.map((A,k)=>`var input${k}Indices: ${i[k].type.indices};`).join(`
5281
+ ${i.map((A,C)=>`var input${C}Indices: ${i[C].type.indices};`).join(`
5217
5282
  `)}
5218
5283
  ${T.join(`
5219
5284
  `)};
5220
5285
  ${d.setByOffset("global_idx","sum")};
5221
- }`};return{name:"Einsum",shaderCache:{hint:r.equation,inputDependencies:e.map(()=>"rank")},getRunData:()=>{let m=l.filter(h=>r.symbolToInfo.has(h)).map(h=>({type:12,data:r.symbolToInfo.get(h)?.dimValue||0}));m.push({type:12,data:a});let u=e.map((h,_)=>[...N(h)]).reduce((h,_)=>h.concat(_),m);return u.push(...N(n)),{outputs:[{dims:n,dataType:t}],dispatchGroup:{x:Math.ceil(a/64)},programUniforms:u}},getShaderSource:p}},md=(e,t)=>{let r=new xo(e.inputs,t.equation),n=r.outputDims,o=e.inputs.map((i,a)=>i.dims);e.compute(Af(o,e.inputs[0].dataType,r,n))},fd=e=>{let t=e.equation.replace(/\s+/g,"");return re({equation:t})}});var kf,gd,Ef,Pf,bd,yd=U(()=>{"use strict";te();oe();ae();kf=e=>{if(!e||e.length!==2)throw new Error("Expand requires 2 input.");let t=e[0].dims,r=Array.from(e[1].getBigInt64Array(),Number),n=r.length<t.length?0:r.length-t.length,o=t.length<r.length?0:t.length-r.length;for(;n<r.length&&o<t.length;++n,++o)if(r[n]!==t[o]&&r[n]!==1&&t[o]!==1)throw new Error("Expand requires shape to be broadcastable to input")},gd=(e,t)=>{let r=e.length-t.length,n=[];for(let o=0;o<r;++o)n.push(e[o]);for(let o=0;o<t.length;++o)n.push(t[o]===1?e[o+r]:t[o]);return n},Ef=(e,t)=>e.length>t.length?gd(e,t):gd(t,e),Pf=e=>{let t=e[0].dims,r=Array.from(e[1].getBigInt64Array(),Number),n=Ef(t,r),o=e[0].dataType,i=o===9||C.size(t)===1,a=o===9||t.length>0&&t[t.length-1]%4===0?4:1,d=i||n.length>0&&n[n.length-1]%4===0?4:1,l=Math.ceil(C.size(n)/d),p=u=>{let h=E("input",o,t.length,a),_=M("output",o,n.length,d),y;if(o===9){let g=(x,$,v="")=>`
5286
+ }`};return{name:"Einsum",shaderCache:{hint:r.equation,inputDependencies:e.map(()=>"rank")},getRunData:()=>{let m=l.filter(h=>r.symbolToInfo.has(h)).map(h=>({type:12,data:r.symbolToInfo.get(h)?.dimValue||0}));m.push({type:12,data:a});let u=e.map((h,_)=>[...N(h)]).reduce((h,_)=>h.concat(_),m);return u.push(...N(n)),{outputs:[{dims:n,dataType:t}],dispatchGroup:{x:Math.ceil(a/64)},programUniforms:u}},getShaderSource:p}},pd=(e,t)=>{let r=new $o(e.inputs,t.equation),n=r.outputDims,o=e.inputs.map((i,a)=>i.dims);e.compute(Af(o,e.inputs[0].dataType,r,n))},md=e=>{let t=e.equation.replace(/\s+/g,"");return ee({equation:t})}});var kf,hd,Ef,Pf,gd,bd=U(()=>{"use strict";J();ne();ae();kf=e=>{if(!e||e.length!==2)throw new Error("Expand requires 2 input.");let t=e[0].dims,r=Array.from(e[1].getBigInt64Array(),Number),n=r.length<t.length?0:r.length-t.length,o=t.length<r.length?0:t.length-r.length;for(;n<r.length&&o<t.length;++n,++o)if(r[n]!==t[o]&&r[n]!==1&&t[o]!==1)throw new Error("Expand requires shape to be broadcastable to input")},hd=(e,t)=>{let r=e.length-t.length,n=[];for(let o=0;o<r;++o)n.push(e[o]);for(let o=0;o<t.length;++o)n.push(t[o]===1?e[o+r]:t[o]);return n},Ef=(e,t)=>e.length>t.length?hd(e,t):hd(t,e),Pf=e=>{let t=e[0].dims,r=Array.from(e[1].getBigInt64Array(),Number),n=Ef(t,r),o=e[0].dataType,i=o===9||k.size(t)===1,a=o===9||t.length>0&&t[t.length-1]%4===0?4:1,d=i||n.length>0&&n[n.length-1]%4===0?4:1,l=Math.ceil(k.size(n)/d),p=u=>{let h=E("input",o,t.length,a),_=M("output",o,n.length,d),y;if(o===9){let g=(x,$,v="")=>`
5222
5287
  let outputIndices${$} = ${_.offsetToIndices(`outputOffset + ${$}u`)};
5223
5288
  let offset${$} = ${h.broadcastedIndicesToOffset(`outputIndices${$}`,_)};
5224
5289
  let index${$} = offset${$} / 4u;
@@ -5241,28 +5306,28 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5241
5306
  ${u.registerUniform("vec_size","u32").declareVariables(h,_)}
5242
5307
  ${u.mainStart()}
5243
5308
  ${u.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")}
5244
- ${y}`},m=[{type:12,data:l},...N(t,n)];return{name:"Expand",shaderCache:{hint:`${n.length};${a}${d}`,inputDependencies:["rank"]},getShaderSource:p,getRunData:()=>({outputs:[{dims:n,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(l/64)},programUniforms:m})}},bd=e=>{kf(e.inputs),e.compute(Pf(e.inputs),{inputs:[0]})}});var zf,_d,wd=U(()=>{"use strict";te();oe();ae();Kr();zf=e=>{let t=e[0].dataType,r=C.size(e[0].dims),n=C.size(e[1].dims),o=n%4===0,i=a=>{let d=E("x",t,[1],4),l=E("bias",t,[1],4),p=M("y",t,[1],4),m=[{name:"output_vec_size",type:"u32"},{name:"bias_size",type:"u32"}],u=_=>`
5309
+ ${y}`},m=[{type:12,data:l},...N(t,n)];return{name:"Expand",shaderCache:{hint:`${n.length};${a}${d}`,inputDependencies:["rank"]},getShaderSource:p,getRunData:()=>({outputs:[{dims:n,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(l/64)},programUniforms:m})}},gd=e=>{kf(e.inputs),e.compute(Pf(e.inputs),{inputs:[0]})}});var zf,yd,_d=U(()=>{"use strict";J();ne();ae();qr();zf=e=>{let t=e[0].dataType,r=k.size(e[0].dims),n=k.size(e[1].dims),o=n%4===0,i=a=>{let d=E("x",t,[1],4),l=E("bias",t,[1],4),p=M("y",t,[1],4),m=[{name:"output_vec_size",type:"u32"},{name:"bias_size",type:"u32"}],u=_=>`
5245
5310
  let bias${_}_offset: u32 = (global_idx * 4 + ${_}) % uniforms.bias_size;
5246
5311
  let bias${_} = ${l.getByOffset(`bias${_}_offset / 4`)}[bias${_}_offset % 4];`,h=o?`
5247
5312
  let bias = ${l.getByOffset("global_idx % (uniforms.bias_size / 4)")};`:`${u(0)}${u(1)}${u(2)}${u(3)}
5248
5313
  let bias = ${d.type.value}(bias0, bias1, bias2, bias3);`;return`${a.registerUniforms(m).declareVariables(d,l,p)}
5249
5314
 
5250
- ${mo(Ee(t))}
5315
+ ${po(Ee(t))}
5251
5316
 
5252
- ${a.mainStart(kt)}
5317
+ ${a.mainStart(Et)}
5253
5318
  ${a.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_vec_size")}
5254
5319
 
5255
5320
  let x = ${d.getByOffset("global_idx")};
5256
5321
  ${h}
5257
5322
  let x_in = x + bias;
5258
- ${p.setByOffset("global_idx",fo("x_in"))}
5259
- }`};return{name:"FastGeluWithBias",shaderCache:{hint:`${o}`,inputDependencies:["type","type"]},getShaderSource:i,getRunData:a=>({outputs:[{dims:a[0].dims,dataType:a[0].dataType}],programUniforms:[{type:12,data:Math.ceil(r/4)},{type:12,data:n}],dispatchGroup:{x:Math.ceil(r/kt/4)}})}},_d=e=>{e.inputs.length<2||C.size(e.inputs[1].dims)===0?hu(e):e.compute(zf(e.inputs))}});var Of,Df,vd,$d,xd=U(()=>{"use strict";te();oe();Se();ae();Of=e=>{if(!e||e.length!==2)throw new Error("Gather requires 2 inputs.")},Df=(e,t)=>{let r=e[0].dims,n=e[1].dims,o=r.length,i=C.normalizeAxis(t.axis,o),a=r.slice(0);a.splice(i,1,...n);let d=r[i],l=e[0].dataType===9?4:1,p=Math.ceil(C.size(a)/l),m=[{type:12,data:p},{type:6,data:d},{type:12,data:i},...N(e[0].dims,e[1].dims,a)],u=h=>{let _=E("data",e[0].dataType,e[0].dims.length,l),y=E("inputIndices",e[1].dataType,e[1].dims.length),g=M("output",e[0].dataType,a.length,l),x=v=>{let S=n.length,T=`var indicesIndices${v} = ${y.type.indices}(0);`;for(let A=0;A<S;A++)T+=`${S>1?`indicesIndices${v}[${A}]`:`indicesIndices${v}`} = ${a.length>1?`outputIndices${v}[uniforms.axis + ${A}]`:`outputIndices${v}`};`;T+=`
5323
+ ${p.setByOffset("global_idx",mo("x_in"))}
5324
+ }`};return{name:"FastGeluWithBias",shaderCache:{hint:`${o}`,inputDependencies:["type","type"]},getShaderSource:i,getRunData:a=>({outputs:[{dims:a[0].dims,dataType:a[0].dataType}],programUniforms:[{type:12,data:Math.ceil(r/4)},{type:12,data:n}],dispatchGroup:{x:Math.ceil(r/Et/4)}})}},yd=e=>{e.inputs.length<2||k.size(e.inputs[1].dims)===0?fu(e):e.compute(zf(e.inputs))}});var Of,Df,wd,vd,$d=U(()=>{"use strict";J();ne();xe();ae();Of=e=>{if(!e||e.length!==2)throw new Error("Gather requires 2 inputs.")},Df=(e,t)=>{let r=e[0].dims,n=e[1].dims,o=r.length,i=k.normalizeAxis(t.axis,o),a=r.slice(0);a.splice(i,1,...n);let d=r[i],l=e[0].dataType===9?4:1,p=Math.ceil(k.size(a)/l),m=[{type:12,data:p},{type:6,data:d},{type:12,data:i},...N(e[0].dims,e[1].dims,a)],u=h=>{let _=E("data",e[0].dataType,e[0].dims.length,l),y=E("inputIndices",e[1].dataType,e[1].dims.length),g=M("output",e[0].dataType,a.length,l),x=v=>{let S=n.length,T=`var indicesIndices${v} = ${y.type.indices}(0);`;for(let A=0;A<S;A++)T+=`${S>1?`indicesIndices${v}[${A}]`:`indicesIndices${v}`} = ${a.length>1?`outputIndices${v}[uniforms.axis + ${A}]`:`outputIndices${v}`};`;T+=`
5260
5325
  var idx${v} = ${y.getByIndices(`indicesIndices${v}`)};
5261
5326
  if (idx${v} < 0) {
5262
5327
  idx${v} = idx${v} + uniforms.axisDimLimit;
5263
5328
  }
5264
5329
  var dataIndices${v} : ${_.type.indices};
5265
- `;for(let A=0,k=0;A<o;A++)A===i?(T+=`${o>1?`dataIndices${v}[${A}]`:`dataIndices${v}`} = u32(idx${v});`,k+=S):(T+=`${o>1?`dataIndices${v}[${A}]`:`dataIndices${v}`} = ${a.length>1?`outputIndices${v}[${k}]`:`outputIndices${v}`};`,k++);return T},$;if(e[0].dataType===9){let v=(S,T,A="")=>`
5330
+ `;for(let A=0,C=0;A<o;A++)A===i?(T+=`${o>1?`dataIndices${v}[${A}]`:`dataIndices${v}`} = u32(idx${v});`,C+=S):(T+=`${o>1?`dataIndices${v}[${A}]`:`dataIndices${v}`} = ${a.length>1?`outputIndices${v}[${C}]`:`outputIndices${v}`};`,C++);return T},$;if(e[0].dataType===9){let v=(S,T,A="")=>`
5266
5331
  let outputIndices${T} = ${g.offsetToIndices(`outputOffset + ${T}u`)};
5267
5332
  ${x(T)};
5268
5333
  let offset${T} = ${_.indicesToOffset(`dataIndices${T}`)};
@@ -5287,7 +5352,7 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5287
5352
  ${h.mainStart()}
5288
5353
  ${h.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}
5289
5354
  ${$}
5290
- }`};return{name:"Gather",shaderCache:{hint:t.cacheKey,inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:a,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(p/64)},programUniforms:m}),getShaderSource:u}},vd=e=>re({axis:e.axis}),$d=(e,t)=>{let r=e.inputs;Of(r),e.compute(Df(e.inputs,t))}});var Bf,Sd,Td,Id=U(()=>{"use strict";te();oe();ae();Bf=(e,t,r,n,o,i,a,d,l)=>{let p=[{type:12,data:i},{type:12,data:n},{type:12,data:o},{type:12,data:r},{type:12,data:a},{type:12,data:d},{type:12,data:l}],m=[i];p.push(...N(t.dims,m));let u=h=>{let _=E("indices_data",t.dataType,t.dims.length),y=M("input_slice_offsets_data",12,1,1),g=[_,y],x=[{name:"output_size",type:"u32"},{name:"batch_dims",type:"u32"},{name:"input_dims",type:"u32",length:o.length},{name:"sizes_from_slice_dims_data",type:"u32",length:r.length},{name:"num_slices_per_batch",type:"u32"},{name:"input_batch_stride",type:"u32"},{name:"num_slice_dims",type:"u32"}];return`
5355
+ }`};return{name:"Gather",shaderCache:{hint:t.cacheKey,inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:a,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(p/64)},programUniforms:m}),getShaderSource:u}},wd=e=>ee({axis:e.axis}),vd=(e,t)=>{let r=e.inputs;Of(r),e.compute(Df(e.inputs,t))}});var Bf,xd,Sd,Td=U(()=>{"use strict";J();ne();ae();Bf=(e,t,r,n,o,i,a,d,l)=>{let p=[{type:12,data:i},{type:12,data:n},{type:12,data:o},{type:12,data:r},{type:12,data:a},{type:12,data:d},{type:12,data:l}],m=[i];p.push(...N(t.dims,m));let u=h=>{let _=E("indices_data",t.dataType,t.dims.length),y=M("input_slice_offsets_data",12,1,1),g=[_,y],x=[{name:"output_size",type:"u32"},{name:"batch_dims",type:"u32"},{name:"input_dims",type:"u32",length:o.length},{name:"sizes_from_slice_dims_data",type:"u32",length:r.length},{name:"num_slices_per_batch",type:"u32"},{name:"input_batch_stride",type:"u32"},{name:"num_slice_dims",type:"u32"}];return`
5291
5356
  ${h.registerUniforms(x).declareVariables(...g)}
5292
5357
  ${h.mainStart()}
5293
5358
  ${h.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
@@ -5306,13 +5371,13 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5306
5371
  }
5307
5372
 
5308
5373
  input_slice_offsets_data[global_idx] = base_offset + u32(relative_slice_offset);
5309
- }`};return e.compute({name:"computeSliceOffsets",shaderCache:{hint:`${o.length}_${r.length}`,inputDependencies:["rank"]},getRunData:()=>({outputs:[{dims:m,dataType:e.inputs[1].dataType}],dispatchGroup:{x:Math.ceil(i/64)},programUniforms:p}),getShaderSource:u},{inputs:[t],outputs:[-1]})[0]},Sd=(e,t)=>{let r=e.inputs,n=r[0].dims,o=r[0].dataType,i=r[1].dims,a=i[i.length-1],d=C.sizeToDimension(i,i.length-1),l=C.sizeFromDimension(n,t.batchDims+a),p=C.sizeToDimension(n,t.batchDims),m=C.sizeFromDimension(n,t.batchDims),u=d/p,h=new Array(a),_=l;for(let T=0;T<a;++T)h[a-1-T]=_,_*=n[t.batchDims+a-1-T];let y=Bf(e,r[1],h,t.batchDims,n,d,u,m,a),g=t.batchDims+a;if(g>n.length)throw new Error("last dimension of indices must not be larger than rank of input tensor");let x=i.slice(0,-1).concat(n.slice(g)),$=C.size(x),v=[{type:12,data:$},{type:12,data:l},...N(r[0].dims,y.dims,x)],S=T=>{let A=E("data",r[0].dataType,r[0].dims.length),k=E("slice_offsets",12,y.dims.length),P=M("output",r[0].dataType,x.length);return`
5310
- ${T.registerUniform("output_size","u32").registerUniform("slice_size","u32").declareVariables(A,k,P)}
5374
+ }`};return e.compute({name:"computeSliceOffsets",shaderCache:{hint:`${o.length}_${r.length}`,inputDependencies:["rank"]},getRunData:()=>({outputs:[{dims:m,dataType:e.inputs[1].dataType}],dispatchGroup:{x:Math.ceil(i/64)},programUniforms:p}),getShaderSource:u},{inputs:[t],outputs:[-1]})[0]},xd=(e,t)=>{let r=e.inputs,n=r[0].dims,o=r[0].dataType,i=r[1].dims,a=i[i.length-1],d=k.sizeToDimension(i,i.length-1),l=k.sizeFromDimension(n,t.batchDims+a),p=k.sizeToDimension(n,t.batchDims),m=k.sizeFromDimension(n,t.batchDims),u=d/p,h=new Array(a),_=l;for(let T=0;T<a;++T)h[a-1-T]=_,_*=n[t.batchDims+a-1-T];let y=Bf(e,r[1],h,t.batchDims,n,d,u,m,a),g=t.batchDims+a;if(g>n.length)throw new Error("last dimension of indices must not be larger than rank of input tensor");let x=i.slice(0,-1).concat(n.slice(g)),$=k.size(x),v=[{type:12,data:$},{type:12,data:l},...N(r[0].dims,y.dims,x)],S=T=>{let A=E("data",r[0].dataType,r[0].dims.length),C=E("slice_offsets",12,y.dims.length),P=M("output",r[0].dataType,x.length);return`
5375
+ ${T.registerUniform("output_size","u32").registerUniform("slice_size","u32").declareVariables(A,C,P)}
5311
5376
  ${T.mainStart()}
5312
5377
  ${T.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
5313
5378
  let slice_offset = slice_offsets[global_idx / uniforms.slice_size];
5314
5379
  output[global_idx] = data[u32(slice_offset) + global_idx % uniforms.slice_size];
5315
- }`};e.compute({name:"GatherND",shaderCache:{hint:t.cacheKey,inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:x,dataType:o}],dispatchGroup:{x:Math.ceil($/64)},programUniforms:v}),getShaderSource:S},{inputs:[r[0],y]})},Td=e=>({batchDims:e.batch_dims,cacheKey:""})});var Mf,Rf,Cd,Ad,kd=U(()=>{"use strict";te();oe();Se();ae();Mf=(e,t)=>{if(e.length<3||e.length>4)throw new Error("GatherBlockQuantized requires 3 or 4 inputs.");let r=C.normalizeAxis(t.quantizeAxis,e[0].dims.length),n=t.blockSize,o=e[0],i=e[2],a=e.length===4?e[3]:void 0;if(i.dims.length!==o.dims.length||!o.dims.map((d,l)=>l===r?Math.ceil(d/n)===i.dims[l]:d===i.dims[l]).reduce((d,l)=>d&&l,!0))throw new Error("Scales must have the same rank as the input tensor and the dims should match except on gatherAxis.");if(a){if(a.dataType!==o.dataType)throw new Error("Zero point must have the same data type as the input tensor.");if(a.dims.length!==i.dims.length||!a.dims.map((d,l)=>d===i.dims[l]).reduce((d,l)=>d&&l,!0))throw new Error("Zero point must have the same rank as the input tensor and the dims should match except on quantizeAxis.")}},Rf=(e,t)=>{let r=e[0].dims,n=e[1].dims,o=r.length,i=C.normalizeAxis(t.gatherAxis,o),a=C.normalizeAxis(t.quantizeAxis,o),d=r.slice(0);d.splice(i,1,...n);let l=C.size(d),p=e[2].dataType,u=e[0].dataType===22,h=[{type:12,data:l},{type:12,data:a},{type:12,data:i},{type:12,data:t.blockSize},...N(...e.map((y,g)=>y.dims),d)],_=y=>{let g=E("data",e[0].dataType,e[0].dims.length),x=E("inputIndices",e[1].dataType,e[1].dims.length),$=E("scales",e[2].dataType,e[2].dims.length),v=e.length>3?E("zeroPoint",e[3].dataType,e[3].dims.length):void 0,S=M("output",p,d.length),T=[g,x,$];v&&T.push(v);let A=[{name:"output_size",type:"u32"},{name:"quantize_axis",type:"u32"},{name:"gather_axis",type:"u32"},{name:"block_size",type:"u32"}];return`
5380
+ }`};e.compute({name:"GatherND",shaderCache:{hint:t.cacheKey,inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:x,dataType:o}],dispatchGroup:{x:Math.ceil($/64)},programUniforms:v}),getShaderSource:S},{inputs:[r[0],y]})},Sd=e=>({batchDims:e.batch_dims,cacheKey:""})});var Mf,Rf,Id,Cd,Ad=U(()=>{"use strict";J();ne();xe();ae();Mf=(e,t)=>{if(e.length<3||e.length>4)throw new Error("GatherBlockQuantized requires 3 or 4 inputs.");let r=k.normalizeAxis(t.quantizeAxis,e[0].dims.length),n=t.blockSize,o=e[0],i=e[2],a=e.length===4?e[3]:void 0;if(i.dims.length!==o.dims.length||!o.dims.map((d,l)=>l===r?Math.ceil(d/n)===i.dims[l]:d===i.dims[l]).reduce((d,l)=>d&&l,!0))throw new Error("Scales must have the same rank as the input tensor and the dims should match except on gatherAxis.");if(a){if(a.dataType!==o.dataType)throw new Error("Zero point must have the same data type as the input tensor.");if(a.dims.length!==i.dims.length||!a.dims.map((d,l)=>d===i.dims[l]).reduce((d,l)=>d&&l,!0))throw new Error("Zero point must have the same rank as the input tensor and the dims should match except on quantizeAxis.")}},Rf=(e,t)=>{let r=e[0].dims,n=e[1].dims,o=r.length,i=k.normalizeAxis(t.gatherAxis,o),a=k.normalizeAxis(t.quantizeAxis,o),d=r.slice(0);d.splice(i,1,...n);let l=k.size(d),p=e[2].dataType,u=e[0].dataType===22,h=[{type:12,data:l},{type:12,data:a},{type:12,data:i},{type:12,data:t.blockSize},...N(...e.map((y,g)=>y.dims),d)],_=y=>{let g=E("data",e[0].dataType,e[0].dims.length),x=E("inputIndices",e[1].dataType,e[1].dims.length),$=E("scales",e[2].dataType,e[2].dims.length),v=e.length>3?E("zeroPoint",e[3].dataType,e[3].dims.length):void 0,S=M("output",p,d.length),T=[g,x,$];v&&T.push(v);let A=[{name:"output_size",type:"u32"},{name:"quantize_axis",type:"u32"},{name:"gather_axis",type:"u32"},{name:"block_size",type:"u32"}];return`
5316
5381
  ${y.registerUniforms(A).declareVariables(...T,S)}
5317
5382
  ${y.mainStart()}
5318
5383
  let output_indices = ${S.offsetToIndices("global_idx")};
@@ -5357,8 +5422,8 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5357
5422
  let zero_point = zero_point_vec[zero_point_index / 2];`:"var zero_point = 0")()};
5358
5423
  let dequantized_data = ${Ee(p)}(quantized_data - zero_point) * scale;
5359
5424
  ${S.setByOffset("global_idx","dequantized_data")};
5360
- }`};return{name:"GatherBlockQuantized",shaderCache:{hint:`${t.cacheKey};${e.filter((y,g)=>g!==1).map(y=>y.dims.join("_")).join(";")}`,inputDependencies:Array.from({length:e.length},(y,g)=>"rank")},getRunData:()=>({outputs:[{dims:d,dataType:p}],dispatchGroup:{x:Math.ceil(l/64)},programUniforms:h}),getShaderSource:_}},Cd=(e,t)=>{let r=e.inputs;Mf(r,t),e.compute(Rf(e.inputs,t))},Ad=e=>re({blockSize:e.blockSize,gatherAxis:e.gatherAxis,quantizeAxis:e.quantizeAxis})});var Uf,Nf,Ed,Pd,zd=U(()=>{"use strict";te();oe();Se();ae();Uf=e=>{if(!e||e.length!==2)throw new Error("GatherElements requires 2 inputs.");if(e[0].dims.length<1)throw new Error("GatherElements requires that the data input be rank >= 1.");if(e[0].dims.length!==e[1].dims.length)throw new Error(`GatherElements requires that the data input and
5361
- indices input tensors be of same rank.`)},Nf=(e,t)=>{let r=e[0].dims,n=e[0].dataType,o=r.length,i=e[1].dims,a=e[1].dataType,d=C.normalizeAxis(t.axis,o),l=r[d],p=i.slice(0),m=C.size(p),u=E("input",n,o),h=E("indicesInput",a,i.length),_=M("output",n,p.length),y=[{type:12,data:m},{type:6,data:l},{type:12,data:d}];return y.push(...N(r,i,p)),{name:"GatherElements",shaderCache:{inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:p,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(m/64)},programUniforms:y}),getShaderSource:$=>`
5425
+ }`};return{name:"GatherBlockQuantized",shaderCache:{hint:`${t.cacheKey};${e.filter((y,g)=>g!==1).map(y=>y.dims.join("_")).join(";")}`,inputDependencies:Array.from({length:e.length},(y,g)=>"rank")},getRunData:()=>({outputs:[{dims:d,dataType:p}],dispatchGroup:{x:Math.ceil(l/64)},programUniforms:h}),getShaderSource:_}},Id=(e,t)=>{let r=e.inputs;Mf(r,t),e.compute(Rf(e.inputs,t))},Cd=e=>ee({blockSize:e.blockSize,gatherAxis:e.gatherAxis,quantizeAxis:e.quantizeAxis})});var Uf,Nf,kd,Ed,Pd=U(()=>{"use strict";J();ne();xe();ae();Uf=e=>{if(!e||e.length!==2)throw new Error("GatherElements requires 2 inputs.");if(e[0].dims.length<1)throw new Error("GatherElements requires that the data input be rank >= 1.");if(e[0].dims.length!==e[1].dims.length)throw new Error(`GatherElements requires that the data input and
5426
+ indices input tensors be of same rank.`)},Nf=(e,t)=>{let r=e[0].dims,n=e[0].dataType,o=r.length,i=e[1].dims,a=e[1].dataType,d=k.normalizeAxis(t.axis,o),l=r[d],p=i.slice(0),m=k.size(p),u=E("input",n,o),h=E("indicesInput",a,i.length),_=M("output",n,p.length),y=[{type:12,data:m},{type:6,data:l},{type:12,data:d}];return y.push(...N(r,i,p)),{name:"GatherElements",shaderCache:{inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:p,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(m/64)},programUniforms:y}),getShaderSource:$=>`
5362
5427
  ${$.registerUniform("outputSize","u32").registerUniform("axisDimLimit","i32").registerUniform("axis","u32").declareVariables(u,h,_)}
5363
5428
  ${$.mainStart()}
5364
5429
  ${$.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}
@@ -5374,8 +5439,8 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5374
5439
  let value = ${u.getByIndices("inputIndices")};
5375
5440
 
5376
5441
  ${_.setByOffset("global_idx","value")};
5377
- }`}},Ed=e=>re({axis:e.axis}),Pd=(e,t)=>{let r=e.inputs;Uf(r),e.compute(Nf(e.inputs,t))}});var Vf,Wf,Od,Dd,Bd=U(()=>{"use strict";te();oe();ae();Vf=e=>{if(!e)throw new Error("Input is missing");if(e.length<2||e.length>3)throw new Error("Invaid input number.");if(e.length===3&&e[2].dims.length>2)throw new Error("Invalid input shape of C");if(e[0].dataType!==e[1].dataType||e.length===3&&e[0].dataType!==e[2].dataType)throw new Error("Input types are mismatched")},Wf=(e,t)=>{let r=e[0].dims.slice(),n=e[1].dims.slice(),[o,i,a]=Vr.getShapeOfGemmResult(r,t.transA,n,t.transB,e.length===3?e[2].dims:void 0),d=[o,i];if(!d)throw new Error("Can't use gemm on the given tensors");let l=16,p=Math.ceil(i/l),m=Math.ceil(o/l),u=!0,h=C.size(d),_=[{type:12,data:u?p:h},{type:12,data:o},{type:12,data:i},{type:12,data:a},{type:1,data:t.alpha},{type:1,data:t.beta}],y=["type","type"];e.length===3&&(_.push(...N(e[2].dims)),y.push("rank")),_.push(...N(d));let g=$=>{let v="";t.transA&&t.transB?v="value += a[k * uniforms.M + m] * b[n * uniforms.K + k];":t.transA&&!t.transB?v="value += a[k * uniforms.M + m] * b[k * uniforms.N + n];":!t.transA&&t.transB?v="value += a[m * uniforms.K + k] * b[n * uniforms.K + k];":!t.transA&&!t.transB&&(v="value += a[m * uniforms.K + k] * b[k * uniforms.N + n];");let S=t.alpha===1?"":"value *= uniforms.alpha;",T=E("a",e[0].dataType,e[0].dims),A=E("b",e[1].dataType,e[1].dims),k=T.type.value,P=null,D=[T,A];e.length===3&&(P=E("c",e[2].dataType,e[2].dims.length),D.push(P));let R=M("output",e[0].dataType,d.length);D.push(R);let G=[{name:"output_size",type:"u32"},{name:"M",type:"u32"},{name:"N",type:"u32"},{name:"K",type:"u32"},{name:"alpha",type:"f32"},{name:"beta",type:"f32"}];return`
5378
- ${$.registerUniforms(G).declareVariables(...D)}
5442
+ }`}},kd=e=>ee({axis:e.axis}),Ed=(e,t)=>{let r=e.inputs;Uf(r),e.compute(Nf(e.inputs,t))}});var Vf,Wf,zd,Od,Dd=U(()=>{"use strict";J();ne();ae();Vf=e=>{if(!e)throw new Error("Input is missing");if(e.length<2||e.length>3)throw new Error("Invaid input number.");if(e.length===3&&e[2].dims.length>2)throw new Error("Invalid input shape of C");if(e[0].dataType!==e[1].dataType||e.length===3&&e[0].dataType!==e[2].dataType)throw new Error("Input types are mismatched")},Wf=(e,t)=>{let r=e[0].dims.slice(),n=e[1].dims.slice(),[o,i,a]=Nr.getShapeOfGemmResult(r,t.transA,n,t.transB,e.length===3?e[2].dims:void 0),d=[o,i];if(!d)throw new Error("Can't use gemm on the given tensors");let l=16,p=Math.ceil(i/l),m=Math.ceil(o/l),u=!0,h=k.size(d),_=[{type:12,data:u?p:h},{type:12,data:o},{type:12,data:i},{type:12,data:a},{type:1,data:t.alpha},{type:1,data:t.beta}],y=["type","type"];e.length===3&&(_.push(...N(e[2].dims)),y.push("rank")),_.push(...N(d));let g=$=>{let v="";t.transA&&t.transB?v="value += a[k * uniforms.M + m] * b[n * uniforms.K + k];":t.transA&&!t.transB?v="value += a[k * uniforms.M + m] * b[k * uniforms.N + n];":!t.transA&&t.transB?v="value += a[m * uniforms.K + k] * b[n * uniforms.K + k];":!t.transA&&!t.transB&&(v="value += a[m * uniforms.K + k] * b[k * uniforms.N + n];");let S=t.alpha===1?"":"value *= uniforms.alpha;",T=E("a",e[0].dataType,e[0].dims),A=E("b",e[1].dataType,e[1].dims),C=T.type.value,P=null,D=[T,A];e.length===3&&(P=E("c",e[2].dataType,e[2].dims.length),D.push(P));let R=M("output",e[0].dataType,d.length);D.push(R);let H=[{name:"output_size",type:"u32"},{name:"M",type:"u32"},{name:"N",type:"u32"},{name:"K",type:"u32"},{name:"alpha",type:"f32"},{name:"beta",type:"f32"}];return`
5443
+ ${$.registerUniforms(H).declareVariables(...D)}
5379
5444
 
5380
5445
  ${$.mainStart()}
5381
5446
  ${$.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
@@ -5383,15 +5448,15 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5383
5448
  let m = global_idx / uniforms.N;
5384
5449
  let n = global_idx % uniforms.N;
5385
5450
 
5386
- var value = ${k}(0);
5451
+ var value = ${C}(0);
5387
5452
  for (var k: u32 = 0u; k < uniforms.K; k++) {
5388
5453
  ${v}
5389
5454
  }
5390
5455
 
5391
5456
  ${S}
5392
- ${(()=>P!=null?`let cOffset = ${P.broadcastedIndicesToOffset("vec2(m, n)",R)}; value += ${k}(uniforms.beta) * ${P.getByOffset("cOffset")};`:"")()}
5457
+ ${(()=>P!=null?`let cOffset = ${P.broadcastedIndicesToOffset("vec2(m, n)",R)}; value += ${C}(uniforms.beta) * ${P.getByOffset("cOffset")};`:"")()}
5393
5458
  output[global_idx] = value;
5394
- }`},x=$=>{let v=E("a",e[0].dataType,e[0].dims),S=E("b",e[1].dataType,e[1].dims),T=null,A=[v,S];e.length===3&&(T=E("c",e[2].dataType,e[2].dims.length),A.push(T));let k=M("output",e[0].dataType,d.length);A.push(k);let P=[{name:"num_tile_n",type:"u32"},{name:"M",type:"u32"},{name:"N",type:"u32"},{name:"K",type:"u32"},{name:"alpha",type:"f32"},{name:"beta",type:"f32"}],D="",R="";t.transA&&t.transB?(R=`
5459
+ }`},x=$=>{let v=E("a",e[0].dataType,e[0].dims),S=E("b",e[1].dataType,e[1].dims),T=null,A=[v,S];e.length===3&&(T=E("c",e[2].dataType,e[2].dims.length),A.push(T));let C=M("output",e[0].dataType,d.length);A.push(C);let P=[{name:"num_tile_n",type:"u32"},{name:"M",type:"u32"},{name:"N",type:"u32"},{name:"K",type:"u32"},{name:"alpha",type:"f32"},{name:"beta",type:"f32"}],D="",R="";t.transA&&t.transB?(R=`
5395
5460
  var col = tile_row_start + local_id.x;
5396
5461
  var row = k_start + local_id.y;
5397
5462
  if (col < uniforms.M && row < uniforms.K) {
@@ -5455,7 +5520,7 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5455
5520
  } else {
5456
5521
  tile_b[local_id.y][local_id.x] = ${S.type.value}(0);
5457
5522
  }
5458
- `,D="value += tile_a[local_id.y][k] * tile_b[k][local_id.x];");let G=t.alpha===1?"":"value *= uniforms.alpha;";return`
5523
+ `,D="value += tile_a[local_id.y][k] * tile_b[k][local_id.x];");let H=t.alpha===1?"":"value *= uniforms.alpha;";return`
5459
5524
  ${$.registerUniforms(P).declareVariables(...A)}
5460
5525
  var<workgroup> tile_a: array<array<${v.type.storage}, ${l}>, ${l}>;
5461
5526
  var<workgroup> tile_b: array<array<${S.type.storage}, ${l}>, ${l}>;
@@ -5464,7 +5529,7 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5464
5529
  let tile_row_start = (workgroup_index / uniforms.num_tile_n) * ${l};
5465
5530
  let num_tiles = (uniforms.K - 1) / ${l} + 1;
5466
5531
  var k_start = 0u;
5467
- var value = ${k.type.value}(0);
5532
+ var value = ${C.type.value}(0);
5468
5533
  for (var t: u32 = 0u; t < num_tiles; t++) {
5469
5534
  ${R}
5470
5535
  k_start = k_start + ${l};
@@ -5476,14 +5541,14 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5476
5541
  workgroupBarrier();
5477
5542
  }
5478
5543
 
5479
- ${G}
5544
+ ${H}
5480
5545
  let m = tile_row_start + local_id.y;
5481
5546
  let n = tile_col_start + local_id.x;
5482
- ${(()=>T!=null?`let cOffset = ${T.broadcastedIndicesToOffset("vec2(m, n)",k)}; value += ${k.type.value}(uniforms.beta) * ${T.getByOffset("cOffset")};`:"")()}
5547
+ ${(()=>T!=null?`let cOffset = ${T.broadcastedIndicesToOffset("vec2(m, n)",C)}; value += ${C.type.value}(uniforms.beta) * ${T.getByOffset("cOffset")};`:"")()}
5483
5548
  if (m < uniforms.M && n < uniforms.N) {
5484
5549
  output[m * uniforms.N + n] = value;
5485
5550
  }
5486
- }`};return u?{name:"GemmShared",shaderCache:{hint:`${t.cacheKey}`,inputDependencies:y},getRunData:()=>({outputs:[{dims:d,dataType:e[0].dataType}],dispatchGroup:{x:p*m},programUniforms:_}),getShaderSource:x}:{name:"Gemm",shaderCache:{hint:`${t.cacheKey}`,inputDependencies:y},getRunData:()=>({outputs:[{dims:d,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(h/64)},programUniforms:_}),getShaderSource:g}},Od=e=>{let t=e.transA,r=e.transB,n=e.alpha,o=e.beta;return{transA:t,transB:r,alpha:n,beta:o,cacheKey:`${e.transA};${e.transB};${e.alpha===1}`}},Dd=(e,t)=>{Vf(e.inputs),e.compute(Wf(e.inputs,t))}});var lt,_t,Ut,Nt,Lf,Gf,Hf,Ff,qf,Kf,jf,Yf,Md,Rd,Ud=U(()=>{"use strict";te();oe();Se();ae();[lt,_t,Ut,Nt]=[0,1,2,3],Lf=e=>{if(e[0].dims.length!==4)throw new Error("only 4-D tensor is supported.");if(e[0].dims.length!==e[1].dims.length)throw new Error("input dimensions must be equal to grid dimensions");if(e[0].dims.length-2!==e[1].dims[e[1].dims.length-1])throw new Error(`last dimension of grid must be equal to ${e[0].dims.length-2}`);if(e[0].dims[0]!==e[1].dims[0])throw new Error("grid batch size must match input batch size")},Gf=`
5551
+ }`};return u?{name:"GemmShared",shaderCache:{hint:`${t.cacheKey}`,inputDependencies:y},getRunData:()=>({outputs:[{dims:d,dataType:e[0].dataType}],dispatchGroup:{x:p*m},programUniforms:_}),getShaderSource:x}:{name:"Gemm",shaderCache:{hint:`${t.cacheKey}`,inputDependencies:y},getRunData:()=>({outputs:[{dims:d,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(h/64)},programUniforms:_}),getShaderSource:g}},zd=e=>{let t=e.transA,r=e.transB,n=e.alpha,o=e.beta;return{transA:t,transB:r,alpha:n,beta:o,cacheKey:`${e.transA};${e.transB};${e.alpha===1}`}},Od=(e,t)=>{Vf(e.inputs),e.compute(Wf(e.inputs,t))}});var lt,_t,Nt,Vt,Lf,Gf,Hf,Ff,qf,Kf,jf,Yf,Bd,Md,Rd=U(()=>{"use strict";J();ne();xe();ae();[lt,_t,Nt,Vt]=[0,1,2,3],Lf=e=>{if(e[0].dims.length!==4)throw new Error("only 4-D tensor is supported.");if(e[0].dims.length!==e[1].dims.length)throw new Error("input dimensions must be equal to grid dimensions");if(e[0].dims.length-2!==e[1].dims[e[1].dims.length-1])throw new Error(`last dimension of grid must be equal to ${e[0].dims.length-2}`);if(e[0].dims[0]!==e[1].dims[0])throw new Error("grid batch size must match input batch size")},Gf=`
5487
5552
  fn gs_get_cubic_coeffs(x: f32) -> vec4<f32> {
5488
5553
  let cubic_alpha = -0.75f;
5489
5554
  let x_abs = abs(x);
@@ -5549,15 +5614,15 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5549
5614
  indices[${lt}] = batch;
5550
5615
  indices[${_t}] = channel;`+(()=>{switch(r.paddingMode){case"zeros":return`
5551
5616
  if (r >= 0 && r < H && c >=0 && c < W) {
5552
- indices[${Ut}] = u32(r);
5553
- indices[${Nt}] = u32(c);
5617
+ indices[${Nt}] = u32(r);
5618
+ indices[${Vt}] = u32(c);
5554
5619
  }
5555
5620
  `;case"border":return`
5556
- indices[${Ut}] = u32(clamp(r, 0, H - 1));
5557
- indices[${Nt}] = u32(clamp(c, 0, W - 1));
5621
+ indices[${Nt}] = u32(clamp(r, 0, H - 1));
5622
+ indices[${Vt}] = u32(clamp(c, 0, W - 1));
5558
5623
  `;case"reflection":return`
5559
- indices[${Ut}] = gs_reflect(r, border[1], border[3]);
5560
- indices[${Nt}] = gs_reflect(c, border[0], border[2]);
5624
+ indices[${Nt}] = gs_reflect(r, border[1], border[3]);
5625
+ indices[${Vt}] = gs_reflect(c, border[0], border[2]);
5561
5626
  `;default:throw new Error(`padding mode ${r.paddingMode} is not supported`)}})()+`
5562
5627
  return ${e.getByIndices("indices")};
5563
5628
  }
@@ -5592,7 +5657,7 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5592
5657
  let dx = x - f32(x0 + 1);
5593
5658
  let dy = y - f32(y0 + 1);
5594
5659
  let result = gs_bicubic_interpolate(p, dx, dy);
5595
- `;default:throw new Error(`mode ${r.mode} is not supported`)}})()+`${e.setByOffset("global_idx","result")}`,Yf=(e,t)=>{let r=E("x",e[0].dataType,e[0].dims.length),n=[e[1].dims[0],e[1].dims[1],e[1].dims[2]],o=E("grid",e[1].dataType,n.length,2),i=[e[0].dims[0],e[0].dims[1],e[1].dims[1],e[1].dims[2]];t.format==="NHWC"&&(i=[e[0].dims[0],e[1].dims[1],e[1].dims[2],e[0].dims[3]],[lt,_t,Ut,Nt]=[0,3,1,2]);let a=M("output",e[0].dataType,i.length),d=r.type.value,l=C.size(i),p=[{type:12,data:l},...N(e[0].dims,n,i)],m=u=>`
5660
+ `;default:throw new Error(`mode ${r.mode} is not supported`)}})()+`${e.setByOffset("global_idx","result")}`,Yf=(e,t)=>{let r=E("x",e[0].dataType,e[0].dims.length),n=[e[1].dims[0],e[1].dims[1],e[1].dims[2]],o=E("grid",e[1].dataType,n.length,2),i=[e[0].dims[0],e[0].dims[1],e[1].dims[1],e[1].dims[2]];t.format==="NHWC"&&(i=[e[0].dims[0],e[1].dims[1],e[1].dims[2],e[0].dims[3]],[lt,_t,Nt,Vt]=[0,3,1,2]);let a=M("output",e[0].dataType,i.length),d=r.type.value,l=k.size(i),p=[{type:12,data:l},...N(e[0].dims,n,i)],m=u=>`
5596
5661
  ${u.registerUniform("output_size","u32").declareVariables(r,o,a)}
5597
5662
  ${Gf}
5598
5663
  ${Hf(d)}
@@ -5602,8 +5667,8 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5602
5667
 
5603
5668
  ${u.mainStart()}
5604
5669
  ${u.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
5605
- let H_in = i32(uniforms.x_shape[${Ut}]);
5606
- let W_in = i32(uniforms.x_shape[${Nt}]);
5670
+ let H_in = i32(uniforms.x_shape[${Nt}]);
5671
+ let W_in = i32(uniforms.x_shape[${Vt}]);
5607
5672
 
5608
5673
  ${t.alignCorners===0?`
5609
5674
  let x_min = -0.5;
@@ -5619,23 +5684,23 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
5619
5684
  let border = vec4<f32>(x_min, y_min, x_max, y_max);
5620
5685
 
5621
5686
  let indices = ${a.offsetToIndices("global_idx")};
5622
- var grid_indices = vec3<u32>(indices[${lt}], indices[${Ut}], indices[${Nt}]);
5687
+ var grid_indices = vec3<u32>(indices[${lt}], indices[${Nt}], indices[${Vt}]);
5623
5688
  let nxy = ${o.getByIndices("grid_indices")};
5624
5689
  var x = gs_denormalize(f32(nxy[0]), W_in);
5625
5690
  var y = gs_denormalize(f32(nxy[1]), H_in);
5626
5691
 
5627
5692
  ${jf(a,d,t)}
5628
- }`;return{name:"GridSample",shaderCache:{hint:`${t.cacheKey}`,inputDependencies:["type","type"]},getRunData:u=>{let h=C.size(i);return{outputs:[{dims:i,dataType:u[0].dataType}],dispatchGroup:{x:Math.ceil(h/64)},programUniforms:p}},getShaderSource:m}},Md=(e,t)=>{Lf(e.inputs),e.compute(Yf(e.inputs,t))},Rd=e=>re({alignCorners:e.align_corners,mode:e.mode,paddingMode:e.padding_mode,format:e.format})});var Be,Xf,Vd,Nd,Jf,tr,Wd,So=U(()=>{"use strict";te();oe();Se();Nr();Fr();ae();dt();Be=(e,t)=>e.length>t&&e[t].dims.length>0?e[t]:void 0,Xf=(e,t)=>{let r=e[0],n=Be(e,1),o=Be(e,2),i=Be(e,3),a=Be(e,4),d=Be(e,5),l=Be(e,6),p=Be(e,7);if(r.dims.length!==3&&r.dims.length!==5)throw new Error("Input query is expected to have 3 or 5 dimensions");let m=r.dims[0],u=r.dims[1],h=r.dims.length===3?r.dims[2]:t.numHeads*r.dims[4],_=u,y=0,g=0,x=Math.floor(h/t.numHeads);if(l&&p&&C.size(l.dims)&&C.size(p.dims)){if(l.dims.length!==4)throw new Error('Input "past_key" is expected to have 4 dimensions');if(l.dims[0]!==m||l.dims[1]!==t.numHeads||l.dims[3]!==x)throw new Error('Input "past_key" shape (batch_size, num_heads, past_sequence_length, head_size)');if(p.dims[0]!==m||p.dims[1]!==t.numHeads||p.dims[3]!==x)throw new Error('Input "past_value" shape (batch_size, num_heads, past_sequence_length, head_size)');if(l.dims[2]!==p.dims[2])throw new Error('Input "past_key" and "past_value" shall have same dim 2 (past_sequence_length)');if(p.dims.length!==4)throw new Error('Input "past_value" is expected to have 4 dimensions');y=l.dims[2],g=l.dims[2]}else if(l&&C.size(l.dims)||p&&C.size(p.dims))throw new Error('Input "past_key" and "past_value" shall be both present or both absent');let $;if(n&&C.size(n.dims)>0){if(r.dims.length!==3)throw new Error('Input "query" is expected to have 3 dimensions when key is given');if(n.dims.length<3||n.dims.length>5)throw new Error('Input "key" is expected to have 3, 4, or 5 dimensions');if(r.dims[0]!==n.dims[0])throw new Error('Input "query" and "key" shall have same dim 0 (batch size)');if(n.dims.length===3){if(n.dims[2]!==r.dims[2])throw new Error('Input "query" and "key" shall have same dim 2 (hidden_size)');$=2,_=n.dims[1]}else if(n.dims.length===5){if(n.dims[2]!==t.numHeads||n.dims[3]!==2||n.dims[4]!==x)throw new Error('Expect "key" shape (batch_size, kv_sequence_length, num_heads, 2, head_size) for packed kv');if(o)throw new Error('Expect "value" be none when "key" has packed kv format.');$=5,_=n.dims[1]}else{if(n.dims[1]!==t.numHeads||n.dims[3]!==x)throw new Error('Expect "key" shape (batch_size, num_heads, kv_sequence_length, head_size) for past_key');$=0,_=n.dims[2]}}else{if(r.dims.length!==5)throw new Error('Input "query" is expected to have 5 dimensions when key is empty');if(r.dims[2]!==t.numHeads||r.dims[3]!==3)throw new Error('Expect "query" shape (batch_size, kv_sequence_length, num_heads, 3, head_size) for packed kv');$=3}if(i&&C.size(i.dims)>0){if(i.dims.length!==1)throw new Error('Input "bias" is expected to have 1 dimension');if(n&&n.dims.length===5&&n.dims[3]===2)throw new Error("bias is not allowed for packed kv.")}let v=y+_,S=0;if(a&&C.size(a.dims)>0){S=8;let P=a.dims;throw P.length===1?P[0]===m?S=1:P[0]===3*m+2&&(S=3):P.length===2&&P[0]===m&&P[1]===v&&(S=5),S===8?new Error('Input "key_padding_mask" shape shall be (batch_size) or (batch_size, total_sequence_length)'):new Error("Mask not supported")}let T=!1,A=h;if(o&&C.size(o.dims)>0){if(o.dims.length!==3&&o.dims.length!==4)throw new Error('Input "value" is expected to have 3 or 4 dimensions');if(r.dims[0]!==o.dims[0])throw new Error('Input "query" and "value" shall have same dim 0 (batch_size)');if(o.dims.length===3){if(_!==o.dims[1])throw new Error('Input "key" and "value" shall have the same dim 1 (kv_sequence_length)');A=o.dims[2]}else{if(_!==o.dims[2])throw new Error('Input "key" and "value" shall have the same dim 2 (kv_sequence_length)');A=o.dims[1]*o.dims[3],T=!0}}let k=!1;if(a&&C.size(a.dims)>0)throw new Error("Key padding mask is not supported");if(d&&C.size(d.dims)>0){if(d.dims.length!==4)throw new Error('Input "attention_bias" is expected to have 4 dimensions');if(d.dims[0]!==m||d.dims[1]!==t.numHeads||d.dims[2]!==u||d.dims[3]!==v)throw new Error('Expect "attention_bias" shape (batch_size, num_heads, sequence_length, total_sequence_length)')}return{batchSize:m,sequenceLength:u,pastSequenceLength:y,kvSequenceLength:_,totalSequenceLength:v,maxSequenceLength:g,inputHiddenSize:0,hiddenSize:h,vHiddenSize:A,headSize:x,vHeadSize:Math.floor(A/t.numHeads),numHeads:t.numHeads,isUnidirectional:!1,pastPresentShareBuffer:!1,maskFilterValue:t.maskFilterValue,maskType:S,scale:t.scale,broadcastResPosBias:k,passPastInKv:T,qkvFormat:$}},Vd=e=>re({...e}),Nd=re({perm:[0,2,1,3]}),Jf=(e,t,r,n,o,i,a)=>{let d=[n,o,i],l=C.size(d),p=[{type:12,data:l},{type:12,data:a},{type:12,data:i}],m=u=>{let h=M("qkv_with_bias",t.dataType,d),_=E("qkv",t.dataType,d),y=E("bias",r.dataType,d),g=[{name:"output_size",type:"u32"},{name:"bias_offset",type:"u32"},{name:"hidden_size",type:"u32"}];return`
5693
+ }`;return{name:"GridSample",shaderCache:{hint:`${t.cacheKey}`,inputDependencies:["type","type"]},getRunData:u=>{let h=k.size(i);return{outputs:[{dims:i,dataType:u[0].dataType}],dispatchGroup:{x:Math.ceil(h/64)},programUniforms:p}},getShaderSource:m}},Bd=(e,t)=>{Lf(e.inputs),e.compute(Yf(e.inputs,t))},Md=e=>ee({alignCorners:e.align_corners,mode:e.mode,paddingMode:e.padding_mode,format:e.format})});var Me,Xf,Nd,Ud,Jf,er,Vd,xo=U(()=>{"use strict";J();ne();xe();Ur();Hr();ae();dt();Me=(e,t)=>e.length>t&&e[t].dims.length>0?e[t]:void 0,Xf=(e,t)=>{let r=e[0],n=Me(e,1),o=Me(e,2),i=Me(e,3),a=Me(e,4),d=Me(e,5),l=Me(e,6),p=Me(e,7);if(r.dims.length!==3&&r.dims.length!==5)throw new Error("Input query is expected to have 3 or 5 dimensions");let m=r.dims[0],u=r.dims[1],h=r.dims.length===3?r.dims[2]:t.numHeads*r.dims[4],_=u,y=0,g=0,x=Math.floor(h/t.numHeads);if(l&&p&&k.size(l.dims)&&k.size(p.dims)){if(l.dims.length!==4)throw new Error('Input "past_key" is expected to have 4 dimensions');if(l.dims[0]!==m||l.dims[1]!==t.numHeads||l.dims[3]!==x)throw new Error('Input "past_key" shape (batch_size, num_heads, past_sequence_length, head_size)');if(p.dims[0]!==m||p.dims[1]!==t.numHeads||p.dims[3]!==x)throw new Error('Input "past_value" shape (batch_size, num_heads, past_sequence_length, head_size)');if(l.dims[2]!==p.dims[2])throw new Error('Input "past_key" and "past_value" shall have same dim 2 (past_sequence_length)');if(p.dims.length!==4)throw new Error('Input "past_value" is expected to have 4 dimensions');y=l.dims[2],g=l.dims[2]}else if(l&&k.size(l.dims)||p&&k.size(p.dims))throw new Error('Input "past_key" and "past_value" shall be both present or both absent');let $;if(n&&k.size(n.dims)>0){if(r.dims.length!==3)throw new Error('Input "query" is expected to have 3 dimensions when key is given');if(n.dims.length<3||n.dims.length>5)throw new Error('Input "key" is expected to have 3, 4, or 5 dimensions');if(r.dims[0]!==n.dims[0])throw new Error('Input "query" and "key" shall have same dim 0 (batch size)');if(n.dims.length===3){if(n.dims[2]!==r.dims[2])throw new Error('Input "query" and "key" shall have same dim 2 (hidden_size)');$=2,_=n.dims[1]}else if(n.dims.length===5){if(n.dims[2]!==t.numHeads||n.dims[3]!==2||n.dims[4]!==x)throw new Error('Expect "key" shape (batch_size, kv_sequence_length, num_heads, 2, head_size) for packed kv');if(o)throw new Error('Expect "value" be none when "key" has packed kv format.');$=5,_=n.dims[1]}else{if(n.dims[1]!==t.numHeads||n.dims[3]!==x)throw new Error('Expect "key" shape (batch_size, num_heads, kv_sequence_length, head_size) for past_key');$=0,_=n.dims[2]}}else{if(r.dims.length!==5)throw new Error('Input "query" is expected to have 5 dimensions when key is empty');if(r.dims[2]!==t.numHeads||r.dims[3]!==3)throw new Error('Expect "query" shape (batch_size, kv_sequence_length, num_heads, 3, head_size) for packed kv');$=3}if(i&&k.size(i.dims)>0){if(i.dims.length!==1)throw new Error('Input "bias" is expected to have 1 dimension');if(n&&n.dims.length===5&&n.dims[3]===2)throw new Error("bias is not allowed for packed kv.")}let v=y+_,S=0;if(a&&k.size(a.dims)>0){S=8;let P=a.dims;throw P.length===1?P[0]===m?S=1:P[0]===3*m+2&&(S=3):P.length===2&&P[0]===m&&P[1]===v&&(S=5),S===8?new Error('Input "key_padding_mask" shape shall be (batch_size) or (batch_size, total_sequence_length)'):new Error("Mask not supported")}let T=!1,A=h;if(o&&k.size(o.dims)>0){if(o.dims.length!==3&&o.dims.length!==4)throw new Error('Input "value" is expected to have 3 or 4 dimensions');if(r.dims[0]!==o.dims[0])throw new Error('Input "query" and "value" shall have same dim 0 (batch_size)');if(o.dims.length===3){if(_!==o.dims[1])throw new Error('Input "key" and "value" shall have the same dim 1 (kv_sequence_length)');A=o.dims[2]}else{if(_!==o.dims[2])throw new Error('Input "key" and "value" shall have the same dim 2 (kv_sequence_length)');A=o.dims[1]*o.dims[3],T=!0}}let C=!1;if(a&&k.size(a.dims)>0)throw new Error("Key padding mask is not supported");if(d&&k.size(d.dims)>0){if(d.dims.length!==4)throw new Error('Input "attention_bias" is expected to have 4 dimensions');if(d.dims[0]!==m||d.dims[1]!==t.numHeads||d.dims[2]!==u||d.dims[3]!==v)throw new Error('Expect "attention_bias" shape (batch_size, num_heads, sequence_length, total_sequence_length)')}return{batchSize:m,sequenceLength:u,pastSequenceLength:y,kvSequenceLength:_,totalSequenceLength:v,maxSequenceLength:g,inputHiddenSize:0,hiddenSize:h,vHiddenSize:A,headSize:x,vHeadSize:Math.floor(A/t.numHeads),numHeads:t.numHeads,isUnidirectional:!1,pastPresentShareBuffer:!1,maskFilterValue:t.maskFilterValue,maskType:S,scale:t.scale,broadcastResPosBias:C,passPastInKv:T,qkvFormat:$}},Nd=e=>ee({...e}),Ud=ee({perm:[0,2,1,3]}),Jf=(e,t,r,n,o,i,a)=>{let d=[n,o,i],l=k.size(d),p=[{type:12,data:l},{type:12,data:a},{type:12,data:i}],m=u=>{let h=M("qkv_with_bias",t.dataType,d),_=E("qkv",t.dataType,d),y=E("bias",r.dataType,d),g=[{name:"output_size",type:"u32"},{name:"bias_offset",type:"u32"},{name:"hidden_size",type:"u32"}];return`
5629
5694
  ${u.registerUniforms(g).declareVariables(_,y,h)}
5630
5695
  ${u.mainStart()}
5631
5696
  ${u.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
5632
5697
  let bias_offset_idx = (global_idx % uniforms.hidden_size) + uniforms.bias_offset;
5633
5698
 
5634
5699
  qkv_with_bias[global_idx] = qkv[global_idx] + bias[bias_offset_idx];
5635
- }`};return e.compute({name:"MultiHeadAttentionAddBias",shaderCache:{inputDependencies:["type","type"]},getRunData:()=>({outputs:[{dims:d,dataType:t.dataType,gpuDataType:0}],dispatchGroup:{x:Math.ceil(l/64)},programUniforms:p}),getShaderSource:m},{inputs:[t,r],outputs:[-1]})[0]},tr=(e,t,r,n,o,i,a,d)=>{let l=i;if(a&&C.size(a.dims)>0){if(n===1)throw new Error("AddBiasReshape is not implemented. Please export your model with packed QKV or KV");return l=Jf(e,i,a,t,n,r*o,d),l=l.reshape([t,n,r,o]),r===1||n===1?l:e.compute(Pe(l,Nd.perm),{inputs:[l],outputs:[-1]})[0]}else return i.dims.length===3&&(l=i.reshape([t,n,r,o])),r===1||n===1?l:e.compute(Pe(l,Nd.perm),{inputs:[l],outputs:[-1]})[0]},Wd=(e,t)=>{let r=Xf(e.inputs,t),n=e.inputs[0],o=Be(e.inputs,1),i=Be(e.inputs,2),a=Be(e.inputs,3),d=Be(e.inputs,4),l=Be(e.inputs,5),p=Be(e.inputs,6),m=Be(e.inputs,7);if(n.dims.length===5)throw new Error("Packed QKV is not implemented");if(o?.dims.length===5)throw new Error("Packed KV is not implemented");let u=o&&i&&o.dims.length===4&&i.dims.length===4,h=tr(e,r.batchSize,r.numHeads,r.sequenceLength,r.headSize,n,a,0);if(u)return Rt(e,h,o,i,d,void 0,p,m,l,r);if(!o||!i)throw new Error("key and value must be provided");let _=tr(e,r.batchSize,r.numHeads,r.kvSequenceLength,r.headSize,o,a,r.hiddenSize),y=tr(e,r.batchSize,r.numHeads,r.kvSequenceLength,r.vHeadSize,i,a,2*r.hiddenSize);Rt(e,h,_,y,d,void 0,p,m,l,r)}});var eh,th,rh,nh,To,Ld,Gd,Io=U(()=>{"use strict";te();oe();Se();ae();eh=e=>{if(!e||e.length<1)throw new Error("too few inputs")},th=(e,t)=>{let r=[],n=t.numOutputs;return e[1].dims[0]>0&&(e[1].getBigInt64Array().forEach(o=>r.push(Number(o))),n=r.length),re({numOutputs:n,axis:t.axis,splitSizes:r})},rh=e=>`
5700
+ }`};return e.compute({name:"MultiHeadAttentionAddBias",shaderCache:{inputDependencies:["type","type"]},getRunData:()=>({outputs:[{dims:d,dataType:t.dataType,gpuDataType:0}],dispatchGroup:{x:Math.ceil(l/64)},programUniforms:p}),getShaderSource:m},{inputs:[t,r],outputs:[-1]})[0]},er=(e,t,r,n,o,i,a,d)=>{let l=i;if(a&&k.size(a.dims)>0){if(n===1)throw new Error("AddBiasReshape is not implemented. Please export your model with packed QKV or KV");return l=Jf(e,i,a,t,n,r*o,d),l=l.reshape([t,n,r,o]),r===1||n===1?l:e.compute(Pe(l,Ud.perm),{inputs:[l],outputs:[-1]})[0]}else return i.dims.length===3&&(l=i.reshape([t,n,r,o])),r===1||n===1?l:e.compute(Pe(l,Ud.perm),{inputs:[l],outputs:[-1]})[0]},Vd=(e,t)=>{let r=Xf(e.inputs,t),n=e.inputs[0],o=Me(e.inputs,1),i=Me(e.inputs,2),a=Me(e.inputs,3),d=Me(e.inputs,4),l=Me(e.inputs,5),p=Me(e.inputs,6),m=Me(e.inputs,7);if(n.dims.length===5)throw new Error("Packed QKV is not implemented");if(o?.dims.length===5)throw new Error("Packed KV is not implemented");let u=o&&i&&o.dims.length===4&&i.dims.length===4,h=er(e,r.batchSize,r.numHeads,r.sequenceLength,r.headSize,n,a,0);if(u)return Ut(e,h,o,i,d,void 0,p,m,l,r);if(!o||!i)throw new Error("key and value must be provided");let _=er(e,r.batchSize,r.numHeads,r.kvSequenceLength,r.headSize,o,a,r.hiddenSize),y=er(e,r.batchSize,r.numHeads,r.kvSequenceLength,r.vHeadSize,i,a,2*r.hiddenSize);Ut(e,h,_,y,d,void 0,p,m,l,r)}});var eh,th,rh,nh,So,Wd,Ld,To=U(()=>{"use strict";J();ne();xe();ae();eh=e=>{if(!e||e.length<1)throw new Error("too few inputs")},th=(e,t)=>{let r=[],n=t.numOutputs;return e[1].dims[0]>0&&(e[1].getBigInt64Array().forEach(o=>r.push(Number(o))),n=r.length),ee({numOutputs:n,axis:t.axis,splitSizes:r})},rh=e=>`
5636
5701
  fn calculateOutputIndex(index: u32) -> u32 {
5637
5702
  for (var i: u32 = 0u; i < ${e}u; i += 1u ) {
5638
- if (index < ${F("uniforms.size_in_split_axis","i",e)}) {
5703
+ if (index < ${q("uniforms.size_in_split_axis","i",e)}) {
5639
5704
  return i;
5640
5705
  }
5641
5706
  }
@@ -5644,7 +5709,7 @@ fn calculateOutputIndex(index: u32) -> u32 {
5644
5709
  fn writeBufferData(output_number: u32, indices: ${e[0].type.indices}, global_idx: u32) {
5645
5710
  ${r.join(`
5646
5711
  `)}
5647
- }`},To=(e,t)=>{let r=e[0].dims,n=C.size(r),o=e[0].dataType,i=C.normalizeAxis(t.axis,r.length),a=new Array(t.numOutputs),d=E("input",o,r.length),l=new Array(t.numOutputs),p=[],m=[],u=0,h=[{type:12,data:n}];for(let y=0;y<t.numOutputs;y++){u+=t.splitSizes[y],l[y]=u;let g=r.slice();g[i]=t.splitSizes[y],m.push(g),a[y]=M(`output${y}`,o,g.length),p.push({dims:m[y],dataType:e[0].dataType})}h.push({type:12,data:l},...N(r,...m));let _=y=>`
5712
+ }`},So=(e,t)=>{let r=e[0].dims,n=k.size(r),o=e[0].dataType,i=k.normalizeAxis(t.axis,r.length),a=new Array(t.numOutputs),d=E("input",o,r.length),l=new Array(t.numOutputs),p=[],m=[],u=0,h=[{type:12,data:n}];for(let y=0;y<t.numOutputs;y++){u+=t.splitSizes[y],l[y]=u;let g=r.slice();g[i]=t.splitSizes[y],m.push(g),a[y]=M(`output${y}`,o,g.length),p.push({dims:m[y],dataType:e[0].dataType})}h.push({type:12,data:l},...N(r,...m));let _=y=>`
5648
5713
  ${y.registerUniform("input_size","u32").registerUniform("size_in_split_axis","u32",l.length).declareVariables(d,...a)}
5649
5714
  ${rh(l.length)}
5650
5715
  ${nh(a)}
@@ -5656,11 +5721,11 @@ fn calculateOutputIndex(index: u32) -> u32 {
5656
5721
  var index = ${d.indicesGet("indices",i)};
5657
5722
  let output_number = calculateOutputIndex(index);
5658
5723
  if (output_number != 0) {
5659
- index -= ${F("uniforms.size_in_split_axis","output_number - 1u",l.length)};
5724
+ index -= ${q("uniforms.size_in_split_axis","output_number - 1u",l.length)};
5660
5725
  ${d.indicesSet("indices",i,"index")};
5661
5726
  }
5662
5727
  writeBufferData(output_number, indices, global_idx);
5663
- }`;return{name:"Split",shaderCache:{hint:t.cacheKey,inputDependencies:["rank"]},getShaderSource:_,getRunData:()=>({outputs:p,dispatchGroup:{x:Math.ceil(n/64)},programUniforms:h})}},Ld=(e,t)=>{eh(e.inputs);let r=e.inputs.length===1?t:th(e.inputs,t);e.compute(To(e.inputs,r),{inputs:[0]})},Gd=e=>{let t=e.axis,r=e.splitSizes,n=e.numOutputs<0?r.length:e.numOutputs;if(n!==r.length)throw new Error("numOutputs and splitSizes lengh must be equal");return re({axis:t,numOutputs:n,splitSizes:r})}});var oh,ih,Hd,Fd,qd=U(()=>{"use strict";Se();Fr();So();Io();dt();oh=(e,t)=>{if(t.doRotary)throw new Error("GroupQuerryAttention do_rotary attribute is not supported");if(t.doRotary&&e.length<=7)throw new Error("cos_cache and sin_cache inputs are required if do_rotary is specified");let r=e[0],n=e[1],o=e[2],i=e[3],a=e[4];if(t.localWindowSize!==-1)throw new Error("Local attention is not supported");if(t.softcap!==0)throw new Error("Softcap is not supported");if(t.rotaryInterleaved!==0)throw new Error("Rotary interleaved is not supported");if(t.smoothSoftmax)throw new Error("Smooth softmax is not supported");if(r.dims.length!==3&&r.dims.length!==5)throw new Error("Input query is expected to have 3 or 5 dimensions");let d=!1,l=r.dims[0],p=r.dims[1],m=r.dims.length===3?d?r.dims[2]/3:r.dims[2]:t.numHeads*r.dims[4],u=p,h=0,_=!n||n.dims.length===0,y=Math.floor(_?m/(t.numHeads+2*t.kvNumHeads):m/t.numHeads);_&&(m=y*t.numHeads);let g=i&&i.dims.length!==0,x=a&&a.dims.length!==0;if(g&&i.dims.length===4&&i.dims[0]===l&&i.dims[1]!==t.kvNumHeads&&i.dims[2]===t.kvNumHeads&&i.dims[3]===y)throw new Error("BSNH pastKey/pastValue is not supported");if(g&&x){if(i.dims.length!==4)throw new Error('Input "past_key" is expected to have 4 dimensions');if(a.dims.length!==4)throw new Error('Input "past_value" is expected to have 4 dimensions');h=i.dims[2]}else if(g||x)throw new Error('Input "past_key" and "past_value" shall be both present or both absent');let v=1;if(n&&n.dims.length>0){if(r.dims.length!==3)throw new Error('Input "query" is expected to have 3 dimensions when key is given');if(n.dims.length<3||n.dims.length>5)throw new Error('Input "key" is expected to have 3, 4, or 5 dimensions');if(r.dims[0]!==n.dims[0])throw new Error('Input "query" and "key" shall have same dim 0 (batch size)');if(n.dims.length===3){if(r.dims[2]%n.dims[2]!==0)throw new Error('Dimension 2 of "query" should be a multiple of "key"');u=n.dims[1]}else if(n.dims.length===5){if(n.dims[2]!==t.numHeads||n.dims[3]!==2||n.dims[4]!==y)throw new Error('Expect "key" shape (batch_size, kv_sequence_length, num_heads, 2, head_size) for packed kv');if(o)throw new Error('Expect "value" be none when "key" has packed kv format.');u=n.dims[1]}else{if(n.dims[1]!==t.numHeads||n.dims[3]!==y)throw new Error('Expect "key" shape (batch_size, num_heads, kv_sequence_length, head_size) for past_key');u=n.dims[2]}}else{if(r.dims.length!==3&&r.dims.length!==5)throw new Error('Input "query" is expected to have 3 or 5 dimensions when key is empty');if(r.dims.length===5&&(r.dims[2]!==t.numHeads||r.dims[3]!==3))throw new Error('Expect "query" shape (batch_size, kv_sequence_length, num_heads, 3, head_size) for packed kv');v=3}let S=0,T=!1,A=t.kvNumHeads?y*t.kvNumHeads:m;if(o&&o.dims.length>0){if(o.dims.length!==3&&o.dims.length!==4)throw new Error('Input "value" is expected to have 3 or 4 dimensions');if(r.dims[0]!==o.dims[0])throw new Error('Input "query" and "value" shall have same dim 0 (batch_size)');if(o.dims.length===3){if(u!==o.dims[1])throw new Error('Input "key" and "value" shall have the same dim 1 (kv_sequence_length)');A=o.dims[2]}else{if(u!==o.dims[2])throw new Error('Input "past_key" and "past_value" shall have the same dim 2 (kv_sequence_length)');A=o.dims[1]*o.dims[3],T=!0}}let k=e.length>4?e[5]:void 0;if(k&&k.dims.length!==1&&k.dims[0]!==l)throw new Error('Input "seqlens" is expected to have 1 dimension and the same dim 0 as batch_size');let P=-1,D=-1,R=!1;return{batchSize:l,sequenceLength:p,pastSequenceLength:h,kvSequenceLength:u,totalSequenceLength:P,maxSequenceLength:D,inputHiddenSize:0,hiddenSize:m,vHiddenSize:A,headSize:y,vHeadSize:Math.floor(A/t.kvNumHeads),numHeads:t.numHeads,kvNumHeads:t.kvNumHeads,nReps:t.numHeads/t.kvNumHeads,pastPresentShareBuffer:!1,maskType:S,scale:t.scale,broadcastResPosBias:R,passPastInKv:T,qkvFormat:v}},ih=re({perm:[0,2,1,3]}),Hd=(e,t,r)=>{let n=t,o=r.kvNumHeads;return t.dims.length===3&&r.kvSequenceLength!==0&&(n=t.reshape([r.batchSize,r.kvSequenceLength,o,r.headSize]),n=e.compute(Pe(n,ih.perm),{inputs:[n],outputs:[-1]})[0]),n},Fd=(e,t)=>{let r=oh(e.inputs,t);if(e.inputs[0].dims.length===5)throw new Error("Packed QKV is not implemented");if(e.inputs[1]?.dims.length===5)throw new Error("Packed KV is not implemented");let n=e.inputs[0],o=e.inputs[1]&&e.inputs[1].dims.length>0?e.inputs[1]:void 0,i=e.inputs[2]&&e.inputs[2].dims.length>0?e.inputs[2]:void 0,a=e.inputs[3]&&e.inputs[3].dims.length!==0?e.inputs[3]:void 0,d=e.inputs[4]&&e.inputs[4].dims.length!==0?e.inputs[4]:void 0,l=e.inputs.length>4?e.inputs[5]:void 0,p=e.inputs.length>5?e.inputs[6]:void 0,m=r.kvNumHeads?r.kvNumHeads:r.numHeads,u=re({axis:2,numOutputs:3,splitSizes:[r.numHeads*r.headSize,m*r.headSize,m*r.headSize]}),[h,_,y]=!o&&!i?e.compute(To([n],u),{inputs:[n],outputs:[-1,-1,-1]}):[n,o,i],g=tr(e,r.batchSize,r.numHeads,r.sequenceLength,r.headSize,h,void 0,0);Rt(e,g,Hd(e,_,r),Hd(e,y,r),void 0,void 0,a,d,void 0,r,l,p)}});var Kd,ah,sh,jd,Yd=U(()=>{"use strict";te();oe();dt();ae();Kd=(e,t,r,n,o,i,a,d)=>{let l=me(i),p=l===1?"f32":`vec${l}f`,m=l===1?"vec2f":`mat2x${l}f`,u=o*a,h=64;u===1&&(h=256);let _=[o,a,i/l],y=[o,a,2],g=["rank","type","type"],x=[];x.push(...N(_,y));let $=v=>{let S=E("x",t.dataType,3,l),T=E("scale",r.dataType,r.dims),A=E("bias",n.dataType,n.dims),k=M("output",1,3,2),P=[S,T,A,k];return`
5728
+ }`;return{name:"Split",shaderCache:{hint:t.cacheKey,inputDependencies:["rank"]},getShaderSource:_,getRunData:()=>({outputs:p,dispatchGroup:{x:Math.ceil(n/64)},programUniforms:h})}},Wd=(e,t)=>{eh(e.inputs);let r=e.inputs.length===1?t:th(e.inputs,t);e.compute(So(e.inputs,r),{inputs:[0]})},Ld=e=>{let t=e.axis,r=e.splitSizes,n=e.numOutputs<0?r.length:e.numOutputs;if(n!==r.length)throw new Error("numOutputs and splitSizes lengh must be equal");return ee({axis:t,numOutputs:n,splitSizes:r})}});var oh,ih,Gd,Hd,Fd=U(()=>{"use strict";xe();Hr();xo();To();dt();oh=(e,t)=>{if(t.doRotary)throw new Error("GroupQuerryAttention do_rotary attribute is not supported");if(t.doRotary&&e.length<=7)throw new Error("cos_cache and sin_cache inputs are required if do_rotary is specified");let r=e[0],n=e[1],o=e[2],i=e[3],a=e[4];if(t.localWindowSize!==-1)throw new Error("Local attention is not supported");if(t.softcap!==0)throw new Error("Softcap is not supported");if(t.rotaryInterleaved!==0)throw new Error("Rotary interleaved is not supported");if(t.smoothSoftmax)throw new Error("Smooth softmax is not supported");if(r.dims.length!==3&&r.dims.length!==5)throw new Error("Input query is expected to have 3 or 5 dimensions");let d=!1,l=r.dims[0],p=r.dims[1],m=r.dims.length===3?d?r.dims[2]/3:r.dims[2]:t.numHeads*r.dims[4],u=p,h=0,_=!n||n.dims.length===0,y=Math.floor(_?m/(t.numHeads+2*t.kvNumHeads):m/t.numHeads);_&&(m=y*t.numHeads);let g=i&&i.dims.length!==0,x=a&&a.dims.length!==0;if(g&&i.dims.length===4&&i.dims[0]===l&&i.dims[1]!==t.kvNumHeads&&i.dims[2]===t.kvNumHeads&&i.dims[3]===y)throw new Error("BSNH pastKey/pastValue is not supported");if(g&&x){if(i.dims.length!==4)throw new Error('Input "past_key" is expected to have 4 dimensions');if(a.dims.length!==4)throw new Error('Input "past_value" is expected to have 4 dimensions');h=i.dims[2]}else if(g||x)throw new Error('Input "past_key" and "past_value" shall be both present or both absent');let v=1;if(n&&n.dims.length>0){if(r.dims.length!==3)throw new Error('Input "query" is expected to have 3 dimensions when key is given');if(n.dims.length<3||n.dims.length>5)throw new Error('Input "key" is expected to have 3, 4, or 5 dimensions');if(r.dims[0]!==n.dims[0])throw new Error('Input "query" and "key" shall have same dim 0 (batch size)');if(n.dims.length===3){if(r.dims[2]%n.dims[2]!==0)throw new Error('Dimension 2 of "query" should be a multiple of "key"');u=n.dims[1]}else if(n.dims.length===5){if(n.dims[2]!==t.numHeads||n.dims[3]!==2||n.dims[4]!==y)throw new Error('Expect "key" shape (batch_size, kv_sequence_length, num_heads, 2, head_size) for packed kv');if(o)throw new Error('Expect "value" be none when "key" has packed kv format.');u=n.dims[1]}else{if(n.dims[1]!==t.numHeads||n.dims[3]!==y)throw new Error('Expect "key" shape (batch_size, num_heads, kv_sequence_length, head_size) for past_key');u=n.dims[2]}}else{if(r.dims.length!==3&&r.dims.length!==5)throw new Error('Input "query" is expected to have 3 or 5 dimensions when key is empty');if(r.dims.length===5&&(r.dims[2]!==t.numHeads||r.dims[3]!==3))throw new Error('Expect "query" shape (batch_size, kv_sequence_length, num_heads, 3, head_size) for packed kv');v=3}let S=0,T=!1,A=t.kvNumHeads?y*t.kvNumHeads:m;if(o&&o.dims.length>0){if(o.dims.length!==3&&o.dims.length!==4)throw new Error('Input "value" is expected to have 3 or 4 dimensions');if(r.dims[0]!==o.dims[0])throw new Error('Input "query" and "value" shall have same dim 0 (batch_size)');if(o.dims.length===3){if(u!==o.dims[1])throw new Error('Input "key" and "value" shall have the same dim 1 (kv_sequence_length)');A=o.dims[2]}else{if(u!==o.dims[2])throw new Error('Input "past_key" and "past_value" shall have the same dim 2 (kv_sequence_length)');A=o.dims[1]*o.dims[3],T=!0}}let C=e.length>4?e[5]:void 0;if(C&&C.dims.length!==1&&C.dims[0]!==l)throw new Error('Input "seqlens" is expected to have 1 dimension and the same dim 0 as batch_size');let P=-1,D=-1,R=!1;return{batchSize:l,sequenceLength:p,pastSequenceLength:h,kvSequenceLength:u,totalSequenceLength:P,maxSequenceLength:D,inputHiddenSize:0,hiddenSize:m,vHiddenSize:A,headSize:y,vHeadSize:Math.floor(A/t.kvNumHeads),numHeads:t.numHeads,kvNumHeads:t.kvNumHeads,nReps:t.numHeads/t.kvNumHeads,pastPresentShareBuffer:!1,maskType:S,scale:t.scale,broadcastResPosBias:R,passPastInKv:T,qkvFormat:v}},ih=ee({perm:[0,2,1,3]}),Gd=(e,t,r)=>{let n=t,o=r.kvNumHeads;return t.dims.length===3&&r.kvSequenceLength!==0&&(n=t.reshape([r.batchSize,r.kvSequenceLength,o,r.headSize]),n=e.compute(Pe(n,ih.perm),{inputs:[n],outputs:[-1]})[0]),n},Hd=(e,t)=>{let r=oh(e.inputs,t);if(e.inputs[0].dims.length===5)throw new Error("Packed QKV is not implemented");if(e.inputs[1]?.dims.length===5)throw new Error("Packed KV is not implemented");let n=e.inputs[0],o=e.inputs[1]&&e.inputs[1].dims.length>0?e.inputs[1]:void 0,i=e.inputs[2]&&e.inputs[2].dims.length>0?e.inputs[2]:void 0,a=e.inputs[3]&&e.inputs[3].dims.length!==0?e.inputs[3]:void 0,d=e.inputs[4]&&e.inputs[4].dims.length!==0?e.inputs[4]:void 0,l=e.inputs.length>4?e.inputs[5]:void 0,p=e.inputs.length>5?e.inputs[6]:void 0,m=r.kvNumHeads?r.kvNumHeads:r.numHeads,u=ee({axis:2,numOutputs:3,splitSizes:[r.numHeads*r.headSize,m*r.headSize,m*r.headSize]}),[h,_,y]=!o&&!i?e.compute(So([n],u),{inputs:[n],outputs:[-1,-1,-1]}):[n,o,i],g=er(e,r.batchSize,r.numHeads,r.sequenceLength,r.headSize,h,void 0,0);Ut(e,g,Gd(e,_,r),Gd(e,y,r),void 0,void 0,a,d,void 0,r,l,p)}});var qd,ah,sh,Kd,jd=U(()=>{"use strict";J();ne();dt();ae();qd=(e,t,r,n,o,i,a,d)=>{let l=me(i),p=l===1?"f32":`vec${l}f`,m=l===1?"vec2f":`mat2x${l}f`,u=o*a,h=64;u===1&&(h=256);let _=[o,a,i/l],y=[o,a,2],g=["rank","type","type"],x=[];x.push(...N(_,y));let $=v=>{let S=E("x",t.dataType,3,l),T=E("scale",r.dataType,r.dims),A=E("bias",n.dataType,n.dims),C=M("output",1,3,2),P=[S,T,A,C];return`
5664
5729
  var<workgroup> workgroup_shared : array<${m}, ${h}>;
5665
5730
  const workgroup_size = ${h}u;
5666
5731
  ${v.declareVariables(...P)}
@@ -5686,15 +5751,15 @@ fn calculateOutputIndex(index: u32) -> u32 {
5686
5751
  workgroupBarrier();
5687
5752
  }
5688
5753
  if (local_idx == 0) {
5689
- let sum_final = ${Fe("workgroup_shared[0][0]",l)} / f32(hight * ${l});
5690
- let squared_sum_final = ${Fe("workgroup_shared[0][1]",l)} / f32(hight * ${l});
5754
+ let sum_final = ${qe("workgroup_shared[0][0]",l)} / f32(hight * ${l});
5755
+ let squared_sum_final = ${qe("workgroup_shared[0][1]",l)} / f32(hight * ${l});
5691
5756
 
5692
5757
  let inv_std_dev = inverseSqrt(squared_sum_final - sum_final * sum_final + f32(${d}));
5693
5758
  let channel_scale = inv_std_dev * f32(scale[channel]);
5694
5759
  let channel_shift = f32(bias[channel]) - sum_final * channel_scale;
5695
5760
  output[workgroup_index] = vec2f(channel_scale, channel_shift);
5696
5761
  }
5697
- }`};return e.compute({name:"InstanceNormComputeChannelScaleShift",shaderCache:{hint:`${l};${d};${h}`,inputDependencies:g},getRunData:()=>({outputs:[{dims:y,dataType:1}],dispatchGroup:{x:u},programUniforms:x}),getShaderSource:$},{inputs:[t,r,n],outputs:[-1]})[0]},ah=(e,t,r)=>{let n=t[0].dims,o=n,i=2,a=n[0],d=n[1],l=C.sizeFromDimension(n,i),p=me(l),m=C.size(o)/p,u=Kd(e,t[0],t[1],t[2],a,l,d,r.epsilon),h=[a,d,l/p],_=[a,d],y=["type","none"],g=x=>{let $=E("x",t[0].dataType,h.length,p),v=E("scale_shift",1,_.length,2),S=M("output",t[0].dataType,h.length,p),T=[$,v,S];return`
5762
+ }`};return e.compute({name:"InstanceNormComputeChannelScaleShift",shaderCache:{hint:`${l};${d};${h}`,inputDependencies:g},getRunData:()=>({outputs:[{dims:y,dataType:1}],dispatchGroup:{x:u},programUniforms:x}),getShaderSource:$},{inputs:[t,r,n],outputs:[-1]})[0]},ah=(e,t,r)=>{let n=t[0].dims,o=n,i=2,a=n[0],d=n[1],l=k.sizeFromDimension(n,i),p=me(l),m=k.size(o)/p,u=qd(e,t[0],t[1],t[2],a,l,d,r.epsilon),h=[a,d,l/p],_=[a,d],y=["type","none"],g=x=>{let $=E("x",t[0].dataType,h.length,p),v=E("scale_shift",1,_.length,2),S=M("output",t[0].dataType,h.length,p),T=[$,v,S];return`
5698
5763
  ${x.registerUniform("output_size","u32").declareVariables(...T)}
5699
5764
  ${x.mainStart()}
5700
5765
  ${x.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
@@ -5704,10 +5769,10 @@ fn calculateOutputIndex(index: u32) -> u32 {
5704
5769
  let scale_shift = ${v.getByIndices("vec2<u32>(batch, channel)")};
5705
5770
  let value = ${$.getByOffset("global_idx")} * ${S.type.value}(scale_shift.x) + ${S.type.value}(scale_shift.y);
5706
5771
  ${S.setByOffset("global_idx","value")};
5707
- }`};e.compute({name:"InstanceNormalization",shaderCache:{hint:`${p}`,inputDependencies:y},getRunData:()=>({outputs:[{dims:o,dataType:t[0].dataType}],dispatchGroup:{x:Math.ceil(m/64)},programUniforms:[{type:12,data:m},...N(h,_,h)]}),getShaderSource:g},{inputs:[t[0],u]})},sh=(e,t,r)=>{let n=t[0].dims,o=n,i=n[0],a=n[n.length-1],d=C.sizeFromDimension(n,1)/a,l=me(a),p=C.size(o)/l,m=[{type:12,data:d},{type:12,data:Math.floor(a/l)}],u=["type","type"],h=!1,_=[0,n.length-1];for(let $=0;$<n.length-2;$++)h=h||n[$+1]!==1,_.push($+1);h=h&&n[n.length-1]!==1;let y=h?e.compute(Pe(e.inputs[0],_),{inputs:[e.inputs[0]],outputs:[-1]})[0]:e.inputs[0].reshape(Array.from({length:n.length},($,v)=>n[_[v]])),g=Kd(e,y,t[1],t[2],i,d,a,r.epsilon),x=$=>{let v=_e(t[0].dataType),S=l===1?"vec2f":`mat${l}x2f`,T=P=>{let D=P===0?"x":"y",R=l===1?"f32":`vec${l}f`;switch(l){case 1:return`${v}(${R}(scale.${D}))`;case 2:return`vec2<${v}>(${R}(scale[0].${D}, scale[1].${D}))`;case 4:return`vec4<${v}>(${R}(scale[0].${D}, scale[1].${D}, scale[2].${D}, scale[3].${D}))`;default:throw new Error(`Not supported compoents ${l}`)}},A=E("input",t[0].dataType,t[0].dims,l),k=M("output",t[0].dataType,o,l);return`
5772
+ }`};e.compute({name:"InstanceNormalization",shaderCache:{hint:`${p}`,inputDependencies:y},getRunData:()=>({outputs:[{dims:o,dataType:t[0].dataType}],dispatchGroup:{x:Math.ceil(m/64)},programUniforms:[{type:12,data:m},...N(h,_,h)]}),getShaderSource:g},{inputs:[t[0],u]})},sh=(e,t,r)=>{let n=t[0].dims,o=n,i=n[0],a=n[n.length-1],d=k.sizeFromDimension(n,1)/a,l=me(a),p=k.size(o)/l,m=[{type:12,data:d},{type:12,data:Math.floor(a/l)}],u=["type","type"],h=!1,_=[0,n.length-1];for(let $=0;$<n.length-2;$++)h=h||n[$+1]!==1,_.push($+1);h=h&&n[n.length-1]!==1;let y=h?e.compute(Pe(e.inputs[0],_),{inputs:[e.inputs[0]],outputs:[-1]})[0]:e.inputs[0].reshape(Array.from({length:n.length},($,v)=>n[_[v]])),g=qd(e,y,t[1],t[2],i,d,a,r.epsilon),x=$=>{let v=ye(t[0].dataType),S=l===1?"vec2f":`mat${l}x2f`,T=P=>{let D=P===0?"x":"y",R=l===1?"f32":`vec${l}f`;switch(l){case 1:return`${v}(${R}(scale.${D}))`;case 2:return`vec2<${v}>(${R}(scale[0].${D}, scale[1].${D}))`;case 4:return`vec4<${v}>(${R}(scale[0].${D}, scale[1].${D}, scale[2].${D}, scale[3].${D}))`;default:throw new Error(`Not supported compoents ${l}`)}},A=E("input",t[0].dataType,t[0].dims,l),C=M("output",t[0].dataType,o,l);return`
5708
5773
  @group(0) @binding(0) var<storage, read> input : array<${A.type.storage}>;
5709
5774
  @group(0) @binding(1) var<storage, read> scale_input : array<${S}>;
5710
- @group(0) @binding(2) var<storage, read_write> output : array<${k.type.storage}>;
5775
+ @group(0) @binding(2) var<storage, read_write> output : array<${C.type.storage}>;
5711
5776
  struct Uniforms {H: u32, C : u32};
5712
5777
  @group(0) @binding(3) var<uniform> uniforms: Uniforms;
5713
5778
 
@@ -5718,52 +5783,52 @@ fn calculateOutputIndex(index: u32) -> u32 {
5718
5783
  let scale_offset = current_image_number * uniforms.C + current_channel_number;
5719
5784
  let scale = scale_input[scale_offset];
5720
5785
  output[global_idx] = fma(input[global_idx], ${T(0)}, ${T(1)});
5721
- }`};e.compute({name:"InstanceNormalizationNHWC",shaderCache:{hint:`${l}`,inputDependencies:u},getRunData:()=>({outputs:[{dims:o,dataType:t[0].dataType}],dispatchGroup:{x:Math.ceil(p/64)},programUniforms:m}),getShaderSource:x},{inputs:[t[0],g]})},jd=(e,t)=>{t.format==="NHWC"?sh(e,e.inputs,t):ah(e,e.inputs,t)}});var uh,dh,Zd,Qd=U(()=>{"use strict";te();oe();ae();uh=e=>{if(!e||e.length<2)throw new Error("layerNorm requires at least 2 inputs.")},dh=(e,t,r)=>{let n=t.simplified,o=e[0].dims,i=e[1],a=!n&&e[2],d=o,l=C.normalizeAxis(t.axis,o.length),p=C.sizeToDimension(o,l),m=C.sizeFromDimension(o,l),u=C.size(i.dims),h=a?C.size(a.dims):0;if(u!==m||a&&h!==m)throw new Error(`Size of X.shape()[axis:] == ${m}.
5786
+ }`};e.compute({name:"InstanceNormalizationNHWC",shaderCache:{hint:`${l}`,inputDependencies:u},getRunData:()=>({outputs:[{dims:o,dataType:t[0].dataType}],dispatchGroup:{x:Math.ceil(p/64)},programUniforms:m}),getShaderSource:x},{inputs:[t[0],g]})},Kd=(e,t)=>{t.format==="NHWC"?sh(e,e.inputs,t):ah(e,e.inputs,t)}});var uh,dh,Yd,Zd=U(()=>{"use strict";J();ne();ae();uh=e=>{if(!e||e.length<2)throw new Error("layerNorm requires at least 2 inputs.")},dh=(e,t,r)=>{let n=t.simplified,o=e[0].dims,i=e[1],a=!n&&e[2],d=o,l=k.normalizeAxis(t.axis,o.length),p=k.sizeToDimension(o,l),m=k.sizeFromDimension(o,l),u=k.size(i.dims),h=a?k.size(a.dims):0;if(u!==m||a&&h!==m)throw new Error(`Size of X.shape()[axis:] == ${m}.
5722
5787
  Size of scale and bias (if provided) must match this.
5723
- Got scale size of ${u} and bias size of ${h}`);let _=[];for(let A=0;A<o.length;++A)A<l?_.push(o[A]):_.push(1);let y=me(m),g=["type","type"],x=[{type:12,data:p},{type:1,data:m},{type:12,data:Math.floor(m/y)},{type:1,data:t.epsilon}];a&&g.push("type");let $=r>1,v=r>2,S=A=>{let k=_e(e[0].dataType),P=[E("x",e[0].dataType,e[0].dims,y),E("scale",i.dataType,i.dims,y)];a&&P.push(E("bias",a.dataType,a.dims,y)),P.push(M("output",e[0].dataType,d,y)),$&&P.push(M("mean_data_output",1,_)),v&&P.push(M("inv_std_output",1,_));let D=[{name:"norm_count",type:"u32"},{name:"norm_size",type:"f32"},{name:"norm_size_vectorized",type:"u32"},{name:"epsilon",type:"f32"}];return`
5788
+ Got scale size of ${u} and bias size of ${h}`);let _=[];for(let A=0;A<o.length;++A)A<l?_.push(o[A]):_.push(1);let y=me(m),g=["type","type"],x=[{type:12,data:p},{type:1,data:m},{type:12,data:Math.floor(m/y)},{type:1,data:t.epsilon}];a&&g.push("type");let $=r>1,v=r>2,S=A=>{let C=ye(e[0].dataType),P=[E("x",e[0].dataType,e[0].dims,y),E("scale",i.dataType,i.dims,y)];a&&P.push(E("bias",a.dataType,a.dims,y)),P.push(M("output",e[0].dataType,d,y)),$&&P.push(M("mean_data_output",1,_)),v&&P.push(M("inv_std_output",1,_));let D=[{name:"norm_count",type:"u32"},{name:"norm_size",type:"f32"},{name:"norm_size_vectorized",type:"u32"},{name:"epsilon",type:"f32"}];return`
5724
5789
  ${A.registerUniforms(D).declareVariables(...P)}
5725
5790
  ${A.mainStart()}
5726
5791
  ${A.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.norm_count")}
5727
5792
  let offset = global_idx * uniforms.norm_size_vectorized;
5728
- var mean_vector = ${uo("f32",y)};
5729
- var mean_square_vector = ${uo("f32",y)};
5793
+ var mean_vector = ${so("f32",y)};
5794
+ var mean_square_vector = ${so("f32",y)};
5730
5795
 
5731
5796
  for (var h: u32 = 0u; h < uniforms.norm_size_vectorized; h++) {
5732
- let value = ${Et(k,y,"x[h + offset]")};
5797
+ let value = ${Pt(C,y,"x[h + offset]")};
5733
5798
  mean_vector += value;
5734
5799
  mean_square_vector += value * value;
5735
5800
  }
5736
- let mean = ${Fe("mean_vector",y)} / uniforms.norm_size;
5737
- let inv_std_dev = inverseSqrt(${Fe("mean_square_vector",y)} / uniforms.norm_size ${n?"":"- mean * mean"} + uniforms.epsilon);
5801
+ let mean = ${qe("mean_vector",y)} / uniforms.norm_size;
5802
+ let inv_std_dev = inverseSqrt(${qe("mean_square_vector",y)} / uniforms.norm_size ${n?"":"- mean * mean"} + uniforms.epsilon);
5738
5803
 
5739
5804
  for (var j: u32 = 0; j < uniforms.norm_size_vectorized; j++) {
5740
- let f32input = ${Et(k,y,"x[j + offset]")};
5741
- let f32scale = ${Et(k,y,"scale[j]")};
5805
+ let f32input = ${Pt(C,y,"x[j + offset]")};
5806
+ let f32scale = ${Pt(C,y,"scale[j]")};
5742
5807
  output[j + offset] = ${P[0].type.value}((f32input ${n?"":"- mean"}) * inv_std_dev * f32scale
5743
- ${a?`+ ${Et(k,y,"bias[j]")}`:""}
5808
+ ${a?`+ ${Pt(C,y,"bias[j]")}`:""}
5744
5809
  );
5745
5810
  }
5746
5811
 
5747
5812
  ${$?"mean_data_output[global_idx] = mean":""};
5748
5813
  ${v?"inv_std_output[global_idx] = inv_std_dev":""};
5749
- }`},T=[{dims:d,dataType:e[0].dataType}];return $&&T.push({dims:_,dataType:1}),v&&T.push({dims:_,dataType:1}),{name:"LayerNormalization",shaderCache:{hint:`${y};${r};${n}`,inputDependencies:g},getRunData:()=>({outputs:T,dispatchGroup:{x:Math.ceil(p/64)},programUniforms:x}),getShaderSource:S}},Zd=(e,t)=>{uh(e.inputs),e.compute(dh(e.inputs,t,e.outputCount))}});var lh,Xd,Jd=U(()=>{"use strict";oe();Qr();Xr();lh=e=>{if(!e||e.length!==2)throw new Error("MatMul requires 2 inputs.");if(e[0].dims[e[0].dims.length-1]!==e[1].dims[e[1].dims.length-2])throw new Error("shared dimension does not match.")},Xd=e=>{lh(e.inputs);let t=tt.calcShape(e.inputs[0].dims,e.inputs[1].dims,!0);if(!t)throw new Error("Can't use matmul on the given tensors");let r=t[t.length-1],n=e.inputs[0].dims[e.inputs[0].dims.length-1];if(r<8&&n<8)e.compute(Zr(e.inputs,{activation:""},t));else{let o=t[t.length-2],i=C.size(e.inputs[0].dims.slice(0,-2)),a=C.size(e.inputs[1].dims.slice(0,-2));if(i!==1&&o===1&&a===1){let d=e.inputs[0].reshape([1,i,n]),l=e.inputs[1].reshape([1,n,r]),p=[1,i,r],m=[d,l];e.compute(er(m,{activation:""},t,p),{inputs:m})}else e.compute(er(e.inputs,{activation:""},t))}}});var ch,ph,mh,el,tl,rl=U(()=>{"use strict";te();oe();Se();ae();ch=(e,t)=>{if(e.length<3||e.length>4)throw new Error("MatMulNBits requires 3 or 4 inputs");let r=e[0],n=r.dims.length;if(r.dims[n-1]!==t.k)throw new Error("The last dim of input shape does not match the k value");let o=Math.floor((t.k+t.blockSize-1)/t.blockSize),i=t.blockSize/8*t.bits,a=e[1];if(!C.areEqual(a.dims,[t.n,o,i]))throw new Error("The second inputs must be 3D tensor with shape N X nBlocksPerCol X blobSize");let l=e[2].dims;if(C.size(l)!==t.n*o)throw new Error("scales input size error.");if(e.length===4){let m=e[3].dims,u=t.bits>4?t.n*o:t.n*Math.floor((o+1)/2);if(C.size(m)!==u)throw new Error("zeroPoints input size error.")}},ph=(e,t)=>{let r=e[0].dims,n=r.length,o=r[n-2],i=t.k,a=t.n,d=r.slice(0,n-2),l=C.size(d),m=e[1].dims[2]/4,u=e[0].dataType,h=me(t.k),_=me(m),y=me(a),g=d.concat([o,a]),x=o>1&&a/y%2===0?2:1,$=C.size(g)/y/x,v=64,S=[],T=[l,o,i/h],A=C.convertShape(e[1].dims).slice();A.splice(-1,1,m/_),S.push(...N(T)),S.push(...N(A)),S.push(...N(e[2].dims)),e.length===4&&S.push(...N(C.convertShape(e[3].dims)));let k=[l,o,a/y];S.push(...N(k));let P=D=>{let R=T.length,G=E("a",e[0].dataType,R,h),K=E("b",12,A.length,_),j=E("scales",e[2].dataType,e[2].dims.length),V=[G,K,j],Q=e.length===4?E("zero_points",12,e[3].dims.length):void 0;Q&&V.push(Q);let se=k.length,Y=M("output",e[0].dataType,se,y),ee=_e(e[0].dataType),J=(()=>{switch(h){case 1:return`array<${ee}, 8>`;case 2:return`mat4x2<${ee}>`;case 4:return`mat2x4<${ee}>`;default:throw new Error(`${h}-component is not supported.`)}})(),ne=()=>{let $e=`
5814
+ }`},T=[{dims:d,dataType:e[0].dataType}];return $&&T.push({dims:_,dataType:1}),v&&T.push({dims:_,dataType:1}),{name:"LayerNormalization",shaderCache:{hint:`${y};${r};${n}`,inputDependencies:g},getRunData:()=>({outputs:T,dispatchGroup:{x:Math.ceil(p/64)},programUniforms:x}),getShaderSource:S}},Yd=(e,t)=>{uh(e.inputs),e.compute(dh(e.inputs,t,e.outputCount))}});var lh,Qd,Xd=U(()=>{"use strict";ne();Zr();Qr();lh=e=>{if(!e||e.length!==2)throw new Error("MatMul requires 2 inputs.");if(e[0].dims[e[0].dims.length-1]!==e[1].dims[e[1].dims.length-2])throw new Error("shared dimension does not match.")},Qd=e=>{lh(e.inputs);let t=tt.calcShape(e.inputs[0].dims,e.inputs[1].dims,!0);if(!t)throw new Error("Can't use matmul on the given tensors");let r=t[t.length-1],n=e.inputs[0].dims[e.inputs[0].dims.length-1];if(r<8&&n<8)e.compute(Yr(e.inputs,{activation:""},t));else{let o=t[t.length-2],i=k.size(e.inputs[0].dims.slice(0,-2)),a=k.size(e.inputs[1].dims.slice(0,-2));if(i!==1&&o===1&&a===1){let d=e.inputs[0].reshape([1,i,n]),l=e.inputs[1].reshape([1,n,r]),p=[1,i,r],m=[d,l];e.compute(Jt(m,{activation:""},t,p),{inputs:m})}else e.compute(Jt(e.inputs,{activation:""},t))}}});var ch,ph,mh,Jd,el,tl=U(()=>{"use strict";J();ne();xe();ae();ch=(e,t)=>{if(e.length<3||e.length>4)throw new Error("MatMulNBits requires 3 or 4 inputs");let r=e[0],n=r.dims.length;if(r.dims[n-1]!==t.k)throw new Error("The last dim of input shape does not match the k value");let o=Math.floor((t.k+t.blockSize-1)/t.blockSize),i=t.blockSize/8*t.bits,a=e[1];if(!k.areEqual(a.dims,[t.n,o,i]))throw new Error("The second inputs must be 3D tensor with shape N X nBlocksPerCol X blobSize");let l=e[2].dims;if(k.size(l)!==t.n*o)throw new Error("scales input size error.");if(e.length===4){let m=e[3].dims,u=t.bits>4?t.n*o:t.n*Math.floor((o+1)/2);if(k.size(m)!==u)throw new Error("zeroPoints input size error.")}},ph=(e,t)=>{let r=e[0].dims,n=r.length,o=r[n-2],i=t.k,a=t.n,d=r.slice(0,n-2),l=k.size(d),m=e[1].dims[2]/4,u=e[0].dataType,h=me(t.k),_=me(m),y=me(a),g=d.concat([o,a]),x=o>1&&a/y%2===0?2:1,$=k.size(g)/y/x,v=64,S=[],T=[l,o,i/h],A=k.convertShape(e[1].dims).slice();A.splice(-1,1,m/_),S.push(...N(T)),S.push(...N(A)),S.push(...N(e[2].dims)),e.length===4&&S.push(...N(k.convertShape(e[3].dims)));let C=[l,o,a/y];S.push(...N(C));let P=D=>{let R=T.length,H=E("a",e[0].dataType,R,h),L=E("b",12,A.length,_),re=E("scales",e[2].dataType,e[2].dims.length),V=[H,L,re],K=e.length===4?E("zero_points",12,e[3].dims.length):void 0;K&&V.push(K);let we=C.length,j=M("output",e[0].dataType,we,y),Q=ye(e[0].dataType),ie=(()=>{switch(h){case 1:return`array<${Q}, 8>`;case 2:return`mat4x2<${Q}>`;case 4:return`mat2x4<${Q}>`;default:throw new Error(`${h}-component is not supported.`)}})(),te=()=>{let ve=`
5750
5815
  // reuse a data
5751
- var input_offset = ${G.indicesToOffset(`${G.type.indices}(batch, row, word_offset)`)};
5752
- var a_data: ${J};
5816
+ var input_offset = ${H.indicesToOffset(`${H.type.indices}(batch, row, word_offset)`)};
5817
+ var a_data: ${ie};
5753
5818
  for (var j: u32 = 0; j < ${8/h}; j++) {
5754
- a_data[j] = ${G.getByOffset("input_offset")};
5819
+ a_data[j] = ${H.getByOffset("input_offset")};
5755
5820
  input_offset++;
5756
5821
  }
5757
- `;for(let le=0;le<y*x;le++)$e+=`
5758
- b_value = ${_===1?`b${le}_data`:`b${le}_data[i]`};
5822
+ `;for(let de=0;de<y*x;de++)ve+=`
5823
+ b_value = ${_===1?`b${de}_data`:`b${de}_data[i]`};
5759
5824
  b_value_lower = unpack4xU8(b_value & b_mask);
5760
5825
  b_value_upper = unpack4xU8((b_value >> 4) & b_mask);
5761
- b_quantized_values = ${J}(${Array.from({length:4},(W,q)=>`${ee}(b_value_lower[${q}]), ${ee}(b_value_upper[${q}])`).join(", ")});
5762
- b_dequantized_values = ${(()=>h===1?`${J}(${Array.from({length:8},(W,q)=>`(b_quantized_values[${q}] - ${Q?`zero_point${le}`:"zero_point"}) * scale${le}`).join(", ")});`:`(b_quantized_values - ${J}(${Array(8).fill(`${Q?`zero_point${le}`:"zero_point"}`).join(",")})) * scale${le};`)()};
5763
- workgroup_shared[local_id.x * ${x} + ${Math.floor(le/y)}]${y>1?`[${le%y}]`:""} += ${Array.from({length:8/h},(W,q)=>`${h===1?`a_data[${q}] * b_dequantized_values[${q}]`:`dot(a_data[${q}], b_dequantized_values[${q}])`}`).join(" + ")};
5764
- `;return $e},be=()=>{let $e=`
5826
+ b_quantized_values = ${ie}(${Array.from({length:4},(W,Y)=>`${Q}(b_value_lower[${Y}]), ${Q}(b_value_upper[${Y}])`).join(", ")});
5827
+ b_dequantized_values = ${(()=>h===1?`${ie}(${Array.from({length:8},(W,Y)=>`(b_quantized_values[${Y}] - ${K?`zero_point${de}`:"zero_point"}) * scale${de}`).join(", ")});`:`(b_quantized_values - ${ie}(${Array(8).fill(`${K?`zero_point${de}`:"zero_point"}`).join(",")})) * scale${de};`)()};
5828
+ workgroup_shared[local_id.x * ${x} + ${Math.floor(de/y)}]${y>1?`[${de%y}]`:""} += ${Array.from({length:8/h},(W,Y)=>`${h===1?`a_data[${Y}] * b_dequantized_values[${Y}]`:`dot(a_data[${Y}], b_dequantized_values[${Y}])`}`).join(" + ")};
5829
+ `;return ve},be=()=>{let ve=`
5765
5830
  var col_index = col * ${y};
5766
- ${Q?`
5831
+ ${K?`
5767
5832
  let zero_point_bytes_per_col = (nBlocksPerCol + 1) / 2;
5768
5833
  var zero_point_byte_count: u32;
5769
5834
  var zero_point_word_index: u32;
@@ -5772,29 +5837,29 @@ fn calculateOutputIndex(index: u32) -> u32 {
5772
5837
  var zero_point_bits_offset: u32;
5773
5838
  var zero_point_word: u32;`:`
5774
5839
  // The default zero point is 8 for unsigned 4-bit quantization.
5775
- let zero_point = ${ee}(8);`}
5776
- `;for(let le=0;le<y*x;le++)$e+=`
5777
- let scale${le} = ${j.getByOffset("col_index * nBlocksPerCol + block")};
5778
- ${Q?`
5840
+ let zero_point = ${Q}(8);`}
5841
+ `;for(let de=0;de<y*x;de++)ve+=`
5842
+ let scale${de} = ${re.getByOffset("col_index * nBlocksPerCol + block")};
5843
+ ${K?`
5779
5844
  zero_point_byte_count = col_index * zero_point_bytes_per_col + (block >> 0x1u);
5780
5845
  zero_point_word_index = zero_point_byte_count >> 0x2u;
5781
5846
  zero_point_byte_offset = zero_point_byte_count & 0x3u;
5782
5847
  zero_point_bits_offset = (zero_point_byte_offset << 3) + (zero_point_nibble_offset << 2);
5783
- zero_point_word = ${Q.getByOffset("zero_point_word_index")} >> zero_point_bits_offset;
5784
- let zero_point${le} = ${ee}((zero_point_word) & 0xFu);`:""}
5785
- col_index += 1;`;return $e},Oe=()=>{let $e=`col_index = col * ${y};`;for(let le=0;le<y*x;le++)$e+=`
5786
- let b${le}_data = ${K.getByIndices(`${K.type.indices}(col_index, block, word)`)};
5787
- col_index += 1;`;return $e+=`
5848
+ zero_point_word = ${K.getByOffset("zero_point_word_index")} >> zero_point_bits_offset;
5849
+ let zero_point${de} = ${Q}((zero_point_word) & 0xFu);`:""}
5850
+ col_index += 1;`;return ve},Oe=()=>{let ve=`col_index = col * ${y};`;for(let de=0;de<y*x;de++)ve+=`
5851
+ let b${de}_data = ${L.getByIndices(`${L.type.indices}(col_index, block, word)`)};
5852
+ col_index += 1;`;return ve+=`
5788
5853
  var b_value: u32;
5789
5854
  let b_mask: u32 = 0x0F0F0F0Fu;
5790
5855
  var b_value_lower: vec4<u32>;
5791
5856
  var b_value_upper: vec4<u32>;
5792
- var b_quantized_values: ${J};
5793
- var b_dequantized_values: ${J};`,$e};return`
5794
- var<workgroup> workgroup_shared: array<${Y.type.value}, ${x*v}>;
5795
- ${D.declareVariables(...V,Y)}
5857
+ var b_quantized_values: ${ie};
5858
+ var b_dequantized_values: ${ie};`,ve};return`
5859
+ var<workgroup> workgroup_shared: array<${j.type.value}, ${x*v}>;
5860
+ ${D.declareVariables(...V,j)}
5796
5861
  ${D.mainStart([v,1,1])}
5797
- let output_indices = ${Y.offsetToIndices(`(global_idx / ${v}) * ${x}`)};
5862
+ let output_indices = ${j.offsetToIndices(`(global_idx / ${v}) * ${x}`)};
5798
5863
  let col = output_indices[2];
5799
5864
  let row = output_indices[1];
5800
5865
  let batch = output_indices[0];
@@ -5807,7 +5872,7 @@ fn calculateOutputIndex(index: u32) -> u32 {
5807
5872
  for (var word: u32 = 0; word < ${m}; word += ${_}) {
5808
5873
  ${Oe()}
5809
5874
  for (var i: u32 = 0; i < ${_}; i++) {
5810
- ${ne()}
5875
+ ${te()}
5811
5876
  word_offset += ${8/h};
5812
5877
  }
5813
5878
  }
@@ -5815,15 +5880,15 @@ fn calculateOutputIndex(index: u32) -> u32 {
5815
5880
  workgroupBarrier();
5816
5881
 
5817
5882
  if (local_id.x < ${x}) {
5818
- var output_value: ${Y.type.value} = ${Y.type.value}(0);
5883
+ var output_value: ${j.type.value} = ${j.type.value}(0);
5819
5884
  var workgroup_shared_offset: u32 = local_id.x;
5820
5885
  for (var b: u32 = 0u; b < ${v}u; b++) {
5821
5886
  output_value += workgroup_shared[workgroup_shared_offset];
5822
5887
  workgroup_shared_offset += ${x};
5823
5888
  }
5824
- ${Y.setByIndices(`${Y.type.indices}(batch, row, col + local_id.x)`,"output_value")};
5889
+ ${j.setByIndices(`${j.type.indices}(batch, row, col + local_id.x)`,"output_value")};
5825
5890
  }
5826
- }`};return{name:"MatMulNBits",shaderCache:{hint:`${t.blockSize};${t.bits};${h};${_};${y};${x};${v}`,inputDependencies:Array(e.length).fill("rank")},getRunData:()=>({outputs:[{dims:g,dataType:u}],dispatchGroup:{x:$},programUniforms:S}),getShaderSource:P}},mh=(e,t)=>{let r=e[0].dims,n=r.length,o=r[n-2],i=t.k,a=t.n,d=r.slice(0,n-2),l=C.size(d),m=e[1].dims[2]/4,u=e[0].dataType,h=me(t.k),_=me(m),y=d.concat([o,a]),g=128,x=a%8===0?8:a%4===0?4:1,$=g/x,v=$*_*8,S=v/h,T=v/t.blockSize,A=C.size(y)/x,k=[],P=[l,o,i/h],D=C.convertShape(e[1].dims).slice();D.splice(-1,1,m/_),k.push(...N(P)),k.push(...N(D)),k.push(...N(e[2].dims)),e.length===4&&k.push(...N(C.convertShape(e[3].dims)));let R=[l,o,a];k.push(...N(R));let G=K=>{let j=P.length,V=E("a",e[0].dataType,j,h),Q=E("b",12,D.length,_),se=E("scales",e[2].dataType,e[2].dims.length),Y=[V,Q,se],ee=e.length===4?E("zero_points",12,e[3].dims.length):void 0;ee&&Y.push(ee);let J=R.length,ne=M("output",e[0].dataType,J),be=_e(e[0].dataType),Oe=()=>{switch(h){case 1:return`
5891
+ }`};return{name:"MatMulNBits",shaderCache:{hint:`${t.blockSize};${t.bits};${h};${_};${y};${x};${v}`,inputDependencies:Array(e.length).fill("rank")},getRunData:()=>({outputs:[{dims:g,dataType:u}],dispatchGroup:{x:$},programUniforms:S}),getShaderSource:P}},mh=(e,t)=>{let r=e[0].dims,n=r.length,o=r[n-2],i=t.k,a=t.n,d=r.slice(0,n-2),l=k.size(d),m=e[1].dims[2]/4,u=e[0].dataType,h=me(t.k),_=me(m),y=d.concat([o,a]),g=128,x=a%8===0?8:a%4===0?4:1,$=g/x,v=$*_*8,S=v/h,T=v/t.blockSize,A=k.size(y)/x,C=[],P=[l,o,i/h],D=k.convertShape(e[1].dims).slice();D.splice(-1,1,m/_),C.push(...N(P)),C.push(...N(D)),C.push(...N(e[2].dims)),e.length===4&&C.push(...N(k.convertShape(e[3].dims)));let R=[l,o,a];C.push(...N(R));let H=L=>{let re=P.length,V=E("a",e[0].dataType,re,h),K=E("b",12,D.length,_),we=E("scales",e[2].dataType,e[2].dims.length),j=[V,K,we],Q=e.length===4?E("zero_points",12,e[3].dims.length):void 0;Q&&j.push(Q);let ie=R.length,te=M("output",e[0].dataType,ie),be=ye(e[0].dataType),Oe=()=>{switch(h){case 1:return`
5827
5892
  let a_data0 = vec4<${be}>(sub_a[word_offset], sub_a[word_offset + 1], sub_a[word_offset + 2], sub_a[word_offset + 3]);
5828
5893
  let a_data1 = vec4<${be}>(sub_a[word_offset + 4], sub_a[word_offset + 5], sub_a[word_offset + 6], sub_a[word_offset + 7]);`;case 2:return`
5829
5894
  let a_data0 = vec4<${be}>(sub_a[word_offset], sub_a[word_offset + 1]);
@@ -5831,10 +5896,10 @@ fn calculateOutputIndex(index: u32) -> u32 {
5831
5896
  let a_data0 = sub_a[word_offset];
5832
5897
  let a_data1 = sub_a[word_offset + 1];`;default:throw new Error(`${h}-component is not supported.`)}};return`
5833
5898
  var<workgroup> sub_a: array<${V.type.value}, ${S}>;
5834
- var<workgroup> inter_results: array<array<${ne.type.value}, ${$}>, ${x}>;
5835
- ${K.declareVariables(...Y,ne)}
5836
- ${K.mainStart([$,x,1])}
5837
- let output_indices = ${ne.offsetToIndices(`workgroup_index * ${x}`)};
5899
+ var<workgroup> inter_results: array<array<${te.type.value}, ${$}>, ${x}>;
5900
+ ${L.declareVariables(...j,te)}
5901
+ ${L.mainStart([$,x,1])}
5902
+ let output_indices = ${te.offsetToIndices(`workgroup_index * ${x}`)};
5838
5903
  let col = output_indices[2];
5839
5904
  let row = output_indices[1];
5840
5905
  let batch = output_indices[0];
@@ -5860,52 +5925,52 @@ fn calculateOutputIndex(index: u32) -> u32 {
5860
5925
  // each thread process one block
5861
5926
  let b_row = col + local_id.y;
5862
5927
  let block = tile * ${T} + local_id.x;
5863
- ${ee?`
5928
+ ${Q?`
5864
5929
  let zero_point_bytes_per_col = (n_blocks_per_col + 1) / 2;
5865
5930
  let zero_point_byte_count = b_row * zero_point_bytes_per_col + (block >> 0x1u);
5866
5931
  let zero_point_word_index = zero_point_byte_count >> 0x2u;
5867
5932
  let zero_point_byte_offset = zero_point_byte_count & 0x3u;
5868
5933
  let zero_point_nibble_offset: u32 = block & 0x1u;
5869
5934
  let zero_point_bits_offset = (zero_point_byte_offset << 3) + (zero_point_nibble_offset << 2);
5870
- let zero_point_word = ${ee.getByOffset("zero_point_word_index")} >> zero_point_bits_offset;
5935
+ let zero_point_word = ${Q.getByOffset("zero_point_word_index")} >> zero_point_bits_offset;
5871
5936
  let zero_point = ${be}((zero_point_word) & 0xFu);`:`
5872
5937
  // The default zero point is 8 for unsigned 4-bit quantization.
5873
5938
  let zero_point = ${be}(8);`}
5874
- let scale = ${se.getByOffset("b_row * n_blocks_per_col + block")};
5875
- let b_data = ${Q.getByIndices(`${Q.type.indices}(b_row, block, 0)`)};
5939
+ let scale = ${we.getByOffset("b_row * n_blocks_per_col + block")};
5940
+ let b_data = ${K.getByIndices(`${K.type.indices}(b_row, block, 0)`)};
5876
5941
  var word_offset = local_id.x * ${t.blockSize/h};
5877
5942
  for (var i: u32 = 0; i < ${_}; i++) {
5878
5943
  ${Oe()}
5879
5944
  let b_value = ${_===1?"b_data":"b_data[i]"};
5880
5945
  let b_value_lower = unpack4xU8(b_value & 0x0F0F0F0Fu);
5881
5946
  let b_value_upper = unpack4xU8((b_value >> 4) & 0x0F0F0F0Fu);
5882
- let b_quantized_values = mat2x4<${be}>(${Array.from({length:4},($e,le)=>`${be}(b_value_lower[${le}]), ${be}(b_value_upper[${le}])`).join(", ")});
5947
+ let b_quantized_values = mat2x4<${be}>(${Array.from({length:4},(ve,de)=>`${be}(b_value_lower[${de}]), ${be}(b_value_upper[${de}])`).join(", ")});
5883
5948
  let b_dequantized_values = (b_quantized_values - mat2x4<${be}>(${Array(8).fill("zero_point").join(",")})) * scale;
5884
- inter_results[local_id.y][local_id.x] += ${Array.from({length:2},($e,le)=>`${`dot(a_data${le}, b_dequantized_values[${le}])`}`).join(" + ")};
5949
+ inter_results[local_id.y][local_id.x] += ${Array.from({length:2},(ve,de)=>`${`dot(a_data${de}, b_dequantized_values[${de}])`}`).join(" + ")};
5885
5950
  word_offset += ${8/h};
5886
5951
  }
5887
5952
  workgroupBarrier();
5888
5953
  }
5889
5954
 
5890
5955
  if (local_idx < ${x}) {
5891
- var output_value: ${ne.type.value} = ${ne.type.value}(0);
5956
+ var output_value: ${te.type.value} = ${te.type.value}(0);
5892
5957
  for (var b = 0u; b < ${$}; b++) {
5893
5958
  output_value += inter_results[local_idx][b];
5894
5959
  }
5895
5960
  if (col + local_idx < uniforms.output_shape[2])
5896
5961
  {
5897
- ${ne.setByIndices(`${ne.type.indices}(batch, row, col + local_idx)`,"output_value")}
5962
+ ${te.setByIndices(`${te.type.indices}(batch, row, col + local_idx)`,"output_value")}
5898
5963
  }
5899
5964
  }
5900
- }`};return{name:"BlockwiseMatMulNBits32",shaderCache:{hint:`${t.blockSize};${h};${_};${$};${x}`,inputDependencies:Array(e.length).fill("rank")},getRunData:()=>({outputs:[{dims:y,dataType:u}],dispatchGroup:{x:A},programUniforms:k}),getShaderSource:G}},el=(e,t)=>{ch(e.inputs,t),t.blockSize===32&&e.adapterInfo.isVendor("intel")&&e.adapterInfo.isArchitecture("gen-12lp")?e.compute(mh(e.inputs,t)):e.compute(ph(e.inputs,t))},tl=e=>re(e)});var fh,hh,gh,bh,yh,_h,wh,vh,nl,ol=U(()=>{"use strict";te();oe();ae();fh=e=>{if(!e||e.length<1)throw new Error("Too few inputs");if(e[0].dataType!==1&&e[0].dataType!==10)throw new Error("Input type must be float or float16.");if(e.length>=2){let t=e[0].dims.length*2===e[1].dims[0];if(e.length===4&&(t=e[3].dims[0]*2===e[1].dims[0]),!t)throw new Error("The pads should be a 1D tensor of shape [2 * input_rank] or [2 * num_axes].")}},hh=(e,t,r)=>{let n="";for(let o=t-1;o>=0;--o)n+=`
5901
- k = i32(${e.indicesGet("indices",o)}) - ${F("uniforms.pads",o,r)};
5965
+ }`};return{name:"BlockwiseMatMulNBits32",shaderCache:{hint:`${t.blockSize};${h};${_};${$};${x}`,inputDependencies:Array(e.length).fill("rank")},getRunData:()=>({outputs:[{dims:y,dataType:u}],dispatchGroup:{x:A},programUniforms:C}),getShaderSource:H}},Jd=(e,t)=>{ch(e.inputs,t),t.blockSize===32&&e.adapterInfo.isVendor("intel")&&e.adapterInfo.isArchitecture("gen-12lp")?e.compute(mh(e.inputs,t)):e.compute(ph(e.inputs,t))},el=e=>ee(e)});var fh,hh,gh,bh,yh,_h,wh,vh,rl,nl=U(()=>{"use strict";J();ne();ae();fh=e=>{if(!e||e.length<1)throw new Error("Too few inputs");if(e[0].dataType!==1&&e[0].dataType!==10)throw new Error("Input type must be float or float16.");if(e.length>=2){let t=e[0].dims.length*2===e[1].dims[0];if(e.length===4&&(t=e[3].dims[0]*2===e[1].dims[0]),!t)throw new Error("The pads should be a 1D tensor of shape [2 * input_rank] or [2 * num_axes].")}},hh=(e,t,r)=>{let n="";for(let o=t-1;o>=0;--o)n+=`
5966
+ k = i32(${e.indicesGet("indices",o)}) - ${q("uniforms.pads",o,r)};
5902
5967
  if (k < 0) {
5903
5968
  break;
5904
5969
  }
5905
- if (k >= i32(${F("uniforms.x_shape",o,t)})) {
5970
+ if (k >= i32(${q("uniforms.x_shape",o,t)})) {
5906
5971
  break;
5907
5972
  }
5908
- offset += k * i32(${F("uniforms.x_strides",o,t)});
5973
+ offset += k * i32(${q("uniforms.x_strides",o,t)});
5909
5974
  `;return`
5910
5975
  value = ${e.type.value}(uniforms.constant_value);
5911
5976
  for (var i = 0; i < 1; i++) {
@@ -5915,52 +5980,52 @@ fn calculateOutputIndex(index: u32) -> u32 {
5915
5980
  value = x[offset];
5916
5981
  }
5917
5982
  `},gh=(e,t,r)=>{let n="";for(let o=t-1;o>=0;--o)n+=`
5918
- k = i32(${e.indicesGet("indices",o)}) - ${F("uniforms.pads",o,r)};
5983
+ k = i32(${e.indicesGet("indices",o)}) - ${q("uniforms.pads",o,r)};
5919
5984
  if (k < 0) {
5920
5985
  k = -k;
5921
5986
  }
5922
5987
  {
5923
- let _2n_1 = 2 * (i32(${F("uniforms.x_shape",o,t)}) - 1);
5988
+ let _2n_1 = 2 * (i32(${q("uniforms.x_shape",o,t)}) - 1);
5924
5989
  k = k % _2n_1;
5925
- if(k >= i32(${F("uniforms.x_shape",o,t)})) {
5990
+ if(k >= i32(${q("uniforms.x_shape",o,t)})) {
5926
5991
  k = _2n_1 - k;
5927
5992
  }
5928
5993
  }
5929
- offset += k * i32(${F("uniforms.x_strides",o,t)});
5994
+ offset += k * i32(${q("uniforms.x_strides",o,t)});
5930
5995
  `;return`
5931
5996
  var offset = 0;
5932
5997
  var k = 0;
5933
5998
  ${n}
5934
5999
  value = x[offset];
5935
6000
  `},bh=(e,t,r)=>{let n="";for(let o=t-1;o>=0;--o)n+=`
5936
- k = i32(${e.indicesGet("indices",o)}) - ${F("uniforms.pads",o,r)};
6001
+ k = i32(${e.indicesGet("indices",o)}) - ${q("uniforms.pads",o,r)};
5937
6002
  if (k < 0) {
5938
6003
  k = 0;
5939
6004
  }
5940
- if (k >= i32(${F("uniforms.x_shape",o,t)})) {
5941
- k = i32(${F("uniforms.x_shape",o,t)}) - 1;
6005
+ if (k >= i32(${q("uniforms.x_shape",o,t)})) {
6006
+ k = i32(${q("uniforms.x_shape",o,t)}) - 1;
5942
6007
  }
5943
- offset += k * i32(${F("uniforms.x_strides",o,t)});
6008
+ offset += k * i32(${q("uniforms.x_strides",o,t)});
5944
6009
  `;return`
5945
6010
  var offset = 0;
5946
6011
  var k = 0;
5947
6012
  ${n}
5948
6013
  value = x[offset];
5949
6014
  `},yh=(e,t,r)=>{let n="";for(let o=t-1;o>=0;--o)n+=`
5950
- k = i32(${e.indicesGet("indices",o)}) - ${F("uniforms.pads",o,r)};
6015
+ k = i32(${e.indicesGet("indices",o)}) - ${q("uniforms.pads",o,r)};
5951
6016
  if (k < 0) {
5952
- k += i32(${F("uniforms.x_shape",o,t)}]);
6017
+ k += i32(${q("uniforms.x_shape",o,t)}]);
5953
6018
  }
5954
- if (k >= i32(${F("uniforms.x_shape",o,t)})) {
5955
- k -= i32(${F("uniforms.x_shape",o,t)});
6019
+ if (k >= i32(${q("uniforms.x_shape",o,t)})) {
6020
+ k -= i32(${q("uniforms.x_shape",o,t)});
5956
6021
  }
5957
- offset += k * i32(${F("uniforms.x_strides",o,t)});
6022
+ offset += k * i32(${q("uniforms.x_strides",o,t)});
5958
6023
  `;return`
5959
6024
  var offset = 0;
5960
6025
  var k = 0;
5961
6026
  ${n}
5962
6027
  value = x[offset];
5963
- `},_h=(e,t,r)=>{switch(r.mode){case 0:return hh(e,t,r.pads.length);case 1:return gh(e,t,r.pads.length);case 2:return bh(e,t,r.pads.length);case 3:return yh(e,t,r.pads.length);default:throw new Error("Invalid mode")}},wh=(e,t)=>{let r=C.padShape(e[0].dims.slice(),t.pads),n=e[0].dims,o=C.size(r),i=[{type:12,data:o},{type:6,data:t.pads}],a=e.length>=3&&e[2].data;t.mode===0&&i.push({type:a?e[2].dataType:1,data:t.value}),i.push(...N(e[0].dims,r));let d=["rank"],l=p=>{let m=M("output",e[0].dataType,r.length),u=E("x",e[0].dataType,n.length),h=u.type.value,_=_h(m,n.length,t),y=[{name:"output_size",type:"u32"},{name:"pads",type:"i32",length:t.pads.length}];return t.mode===0&&y.push({name:"constant_value",type:a?h:"f32"}),`
6028
+ `},_h=(e,t,r)=>{switch(r.mode){case 0:return hh(e,t,r.pads.length);case 1:return gh(e,t,r.pads.length);case 2:return bh(e,t,r.pads.length);case 3:return yh(e,t,r.pads.length);default:throw new Error("Invalid mode")}},wh=(e,t)=>{let r=k.padShape(e[0].dims.slice(),t.pads),n=e[0].dims,o=k.size(r),i=[{type:12,data:o},{type:6,data:t.pads}],a=e.length>=3&&e[2].data;t.mode===0&&i.push({type:a?e[2].dataType:1,data:t.value}),i.push(...N(e[0].dims,r));let d=["rank"],l=p=>{let m=M("output",e[0].dataType,r.length),u=E("x",e[0].dataType,n.length),h=u.type.value,_=_h(m,n.length,t),y=[{name:"output_size",type:"u32"},{name:"pads",type:"i32",length:t.pads.length}];return t.mode===0&&y.push({name:"constant_value",type:a?h:"f32"}),`
5964
6029
  ${p.registerUniforms(y).declareVariables(u,m)}
5965
6030
  ${p.mainStart()}
5966
6031
  ${p.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
@@ -5970,7 +6035,7 @@ fn calculateOutputIndex(index: u32) -> u32 {
5970
6035
  var value = ${h}(0);
5971
6036
  ${_}
5972
6037
  output[global_idx] = value;
5973
- }`};return{name:"Pad",shaderCache:{hint:`${t.mode}${a}`,inputDependencies:d},getRunData:()=>({outputs:[{dims:r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(C.size(r)/64)},programUniforms:i}),getShaderSource:l}},vh=(e,t)=>{if(e.length>1){let r=e[1].getBigInt64Array(),n=e.length>=3&&e[2].data?e[2].dataType===10?e[2].getUint16Array()[0]:e[2].getFloat32Array()[0]:0,o=e[0].dims.length,i=new Int32Array(2*o).fill(0);if(e.length>=4){let d=e[3].getBigInt64Array();for(let l=0;l<d.length;l++)i[Number(d[l])]=Number(r[l]),i[Number(d[l])+o]=Number(r[l+d.length])}else r.forEach((d,l)=>i[Number(l)]=Number(d));let a=[];return i.forEach(d=>a.push(d)),{mode:t.mode,value:n,pads:a}}else return t},nl=(e,t)=>{fh(e.inputs);let r=vh(e.inputs,t);e.compute(wh(e.inputs,r),{inputs:[0]})}});var tn,il,al,sl,ul,$h,xh,dl,ll,cl,pl,ml,fl,hl,gl,bl,yl,_l,wl,vl=U(()=>{"use strict";We();te();oe();ae();tn=e=>{if(ve.webgpu.validateInputContent&&(!e||e.length!==1))throw new Error("Pool ops requires 1 input.")},il=(e,t,r)=>{let n=t.format==="NHWC",o=e.dims.slice();n&&o.splice(1,0,o.pop());let i=Object.hasOwnProperty.call(t,"dilations"),a=t.kernelShape.slice(),d=t.strides.slice(),l=i?t.dilations.slice():[],p=t.pads.slice();At.adjustPoolAttributes(r,o,a,d,l,p);let m=At.computePoolOutputShape(r,o,d,l,a,p,t.autoPad),u=Object.assign({},t);i?Object.assign(u,{kernelShape:a,strides:d,pads:p,dilations:l,cacheKey:t.cacheKey}):Object.assign(u,{kernelShape:a,strides:d,pads:p,cacheKey:t.cacheKey});let h=m.slice();return h.push(h.splice(1,1)[0]),[u,n?h:m]},al=(e,t)=>{let r=t.format==="NHWC",n=C.size(e),o=C.size(t.kernelShape),i=[{type:12,data:n},{type:12,data:o}],a=[{name:"outputSize",type:"u32"},{name:"kernelSize",type:"u32"}];if(t.kernelShape.length<=2){let d=t.kernelShape[t.kernelShape.length-1],l=t.strides[t.strides.length-1],p=t.pads[t.pads.length/2-1],m=t.pads[t.pads.length-1],u=!!(p+m);i.push({type:12,data:d},{type:12,data:l},{type:12,data:p},{type:12,data:m}),a.push({name:"kw",type:"u32"},{name:"sw",type:"u32"},{name:"pwStart",type:"u32"},{name:"pwEnd",type:"u32"});let h=!1;if(t.kernelShape.length===2){let _=t.kernelShape[t.kernelShape.length-2],y=t.strides[t.strides.length-2],g=t.pads[t.pads.length/2-2],x=t.pads[t.pads.length-2];h=!!(g+x),i.push({type:12,data:_},{type:12,data:y},{type:12,data:g},{type:12,data:x}),a.push({name:"kh",type:"u32"},{name:"sh",type:"u32"},{name:"phStart",type:"u32"},{name:"phEnd",type:"u32"})}return[i,a,!0,u,h]}else{if(r)throw new Error("Pooling with kernelShape.length > 2 is not supported for NHWC format.");let d=C.computeStrides(t.kernelShape);i.push({type:12,data:d},{type:12,data:t.pads},{type:12,data:t.strides}),a.push({name:"kernelStrides",type:"u32",length:d.length},{name:"pads",type:"u32",length:t.pads.length},{name:"strides",type:"u32",length:t.strides.length});let l=t.pads.reduce((p,m)=>p+m);return[i,a,!!l,!1,!1]}},sl=(e,t,r,n,o,i,a,d,l,p,m,u)=>{let h=o.format==="NHWC",_=t.type.value,y=M("output",t.type.tensor,n);if(o.kernelShape.length<=2){let g="",x="",$="",v=r-(h?2:1);if(m?g=`
6038
+ }`};return{name:"Pad",shaderCache:{hint:`${t.mode}${a}`,inputDependencies:d},getRunData:()=>({outputs:[{dims:r,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(k.size(r)/64)},programUniforms:i}),getShaderSource:l}},vh=(e,t)=>{if(e.length>1){let r=e[1].getBigInt64Array(),n=e.length>=3&&e[2].data?e[2].dataType===10?e[2].getUint16Array()[0]:e[2].getFloat32Array()[0]:0,o=e[0].dims.length,i=new Int32Array(2*o).fill(0);if(e.length>=4){let d=e[3].getBigInt64Array();for(let l=0;l<d.length;l++)i[Number(d[l])]=Number(r[l]),i[Number(d[l])+o]=Number(r[l+d.length])}else r.forEach((d,l)=>i[Number(l)]=Number(d));let a=[];return i.forEach(d=>a.push(d)),{mode:t.mode,value:n,pads:a}}else return t},rl=(e,t)=>{fh(e.inputs);let r=vh(e.inputs,t);e.compute(wh(e.inputs,r),{inputs:[0]})}});var en,ol,il,al,sl,$h,xh,ul,dl,ll,cl,pl,ml,fl,hl,gl,bl,yl,_l,wl=U(()=>{"use strict";Le();J();ne();ae();en=e=>{if(_e.webgpu.validateInputContent&&(!e||e.length!==1))throw new Error("Pool ops requires 1 input.")},ol=(e,t,r)=>{let n=t.format==="NHWC",o=e.dims.slice();n&&o.splice(1,0,o.pop());let i=Object.hasOwnProperty.call(t,"dilations"),a=t.kernelShape.slice(),d=t.strides.slice(),l=i?t.dilations.slice():[],p=t.pads.slice();kt.adjustPoolAttributes(r,o,a,d,l,p);let m=kt.computePoolOutputShape(r,o,d,l,a,p,t.autoPad),u=Object.assign({},t);i?Object.assign(u,{kernelShape:a,strides:d,pads:p,dilations:l,cacheKey:t.cacheKey}):Object.assign(u,{kernelShape:a,strides:d,pads:p,cacheKey:t.cacheKey});let h=m.slice();return h.push(h.splice(1,1)[0]),[u,n?h:m]},il=(e,t)=>{let r=t.format==="NHWC",n=k.size(e),o=k.size(t.kernelShape),i=[{type:12,data:n},{type:12,data:o}],a=[{name:"outputSize",type:"u32"},{name:"kernelSize",type:"u32"}];if(t.kernelShape.length<=2){let d=t.kernelShape[t.kernelShape.length-1],l=t.strides[t.strides.length-1],p=t.pads[t.pads.length/2-1],m=t.pads[t.pads.length-1],u=!!(p+m);i.push({type:12,data:d},{type:12,data:l},{type:12,data:p},{type:12,data:m}),a.push({name:"kw",type:"u32"},{name:"sw",type:"u32"},{name:"pwStart",type:"u32"},{name:"pwEnd",type:"u32"});let h=!1;if(t.kernelShape.length===2){let _=t.kernelShape[t.kernelShape.length-2],y=t.strides[t.strides.length-2],g=t.pads[t.pads.length/2-2],x=t.pads[t.pads.length-2];h=!!(g+x),i.push({type:12,data:_},{type:12,data:y},{type:12,data:g},{type:12,data:x}),a.push({name:"kh",type:"u32"},{name:"sh",type:"u32"},{name:"phStart",type:"u32"},{name:"phEnd",type:"u32"})}return[i,a,!0,u,h]}else{if(r)throw new Error("Pooling with kernelShape.length > 2 is not supported for NHWC format.");let d=k.computeStrides(t.kernelShape);i.push({type:12,data:d},{type:12,data:t.pads},{type:12,data:t.strides}),a.push({name:"kernelStrides",type:"u32",length:d.length},{name:"pads",type:"u32",length:t.pads.length},{name:"strides",type:"u32",length:t.strides.length});let l=t.pads.reduce((p,m)=>p+m);return[i,a,!!l,!1,!1]}},al=(e,t,r,n,o,i,a,d,l,p,m,u)=>{let h=o.format==="NHWC",_=t.type.value,y=M("output",t.type.tensor,n);if(o.kernelShape.length<=2){let g="",x="",$="",v=r-(h?2:1);if(m?g=`
5974
6039
  for (var i: u32 = 0u; i < uniforms.kw; i++) {
5975
6040
  xIndices[${v}] = indices[${v}] * uniforms.sw - uniforms.pwStart + i;
5976
6041
  if (xIndices[${v}] < 0 || xIndices[${v}]
@@ -6045,27 +6110,27 @@ fn calculateOutputIndex(index: u32) -> u32 {
6045
6110
  for (var i: u32 = 0u; i < uniforms.kernelSize; i++) {
6046
6111
  var offset = i;
6047
6112
  for (var j = 0u; j < ${g-1}u; j++) {
6048
- offsets[j] = offset / ${F("uniforms.kernelStrides","j",g)};
6049
- offset -= offsets[j] * ${F("uniforms.kernelStrides","j",g)};
6113
+ offsets[j] = offset / ${q("uniforms.kernelStrides","j",g)};
6114
+ offset -= offsets[j] * ${q("uniforms.kernelStrides","j",g)};
6050
6115
  }
6051
6116
  offsets[${g-1}] = offset;
6052
6117
 
6053
6118
  isPad = false;
6054
6119
  for (var j = ${r-g}u; j < ${r}u; j++) {
6055
- xIndices[j] = indices[j] * ${F("uniforms.strides",`j - ${r-g}u`,g)}
6056
- + offsets[j - ${r-g}u] - ${F("uniforms.pads","j - 2u",x)};
6120
+ xIndices[j] = indices[j] * ${q("uniforms.strides",`j - ${r-g}u`,g)}
6121
+ + offsets[j - ${r-g}u] - ${q("uniforms.pads","j - 2u",x)};
6057
6122
  ${$}
6058
6123
  }
6059
6124
  ${a}
6060
6125
 
6061
6126
  output[global_idx] = value;
6062
- }`}},ul=e=>`${e.format};${e.ceilMode};${e.autoPad};${e.kernelShape.length}`,$h=e=>`${ul(e)};${e.countIncludePad}`,xh=e=>`${ul(e)};${e.storageOrder};${e.dilations}`,dl=e=>({format:e.format,autoPad:["NOTSET","VALID","SAME_UPPER","SAME_LOWER"][e.auto_pad],ceilMode:e.ceil_mode,kernelShape:e.kernel_shape,strides:e.strides,pads:e.pads}),ll=(e,t,r,n)=>{let[o,i]=il(t,n,r),a=E("x",t.dataType,t.dims.length),d=a.type.value,l="value += x_val;",p="";o.countIncludePad?p+=`value /= ${d}(uniforms.kernelSize);`:p+=`value /= ${d}(i32(uniforms.kernelSize) - pad);`;let[m,u,h,_,y]=al(i,o);m.push(...N(t.dims,i));let g=["rank"];return{name:e,shaderCache:{hint:`${n.cacheKey};${h};${_};${y}`,inputDependencies:g},getRunData:()=>({outputs:[{dims:i,dataType:t.dataType}],dispatchGroup:{x:Math.ceil(C.size(i)/64)},programUniforms:m}),getShaderSource:x=>sl(x,a,t.dims.length,i.length,o,l,p,0,u,h,_,y)}},cl=e=>{let t=e.count_include_pad!==0,r=dl(e);if(r.ceilMode!==0)throw new Error("using ceil() in shape computation is not yet supported for AveragePool");let n={countIncludePad:t,...r,cacheKey:""};return{...n,cacheKey:$h(n)}},pl=(e,t)=>{tn(e.inputs),e.compute(ll("AveragePool",e.inputs[0],!1,t))},ml={autoPad:"",ceilMode:0,countIncludePad:!1,kernelShape:[],strides:[],pads:[],storageOrder:0,dilations:[]},fl=e=>{let t=e.format;return{format:t,...ml,cacheKey:t}},hl=(e,t)=>{tn(e.inputs),e.compute(ll("GlobalAveragePool",e.inputs[0],!0,t))},gl=(e,t,r,n)=>{let[o,i]=il(t,n,r),a=`
6127
+ }`}},sl=e=>`${e.format};${e.ceilMode};${e.autoPad};${e.kernelShape.length}`,$h=e=>`${sl(e)};${e.countIncludePad}`,xh=e=>`${sl(e)};${e.storageOrder};${e.dilations}`,ul=e=>({format:e.format,autoPad:["NOTSET","VALID","SAME_UPPER","SAME_LOWER"][e.auto_pad],ceilMode:e.ceil_mode,kernelShape:e.kernel_shape,strides:e.strides,pads:e.pads}),dl=(e,t,r,n)=>{let[o,i]=ol(t,n,r),a=E("x",t.dataType,t.dims.length),d=a.type.value,l="value += x_val;",p="";o.countIncludePad?p+=`value /= ${d}(uniforms.kernelSize);`:p+=`value /= ${d}(i32(uniforms.kernelSize) - pad);`;let[m,u,h,_,y]=il(i,o);m.push(...N(t.dims,i));let g=["rank"];return{name:e,shaderCache:{hint:`${n.cacheKey};${h};${_};${y}`,inputDependencies:g},getRunData:()=>({outputs:[{dims:i,dataType:t.dataType}],dispatchGroup:{x:Math.ceil(k.size(i)/64)},programUniforms:m}),getShaderSource:x=>al(x,a,t.dims.length,i.length,o,l,p,0,u,h,_,y)}},ll=e=>{let t=e.count_include_pad!==0,r=ul(e);if(r.ceilMode!==0)throw new Error("using ceil() in shape computation is not yet supported for AveragePool");let n={countIncludePad:t,...r,cacheKey:""};return{...n,cacheKey:$h(n)}},cl=(e,t)=>{en(e.inputs),e.compute(dl("AveragePool",e.inputs[0],!1,t))},pl={autoPad:"",ceilMode:0,countIncludePad:!1,kernelShape:[],strides:[],pads:[],storageOrder:0,dilations:[]},ml=e=>{let t=e.format;return{format:t,...pl,cacheKey:t}},fl=(e,t)=>{en(e.inputs),e.compute(dl("GlobalAveragePool",e.inputs[0],!0,t))},hl=(e,t,r,n)=>{let[o,i]=ol(t,n,r),a=`
6063
6128
  value = max(x_val, value);
6064
- `,d="",l=E("x",t.dataType,t.dims.length),p=["rank"],[m,u,h,_,y]=al(i,o);return m.push(...N(t.dims,i)),{name:e,shaderCache:{hint:`${n.cacheKey};${h};${_};${y}`,inputDependencies:p},getRunData:()=>({outputs:[{dims:i,dataType:t.dataType}],dispatchGroup:{x:Math.ceil(C.size(i)/64)},programUniforms:m}),getShaderSource:g=>sl(g,l,t.dims.length,i.length,o,a,d,t.dataType===10?-65504:-1e5,u,h,_,y)}},bl=(e,t)=>{tn(e.inputs),e.compute(gl("MaxPool",e.inputs[0],!1,t))},yl=e=>{let t=e.storage_order,r=e.dilations,n=dl(e);if(t!==0)throw new Error("column major storage order is not yet supported for MaxPool");if(n.ceilMode!==0)throw new Error("using ceil() in shape computation is not yet supported for MaxPool");let o={storageOrder:t,dilations:r,...n,cacheKey:""};return{...o,cacheKey:xh(o)}},_l=e=>{let t=e.format;return{format:t,...ml,cacheKey:t}},wl=(e,t)=>{tn(e.inputs),e.compute(gl("GlobalMaxPool",e.inputs[0],!0,t))}});var Th,Ih,$l,xl,Sl=U(()=>{"use strict";te();oe();Se();ae();Th=(e,t)=>{if(e.length<2||e.length>3)throw new Error("DequantizeLinear requires 2 or 3 inputs.");if(e.length===3&&e[1].dims===e[2].dims)throw new Error("x-scale and x-zero-point must have the same shape.");if(e.length===3&&e[0].dataType!==e[2].dataType)throw new Error("x and x-zero-point must have the same data type.");if(e[0].dataType===6&&e.length>2)throw new Error("In the case of dequantizing int32 there is no zero point.");if(e[1].dims.length!==0&&e[1].dims.length!==1&&e[1].dims.length!==e[0].dims.length)throw new Error("scale input must be a scalar, a 1D tensor, or have the same rank as the input tensor.");if(e.length>2){if(e[0].dataType!==e[2].dataType)throw new Error("x and x-zero-point must have the same data type.");if(e[1].dims.length!==e[2].dims.length)throw new Error("scale and zero-point inputs must have the same rank.");if(!e[1].dims.map((r,n)=>r===e[2].dims[n]).reduce((r,n)=>r&&n,!0))throw new Error("scale and zero-point inputs must have the same shape.")}if(t.blockSize>0){if(e[1].dims.length===0||e[1].dims.length===1&&e[1].dims[0]===1)throw new Error("blockSize must be set only for block quantization.");if(!e[1].dims.map((o,i)=>i===t.axis||o===e[0].dims[i]).reduce((o,i)=>o&&i,!0))throw new Error("For block qunatization, scale input shape to match the input shape except for the axis");if(e[1].dims.length!==e[0].dims.length)throw new Error("For block qunatization the scale input rank must be the same as the x rank.");let r=e[0].dims[t.axis],n=e[1].dims[t.axis];if(t.blockSize<Math.ceil(r/n)||t.blockSize>Math.ceil(r/(n-1)-1))throw new Error("blockSize must be with in the range [ceil(dI / Si), ceil(dI / (Si - 1) - 1)].")}},Ih=(e,t)=>{let r=C.normalizeAxis(t.axis,e[0].dims.length),n=e[0].dataType,o=n===3,i=e[0].dims,a=e[1].dataType,d=C.size(i),l=n===3||n===2,p=l?[Math.ceil(C.size(e[0].dims)/4)]:e[0].dims,m=e[1].dims,u=e.length>2?e[2]:void 0,h=u?l?[Math.ceil(C.size(u.dims)/4)]:u.dims:void 0,_=m.length===0||m.length===1&&m[0]===1,y=_===!1&&m.length===1,g=me(d),x=_&&(!l||g===4),$=x?g:1,v=x&&!l?g:1,S=E("input",l?12:n,p.length,v),T=E("scale",a,m.length),A=u?E("zero_point",l?12:n,h.length):void 0,k=M("output",a,i.length,$),P=[S,T];A&&P.push(A);let D=[p,m];u&&D.push(h);let R=[{type:12,data:d/$},{type:12,data:r},{type:12,data:t.blockSize},...N(...D,i)],G=K=>{let j=[{name:"output_size",type:"u32"},{name:"axis",type:"u32"},{name:"block_size",type:"u32"}];return`
6065
- ${K.registerUniforms(j).declareVariables(...P,k)}
6066
- ${K.mainStart()}
6067
- ${K.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
6068
- let output_indices = ${k.offsetToIndices("global_idx")};
6129
+ `,d="",l=E("x",t.dataType,t.dims.length),p=["rank"],[m,u,h,_,y]=il(i,o);return m.push(...N(t.dims,i)),{name:e,shaderCache:{hint:`${n.cacheKey};${h};${_};${y}`,inputDependencies:p},getRunData:()=>({outputs:[{dims:i,dataType:t.dataType}],dispatchGroup:{x:Math.ceil(k.size(i)/64)},programUniforms:m}),getShaderSource:g=>al(g,l,t.dims.length,i.length,o,a,d,t.dataType===10?-65504:-1e5,u,h,_,y)}},gl=(e,t)=>{en(e.inputs),e.compute(hl("MaxPool",e.inputs[0],!1,t))},bl=e=>{let t=e.storage_order,r=e.dilations,n=ul(e);if(t!==0)throw new Error("column major storage order is not yet supported for MaxPool");if(n.ceilMode!==0)throw new Error("using ceil() in shape computation is not yet supported for MaxPool");let o={storageOrder:t,dilations:r,...n,cacheKey:""};return{...o,cacheKey:xh(o)}},yl=e=>{let t=e.format;return{format:t,...pl,cacheKey:t}},_l=(e,t)=>{en(e.inputs),e.compute(hl("GlobalMaxPool",e.inputs[0],!0,t))}});var Th,Ih,vl,$l,xl=U(()=>{"use strict";J();ne();xe();ae();Th=(e,t)=>{if(e.length<2||e.length>3)throw new Error("DequantizeLinear requires 2 or 3 inputs.");if(e.length===3&&e[1].dims===e[2].dims)throw new Error("x-scale and x-zero-point must have the same shape.");if(e.length===3&&e[0].dataType!==e[2].dataType)throw new Error("x and x-zero-point must have the same data type.");if(e[0].dataType===6&&e.length>2)throw new Error("In the case of dequantizing int32 there is no zero point.");if(e[1].dims.length!==0&&e[1].dims.length!==1&&e[1].dims.length!==e[0].dims.length)throw new Error("scale input must be a scalar, a 1D tensor, or have the same rank as the input tensor.");if(e.length>2){if(e[0].dataType!==e[2].dataType)throw new Error("x and x-zero-point must have the same data type.");if(e[1].dims.length!==e[2].dims.length)throw new Error("scale and zero-point inputs must have the same rank.");if(!e[1].dims.map((r,n)=>r===e[2].dims[n]).reduce((r,n)=>r&&n,!0))throw new Error("scale and zero-point inputs must have the same shape.")}if(t.blockSize>0){if(e[1].dims.length===0||e[1].dims.length===1&&e[1].dims[0]===1)throw new Error("blockSize must be set only for block quantization.");if(!e[1].dims.map((o,i)=>i===t.axis||o===e[0].dims[i]).reduce((o,i)=>o&&i,!0))throw new Error("For block qunatization, scale input shape to match the input shape except for the axis");if(e[1].dims.length!==e[0].dims.length)throw new Error("For block qunatization the scale input rank must be the same as the x rank.");let r=e[0].dims[t.axis],n=e[1].dims[t.axis];if(t.blockSize<Math.ceil(r/n)||t.blockSize>Math.ceil(r/(n-1)-1))throw new Error("blockSize must be with in the range [ceil(dI / Si), ceil(dI / (Si - 1) - 1)].")}},Ih=(e,t)=>{let r=k.normalizeAxis(t.axis,e[0].dims.length),n=e[0].dataType,o=n===3,i=e[0].dims,a=e[1].dataType,d=k.size(i),l=n===3||n===2,p=l?[Math.ceil(k.size(e[0].dims)/4)]:e[0].dims,m=e[1].dims,u=e.length>2?e[2]:void 0,h=u?l?[Math.ceil(k.size(u.dims)/4)]:u.dims:void 0,_=m.length===0||m.length===1&&m[0]===1,y=_===!1&&m.length===1,g=me(d),x=_&&(!l||g===4),$=x?g:1,v=x&&!l?g:1,S=E("input",l?12:n,p.length,v),T=E("scale",a,m.length),A=u?E("zero_point",l?12:n,h.length):void 0,C=M("output",a,i.length,$),P=[S,T];A&&P.push(A);let D=[p,m];u&&D.push(h);let R=[{type:12,data:d/$},{type:12,data:r},{type:12,data:t.blockSize},...N(...D,i)],H=L=>{let re=[{name:"output_size",type:"u32"},{name:"axis",type:"u32"},{name:"block_size",type:"u32"}];return`
6130
+ ${L.registerUniforms(re).declareVariables(...P,C)}
6131
+ ${L.mainStart()}
6132
+ ${L.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
6133
+ let output_indices = ${C.offsetToIndices("global_idx")};
6069
6134
 
6070
6135
  // Set input x
6071
6136
  ${(()=>l?`
@@ -6075,7 +6140,7 @@ fn calculateOutputIndex(index: u32) -> u32 {
6075
6140
 
6076
6141
  // Set scale input
6077
6142
  ${(()=>_?`let scale_value= ${T.getByOffset("0")}`:y?`
6078
- let scale_index = ${k.indicesGet("output_indices","uniforms.axis")};
6143
+ let scale_index = ${C.indicesGet("output_indices","uniforms.axis")};
6079
6144
  let scale_value= ${T.getByOffset("scale_index")};`:`
6080
6145
  var scale_indices: ${T.type.indices} = output_indices;
6081
6146
  let index = ${T.indicesGet("scale_indices","uniforms.axis")} / uniforms.block_size;
@@ -6087,24 +6152,24 @@ fn calculateOutputIndex(index: u32) -> u32 {
6087
6152
  let zero_point_input = ${A.getByOffset("0")};
6088
6153
  let zero_point_vec = ${o?"unpack4xI8(zero_point_input)":"unpack4xU8(zero_point_input)"};
6089
6154
  let zero_point_value= zero_point_vec[0]`:`let zero_point_value = ${A.getByOffset("0")}`:y?l?`
6090
- let zero_point_index = ${k.indicesGet("output_indices","uniforms.axis")};
6155
+ let zero_point_index = ${C.indicesGet("output_indices","uniforms.axis")};
6091
6156
  let zero_point_input = ${A.getByOffset("zero_point_index / 4")};
6092
6157
  let zero_point_vec = ${o?"unpack4xI8(zero_point_input)":"unpack4xU8(zero_point_input)"};
6093
6158
  let zero_point_value = zero_point_vec[zero_point_index % 4]`:`
6094
- let zero_point_index = ${k.indicesGet("output_indices","uniforms.axis")};
6159
+ let zero_point_index = ${C.indicesGet("output_indices","uniforms.axis")};
6095
6160
  let zero_point_value = ${A.getByOffset("zero_point_index")};`:l?`
6096
6161
  let zero_point_offset = ${T.indicesToOffset("scale_indices")};
6097
6162
  let zero_point_input = ${A.getByOffset("zero_point_offset / 4")};
6098
6163
  let zero_point_vec = ${o?"unpack4xI8(zero_point_input)":"unpack4xU8(zero_point_input)"};
6099
6164
  let zero_point_value = zero_point_vec[zero_point_offset % 4];`:`let zero_point_value = ${A.getByIndices("scale_indices")};`:`let zero_point_value = ${l?o?"i32":"u32":S.type.value}(0);`)()};
6100
6165
  // Compute and write output
6101
- ${k.setByOffset("global_idx",`${k.type.value}(x_value - zero_point_value) * scale_value`)};
6102
- }`};return{name:"DequantizeLinear",shaderCache:{hint:t.cacheKey,inputDependencies:A?["rank","rank","rank"]:["rank","rank"]},getShaderSource:G,getRunData:()=>({outputs:[{dims:i,dataType:a}],dispatchGroup:{x:Math.ceil(d/$/64),y:1,z:1},programUniforms:R})}},$l=(e,t)=>{Th(e.inputs,t),e.compute(Ih(e.inputs,t))},xl=e=>re({axis:e.axis,blockSize:e.blockSize})});var Ch,Ah,Tl,Il=U(()=>{"use strict";We();te();ae();Ch=(e,t,r)=>{let n=e===t,o=e<t&&r<0,i=e>t&&r>0;if(n||o||i)throw new Error("Range these inputs' contents are invalid.")},Ah=(e,t,r,n)=>{let o=Math.abs(Math.ceil((t-e)/r)),i=[o],a=o,d=[{type:12,data:a},{type:n,data:e},{type:n,data:r},...N(i)],l=p=>{let m=M("output",n,i.length),u=m.type.value,h=[{name:"outputSize",type:"u32"},{name:"start",type:u},{name:"delta",type:u}];return`
6166
+ ${C.setByOffset("global_idx",`${C.type.value}(x_value - zero_point_value) * scale_value`)};
6167
+ }`};return{name:"DequantizeLinear",shaderCache:{hint:t.cacheKey,inputDependencies:A?["rank","rank","rank"]:["rank","rank"]},getShaderSource:H,getRunData:()=>({outputs:[{dims:i,dataType:a}],dispatchGroup:{x:Math.ceil(d/$/64),y:1,z:1},programUniforms:R})}},vl=(e,t)=>{Th(e.inputs,t),e.compute(Ih(e.inputs,t))},$l=e=>ee({axis:e.axis,blockSize:e.blockSize})});var Ch,Ah,Sl,Tl=U(()=>{"use strict";Le();J();ae();Ch=(e,t,r)=>{let n=e===t,o=e<t&&r<0,i=e>t&&r>0;if(n||o||i)throw new Error("Range these inputs' contents are invalid.")},Ah=(e,t,r,n)=>{let o=Math.abs(Math.ceil((t-e)/r)),i=[o],a=o,d=[{type:12,data:a},{type:n,data:e},{type:n,data:r},...N(i)],l=p=>{let m=M("output",n,i.length),u=m.type.value,h=[{name:"outputSize",type:"u32"},{name:"start",type:u},{name:"delta",type:u}];return`
6103
6168
  ${p.registerUniforms(h).declareVariables(m)}
6104
6169
  ${p.mainStart()}
6105
6170
  ${p.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}
6106
6171
  output[global_idx] = uniforms.start + ${u}(global_idx) * uniforms.delta;
6107
- }`};return{name:"Range",shaderCache:{hint:`${n}`},getShaderSource:l,getRunData:()=>({outputs:[{dims:i,dataType:n}],dispatchGroup:{x:Math.ceil(a/64)},programUniforms:d})}},Tl=e=>{let t=0,r=0,n=0;e.inputs[0].dataType===6?(t=e.inputs[0].getInt32Array()[0],r=e.inputs[1].getInt32Array()[0],n=e.inputs[2].getInt32Array()[0]):e.inputs[0].dataType===1&&(t=e.inputs[0].getFloat32Array()[0],r=e.inputs[1].getFloat32Array()[0],n=e.inputs[2].getFloat32Array()[0]),ve.webgpu.validateInputContent&&Ch(t,r,n),e.compute(Ah(t,r,n,e.inputs[0].dataType),{inputs:[]})}});var kh,Eh,Cl,Al,kl=U(()=>{"use strict";te();oe();Se();ae();kh=(e,t,r,n)=>{if(e!=="none"&&n!=="i32"&&n!=="u32"&&n!=="f32")throw new Error(`Input ${n} is not supported with reduction ${e}.`);let o=`{
6172
+ }`};return{name:"Range",shaderCache:{hint:`${n}`},getShaderSource:l,getRunData:()=>({outputs:[{dims:i,dataType:n}],dispatchGroup:{x:Math.ceil(a/64)},programUniforms:d})}},Sl=e=>{let t=0,r=0,n=0;e.inputs[0].dataType===6?(t=e.inputs[0].getInt32Array()[0],r=e.inputs[1].getInt32Array()[0],n=e.inputs[2].getInt32Array()[0]):e.inputs[0].dataType===1&&(t=e.inputs[0].getFloat32Array()[0],r=e.inputs[1].getFloat32Array()[0],n=e.inputs[2].getFloat32Array()[0]),_e.webgpu.validateInputContent&&Ch(t,r,n),e.compute(Ah(t,r,n,e.inputs[0].dataType),{inputs:[]})}});var kh,Eh,Il,Cl,Al=U(()=>{"use strict";J();ne();xe();ae();kh=(e,t,r,n)=>{if(e!=="none"&&n!=="i32"&&n!=="u32"&&n!=="f32")throw new Error(`Input ${n} is not supported with reduction ${e}.`);let o=`{
6108
6173
  var oldValue = 0;
6109
6174
  loop {
6110
6175
  let newValueF32 =`,i=`;
@@ -6117,7 +6182,7 @@ fn calculateOutputIndex(index: u32) -> u32 {
6117
6182
  }
6118
6183
  }`;switch(e){case"none":return`${t}=${r};`;case"add":return n==="i32"||n==="u32"?`atomicAdd(&${t}, bitcast<${n}>(${r}));`:`
6119
6184
  ${o}bitcast<${n}>(oldValue) + (${r})${i}`;case"max":return n==="i32"||n==="u32"?`atomicMax(&${t}, bitcast<${n}>(${r}));`:`
6120
- ${o}max(bitcast<f32>(oldValue), (${r}))${i}`;case"min":return n==="i32"||n==="u32"?`atomicMin(&${t}, bitcast<${n}>(${r}));`:`${o}min(bitcast<${n}>(oldValue), (${r}))${i}`;case"mul":return`${o}(bitcast<${n}>(oldValue) * (${r}))${i}`;default:throw new Error(`Reduction ${e} is not supported.`)}},Eh=(e,t)=>{let r=e[0].dims,n=e[1].dims,o=r,i=1,a=Math.ceil(C.size(n)/i),d=n[n.length-1],l=C.sizeFromDimension(r,d),p=[{type:12,data:a},{type:12,data:d},{type:12,data:l},...N(e[1].dims,e[2].dims,o)],m=u=>{let h=E("indices",e[1].dataType,e[1].dims.length),_=E("updates",e[2].dataType,e[2].dims.length,i),y=t.reduction!=="none"&&t.reduction!==""?rs("output",e[0].dataType,o.length):M("output",e[0].dataType,o.length,i);return`
6185
+ ${o}max(bitcast<f32>(oldValue), (${r}))${i}`;case"min":return n==="i32"||n==="u32"?`atomicMin(&${t}, bitcast<${n}>(${r}));`:`${o}min(bitcast<${n}>(oldValue), (${r}))${i}`;case"mul":return`${o}(bitcast<${n}>(oldValue) * (${r}))${i}`;default:throw new Error(`Reduction ${e} is not supported.`)}},Eh=(e,t)=>{let r=e[0].dims,n=e[1].dims,o=r,i=1,a=Math.ceil(k.size(n)/i),d=n[n.length-1],l=k.sizeFromDimension(r,d),p=[{type:12,data:a},{type:12,data:d},{type:12,data:l},...N(e[1].dims,e[2].dims,o)],m=u=>{let h=E("indices",e[1].dataType,e[1].dims.length),_=E("updates",e[2].dataType,e[2].dims.length,i),y=t.reduction!=="none"&&t.reduction!==""?ts("output",e[0].dataType,o.length):M("output",e[0].dataType,o.length,i);return`
6121
6186
  ${u.registerUniform("output_size","u32").registerUniform("last_index_dimension","u32").registerUniform("num_updates_elements","u32").declareVariables(h,_,y)}
6122
6187
  ${u.mainStart()}
6123
6188
  ${u.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}
@@ -6150,22 +6215,30 @@ fn calculateOutputIndex(index: u32) -> u32 {
6150
6215
  ${kh(t.reduction,"output[data_offset + i]","value",y.type.value)}
6151
6216
  }
6152
6217
 
6153
- }`};return{name:"ScatterND",shaderCache:{hint:`${t.cacheKey}_${t.reduction}`,inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:o,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(a/64)},programUniforms:p}),getShaderSource:m}},Cl=e=>re({reduction:e.reduction}),Al=(e,t)=>{e.compute(Eh(e.inputs,t),{inputs:[e.inputs[1],e.inputs[2]],outputs:[]})}});var Ph,zh,Oh,Dh,Bh,Mh,Rh,Uh,Nh,Vh,Wh,El,Lh,Gh,Hh,Fh,qh,Pl,zl,Ol=U(()=>{"use strict";te();oe();Se();ae();Ph=(e,t)=>{if(e.every(r=>r>0||(()=>{throw new Error("Resize requires scales input values to be positive")})),e.length>0){if(t.mode==="linear"){if(!(e.length===2||e.length===3||e.length===4&&e[0]===1&&e[1]===1||e.length===4&&e[0]===1&&e[3]===1||e.length===5&&e[0]===1&&e[1]===1))throw new Error(`For linear mode, Resize requires scales to be 2D, 3D, 4D with either two outermost or one innermost and
6154
- one outermost scale values equal to 1, or 5D with two outermost scale values equal to 1`)}else if(t.mode==="cubic"&&!(e.length===2||e.length===4&&e[0]===1&&e[1]===1||e.length===4&&e[0]===1&&e[3]===1))throw new Error("Resize requires scales input size to be 2 or 4 for cubic mode")}},zh=(e,t,r)=>{t.every(o=>o>=0&&o<r||(()=>{throw new Error("Resize requires axes input values to be positive and less than rank")}));let n=new Array(r).fill(1);return t.forEach((o,i)=>n[o]=e[i]),n},Oh=(e,t,r,n,o,i)=>{let[a,d,l]=r>10?[1,2,3]:[-1,e.length>1?1:-1,-1],p=e[0].dims.length;if(a>0&&e.length>a&&e[a].dims.length>0)e[a].getFloat32Array().forEach(m=>i.push(m));else if(t.coordinateTransformMode==="tf_crop_and_resize")throw new Error("Resize requires RoI input to be specified when coordinateTransformMode is tfCropAndResize");if(d>0&&e.length>d&&e[d].dims.length===1&&e[d].dims[0]>0){if(e[d].getFloat32Array().forEach(m=>n.push(m)),n.length!==0&&n.length!==p&&r>=18&&n.length!==t.axes.length)throw new Error("Resize requires scales input size to be same as input rank or axes size for opset 18 and up");Ph(n,t),t.axes.length>0&&zh(n,t.axes,p).forEach((m,u)=>n[u]=m)}if(l>0&&e.length>l&&e[l].dims.length===1&&e[l].dims[0]>0&&(e[l].getBigInt64Array().forEach(m=>o.push(Number(m))),o.length!==0&&o.length!==p&&r>=18&&o.length!==t.axes.length))throw new Error("Resize requires sizes input size to be same as input rank or axes size for opset 18 and up");if(t.axes.length>0){if(n.length!==0&&n.length!==t.axes.length)throw new Error('Resize requires "scales" input size to be of axes rank when axes attributes is specified');if(o.length!==0&&o.length!==t.axes.length)throw new Error('Resize requires "sizes" input size to be of rank axes rank when axes attributes is specified')}if(typeof n<"u"&&typeof o<"u"&&n.length>0&&o.length>p)throw new Error("Resize requires only of scales or sizes to be specified")},Dh=(e,t)=>`fn getOriginalCoordinateFromResizedCoordinate(xResized: u32, xScale: f32, lengthResized: u32,
6155
- lengthOriginal: u32, roiStart: f32, roiEnd: f32) -> ${t} { `+(()=>{switch(e){case"asymmetric":return`return ${t}(xResized) / ${t}(xScale);`;case"pytorch_half_pixel":return`if (lengthResized > 1) {
6218
+ }`};return{name:"ScatterND",shaderCache:{hint:`${t.cacheKey}_${t.reduction}`,inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:o,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(a/64)},programUniforms:p}),getShaderSource:m}},Il=e=>ee({reduction:e.reduction}),Cl=(e,t)=>{e.compute(Eh(e.inputs,t),{inputs:[e.inputs[1],e.inputs[2]],outputs:[]})}});var Ph,zh,Oh,kl,Dh,Bh,Mh,Rh,Uh,Nh,Vh,Wh,El,Lh,Gh,Hh,Fh,qh,Pl,zl,Ol=U(()=>{"use strict";J();ne();xe();ae();Ph=(e,t)=>{if(e.every(r=>r>0||(()=>{throw new Error("Resize requires scales input values to be positive")})),e.length>0){if(t.mode==="linear"){if(!(e.length===2||e.length===3||e.length===4&&e[0]===1&&e[1]===1||e.length===4&&e[0]===1&&e[3]===1||e.length===5&&e[0]===1&&e[1]===1))throw new Error(`For linear mode, Resize requires scales to be 2D, 3D, 4D with either two outermost or one innermost and
6219
+ one outermost scale values equal to 1, or 5D with two outermost scale values equal to 1`)}else if(t.mode==="cubic"&&!(e.length===2||e.length===4&&e[0]===1&&e[1]===1||e.length===4&&e[0]===1&&e[3]===1))throw new Error("Resize requires scales input size to be 2 or 4 for cubic mode")}},zh=(e,t,r)=>{t.every(o=>o>=0&&o<r||(()=>{throw new Error("Resize requires axes input values to be positive and less than rank")}));let n=new Array(r).fill(1);return t.forEach((o,i)=>n[o]=e[i]),n},Oh=(e,t,r,n,o,i)=>{let[a,d,l]=r>10?[1,2,3]:[-1,e.length>1?1:-1,-1],p=e[0].dims.length;if(a>0&&e.length>a&&e[a].dims.length>0)e[a].getFloat32Array().forEach(m=>i.push(m));else if(t.coordinateTransformMode==="tf_crop_and_resize")throw new Error("Resize requires RoI input to be specified when coordinateTransformMode is tfCropAndResize");if(d>0&&e.length>d&&e[d].dims.length===1&&e[d].dims[0]>0){if(e[d].getFloat32Array().forEach(m=>n.push(m)),n.length!==0&&n.length!==p&&r>=18&&n.length!==t.axes.length)throw new Error("Resize requires scales input size to be same as input rank or axes size for opset 18 and up");Ph(n,t),t.axes.length>0&&zh(n,t.axes,p).forEach((m,u)=>n[u]=m)}if(l>0&&e.length>l&&e[l].dims.length===1&&e[l].dims[0]>0&&(e[l].getBigInt64Array().forEach(m=>o.push(Number(m))),o.length!==0&&o.length!==p&&r>=18&&o.length!==t.axes.length))throw new Error("Resize requires sizes input size to be same as input rank or axes size for opset 18 and up");if(t.axes.length>0){if(n.length!==0&&n.length!==t.axes.length)throw new Error('Resize requires "scales" input size to be of axes rank when axes attributes is specified');if(o.length!==0&&o.length!==t.axes.length)throw new Error('Resize requires "sizes" input size to be of rank axes rank when axes attributes is specified')}if(typeof n<"u"&&typeof o<"u"&&n.length>0&&o.length>p)throw new Error("Resize requires only of scales or sizes to be specified")},kl=(e,t,r,n)=>`
6220
+ // The whole part and the fractional part are calculated separately due to inaccuracy of floating
6221
+ // point division. As an example, f32(21) / f32(7) may evaluate to 2.99... instead of 3, causing an
6222
+ // offset-by-one error later in floor().
6223
+ let big = (${e}) * (${t});
6224
+ let whole = ${n}(big / (${r}));
6225
+ let fract = ${n}(big % (${r})) / ${n}(${r});
6226
+ return whole + fract;
6227
+ `,Dh=(e,t)=>`fn getOriginalCoordinateFromResizedCoordinate(xResized: u32, xScale: f32, lengthResized: u32,
6228
+ lengthOriginal: u32, roiStart: f32, roiEnd: f32) -> ${t} { `+(()=>{switch(e){case"asymmetric":return`
6229
+ if (xScale < 1.0 || floor(xScale) != xScale) {
6230
+ return ${t}(xResized) / ${t}(xScale);
6231
+ } else {
6232
+ ${kl("xResized","lengthOriginal","lengthResized",t)}
6233
+ }
6234
+ `;case"pytorch_half_pixel":return`if (lengthResized > 1) {
6156
6235
  return (${t}(xResized) + 0.5) / ${t}(xScale) - 0.5;
6157
6236
  } else {
6158
6237
  return 0.0;
6159
6238
  }`;case"tf_half_pixel_for_nn":return`return (${t}(xResized) + 0.5) / ${t}(xScale);`;case"align_corners":return`if (lengthResized == 1) {
6160
6239
  return 0.0;
6161
6240
  } else {
6162
- // The whole part and the fractional part are calculated separately due to inaccuracy of floating
6163
- // point division. As an example, f32(21) / f32(7) may evaluate to 2.99... instead of 3, causing an
6164
- // offset-by-one error later in floor().
6165
- let whole = ${t}(xResized * (lengthOriginal - 1) / (lengthResized - 1));
6166
- let fract =
6167
- ${t}(xResized * (lengthOriginal - 1) % (lengthResized - 1)) / ${t}(lengthResized - 1);
6168
- return whole + fract;
6241
+ ${kl("xResized","lengthOriginal - 1","lengthResized - 1",t)}
6169
6242
  }`;case"tf_crop_and_resize":return`if (lengthResized > 1) {
6170
6243
  return ${t}(roiStart) * ${t}(lengthOriginal - 1) +
6171
6244
  (${t}(xResized) * ${t}(roiEnd - roiStart) * ${t}(lengthOriginal - 1)) /
@@ -6181,14 +6254,14 @@ fn calculateOutputIndex(index: u32) -> u32 {
6181
6254
  var original_indices: array<${e.type.value}, ${r.length}>;
6182
6255
  for (var i:u32 = 0; i < ${r.length}; i++) {
6183
6256
  var output_index = ${e.indicesGet("output_indices","i")};
6184
- var scale = ${F("uniforms.scales","i",n)};
6185
- var roi_low = ${F("uniforms.roi","i",o)};
6186
- var roi_hi = ${F("uniforms.roi",`i + ${t.length}`,o)};
6257
+ var scale = ${q("uniforms.scales","i",n)};
6258
+ var roi_low = ${q("uniforms.roi","i",o)};
6259
+ var roi_hi = ${q("uniforms.roi",`i + ${t.length}`,o)};
6187
6260
  if (scale == 1.0) {
6188
6261
  original_indices[i] = ${e.type.value}(output_index);
6189
6262
  } else {
6190
- var input_shape_i = ${F("uniforms.input_shape","i",t.length)};
6191
- var output_shape_i = ${F("uniforms.output_shape","i",r.length)};
6263
+ var input_shape_i = ${q("uniforms.input_shape","i",t.length)};
6264
+ var output_shape_i = ${q("uniforms.output_shape","i",r.length)};
6192
6265
  original_indices[i] = getOriginalCoordinateFromResizedCoordinate(output_index, scale, output_shape_i,
6193
6266
  input_shape_i, roi_low, roi_hi);
6194
6267
  }
@@ -6200,14 +6273,14 @@ fn calculateOutputIndex(index: u32) -> u32 {
6200
6273
  for (var i:u32 = 0; i < ${n.length}; i++) {
6201
6274
  var output_index = ${t.indicesGet("output_indices","i")};
6202
6275
  var input_index: u32;
6203
- var scale = ${F("uniforms.scales","i",o)};
6276
+ var scale = ${q("uniforms.scales","i",o)};
6204
6277
  if (scale == 1.0) {
6205
6278
  input_index = output_index;
6206
6279
  } else {
6207
- var roi_low = ${F("uniforms.roi","i",i)};
6208
- var roi_hi = ${F("uniforms.roi",`i + ${r.length}`,i)};
6209
- var input_shape_i = ${F("uniforms.input_shape","i",r.length)};
6210
- var output_shape_i = ${F("uniforms.output_shape","i",n.length)};
6280
+ var roi_low = ${q("uniforms.roi","i",i)};
6281
+ var roi_hi = ${q("uniforms.roi",`i + ${r.length}`,i)};
6282
+ var input_shape_i = ${q("uniforms.input_shape","i",r.length)};
6283
+ var output_shape_i = ${q("uniforms.output_shape","i",n.length)};
6211
6284
  var original_idx = getOriginalCoordinateFromResizedCoordinate(output_index, scale, output_shape_i,
6212
6285
  input_shape_i, roi_low, roi_hi);
6213
6286
  if (!${a} || (original_idx >= 0 && original_idx < ${t.type.value}(input_shape_i))) {
@@ -6222,14 +6295,14 @@ fn calculateOutputIndex(index: u32) -> u32 {
6222
6295
  input_index = u32(original_idx);
6223
6296
  }
6224
6297
  }
6225
- ${e.indicesSet("input_indices","i"," input_index")}
6298
+ ${e.indicesSet("input_indices","i","input_index")}
6226
6299
  }
6227
6300
  return input_indices;
6228
6301
  }`,Wh=(e,t)=>`
6229
6302
  fn checkInputIndices(input_indices: ${e.type.indices}) -> bool {
6230
6303
  for (var i:u32 = 0; i < ${t.length}; i++) {
6231
6304
  var input_index = ${e.indicesGet("input_indices","i")};
6232
- if (input_index < 0 || input_index >= ${F("uniforms.input_shape","i",t.length)}) {
6305
+ if (input_index < 0 || input_index >= ${q("uniforms.input_shape","i",t.length)}) {
6233
6306
  return false;
6234
6307
  }
6235
6308
  }
@@ -6385,7 +6458,7 @@ fn calculateOutputIndex(index: u32) -> u32 {
6385
6458
  }
6386
6459
  return (x111 * dx2 * dy2 * dz2 + x112 * dx2 * dy2 * dz1 + x121 * dx2 * dy1 *dz2 + x122 * dx2 * dy1 * dz1 +
6387
6460
  x211 * dx1 * dy2 * dz2 + x212 * dx1 * dy2 * dz1 + x221 * dx1 * dy1 *dz2 + x222 * dx1 * dy1 * dz1);
6388
- }`},Fh=(e,t,r,n,o,i)=>{let a=e.dims,d=Mh(i,t.axes,a.length),l=Rh(a,n,o,t.axes),p=n.slice();n.length===0&&(p=a.map((v,S)=>v===0?1:l[S]/v),t.keepAspectRatioPolicy!=="stretch"&&(l=Uh(a,p,t)));let m=M("output",e.dataType,l.length),u=E("input",e.dataType,a.length),h=C.size(l),_=a.length===l.length&&a.every((v,S)=>v===l[S]),y=t.coordinateTransformMode==="tf_crop_and_resize",g=t.extrapolationValue,x=u.type.value,$=v=>`
6461
+ }`},Fh=(e,t,r,n,o,i)=>{let a=e.dims,d=Mh(i,t.axes,a.length),l=Rh(a,n,o,t.axes),p=n.slice();n.length===0&&(p=a.map((v,S)=>v===0?1:l[S]/v),t.keepAspectRatioPolicy!=="stretch"&&(l=Uh(a,p,t)));let m=M("output",e.dataType,l.length),u=E("input",e.dataType,a.length),h=k.size(l),_=a.length===l.length&&a.every((v,S)=>v===l[S]),y=t.coordinateTransformMode==="tf_crop_and_resize",g=t.extrapolationValue,x=u.type.value,$=v=>`
6389
6462
  ${_?"":`
6390
6463
  ${Dh(t.coordinateTransformMode,x)};
6391
6464
  ${(()=>{switch(t.mode){case"nearest":return`
@@ -6412,10 +6485,10 @@ fn calculateOutputIndex(index: u32) -> u32 {
6412
6485
  output[global_idx] = ${t.extrapolationValue};
6413
6486
  }`;case"linear":return`output[global_idx] = ${a.length===2||a.length===4?"bilinearInterpolation":"trilinearInterpolation"}(output_indices);`;case"cubic":return"output[global_idx] = bicubicInterpolation(output_indices);";default:throw Error(`Unsupported resize mode: ${t.mode}`)}})()};
6414
6487
  `}
6415
- }`;return{name:"Resize",shaderCache:{hint:`${t.cacheKey}|${r}|${p.length>0?p:""}|${o.length>0?o:""}|${d.length>0?d:""}|${_}|${a}`,inputDependencies:["rank"]},getShaderSource:$,getRunData:()=>({outputs:[{dims:l,dataType:e.dataType}],dispatchGroup:{x:Math.ceil(h/64)},programUniforms:[{type:12,data:h},{type:1,data:p},{type:1,data:d},...N(a,l)]})}},qh=e=>{let t=e.customDataBuffer;return new Uint32Array(t,t.byteOffset,1)[0]},Pl=(e,t)=>{let r=[],n=[],o=[],i=qh(e);if(t.antialias!==0)throw Error("Only default value (0) for Antialias attribute is supported");Oh(e.inputs,t,i,r,n,o),e.compute(Fh(e.inputs[0],t,i,r,n,o),{inputs:[0]})},zl=e=>{let t=e.antialias,r=e.axes,n=e.coordinateTransformMode,o=e.cubicCoeffA,i=e.excludeOutside!==0,a=e.extrapolationValue,d=e.keepAspectRatioPolicy,l=e.mode,p=e.nearestMode===""?"simple":e.nearestMode;return re({antialias:t,axes:r,coordinateTransformMode:n,cubicCoeffA:o,excludeOutside:i,extrapolationValue:a,keepAspectRatioPolicy:d,mode:l,nearestMode:p})}});var Kh,jh,Dl,Bl=U(()=>{"use strict";te();oe();Se();ae();Kh=(e,t)=>{let[r,n,o,i]=e,{numHeads:a,rotaryEmbeddingDim:d}=t;if(r.dims.length!==3&&r.dims.length!==4)throw new Error(`Input 'x' is expected to have 3 or 4 dimensions, got ${r.dims.length}`);if(!C.areEqual(n.dims,[])&&!C.areEqual(n.dims,[1])&&n.dims.length!==2)throw new Error(`Input 'position_ids' is expected to have 0, 1, or 2 dimensions, got ${n.dims.length}`);if(o.dims.length!==2)throw new Error(`Input 'cos_cache' is expected to have 2 dimensions, got ${o.dims.length}`);if(i.dims.length!==2)throw new Error(`Input 'sin_cache' is expected to have 2 dimensions, got ${i.dims.length}`);if(!C.areEqual(o.dims,i.dims))throw new Error("Inputs 'cos_cache' and 'sin_cache' are expected to have the same shape");if(d>0&&a===0)throw new Error("num_heads must be provided if rotary_embedding_dim is specified");let l=r.dims[0],p=r.dims[r.dims.length-2],m=o.dims[0],u=C.sizeFromDimension(r.dims,1)/p,h=d===0?o.dims[1]*2:u/a;if(d>h)throw new Error("rotary_embedding_dim must be less than or equal to head_size");if(n.dims.length===2){if(l!==n.dims[0])throw new Error(`Input 'position_ids' dimension 0 should be of size batch_size, got ${n.dims[0]}`);if(p!==n.dims[1])throw new Error(`Input 'position_ids' dimension 1 should be of size sequence_length, got ${n.dims[1]}`)}if(h/2!==o.dims[1]&&d/2!==o.dims[1])throw new Error(`Input 'cos_cache' dimension 1 should be same as head_size / 2 or rotary_embedding_dim / 2, got ${o.dims[1]}`);if(p>m)throw new Error("Updating cos_cache and sin_cache in RotaryEmbedding is not currently supported")},jh=(e,t)=>{let{interleaved:r,numHeads:n,rotaryEmbeddingDim:o,scale:i}=t,a=e[0].dims[0],d=C.sizeFromDimension(e[0].dims,1),l=e[0].dims[e[0].dims.length-2],p=d/l,m=e[2].dims[1],u=o===0?m*2:p/n,h=new Array(a,l,p/u,u-m),_=C.computeStrides(h),y=[{type:1,data:i},{type:12,data:h},{type:12,data:_},...e[0].dims.length===3?new Array({type:12,data:[d,p,u,1]}):[],...e[0].dims.length===4?new Array({type:12,data:[d,u,l*u,1]}):[],...N(e[0].dims,e[1].dims,e[2].dims,e[3].dims,e[0].dims)],g=x=>{let $=E("input",e[0].dataType,e[0].dims.length),v=E("position_ids",e[1].dataType,e[1].dims.length),S=E("cos_cache",e[2].dataType,e[2].dims.length),T=E("sin_cache",e[3].dataType,e[3].dims.length),A=M("output",e[0].dataType,e[0].dims.length);return x.registerUniforms([{name:"scale",type:"f32"},{name:"global_shape",type:"u32",length:h.length},{name:"global_strides",type:"u32",length:_.length},{name:"input_output_strides",type:"u32",length:_.length}]),`
6488
+ }`;return{name:"Resize",shaderCache:{hint:`${t.cacheKey}|${r}|${p.length>0?t.mode==="cubic"?p:p.length:""}|${o.length>0?o:""}|${d.length>0?d:""}|${_}|${t.mode==="nearest"?a.length:a}`,inputDependencies:["rank"]},getShaderSource:$,getRunData:()=>({outputs:[{dims:l,dataType:e.dataType}],dispatchGroup:{x:Math.ceil(h/64)},programUniforms:[{type:12,data:h},{type:1,data:p},{type:1,data:d},...N(a,l)]})}},qh=e=>{let t=e.customDataBuffer;return new Uint32Array(t,t.byteOffset,1)[0]},Pl=(e,t)=>{let r=[],n=[],o=[],i=qh(e);if(t.antialias!==0)throw Error("Only default value (0) for Antialias attribute is supported");Oh(e.inputs,t,i,r,n,o),e.compute(Fh(e.inputs[0],t,i,r,n,o),{inputs:[0]})},zl=e=>{let t=e.antialias,r=e.axes,n=e.coordinateTransformMode,o=e.cubicCoeffA,i=e.excludeOutside!==0,a=e.extrapolationValue,d=e.keepAspectRatioPolicy,l=e.mode,p=e.nearestMode===""?"simple":e.nearestMode;return ee({antialias:t,axes:r,coordinateTransformMode:n,cubicCoeffA:o,excludeOutside:i,extrapolationValue:a,keepAspectRatioPolicy:d,mode:l,nearestMode:p})}});var Kh,jh,Dl,Bl=U(()=>{"use strict";J();ne();xe();ae();Kh=(e,t)=>{let[r,n,o,i]=e,{numHeads:a,rotaryEmbeddingDim:d}=t;if(r.dims.length!==3&&r.dims.length!==4)throw new Error(`Input 'x' is expected to have 3 or 4 dimensions, got ${r.dims.length}`);if(!k.areEqual(n.dims,[])&&!k.areEqual(n.dims,[1])&&n.dims.length!==2)throw new Error(`Input 'position_ids' is expected to have 0, 1, or 2 dimensions, got ${n.dims.length}`);if(o.dims.length!==2)throw new Error(`Input 'cos_cache' is expected to have 2 dimensions, got ${o.dims.length}`);if(i.dims.length!==2)throw new Error(`Input 'sin_cache' is expected to have 2 dimensions, got ${i.dims.length}`);if(!k.areEqual(o.dims,i.dims))throw new Error("Inputs 'cos_cache' and 'sin_cache' are expected to have the same shape");if(d>0&&a===0)throw new Error("num_heads must be provided if rotary_embedding_dim is specified");let l=r.dims[0],p=r.dims[r.dims.length-2],m=o.dims[0],u=k.sizeFromDimension(r.dims,1)/p,h=d===0?o.dims[1]*2:u/a;if(d>h)throw new Error("rotary_embedding_dim must be less than or equal to head_size");if(n.dims.length===2){if(l!==n.dims[0])throw new Error(`Input 'position_ids' dimension 0 should be of size batch_size, got ${n.dims[0]}`);if(p!==n.dims[1])throw new Error(`Input 'position_ids' dimension 1 should be of size sequence_length, got ${n.dims[1]}`)}if(h/2!==o.dims[1]&&d/2!==o.dims[1])throw new Error(`Input 'cos_cache' dimension 1 should be same as head_size / 2 or rotary_embedding_dim / 2, got ${o.dims[1]}`);if(p>m)throw new Error("Updating cos_cache and sin_cache in RotaryEmbedding is not currently supported")},jh=(e,t)=>{let{interleaved:r,numHeads:n,rotaryEmbeddingDim:o,scale:i}=t,a=e[0].dims[0],d=k.sizeFromDimension(e[0].dims,1),l=e[0].dims[e[0].dims.length-2],p=d/l,m=e[2].dims[1],u=o===0?m*2:p/n,h=new Array(a,l,p/u,u-m),_=k.computeStrides(h),y=[{type:1,data:i},{type:12,data:h},{type:12,data:_},...e[0].dims.length===3?new Array({type:12,data:[d,p,u,1]}):[],...e[0].dims.length===4?new Array({type:12,data:[d,u,l*u,1]}):[],...N(e[0].dims,e[1].dims,e[2].dims,e[3].dims,e[0].dims)],g=x=>{let $=E("input",e[0].dataType,e[0].dims.length),v=E("position_ids",e[1].dataType,e[1].dims.length),S=E("cos_cache",e[2].dataType,e[2].dims.length),T=E("sin_cache",e[3].dataType,e[3].dims.length),A=M("output",e[0].dataType,e[0].dims.length);return x.registerUniforms([{name:"scale",type:"f32"},{name:"global_shape",type:"u32",length:h.length},{name:"global_strides",type:"u32",length:_.length},{name:"input_output_strides",type:"u32",length:_.length}]),`
6416
6489
  ${x.declareVariables($,v,S,T,A)}
6417
6490
 
6418
- ${x.mainStart(kt)}
6491
+ ${x.mainStart(Et)}
6419
6492
  let half_rotary_emb_dim = uniforms.${S.name}_shape[1];
6420
6493
  let bsnh = global_idx / uniforms.global_strides % uniforms.global_shape;
6421
6494
  let size = uniforms.global_shape[0] * uniforms.global_strides[0];
@@ -6438,9 +6511,9 @@ fn calculateOutputIndex(index: u32) -> u32 {
6438
6511
  let k = dot(bsnh, uniforms.input_output_strides) + half_rotary_emb_dim;
6439
6512
  ${A.setByOffset("k",$.getByOffset("k"))}
6440
6513
  }
6441
- }`};return{name:"RotaryEmbedding",shaderCache:{hint:re({interleaved:r}).cacheKey,inputDependencies:["rank","rank","rank","rank"]},getShaderSource:g,getRunData:()=>({outputs:[{dims:e[0].dims,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(C.size(h)/kt)},programUniforms:y})}},Dl=(e,t)=>{Kh(e.inputs,t),e.compute(jh(e.inputs,t))}});var Yh,Zh,Ml,Rl=U(()=>{"use strict";te();oe();ae();Yh=e=>{if(!e||e.length<3)throw new Error("layerNorm requires at least 3 inputs.");let t=e[0],r=e[1],n=e[2];if(t.dataType!==r.dataType||t.dataType!==n.dataType)throw new Error("All inputs must have the same data type");if(t.dims.length!==3&&t.dims.length!==2)throw new Error("Input must be 2D or 3D");if(r.dims.length!==3&&r.dims.length!==2)throw new Error("Skip must be 2D or 3D");let o=t.dims[t.dims.length-1],i=t.dims[t.dims.length-2];if(r.dims[r.dims.length-1]!==o)throw new Error("Skip must have the same hidden size as input");if(r.dims[r.dims.length-2]!==i)throw new Error("Skip must have the same sequence length as input");if(n.dims.length!==1)throw new Error("Gamma must be 1D");if(n.dims[n.dims.length-1]!==o)throw new Error("Gamma must have the same hidden size as input");if(e.length>3){let a=e[3];if(a.dims.length!==1)throw new Error("Beta must be 1D");if(a.dims[a.dims.length-1]!==o)throw new Error("Beta must have the same hidden size as input")}if(e.length>4){let a=e[4];if(a.dims.length!==1)throw new Error("Bias must be 1D");if(a.dims[a.dims.length-1]!==o)throw new Error("Bias must have the same hidden size as input")}},Zh=(e,t,r,n)=>{let o=t.simplified,i=e[0].dims,a=C.size(i),d=i,l=a,p=i.slice(-1)[0],m=n?i.slice(0,-1).concat(1):[],u=!o&&e.length>3,h=e.length>4,_=n&&r>1,y=n&&r>2,g=r>3,x=64,$=me(p),v=[{type:12,data:l},{type:12,data:$},{type:12,data:p},{type:1,data:t.epsilon}],S=A=>{let k=[{name:"output_size",type:"u32"},{name:"components",type:"u32"},{name:"hidden_size",type:"u32"},{name:"epsilon",type:"f32"}],P=[E("x",e[0].dataType,e[0].dims,$),E("skip",e[1].dataType,e[1].dims,$),E("gamma",e[2].dataType,e[2].dims,$)];u&&P.push(E("beta",e[3].dataType,e[3].dims,$)),h&&P.push(E("bias",e[4].dataType,e[4].dims,$)),P.push(M("output",e[0].dataType,d,$)),_&&P.push(M("mean_output",1,m)),y&&P.push(M("inv_std_output",1,m)),g&&P.push(M("input_skip_bias_sum",e[0].dataType,d,$));let D=_e(e[0].dataType),R=_e(1,$);return`
6514
+ }`};return{name:"RotaryEmbedding",shaderCache:{hint:ee({interleaved:r}).cacheKey,inputDependencies:["rank","rank","rank","rank"]},getShaderSource:g,getRunData:()=>({outputs:[{dims:e[0].dims,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(k.size(h)/Et)},programUniforms:y})}},Dl=(e,t)=>{Kh(e.inputs,t),e.compute(jh(e.inputs,t))}});var Yh,Zh,Ml,Rl=U(()=>{"use strict";J();ne();ae();Yh=e=>{if(!e||e.length<3)throw new Error("layerNorm requires at least 3 inputs.");let t=e[0],r=e[1],n=e[2];if(t.dataType!==r.dataType||t.dataType!==n.dataType)throw new Error("All inputs must have the same data type");if(t.dims.length!==3&&t.dims.length!==2)throw new Error("Input must be 2D or 3D");if(r.dims.length!==3&&r.dims.length!==2)throw new Error("Skip must be 2D or 3D");let o=t.dims[t.dims.length-1],i=t.dims[t.dims.length-2];if(r.dims[r.dims.length-1]!==o)throw new Error("Skip must have the same hidden size as input");if(r.dims[r.dims.length-2]!==i)throw new Error("Skip must have the same sequence length as input");if(n.dims.length!==1)throw new Error("Gamma must be 1D");if(n.dims[n.dims.length-1]!==o)throw new Error("Gamma must have the same hidden size as input");if(e.length>3){let a=e[3];if(a.dims.length!==1)throw new Error("Beta must be 1D");if(a.dims[a.dims.length-1]!==o)throw new Error("Beta must have the same hidden size as input")}if(e.length>4){let a=e[4];if(a.dims.length!==1)throw new Error("Bias must be 1D");if(a.dims[a.dims.length-1]!==o)throw new Error("Bias must have the same hidden size as input")}},Zh=(e,t,r,n)=>{let o=t.simplified,i=e[0].dims,a=k.size(i),d=i,l=a,p=i.slice(-1)[0],m=n?i.slice(0,-1).concat(1):[],u=!o&&e.length>3,h=e.length>4,_=n&&r>1,y=n&&r>2,g=r>3,x=64,$=me(p),v=[{type:12,data:l},{type:12,data:$},{type:12,data:p},{type:1,data:t.epsilon}],S=A=>{let C=[{name:"output_size",type:"u32"},{name:"components",type:"u32"},{name:"hidden_size",type:"u32"},{name:"epsilon",type:"f32"}],P=[E("x",e[0].dataType,e[0].dims,$),E("skip",e[1].dataType,e[1].dims,$),E("gamma",e[2].dataType,e[2].dims,$)];u&&P.push(E("beta",e[3].dataType,e[3].dims,$)),h&&P.push(E("bias",e[4].dataType,e[4].dims,$)),P.push(M("output",e[0].dataType,d,$)),_&&P.push(M("mean_output",1,m)),y&&P.push(M("inv_std_output",1,m)),g&&P.push(M("input_skip_bias_sum",e[0].dataType,d,$));let D=ye(e[0].dataType),R=ye(1,$);return`
6442
6515
 
6443
- ${A.registerUniforms(k).declareVariables(...P)}
6516
+ ${A.registerUniforms(C).declareVariables(...P)}
6444
6517
  var<workgroup> sum_shared : array<${R}, ${x}>;
6445
6518
  var<workgroup> sum_squared_shared : array<${R}, ${x}>;
6446
6519
 
@@ -6462,7 +6535,7 @@ fn calculateOutputIndex(index: u32) -> u32 {
6462
6535
  let value = input_value + skip_value + bias_value;
6463
6536
  ${g?"input_skip_bias_sum[offset + i] = value;":""}
6464
6537
  output[offset + i] = value;
6465
- let f32_value = ${Et(D,$,"value")};
6538
+ let f32_value = ${Pt(D,$,"value")};
6466
6539
  sum_shared[ix] += f32_value;
6467
6540
  sum_squared_shared[ix] += f32_value * f32_value;
6468
6541
  }
@@ -6480,8 +6553,8 @@ fn calculateOutputIndex(index: u32) -> u32 {
6480
6553
 
6481
6554
  let sum = sum_shared[0];
6482
6555
  let square_sum = sum_squared_shared[0];
6483
- let mean = ${Fe("sum",$)} / f32(uniforms.hidden_size);
6484
- let inv_std_dev = inverseSqrt(${Fe("square_sum",$)} / f32(uniforms.hidden_size) ${o?"":"- mean * mean"} + uniforms.epsilon);
6556
+ let mean = ${qe("sum",$)} / f32(uniforms.hidden_size);
6557
+ let inv_std_dev = inverseSqrt(${qe("square_sum",$)} / f32(uniforms.hidden_size) ${o?"":"- mean * mean"} + uniforms.epsilon);
6485
6558
  ${_?"mean_output[global_idx] = mean;":""}
6486
6559
  ${y?"inv_std_output[global_idx] = inv_std_dev;":""}
6487
6560
 
@@ -6490,14 +6563,14 @@ fn calculateOutputIndex(index: u32) -> u32 {
6490
6563
  ${D}(inv_std_dev) * gamma[offset1d + i]
6491
6564
  ${u?"+ beta[offset1d + i]":""};
6492
6565
  }
6493
- }`},T=[{dims:d,dataType:e[0].dataType}];return r>1&&T.push({dims:m,dataType:1}),r>2&&T.push({dims:m,dataType:1}),r>3&&T.push({dims:i,dataType:e[0].dataType}),{name:"SkipLayerNormalization",shaderCache:{hint:`${$};${_};${y};${g}`,inputDependencies:e.map((A,k)=>"type")},getShaderSource:S,getRunData:()=>({outputs:T,dispatchGroup:{x:Math.ceil(l/p)},programUniforms:v})}},Ml=(e,t)=>{Yh(e.inputs);let n=[0];e.outputCount>1&&n.push(-3),e.outputCount>2&&n.push(-3),e.outputCount>3&&n.push(3),e.compute(Zh(e.inputs,t,e.outputCount,!1),{outputs:n})}});var Qh,rn,Xh,Ul,Jh,eg,Nl,Vl,Wl=U(()=>{"use strict";te();oe();Se();ae();Qh=(e,t)=>{if(!e||e.length<1)throw new Error("too few inputs");if(t.axes.length!==0){if(t.axes.length!==t.starts.length||t.axes.length!==t.ends.length)throw new Error("axes, starts and ends must have the same length")}else if(t.starts.length!==t.ends.length)throw new Error("starts and ends must have the same length");e.slice(1).forEach((r,n)=>{if(e[n+1].dataType!==6&&e[n+1].dataType!==7)throw new Error(`Input ${n} must be an array of int32 or int64`)})},rn=(e,t)=>{let r=[];if(e.length>t)if(e[t].dataType===7)e[t].getBigInt64Array().forEach(n=>r.push(Number(n)));else if(e[t].dataType===6)e[t].getInt32Array().forEach(n=>r.push(Number(n)));else throw new Error(`Input ${t} must be an array of int32 or int64`);return r},Xh=(e,t)=>{if(e.length>1){let r=rn(e,1),n=rn(e,2),o=rn(e,3);return o.length===0&&(o=[...Array(e[0].dims.length).keys()]),re({starts:r,ends:n,axes:o})}else return t},Ul=(e,t,r,n,o)=>{let i=e;return e<0&&(i+=r[n[t]]),o[t]<0?Math.max(0,Math.min(i,r[n[t]]-1)):Math.max(0,Math.min(i,r[n[t]]))},Jh=(e,t,r)=>`fn calculateInputIndices(output_indices: ${t.type.indices}) -> ${e.type.indices} {
6566
+ }`},T=[{dims:d,dataType:e[0].dataType}];return r>1&&T.push({dims:m,dataType:1}),r>2&&T.push({dims:m,dataType:1}),r>3&&T.push({dims:i,dataType:e[0].dataType}),{name:"SkipLayerNormalization",shaderCache:{hint:`${$};${_};${y};${g}`,inputDependencies:e.map((A,C)=>"type")},getShaderSource:S,getRunData:()=>({outputs:T,dispatchGroup:{x:Math.ceil(l/p)},programUniforms:v})}},Ml=(e,t)=>{Yh(e.inputs);let n=[0];e.outputCount>1&&n.push(-3),e.outputCount>2&&n.push(-3),e.outputCount>3&&n.push(3),e.compute(Zh(e.inputs,t,e.outputCount,!1),{outputs:n})}});var Qh,tn,Xh,Ul,Jh,eg,Nl,Vl,Wl=U(()=>{"use strict";J();ne();xe();ae();Qh=(e,t)=>{if(!e||e.length<1)throw new Error("too few inputs");if(t.axes.length!==0){if(t.axes.length!==t.starts.length||t.axes.length!==t.ends.length)throw new Error("axes, starts and ends must have the same length")}else if(t.starts.length!==t.ends.length)throw new Error("starts and ends must have the same length");e.slice(1).forEach((r,n)=>{if(e[n+1].dataType!==6&&e[n+1].dataType!==7)throw new Error(`Input ${n} must be an array of int32 or int64`)})},tn=(e,t)=>{let r=[];if(e.length>t)if(e[t].dataType===7)e[t].getBigInt64Array().forEach(n=>r.push(Number(n)));else if(e[t].dataType===6)e[t].getInt32Array().forEach(n=>r.push(Number(n)));else throw new Error(`Input ${t} must be an array of int32 or int64`);return r},Xh=(e,t)=>{if(e.length>1){let r=tn(e,1),n=tn(e,2),o=tn(e,3);return o.length===0&&(o=[...Array(e[0].dims.length).keys()]),ee({starts:r,ends:n,axes:o})}else return t},Ul=(e,t,r,n,o)=>{let i=e;return e<0&&(i+=r[n[t]]),o[t]<0?Math.max(0,Math.min(i,r[n[t]]-1)):Math.max(0,Math.min(i,r[n[t]]))},Jh=(e,t,r)=>`fn calculateInputIndices(output_indices: ${t.type.indices}) -> ${e.type.indices} {
6494
6567
  var input_indices: ${e.type.indices};
6495
6568
  var carry = 0u;
6496
6569
  for (var i = ${r.length}; i >= 0; i--) {
6497
- let input_shape_i = ${F("uniforms.input_shape","i",r.length)};
6498
- let steps_i = ${F("uniforms.steps","i",r.length)};
6499
- let signs_i = ${F("uniforms.signs","i",r.length)};
6500
- let starts_i = ${F("uniforms.starts","i",r.length)};
6570
+ let input_shape_i = ${q("uniforms.input_shape","i",r.length)};
6571
+ let steps_i = ${q("uniforms.steps","i",r.length)};
6572
+ let signs_i = ${q("uniforms.signs","i",r.length)};
6573
+ let starts_i = ${q("uniforms.starts","i",r.length)};
6501
6574
  var output_index = ${t.indicesGet("output_indices","i")};
6502
6575
  var input_index = output_index * steps_i + starts_i + carry;
6503
6576
  carry = input_index / input_shape_i;
@@ -6508,7 +6581,7 @@ fn calculateOutputIndex(index: u32) -> u32 {
6508
6581
  ${e.indicesSet("input_indices","i","input_index")};
6509
6582
  }
6510
6583
  return input_indices;
6511
- }`,eg=(e,t)=>{let r=e[0].dims,n=C.size(r),o=t.axes.length>0?C.normalizeAxes(t.axes,r.length):[...Array(r.length).keys()],i=rn(e,4);i.forEach($=>$!==0||(()=>{throw new Error("step cannot be 0")})),i.length===0&&(i=Array(o.length).fill(1));let a=t.starts.map(($,v)=>Ul($,v,r,o,i)),d=t.ends.map(($,v)=>Ul($,v,r,o,i));if(o.length!==a.length||o.length!==d.length)throw new Error("start, ends and axes should have the same number of elements");if(o.length!==r.length)for(let $=0;$<r.length;++$)o.includes($)||(a.splice($,0,0),d.splice($,0,r[$]),i.splice($,0,1));let l=i.map($=>Math.sign($));i.forEach(($,v,S)=>{if($<0){let T=(d[v]-a[v])/$,A=a[v],k=A+T*i[v];a[v]=k,d[v]=A,S[v]=-$}});let p=r.slice(0);o.forEach(($,v)=>{p[$]=Math.ceil((d[$]-a[$])/i[$])});let m={dims:p,dataType:e[0].dataType},u=M("output",e[0].dataType,p.length),h=E("input",e[0].dataType,e[0].dims.length),_=C.size(p),y=[{name:"outputSize",type:"u32"},{name:"starts",type:"u32",length:a.length},{name:"signs",type:"i32",length:l.length},{name:"steps",type:"u32",length:i.length}],g=[{type:12,data:_},{type:12,data:a},{type:6,data:l},{type:12,data:i},...N(e[0].dims,p)],x=$=>`
6584
+ }`,eg=(e,t)=>{let r=e[0].dims,n=k.size(r),o=t.axes.length>0?k.normalizeAxes(t.axes,r.length):[...Array(r.length).keys()],i=tn(e,4);i.forEach($=>$!==0||(()=>{throw new Error("step cannot be 0")})),i.length===0&&(i=Array(o.length).fill(1));let a=t.starts.map(($,v)=>Ul($,v,r,o,i)),d=t.ends.map(($,v)=>Ul($,v,r,o,i));if(o.length!==a.length||o.length!==d.length)throw new Error("start, ends and axes should have the same number of elements");if(o.length!==r.length)for(let $=0;$<r.length;++$)o.includes($)||(a.splice($,0,0),d.splice($,0,r[$]),i.splice($,0,1));let l=i.map($=>Math.sign($));i.forEach(($,v,S)=>{if($<0){let T=(d[v]-a[v])/$,A=a[v],C=A+T*i[v];a[v]=C,d[v]=A,S[v]=-$}});let p=r.slice(0);o.forEach(($,v)=>{p[$]=Math.ceil((d[$]-a[$])/i[$])});let m={dims:p,dataType:e[0].dataType},u=M("output",e[0].dataType,p.length),h=E("input",e[0].dataType,e[0].dims.length),_=k.size(p),y=[{name:"outputSize",type:"u32"},{name:"starts",type:"u32",length:a.length},{name:"signs",type:"i32",length:l.length},{name:"steps",type:"u32",length:i.length}],g=[{type:12,data:_},{type:12,data:a},{type:6,data:l},{type:12,data:i},...N(e[0].dims,p)],x=$=>`
6512
6585
  ${$.registerUniforms(y).declareVariables(h,u)}
6513
6586
  ${Jh(h,u,r)}
6514
6587
  ${$.mainStart()}
@@ -6516,7 +6589,7 @@ fn calculateOutputIndex(index: u32) -> u32 {
6516
6589
  let output_indices = ${u.offsetToIndices("global_idx")};
6517
6590
  let input_indices = calculateInputIndices(output_indices);
6518
6591
  ${u.setByOffset("global_idx",h.getByIndices("input_indices"))}
6519
- }`;return{name:"Slice",shaderCache:{hint:`${l.length}_${a.length}_${i.length}`,inputDependencies:["rank"]},getShaderSource:x,getRunData:()=>({outputs:[m],dispatchGroup:{x:Math.ceil(n/64)},programUniforms:g})}},Nl=(e,t)=>{Qh(e.inputs,t);let r=Xh(e.inputs,t);e.compute(eg(e.inputs,r),{inputs:[0]})},Vl=e=>{let t=e.starts,r=e.ends,n=e.axes;return re({starts:t,ends:r,axes:n})}});var tg,rg,Ll,Gl,Hl=U(()=>{"use strict";te();oe();Se();dt();ae();tg=e=>{if(!e||e.length!==1)throw new Error("Softmax op requires 1 input.")},rg=(e,t)=>{let r=e.inputs[0],n=r.dims,o=C.size(n),i=n.length,a=C.normalizeAxis(t.axis,i),d=a<n.length-1,l,p=[];d?(p=Array.from({length:i},(P,D)=>D),p[a]=i-1,p[i-1]=a,l=e.compute(Pe(r,p),{inputs:[r],outputs:[-1]})[0]):l=r;let m=l.dims,u=m[i-1],h=o/u,_=me(u),y=u/_,g=64;h===1&&(g=256);let x=(P,D)=>D===4?`max(max(${P}.x, ${P}.y), max(${P}.z, ${P}.w))`:D===2?`max(${P}.x, ${P}.y)`:D===3?`max(max(${P}.x, ${P}.y), ${P}.z)`:P,$=E("x",l.dataType,l.dims,_),v=M("result",l.dataType,l.dims,_),S=$.type.value,T=_e(l.dataType)==="f32"?`var threadMax = ${S}(-3.402823e+38f);`:`var threadMax = ${S}(-65504.0h);`,A=P=>`
6592
+ }`;return{name:"Slice",shaderCache:{hint:`${l.length}_${a.length}_${i.length}`,inputDependencies:["rank"]},getShaderSource:x,getRunData:()=>({outputs:[m],dispatchGroup:{x:Math.ceil(n/64)},programUniforms:g})}},Nl=(e,t)=>{Qh(e.inputs,t);let r=Xh(e.inputs,t);e.compute(eg(e.inputs,r),{inputs:[0]})},Vl=e=>{let t=e.starts,r=e.ends,n=e.axes;return ee({starts:t,ends:r,axes:n})}});var tg,rg,Ll,Gl,Hl=U(()=>{"use strict";J();ne();xe();dt();ae();tg=e=>{if(!e||e.length!==1)throw new Error("Softmax op requires 1 input.")},rg=(e,t)=>{let r=e.inputs[0],n=r.dims,o=k.size(n),i=n.length,a=k.normalizeAxis(t.axis,i),d=a<n.length-1,l,p=[];d?(p=Array.from({length:i},(P,D)=>D),p[a]=i-1,p[i-1]=a,l=e.compute(Pe(r,p),{inputs:[r],outputs:[-1]})[0]):l=r;let m=l.dims,u=m[i-1],h=o/u,_=me(u),y=u/_,g=64;h===1&&(g=256);let x=(P,D)=>D===4?`max(max(${P}.x, ${P}.y), max(${P}.z, ${P}.w))`:D===2?`max(${P}.x, ${P}.y)`:D===3?`max(max(${P}.x, ${P}.y), ${P}.z)`:P,$=E("x",l.dataType,l.dims,_),v=M("result",l.dataType,l.dims,_),S=$.type.value,T=ye(l.dataType)==="f32"?`var threadMax = ${S}(-3.402823e+38f);`:`var threadMax = ${S}(-65504.0h);`,A=P=>`
6520
6593
  var<workgroup> rowMaxShared : ${S};
6521
6594
  var<workgroup> rowSumShared : ${S};
6522
6595
  var<workgroup> threadShared : array<${S}, ${g}>;
@@ -6579,7 +6652,7 @@ fn calculateOutputIndex(index: u32) -> u32 {
6579
6652
  workgroupBarrier();
6580
6653
  }
6581
6654
  if (lindex == 0) {
6582
- rowSumShared = ${S}(${Fe("threadShared[0]",_)});
6655
+ rowSumShared = ${S}(${qe("threadShared[0]",_)});
6583
6656
  }
6584
6657
  workgroupBarrier();
6585
6658
 
@@ -6588,7 +6661,7 @@ fn calculateOutputIndex(index: u32) -> u32 {
6588
6661
  let value = exp(getValue(row, col, row_stride) - rowMaxShared) / rowSumShared;
6589
6662
  setValue(row, col, row_stride, value);
6590
6663
  }
6591
- }`,k=e.compute({name:"Softmax",shaderCache:{hint:`${_};${g}`,inputDependencies:["type"]},getRunData:()=>({outputs:[{dims:m,dataType:l.dataType}],dispatchGroup:{x:h},programUniforms:[{type:6,data:y}]}),getShaderSource:A},{inputs:[l],outputs:[d?-1:0]})[0];d&&e.compute(Pe(k,p),{inputs:[k]})},Ll=(e,t)=>{tg(e.inputs),rg(e,t)},Gl=e=>re({axis:e.axis})});var Fl,ng,og,ig,ql,Kl=U(()=>{"use strict";te();oe();ae();Fl=e=>Array.from(e.getBigInt64Array(),Number),ng=e=>{if(!e||e.length!==2)throw new Error("Tile requires 2 inputs.");if(e[0].dataType!==1&&e[0].dataType!==10&&e[0].dataType!==6&&e[0].dataType!==12)throw new Error("Tile only support float, float16, int32, and uint32 data types");if(e[1].dataType!==7)throw new Error("Tile `repeats` input should be of int64 data type");if(e[1].dims.length!==1)throw new Error("Tile `repeats` input should be 1-D");if(Fl(e[1]).length!==e[0].dims.length)throw new Error("Tile `repeats` input should have same number of elements as rank of input data tensor")},og=(e,t)=>{let r=[];for(let n=0;n<e.length;++n)r.push(e[n]*t[n]);return r},ig=(e,t)=>{let r=e[0].dims,n=t??Fl(e[1]),o=og(r,n),i=C.size(o),a=e[0].dataType,d=E("input",a,r.length),l=M("output",a,o.length),p=m=>`
6664
+ }`,C=e.compute({name:"Softmax",shaderCache:{hint:`${_};${g}`,inputDependencies:["type"]},getRunData:()=>({outputs:[{dims:m,dataType:l.dataType}],dispatchGroup:{x:h},programUniforms:[{type:6,data:y}]}),getShaderSource:A},{inputs:[l],outputs:[d?-1:0]})[0];d&&e.compute(Pe(C,p),{inputs:[C]})},Ll=(e,t)=>{tg(e.inputs),rg(e,t)},Gl=e=>ee({axis:e.axis})});var Fl,ng,og,ig,ql,Kl=U(()=>{"use strict";J();ne();ae();Fl=e=>Array.from(e.getBigInt64Array(),Number),ng=e=>{if(!e||e.length!==2)throw new Error("Tile requires 2 inputs.");if(e[0].dataType!==1&&e[0].dataType!==10&&e[0].dataType!==6&&e[0].dataType!==12)throw new Error("Tile only support float, float16, int32, and uint32 data types");if(e[1].dataType!==7)throw new Error("Tile `repeats` input should be of int64 data type");if(e[1].dims.length!==1)throw new Error("Tile `repeats` input should be 1-D");if(Fl(e[1]).length!==e[0].dims.length)throw new Error("Tile `repeats` input should have same number of elements as rank of input data tensor")},og=(e,t)=>{let r=[];for(let n=0;n<e.length;++n)r.push(e[n]*t[n]);return r},ig=(e,t)=>{let r=e[0].dims,n=t??Fl(e[1]),o=og(r,n),i=k.size(o),a=e[0].dataType,d=E("input",a,r.length),l=M("output",a,o.length),p=m=>`
6592
6665
  const inputShape = ${d.indices(...r)};
6593
6666
  ${m.registerUniform("output_size","u32").declareVariables(d,l)}
6594
6667
  ${m.mainStart()}
@@ -6602,7 +6675,7 @@ fn calculateOutputIndex(index: u32) -> u32 {
6602
6675
  ${d.indicesSet("input_indices","i","input_dim_value")}
6603
6676
  }
6604
6677
  ${l.setByOffset("global_idx",d.getByIndices("input_indices"))}
6605
- }`;return{name:"Tile",shaderCache:{hint:`${n}`,inputDependencies:["rank"]},getRunData:()=>({outputs:[{dims:o,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(i/64)},programUniforms:[{type:12,data:i},...N(e[0].dims,o)]}),getShaderSource:p}},ql=e=>{ng(e.inputs),e.compute(ig(e.inputs),{inputs:[0]})}});var ag,sg,jl,Yl=U(()=>{"use strict";te();oe();ae();ag=(e,t,r,n,o)=>{let i=M("output_data",o,r.length,4),a=E("a_data",t[1].dataType,t[1].dims.length,4),d=E("b_data",t[2].dataType,t[2].dims.length,4),l=E("c_data",t[0].dataType,t[0].dims.length,4),p,m=(u,h,_)=>`select(${h}, ${u}, ${_})`;if(!n)p=i.setByOffset("global_idx",m(a.getByOffset("global_idx"),d.getByOffset("global_idx"),l.getByOffset("global_idx")));else{let u=(h,_,y="")=>{let g=`a_data[index_a${_}][component_a${_}]`,x=`b_data[index_b${_}][component_b${_}]`,$=`bool(c_data[index_c${_}] & (0xffu << (component_c${_} * 8)))`;return`
6678
+ }`;return{name:"Tile",shaderCache:{hint:`${n}`,inputDependencies:["rank"]},getRunData:()=>({outputs:[{dims:o,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(i/64)},programUniforms:[{type:12,data:i},...N(e[0].dims,o)]}),getShaderSource:p}},ql=e=>{ng(e.inputs),e.compute(ig(e.inputs),{inputs:[0]})}});var ag,sg,jl,Yl=U(()=>{"use strict";J();ne();ae();ag=(e,t,r,n,o)=>{let i=M("output_data",o,r.length,4),a=E("a_data",t[1].dataType,t[1].dims.length,4),d=E("b_data",t[2].dataType,t[2].dims.length,4),l=E("c_data",t[0].dataType,t[0].dims.length,4),p,m=(u,h,_)=>`select(${h}, ${u}, ${_})`;if(!n)p=i.setByOffset("global_idx",m(a.getByOffset("global_idx"),d.getByOffset("global_idx"),l.getByOffset("global_idx")));else{let u=(h,_,y="")=>{let g=`a_data[index_a${_}][component_a${_}]`,x=`b_data[index_b${_}][component_b${_}]`,$=`bool(c_data[index_c${_}] & (0xffu << (component_c${_} * 8)))`;return`
6606
6679
  let output_indices${_} = ${i.offsetToIndices(`global_idx * 4u + ${_}u`)};
6607
6680
  let offset_a${_} = ${a.broadcastedIndicesToOffset(`output_indices${_}`,i)};
6608
6681
  let offset_b${_} = ${d.broadcastedIndicesToOffset(`output_indices${_}`,i)};
@@ -6630,10 +6703,10 @@ fn calculateOutputIndex(index: u32) -> u32 {
6630
6703
  ${e.mainStart()}
6631
6704
  ${e.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")}
6632
6705
  ${p}
6633
- }`},sg=e=>{let t=e[1].dims,r=e[2].dims,n=e[0].dims,o=e[1].dataType,i=!(C.areEqual(t,r)&&C.areEqual(r,n)),a=t,d=C.size(t);if(i){let p=tt.calcShape(tt.calcShape(t,r,!1),n,!1);if(!p)throw new Error("Can't perform where op on the given tensors");a=p,d=C.size(a)}let l=Math.ceil(d/4);return{name:"Where",shaderCache:{inputDependencies:["rank","rank","rank"]},getShaderSource:p=>ag(p,e,a,i,o),getRunData:()=>({outputs:[{dims:a,dataType:o}],dispatchGroup:{x:Math.ceil(d/64/4)},programUniforms:[{type:12,data:l},...N(n,t,r,a)]})}},jl=e=>{e.compute(sg(e.inputs))}});var Zl,Ql=U(()=>{"use strict";Ps();Fr();Ds();Ms();vu();zu();Bu();Qu();od();sd();ld();hd();yd();wd();xd();Id();kd();zd();Bd();Ud();qd();Yd();Qd();Jd();rl();So();ol();vl();Sl();Il();kl();Gr();Ol();Bl();Rl();Wl();Hl();Io();Kl();dt();Kr();Yl();Zl=new Map([["Abs",[Rs]],["Acos",[Us]],["Acosh",[Ns]],["Add",[$u]],["ArgMax",[Es,co]],["ArgMin",[ks,co]],["Asin",[Vs]],["Asinh",[Ws]],["Atan",[Ls]],["Atanh",[Gs]],["Attention",[zs]],["AveragePool",[pl,cl]],["BatchNormalization",[Os]],["BiasAdd",[Bs]],["BiasSplitGelu",[wu]],["Cast",[Fs,Hs]],["Ceil",[Ks]],["Clip",[qs]],["Concat",[Ou,Du]],["Conv",[wo,_o]],["ConvTranspose",[nd,td]],["Cos",[js]],["Cosh",[Ys]],["CumSum",[id,ad]],["DepthToSpace",[ud,dd]],["DequantizeLinear",[$l,xl]],["Div",[xu]],["Einsum",[md,fd]],["Elu",[Zs,Xt]],["Equal",[Su]],["Erf",[Qs]],["Exp",[Xs]],["Expand",[bd]],["FastGelu",[_d]],["Floor",[Js]],["FusedConv",[wo,_o]],["Gather",[$d,vd]],["GatherElements",[Pd,Ed]],["GatherBlockQuantized",[Cd,Ad]],["GatherND",[Sd,Td]],["Gelu",[eu]],["Gemm",[Dd,Od]],["GlobalAveragePool",[hl,fl]],["GlobalMaxPool",[wl,_l]],["Greater",[Au]],["GreaterOrEqual",[Eu]],["GridSample",[Md,Rd]],["GroupQueryAttention",[Fd]],["HardSigmoid",[uu,su]],["InstanceNormalization",[jd]],["LayerNormalization",[Zd]],["LeakyRelu",[tu,Xt]],["Less",[ku]],["LessOrEqual",[Pu]],["Log",[bu]],["MatMul",[Xd]],["MatMulNBits",[el,tl]],["MaxPool",[bl,yl]],["Mul",[Tu]],["MultiHeadAttention",[Wd,Vd]],["Neg",[nu]],["Not",[ru]],["Pad",[nl]],["Pow",[Iu]],["QuickGelu",[yu,Xt]],["Range",[Tl]],["Reciprocal",[ou]],["ReduceMin",[xs]],["ReduceMean",[ys]],["ReduceMax",[$s]],["ReduceSum",[Ts]],["ReduceProd",[Ss]],["ReduceL1",[_s]],["ReduceL2",[ws]],["ReduceLogSum",[Cs]],["ReduceLogSumExp",[vs]],["ReduceSumSquare",[Is]],["Relu",[iu]],["Resize",[Pl,zl]],["RotaryEmbedding",[Dl]],["ScatterND",[Al,Cl]],["Sigmoid",[au]],["Sin",[du]],["Sinh",[lu]],["Slice",[Nl,Vl]],["SkipLayerNormalization",[Ml]],["Split",[Ld,Gd]],["Sqrt",[cu]],["Softmax",[Ll,Gl]],["Sub",[Cu]],["Tan",[pu]],["Tanh",[fu]],["ThresholdedRelu",[gu,Xt]],["Tile",[ql]],["Transpose",[is,as]],["Where",[jl]]])});var nn,Xl=U(()=>{"use strict";We();et();ae();nn=class{constructor(t){this.backend=t;this.repo=new Map,this.attributesBound=!1}getArtifact(t){return this.repo.get(t)}setArtifact(t,r){this.repo.set(t,r)}run(t,r,n,o,i){Ue(t.programInfo.name);let a=this.backend.device,d=this.backend.getComputePassEncoder();this.backend.writeTimestamp(this.backend.pendingDispatchNumber*2);let l=[];for(let m of r)l.push({binding:l.length,resource:{buffer:m.buffer}});for(let m of n)l.push({binding:l.length,resource:{buffer:m.buffer}});i&&l.push({binding:l.length,resource:i});let p=a.createBindGroup({layout:t.computePipeline.getBindGroupLayout(0),entries:l,label:t.programInfo.name});if(this.backend.sessionStatus==="capturing"){let m={kernelId:this.backend.currentKernelId,computePipeline:t.computePipeline,bindGroup:p,dispatchGroup:o};this.backend.capturedCommandList.get(this.backend.currentSessionId).push(m)}d.setPipeline(t.computePipeline),d.setBindGroup(0,p),d.dispatchWorkgroups(...o),this.backend.writeTimestamp(this.backend.pendingDispatchNumber*2+1),this.backend.pendingDispatchNumber++,(this.backend.pendingDispatchNumber>=this.backend.maxDispatchNumber||this.backend.queryType==="at-passes")&&this.backend.endComputePass(),this.backend.pendingDispatchNumber>=this.backend.maxDispatchNumber&&this.backend.flush(),De(t.programInfo.name)}dispose(){}build(t,r){Ue(t.name);let n=this.backend.device,o=[];[{feature:"shader-f16",extension:"f16"},{feature:"subgroups",extension:"subgroups"},{feature:"subgroups-f16",extension:"subgroups_f16"}].forEach(u=>{n.features.has(u.feature)&&o.push(`enable ${u.extension};`)});let a=ns(r,this.backend.device.limits),d=t.getShaderSource(a),l=`${o.join(`
6706
+ }`},sg=e=>{let t=e[1].dims,r=e[2].dims,n=e[0].dims,o=e[1].dataType,i=!(k.areEqual(t,r)&&k.areEqual(r,n)),a=t,d=k.size(t);if(i){let p=tt.calcShape(tt.calcShape(t,r,!1),n,!1);if(!p)throw new Error("Can't perform where op on the given tensors");a=p,d=k.size(a)}let l=Math.ceil(d/4);return{name:"Where",shaderCache:{inputDependencies:["rank","rank","rank"]},getShaderSource:p=>ag(p,e,a,i,o),getRunData:()=>({outputs:[{dims:a,dataType:o}],dispatchGroup:{x:Math.ceil(d/64/4)},programUniforms:[{type:12,data:l},...N(n,t,r,a)]})}},jl=e=>{e.compute(sg(e.inputs))}});var Zl,Ql=U(()=>{"use strict";Es();Hr();Os();Bs();wu();Pu();Du();Zu();nd();ad();dd();fd();bd();_d();$d();Td();Ad();Pd();Dd();Rd();Fd();jd();Zd();Xd();tl();xo();nl();wl();xl();Tl();Al();Lr();Ol();Bl();Rl();Wl();Hl();To();Kl();dt();qr();Yl();Zl=new Map([["Abs",[Ms]],["Acos",[Rs]],["Acosh",[Us]],["Add",[vu]],["ArgMax",[ks,lo]],["ArgMin",[As,lo]],["Asin",[Ns]],["Asinh",[Vs]],["Atan",[Ws]],["Atanh",[Ls]],["Attention",[Ps]],["AveragePool",[cl,ll]],["BatchNormalization",[zs]],["BiasAdd",[Ds]],["BiasSplitGelu",[_u]],["Cast",[Hs,Gs]],["Ceil",[qs]],["Clip",[Fs]],["Concat",[zu,Ou]],["Conv",[_o,yo]],["ConvTranspose",[rd,ed]],["Cos",[Ks]],["Cosh",[js]],["CumSum",[od,id]],["DepthToSpace",[sd,ud]],["DequantizeLinear",[vl,$l]],["Div",[$u]],["Einsum",[pd,md]],["Elu",[Ys,Qt]],["Equal",[xu]],["Erf",[Zs]],["Exp",[Qs]],["Expand",[gd]],["FastGelu",[yd]],["Floor",[Xs]],["FusedConv",[_o,yo]],["Gather",[vd,wd]],["GatherElements",[Ed,kd]],["GatherBlockQuantized",[Id,Cd]],["GatherND",[xd,Sd]],["Gelu",[Js]],["Gemm",[Od,zd]],["GlobalAveragePool",[fl,ml]],["GlobalMaxPool",[_l,yl]],["Greater",[Cu]],["GreaterOrEqual",[ku]],["GridSample",[Bd,Md]],["GroupQueryAttention",[Hd]],["HardSigmoid",[su,au]],["InstanceNormalization",[Kd]],["LayerNormalization",[Yd]],["LeakyRelu",[eu,Qt]],["Less",[Au]],["LessOrEqual",[Eu]],["Log",[gu]],["MatMul",[Qd]],["MatMulNBits",[Jd,el]],["MaxPool",[gl,bl]],["Mul",[Su]],["MultiHeadAttention",[Vd,Nd]],["Neg",[ru]],["Not",[tu]],["Pad",[rl]],["Pow",[Tu]],["QuickGelu",[bu,Qt]],["Range",[Sl]],["Reciprocal",[nu]],["ReduceMin",[$s]],["ReduceMean",[bs]],["ReduceMax",[vs]],["ReduceSum",[Ss]],["ReduceProd",[xs]],["ReduceL1",[ys]],["ReduceL2",[_s]],["ReduceLogSum",[Is]],["ReduceLogSumExp",[ws]],["ReduceSumSquare",[Ts]],["Relu",[ou]],["Resize",[Pl,zl]],["RotaryEmbedding",[Dl]],["ScatterND",[Cl,Il]],["Sigmoid",[iu]],["Sin",[uu]],["Sinh",[du]],["Slice",[Nl,Vl]],["SkipLayerNormalization",[Ml]],["Split",[Wd,Ld]],["Sqrt",[lu]],["Softmax",[Ll,Gl]],["Sub",[Iu]],["Tan",[cu]],["Tanh",[mu]],["ThresholdedRelu",[hu,Qt]],["Tile",[ql]],["Transpose",[os,is]],["Where",[jl]]])});var rn,Xl=U(()=>{"use strict";Le();et();ae();rn=class{constructor(t){this.backend=t;this.repo=new Map,this.attributesBound=!1}getArtifact(t){return this.repo.get(t)}setArtifact(t,r){this.repo.set(t,r)}run(t,r,n,o,i){Ne(t.programInfo.name);let a=this.backend.device,d=this.backend.getComputePassEncoder();this.backend.writeTimestamp(this.backend.pendingDispatchNumber*2);let l=[];for(let m of r)l.push({binding:l.length,resource:{buffer:m.buffer}});for(let m of n)l.push({binding:l.length,resource:{buffer:m.buffer}});i&&l.push({binding:l.length,resource:i});let p=a.createBindGroup({layout:t.computePipeline.getBindGroupLayout(0),entries:l,label:t.programInfo.name});if(this.backend.sessionStatus==="capturing"){let m={kernelId:this.backend.currentKernelId,computePipeline:t.computePipeline,bindGroup:p,dispatchGroup:o};this.backend.capturedCommandList.get(this.backend.currentSessionId).push(m)}d.setPipeline(t.computePipeline),d.setBindGroup(0,p),d.dispatchWorkgroups(...o),this.backend.writeTimestamp(this.backend.pendingDispatchNumber*2+1),this.backend.pendingDispatchNumber++,(this.backend.pendingDispatchNumber>=this.backend.maxDispatchNumber||this.backend.queryType==="at-passes")&&this.backend.endComputePass(),this.backend.pendingDispatchNumber>=this.backend.maxDispatchNumber&&this.backend.flush(),Be(t.programInfo.name)}dispose(){}build(t,r){Ne(t.name);let n=this.backend.device,o=[];[{feature:"shader-f16",extension:"f16"},{feature:"subgroups",extension:"subgroups"},{feature:"subgroups-f16",extension:"subgroups_f16"}].forEach(u=>{n.features.has(u.feature)&&o.push(`enable ${u.extension};`)});let a=rs(r,this.backend.device.limits),d=t.getShaderSource(a),l=`${o.join(`
6634
6707
  `)}
6635
6708
  ${a.additionalImplementations}
6636
- ${d}`,p=n.createShaderModule({code:l,label:t.name});ue("verbose",()=>`[WebGPU] ${t.name} shader code: ${l}`);let m=n.createComputePipeline({compute:{module:p,entryPoint:"main"},layout:"auto",label:t.name});return De(t.name),{programInfo:t,computePipeline:m,uniformVariablesInfo:a.variablesInfo}}normalizeDispatchGroupSize(t){let r=typeof t=="number"?t:t.x,n=typeof t=="number"?1:t.y||1,o=typeof t=="number"?1:t.z||1,i=this.backend.device.limits.maxComputeWorkgroupsPerDimension;if(r<=i&&n<=i&&o<=i)return[r,n,o];let a=r*n*o,d=Math.ceil(Math.sqrt(a));if(d>i){if(d=Math.ceil(Math.cbrt(a)),d>i)throw new Error("Total dispatch size exceeds WebGPU maximum.");return[d,d,d]}else return[d,d,1]}}});var ug,dg,Co,Ao,on,Jl=U(()=>{"use strict";We();te();et();Jn();Ja();Ql();Xl();ug=(e,t)=>{if(t.length!==e.length)throw new Error(`inputDependencies length ${t.length} is not equal to inputTensors length ${e.length}.`);let r=[];for(let n=0;n<e.length;++n){let o=e[n].dataType;switch(t[n]){case"none":{r.push("");break}case"type":{r.push(`${o}`);break}case"rank":{let i=e[n].dims.length;r.push(`${o};${i}`);break}case"dims":{let i=e[n].dims.join(",");r.push(`${o};${i}`);break}default:throw new Error(`unsupported input dependency: ${t[n]}`)}}return r.join("|")},dg=(e,t,r)=>{let n=e.name;return e.shaderCache?.hint&&(n+="["+e.shaderCache.hint+"]"),n+=":"+r+`:${ug(t,e.shaderCache?.inputDependencies??new Array(t.length).fill("dims"))}`,n},Co=class{constructor(t){t&&(this.architecture=t.architecture,this.vendor=t.vendor)}isArchitecture(t){return this.architecture===t}isVendor(t){return this.vendor===t}},Ao=class{constructor(t){this.subgroupsSupported=t.features.has("subgroups"),this.subgroupsF16Supported=t.features.has("subgroups");let r=t.limits;!this.subgroupsSupported||!r.minSubgroupSize||!r.maxSubgroupSize?this.subgroupSizeRange=void 0:this.subgroupSizeRange=[r.minSubgroupSize,r.maxSubgroupSize]}},on=class{constructor(){this.currentSessionId=null;this.currentKernelId=null;this.commandEncoder=null;this.computePassEncoder=null;this.maxDispatchNumber=16;this.pendingDispatchNumber=0;this.pendingKernels=[];this.pendingQueries=new Map;this.sessionStatus="default";this.capturedCommandList=new Map;this.capturedPendingKernels=new Map;this.sessionExternalDataMapping=new Map}get currentKernelCustomData(){if(this.currentKernelId===null)throw new Error("currentKernelCustomData(): currentKernelId is null. (should not happen)");let t=this.kernelCustomData.get(this.currentKernelId);return t||(t={},this.kernelCustomData.set(this.currentKernelId,t)),t}async initialize(t,r){this.env=t;let n=[],o={requiredLimits:{maxComputeWorkgroupStorageSize:r.limits.maxComputeWorkgroupStorageSize,maxComputeWorkgroupsPerDimension:r.limits.maxComputeWorkgroupsPerDimension,maxStorageBufferBindingSize:r.limits.maxStorageBufferBindingSize,maxBufferSize:r.limits.maxBufferSize,maxComputeInvocationsPerWorkgroup:r.limits.maxComputeInvocationsPerWorkgroup,maxComputeWorkgroupSizeX:r.limits.maxComputeWorkgroupSizeX,maxComputeWorkgroupSizeY:r.limits.maxComputeWorkgroupSizeY,maxComputeWorkgroupSizeZ:r.limits.maxComputeWorkgroupSizeZ},requiredFeatures:n},i=a=>r.features.has(a)&&n.push(a)&&!0;i("chromium-experimental-timestamp-query-inside-passes")||i("timestamp-query"),i("shader-f16"),i("subgroups")&&i("subgroups-f16"),this.device=await r.requestDevice(o),this.deviceInfo=new Ao(this.device),this.adapterInfo=new Co(r.info||await r.requestAdapterInfo()),this.gpuDataManager=Xa(this),this.programManager=new nn(this),this.kernels=new Map,this.kernelPersistentData=new Map,this.kernelCustomData=new Map,Rr(t.logLevel,!!t.debug),this.device.onuncapturederror=a=>{a.error instanceof GPUValidationError&&console.error(`An uncaught WebGPU validation error was raised: ${a.error.message}`)},Object.defineProperty(this.env.webgpu,"device",{value:this.device,writable:!1,enumerable:!0,configurable:!1}),Object.defineProperty(this.env.webgpu,"adapter",{value:r,writable:!1,enumerable:!0,configurable:!1}),this.setQueryType()}dispose(){typeof this.querySet<"u"&&this.querySet.destroy(),this.gpuDataManager.dispose()}getCommandEncoder(){return this.commandEncoder||(this.commandEncoder=this.device.createCommandEncoder()),this.commandEncoder}getComputePassEncoder(){if(!this.computePassEncoder){let t=this.getCommandEncoder(),r={};this.queryType==="at-passes"&&(r.timestampWrites={querySet:this.querySet,beginningOfPassWriteIndex:this.pendingDispatchNumber*2,endOfPassWriteIndex:this.pendingDispatchNumber*2+1}),this.computePassEncoder=t.beginComputePass(r)}return this.computePassEncoder}endComputePass(){this.computePassEncoder&&(this.computePassEncoder.end(),this.computePassEncoder=null)}flush(){if(!this.commandEncoder)return;Ue(),this.endComputePass();let t;this.queryType!=="none"&&(this.commandEncoder.resolveQuerySet(this.querySet,0,this.pendingDispatchNumber*2,this.queryResolveBuffer,0),t=this.device.createBuffer({size:this.pendingDispatchNumber*2*8,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST}),this.pendingQueries.set(t,this.pendingKernels),this.pendingKernels=[],this.commandEncoder.copyBufferToBuffer(this.queryResolveBuffer,0,t,0,this.pendingDispatchNumber*2*8)),this.device.queue.submit([this.commandEncoder.finish()]),this.gpuDataManager.refreshPendingBuffers(),this.commandEncoder=null,this.pendingDispatchNumber=0,this.queryType!=="none"&&t.mapAsync(GPUMapMode.READ).then(()=>{let r=new BigUint64Array(t.getMappedRange()),n=this.pendingQueries.get(t);for(let o=0;o<r.length/2;o++){let i=n[o],a=i.kernelId,d=this.kernels.get(a),l=d.kernelType,p=d.kernelName,m=i.programName,u=i.inputTensorViews,h=i.outputTensorViews,_=r[o*2],y=r[o*2+1];typeof this.queryTimeBase>"u"&&(this.queryTimeBase=_);let g=Number(_-this.queryTimeBase),x=Number(y-this.queryTimeBase);if(!Number.isSafeInteger(g)||!Number.isSafeInteger(x))throw new RangeError("incorrect timestamp range");if(this.env.webgpu.profiling?.ondata)this.env.webgpu.profiling.ondata({version:1,inputsMetadata:u.map($=>({dims:$.dims,dataType:bt($.dataType)})),outputsMetadata:h.map($=>({dims:$.dims,dataType:bt($.dataType)})),kernelId:a,kernelType:l,kernelName:p,programName:m,startTime:g,endTime:x});else{let $="";u.forEach((S,T)=>{$+=`input[${T}]: [${S.dims}] | ${bt(S.dataType)}, `});let v="";h.forEach((S,T)=>{v+=`output[${T}]: [${S.dims}] | ${bt(S.dataType)}, `}),console.log(`[profiling] kernel "${a}|${l}|${p}|${m}" ${$}${v}execution time: ${x-g} ns`)}wr("GPU",`${m}::${_}::${y}`)}t.unmap(),this.pendingQueries.delete(t)}),De()}run(t,r,n,o,i,a){Ue(t.name);let d=[];for(let S=0;S<r.length;++S){let T=r[S].data;if(T===0)continue;let A=this.gpuDataManager.get(T);if(!A)throw new Error(`no GPU data for input: ${T}`);d.push(A)}let{outputs:l,dispatchGroup:p,programUniforms:m}=t.getRunData(r),u=n.length===0?l.map((S,T)=>T):n;if(u.length!==l.length)throw new Error(`Output size ${u.length} must be equal to ${l.length}.`);let h=[],_=[];for(let S=0;S<l.length;++S){if(!Number.isInteger(u[S])||u[S]<-3||u[S]>=a)throw new Error(`Invalid output index: ${u[S]}`);if(u[S]===-3)continue;let T=u[S]===-1,A=u[S]===-2,k=T||A?i(l[S].dataType,l[S].dims):o(u[S],l[S].dataType,l[S].dims);if(h.push(k),k.data===0)continue;let P=this.gpuDataManager.get(k.data);if(!P)throw new Error(`no GPU data for output: ${k.data}`);if(T&&this.temporaryData.push(P),A){let D=this.kernelPersistentData.get(this.currentKernelId);D||(D=[],this.kernelPersistentData.set(this.currentKernelId,D)),D.push(P)}_.push(P)}if(d.length!==r.length||_.length!==h.length){if(_.length===0)return De(t.name),h;throw new Error(`Program ${t.name} has zero-sized tensor(s) in inputs or outputs. This is not supported now.`)}let y;if(m){let S=0,T=[];m.forEach(D=>{let R=typeof D.data=="number"?[D.data]:D.data;if(R.length===0)return;let G=D.type===10?2:4,K,j;D.type===10?(j=R.length>4?16:R.length>2?8:R.length*G,K=R.length>4?16:G*R.length):(j=R.length<=2?R.length*G:16,K=16),S=Math.ceil(S/j)*j,T.push(S);let V=D.type===10?8:4;S+=R.length>4?Math.ceil(R.length/V)*K:R.length*G});let A=16;S=Math.ceil(S/A)*A;let k=new ArrayBuffer(S);m.forEach((D,R)=>{let G=T[R],K=typeof D.data=="number"?[D.data]:D.data;if(D.type===6)new Int32Array(k,G,K.length).set(K);else if(D.type===12)new Uint32Array(k,G,K.length).set(K);else if(D.type===10)new Uint16Array(k,G,K.length).set(K);else if(D.type===1)new Float32Array(k,G,K.length).set(K);else throw new Error(`Unsupported uniform type: ${bt(D.type)}`)});let P=this.gpuDataManager.create(S,GPUBufferUsage.COPY_DST|GPUBufferUsage.UNIFORM);this.device.queue.writeBuffer(P.buffer,0,k,0,S),this.gpuDataManager.release(P.id),y={offset:0,size:S,buffer:P.buffer}}let g=this.programManager.normalizeDispatchGroupSize(p),x=g[1]===1&&g[2]===1,$=dg(t,r,x),v=this.programManager.getArtifact($);if(v||(v=this.programManager.build(t,g),this.programManager.setArtifact($,v),ue("info",()=>`[artifact] key: ${$}, programName: ${t.name}`)),m&&v.uniformVariablesInfo){if(m.length!==v.uniformVariablesInfo.length)throw new Error(`Uniform variables count mismatch: expect ${v.uniformVariablesInfo.length}, got ${m.length} in program "${v.programInfo.name}".`);for(let S=0;S<m.length;S++){let T=m[S],A=T.type,k=typeof T.data=="number"?1:T.data.length,[P,D]=v.uniformVariablesInfo[S];if(A!==P||k!==D)throw new Error(`Uniform variable ${S} mismatch: expect type ${P} with size ${D}, got type ${A} with size ${k} in program "${v.programInfo.name}".`)}}if(ue("info",()=>`[ProgramManager] run "${t.name}" (key=${$}) with ${g[0]}x${g[1]}x${g[2]}`),this.queryType!=="none"||this.sessionStatus==="capturing"){let S={kernelId:this.currentKernelId,programName:v.programInfo.name,inputTensorViews:r,outputTensorViews:h};this.pendingKernels.push(S),this.sessionStatus==="capturing"&&this.capturedPendingKernels.get(this.currentSessionId).push(S)}return this.programManager.run(v,d,_,g,y),De(t.name),h}upload(t,r){this.gpuDataManager.upload(t,r)}memcpy(t,r){this.gpuDataManager.memcpy(t,r)}async download(t,r){await this.gpuDataManager.download(t,r)}alloc(t){return this.gpuDataManager.create(t).id}free(t){return this.gpuDataManager.release(t)}createKernel(t,r,n,o){let i=Zl.get(t);if(!i)throw new Error(`kernel not implemented: ${t}`);let a={kernelType:t,kernelName:o,kernelEntry:i[0],attributes:[i[1],n]};this.kernels.set(r,a)}releaseKernel(t){let r=this.kernelPersistentData.get(t);if(r){for(let n of r)this.gpuDataManager.release(n.id);this.kernelPersistentData.delete(t)}this.kernelCustomData.delete(t),this.kernels.delete(t)}computeKernel(t,r,n){let o=this.kernels.get(t);if(!o)throw new Error(`kernel not created: ${t}`);let i=o.kernelType,a=o.kernelName,d=o.kernelEntry,l=o.attributes;if(this.currentKernelId!==null)throw new Error(`kernel "[${i}] ${a}" is not allowed to be called recursively`);this.currentKernelId=t,l[0]&&(l[1]=l[0](l[1]),l[0]=void 0),ue("info",()=>`[WebGPU] Start to run kernel "[${i}] ${a}"...`);let p=this.env.debug;this.temporaryData=[];try{return p&&this.device.pushErrorScope("validation"),d(r,l[1]),0}catch(m){return n.push(Promise.resolve(`[WebGPU] Kernel "[${i}] ${a}" failed. ${m}`)),1}finally{p&&n.push(this.device.popErrorScope().then(m=>m?`GPU validation error for kernel "[${i}] ${a}": ${m.message}`:null));for(let m of this.temporaryData)this.gpuDataManager.release(m.id);this.temporaryData=[],this.currentKernelId=null}}registerBuffer(t,r,n,o){let i=this.sessionExternalDataMapping.get(t);i||(i=new Map,this.sessionExternalDataMapping.set(t,i));let a=i.get(r),d=this.gpuDataManager.registerExternalBuffer(n,o,a);return i.set(r,[d,n]),d}unregisterBuffers(t){let r=this.sessionExternalDataMapping.get(t);r&&(r.forEach(n=>this.gpuDataManager.unregisterExternalBuffer(n[0])),this.sessionExternalDataMapping.delete(t))}getBuffer(t){let r=this.gpuDataManager.get(t);if(!r)throw new Error(`no GPU data for buffer: ${t}`);return r.buffer}createDownloader(t,r,n){return async()=>{let o=await no(this,t,r);return Ur(o.buffer,n)}}writeTimestamp(t){this.queryType==="inside-passes"&&this.computePassEncoder.writeTimestamp(this.querySet,t)}setQueryType(){this.queryType="none",(this.env.webgpu.profiling?.mode==="default"||(typeof this.env.trace>"u"?this.env.wasm.trace:this.env.trace))&&(this.device.features.has("chromium-experimental-timestamp-query-inside-passes")?this.queryType="inside-passes":this.device.features.has("timestamp-query")&&(this.queryType="at-passes"),this.queryType!=="none"&&typeof this.querySet>"u"&&(this.querySet=this.device.createQuerySet({type:"timestamp",count:this.maxDispatchNumber*2}),this.queryResolveBuffer=this.device.createBuffer({size:this.maxDispatchNumber*2*8,usage:GPUBufferUsage.COPY_SRC|GPUBufferUsage.QUERY_RESOLVE})))}captureBegin(){ue("info","captureBegin"),this.capturedCommandList.get(this.currentSessionId)||this.capturedCommandList.set(this.currentSessionId,[]),this.capturedPendingKernels.get(this.currentSessionId)||this.capturedPendingKernels.set(this.currentSessionId,[]),this.flush(),this.sessionStatus="capturing"}captureEnd(){ue("info","captureEnd"),this.flush(),this.sessionStatus="default"}replay(){ue("info","replay"),this.sessionStatus="replaying";let t=this.capturedCommandList.get(this.currentSessionId),r=this.capturedPendingKernels.get(this.currentSessionId),n=t.length;this.pendingKernels=[];for(let o=0;o<n;o++){let i=this.getComputePassEncoder(),a=t[o];this.writeTimestamp(this.pendingDispatchNumber*2),i.setPipeline(a.computePipeline),i.setBindGroup(0,a.bindGroup),i.dispatchWorkgroups(...a.dispatchGroup),this.writeTimestamp(this.pendingDispatchNumber*2+1),this.pendingDispatchNumber++,this.queryType!=="none"&&this.pendingKernels.push(r[o]),(this.pendingDispatchNumber>=this.maxDispatchNumber||this.queryType==="at-passes")&&this.endComputePass(),this.pendingDispatchNumber>=this.maxDispatchNumber&&this.flush()}this.flush(),this.sessionStatus="default"}onCreateSession(){this.gpuDataManager.onCreateSession()}onReleaseSession(t){this.unregisterBuffers(t),this.capturedCommandList.has(t)&&this.capturedCommandList.delete(t),this.capturedPendingKernels.has(t)&&this.capturedPendingKernels.delete(t),this.gpuDataManager.onReleaseSession(t)}onRunStart(t){this.currentSessionId=t,this.setQueryType()}}});var lg,ec,cg,tc,an,sn,ko,rc,nc=U(()=>{"use strict";et();lg=1,ec=()=>lg++,cg=new Map([["float32",32],["float16",16],["int32",32],["uint32",32],["int64",64],["uint64",64],["int8",8],["uint8",8],["int4",4],["uint4",4]]),tc=(e,t)=>{let r=cg.get(e);if(!r)throw new Error("Unsupported data type.");return t.length>0?Math.ceil(t.reduce((n,o)=>n*o)*r/8):0},an=class{constructor(t){this.sessionId=t.sessionId,this.mlContext=t.context,this.mlTensor=t.tensor,this.dataType=t.dataType,this.tensorShape=t.shape}get tensor(){return this.mlTensor}get type(){return this.dataType}get shape(){return this.tensorShape}get byteLength(){return tc(this.dataType,this.tensorShape)}destroy(){ue("verbose",()=>"[WebNN] TensorWrapper.destroy"),this.mlTensor.destroy()}write(t){this.mlContext.writeTensor(this.mlTensor,t)}async read(t){return t?this.mlContext.readTensor(this.mlTensor,t):this.mlContext.readTensor(this.mlTensor)}canReuseTensor(t,r,n){return this.mlContext===t&&this.dataType===r&&this.tensorShape.length===n.length&&this.tensorShape.every((o,i)=>o===n[i])}},sn=class{constructor(t,r){this.tensorManager=t;this.wrapper=r}get tensorWrapper(){return this.wrapper}releaseTensor(){this.tensorWrapper&&(this.tensorManager.releaseTensor(this.tensorWrapper),this.wrapper=void 0)}async ensureTensor(t,r,n,o){if(this.wrapper){if(this.wrapper.canReuseTensor(t,r,n))return this.wrapper.tensor;if(o){if(this.wrapper.byteLength!==tc(r,n))throw new Error("Unable to copy data to tensor with different size.");this.activeUpload=new Uint8Array(await this.wrapper.read())}this.tensorManager.releaseTensor(this.wrapper)}let i=typeof MLTensorUsage>"u"?void 0:MLTensorUsage.READ|MLTensorUsage.WRITE;return this.wrapper=await this.tensorManager.getCachedTensor(r,n,i,!0,!0),o&&this.activeUpload&&(this.wrapper.write(this.activeUpload),this.activeUpload=void 0),this.wrapper.tensor}upload(t){if(this.wrapper)if(t.byteLength===this.wrapper.byteLength){this.wrapper.write(t);return}else ue("verbose",()=>"Data size does not match tensor size. Releasing tensor."),this.releaseTensor();this.activeUpload?this.activeUpload.set(t):this.activeUpload=new Uint8Array(t)}async download(t){if(this.activeUpload)if(t){t instanceof ArrayBuffer?new Uint8Array(t).set(this.activeUpload):new Uint8Array(t.buffer,t.byteOffset,t.byteLength).set(this.activeUpload);return}else return this.activeUpload.buffer;if(!this.wrapper)throw new Error("Tensor has not been created.");return t?this.wrapper.read(t):this.wrapper.read()}},ko=class{constructor(t){this.backend=t;this.tensorTrackersById=new Map;this.freeTensors=[];this.externalTensors=new Set}reserveTensorId(){let t=ec();return this.tensorTrackersById.set(t,new sn(this)),t}releaseTensorId(t){let r=this.tensorTrackersById.get(t);r&&(this.tensorTrackersById.delete(t),r.tensorWrapper&&this.releaseTensor(r.tensorWrapper))}async ensureTensor(t,r,n,o){ue("verbose",()=>`[WebNN] TensorManager.ensureTensor {tensorId: ${t}, dataType: ${r}, shape: ${n}, copyOld: ${o}}`);let i=this.tensorTrackersById.get(t);if(!i)throw new Error("Tensor not found.");return i.ensureTensor(this.backend.currentContext,r,n,o)}upload(t,r){let n=this.tensorTrackersById.get(t);if(!n)throw new Error("Tensor not found.");n.upload(r)}async download(t,r){ue("verbose",()=>`[WebNN] TensorManager.download {tensorId: ${t}, dstBuffer: ${r?.byteLength}}`);let n=this.tensorTrackersById.get(t);if(!n)throw new Error("Tensor not found.");return n.download(r)}releaseTensorsForSession(t){for(let r of this.freeTensors)r.sessionId===t&&r.destroy();this.freeTensors=this.freeTensors.filter(r=>r.sessionId!==t)}registerTensor(t,r,n,o){let i=ec(),a=new an({sessionId:this.backend.currentSessionId,context:t,tensor:r,dataType:n,shape:o});return this.tensorTrackersById.set(i,new sn(this,a)),this.externalTensors.add(a),i}async getCachedTensor(t,r,n,o,i){let a=this.backend.currentSessionId,d=this.backend.currentContext;for(let[p,m]of this.freeTensors.entries())if(m.canReuseTensor(d,t,r)){ue("verbose",()=>`[WebNN] Reusing tensor {dataType: ${t}, shape: ${r}}`);let u=this.freeTensors.splice(p,1)[0];return u.sessionId=a,u}ue("verbose",()=>`[WebNN] MLContext.createTensor {dataType: ${t}, shape: ${r}}`);let l=await d.createTensor({dataType:t,shape:r,dimensions:r,usage:n,writable:o,readable:i});return new an({sessionId:a,context:d,tensor:l,dataType:t,shape:r})}releaseTensor(t){this.externalTensors.has(t)&&this.externalTensors.delete(t),this.freeTensors.push(t)}},rc=(...e)=>new ko(...e)});var oc,pg,un,ic=U(()=>{"use strict";te();gt();Jn();nc();et();oc=new Map([[1,"float32"],[10,"float16"],[6,"int32"],[12,"uint32"],[7,"int64"],[13,"uint64"],[22,"int4"],[21,"uint4"],[3,"int8"],[2,"uint8"],[9,"uint8"]]),pg=(e,t)=>{if(e===t)return!0;if(e===void 0||t===void 0)return!1;let r=Object.keys(e).sort(),n=Object.keys(t).sort();return r.length===n.length&&r.every((o,i)=>o===n[i]&&e[o]===t[o])},un=class{constructor(t){this.tensorManager=rc(this);this.mlContextBySessionId=new Map;this.sessionIdsByMLContext=new Map;this.mlContextCache=[];Rr(t.logLevel,!!t.debug)}get currentSessionId(){if(this.activeSessionId===void 0)throw new Error("No active session");return this.activeSessionId}onRunStart(t){this.activeSessionId=t}async createMLContext(t){if(t instanceof GPUDevice){let n=this.mlContextCache.findIndex(o=>o.gpuDevice===t);if(n!==-1)return this.mlContextCache[n].mlContext;{let o=await navigator.ml.createContext(t);return this.mlContextCache.push({gpuDevice:t,mlContext:o}),o}}else if(t===void 0){let n=this.mlContextCache.findIndex(o=>o.options===void 0&&o.gpuDevice===void 0);if(n!==-1)return this.mlContextCache[n].mlContext;{let o=await navigator.ml.createContext();return this.mlContextCache.push({mlContext:o}),o}}let r=this.mlContextCache.findIndex(n=>pg(n.options,t));if(r!==-1)return this.mlContextCache[r].mlContext;{let n=await navigator.ml.createContext(t);return this.mlContextCache.push({options:t,mlContext:n}),n}}get currentContext(){let t=this.getMLContext(this.currentSessionId);if(!t)throw new Error(`No MLContext found for session ${this.currentSessionId}`);return t}registerMLContext(t,r){this.mlContextBySessionId.set(t,r);let n=this.sessionIdsByMLContext.get(r);n||(n=new Set,this.sessionIdsByMLContext.set(r,n)),n.add(t)}onReleaseSession(t){let r=this.mlContextBySessionId.get(t);if(!r)return;this.tensorManager.releaseTensorsForSession(t),this.mlContextBySessionId.delete(t);let n=this.sessionIdsByMLContext.get(r);if(n.delete(t),n.size===0){this.sessionIdsByMLContext.delete(r);let o=this.mlContextCache.findIndex(i=>i.mlContext===r);o!==-1&&this.mlContextCache.splice(o,1)}}getMLContext(t){return this.mlContextBySessionId.get(t)}reserveTensorId(){return this.tensorManager.reserveTensorId()}releaseTensorId(t){ue("verbose",()=>`[WebNN] releaseTensorId {tensorId: ${t}}`),this.tensorManager.releaseTensorId(t)}async ensureTensor(t,r,n,o){let i=oc.get(r);if(!i)throw new Error(`Unsupported ONNX data type: ${r}`);return this.tensorManager.ensureTensor(t,i,n,o)}uploadTensor(t,r){if(!Ie().shouldTransferToMLTensor)throw new Error("Trying to upload to a MLTensor while shouldTransferToMLTensor is false");ue("verbose",()=>`[WebNN] uploadTensor {tensorId: ${t}, data: ${r.byteLength}}`),this.tensorManager.upload(t,r)}async downloadTensor(t,r){return this.tensorManager.download(t,r)}createMLTensorDownloader(t,r){return async()=>{let n=await this.tensorManager.download(t);return Ur(n,r)}}registerMLTensor(t,r,n){let o=oc.get(r);if(!o)throw new Error(`Unsupported ONNX data type: ${r}`);let i=this.tensorManager.registerTensor(this.currentContext,t,o,n);return ue("verbose",()=>`[WebNN] registerMLTensor {tensor: ${t}, dataType: ${o}, dimensions: ${n}} -> {tensorId: ${i}}`),i}registerMLConstant(t,r,n,o,i,a){if(!a)throw new Error("External mounted files are not available.");let d=t;t.startsWith("./")&&(d=t.substring(2));let l=a.get(d);if(!l)throw new Error(`File with name ${d} not found in preloaded files.`);if(r+n>l.byteLength)throw new Error("Out of bounds: data offset and length exceed the external file data size.");let p=l.slice(r,r+n).buffer,m;switch(i.dataType){case"float32":m=new Float32Array(p);break;case"float16":m=new Uint16Array(p);break;case"int32":m=new Int32Array(p);break;case"uint32":m=new Uint32Array(p);break;case"int64":m=new BigInt64Array(p);break;case"uint64":m=new BigUint64Array(p);break;case"int8":m=new Int8Array(p);break;case"int4":case"uint4":case"uint8":m=new Uint8Array(p);break;default:throw new Error(`Unsupported data type: ${i.dataType} in creating WebNN Constant from external data.`)}return ue("verbose",()=>`[WebNN] registerMLConstant {dataType: ${i.dataType}, shape: ${i.shape}}}`),o.constant(i,m)}flush(){}}});var ac={};Ft(ac,{init:()=>mg});var rr,Eo,mg,sc=U(()=>{"use strict";te();Jl();et();oe();ic();rr=class e{constructor(t,r,n,o){this.module=t;this.dataType=r;this.data=n;this.dims=o}getFloat32Array(){if(this.dataType!==1)throw new Error("Invalid data type");let t=C.size(this.dims);return t===0?new Float32Array:new Float32Array(this.module.HEAP8.buffer,this.data,t)}getBigInt64Array(){if(this.dataType!==7)throw new Error("Invalid data type");let t=C.size(this.dims);return t===0?new BigInt64Array:new BigInt64Array(this.module.HEAP8.buffer,this.data,t)}getInt32Array(){if(this.dataType!==6)throw new Error("Invalid data type");let t=C.size(this.dims);return t===0?new Int32Array:new Int32Array(this.module.HEAP8.buffer,this.data,t)}getUint16Array(){if(this.dataType!==10&&this.dataType!==4)throw new Error("Invalid data type");let t=C.size(this.dims);return t===0?new Uint16Array:new Uint16Array(this.module.HEAP8.buffer,this.data,t)}reshape(t){if(C.size(t)!==C.size(this.dims))throw new Error("Invalid new shape");return new e(this.module,this.dataType,this.data,t)}},Eo=class{constructor(t,r,n){this.module=t;this.backend=r;this.customDataOffset=0;this.customDataSize=0;this.adapterInfo=r.adapterInfo,this.deviceInfo=r.deviceInfo;let o=t.PTR_SIZE,i=n/t.PTR_SIZE,a=o===4?"i32":"i64";this.opKernelContext=Number(t.getValue(o*i++,a));let d=Number(t.getValue(o*i++,a));this.outputCount=Number(t.getValue(o*i++,a)),this.customDataOffset=Number(t.getValue(o*i++,"*")),this.customDataSize=Number(t.getValue(o*i++,a));let l=[];for(let p=0;p<d;p++){let m=Number(t.getValue(o*i++,a)),u=Number(t.getValue(o*i++,"*")),h=Number(t.getValue(o*i++,a)),_=[];for(let y=0;y<h;y++)_.push(Number(t.getValue(o*i++,a)));l.push(new rr(t,m,u,_))}this.inputs=l}get kernelCustomData(){return this.backend.currentKernelCustomData}get customDataBuffer(){return this.module.HEAPU8.subarray(this.customDataOffset,this.customDataOffset+this.customDataSize)}compute(t,r){let n=r?.inputs?.map(d=>typeof d=="number"?this.inputs[d]:d)??this.inputs,o=r?.outputs??[],i=(d,l,p)=>new rr(this.module,l,this.output(d,p),p),a=(d,l)=>{let p=Ct(d,l);if(!p)throw new Error(`Unsupported data type: ${d}`);let m=p>0?this.backend.gpuDataManager.create(p).id:0;return new rr(this.module,d,m,l)};return this.backend.run(t,n,o,i,a,this.outputCount)}output(t,r){let n=this.module.stackSave();try{let o=this.module.PTR_SIZE,i=o===4?"i32":"i64",a=this.module.stackAlloc((1+r.length)*o);this.module.setValue(a,r.length,i);for(let d=0;d<r.length;d++)this.module.setValue(a+o*(d+1),r[d],i);return this.module._JsepOutput(this.opKernelContext,t,a)}catch(o){throw new Error(`Failed to generate kernel's output[${t}] with dims [${r}]. If you are running with pre-allocated output, please make sure the output type/dims are correct. Error: ${o}`)}finally{this.module.stackRestore(n)}}},mg=async(e,t,r,n)=>{let o=t.jsepInit;if(!o)throw new Error("Failed to initialize JSEP. The WebAssembly module is not built with JSEP support.");if(e==="webgpu"){let i=new on;await i.initialize(r,n),o("webgpu",[i,a=>i.alloc(Number(a)),a=>i.free(a),(a,d,l,p=!1)=>{if(p)ue("verbose",()=>`[WebGPU] jsepCopyGpuToGpu: src=${Number(a)}, dst=${Number(d)}, size=${Number(l)}`),i.memcpy(Number(a),Number(d));else{ue("verbose",()=>`[WebGPU] jsepCopyCpuToGpu: dataOffset=${Number(a)}, gpuDataId=${Number(d)}, size=${Number(l)}`);let m=t.HEAPU8.subarray(Number(a>>>0),Number(a>>>0)+Number(l));i.upload(Number(d),m)}},async(a,d,l)=>{ue("verbose",()=>`[WebGPU] jsepCopyGpuToCpu: gpuDataId=${a}, dataOffset=${d}, size=${l}`),await i.download(Number(a),()=>t.HEAPU8.subarray(Number(d)>>>0,Number(d+l)>>>0))},(a,d,l)=>i.createKernel(a,Number(d),l,t.UTF8ToString(t._JsepGetNodeName(Number(d)))),a=>i.releaseKernel(a),(a,d,l,p)=>{ue("verbose",()=>`[WebGPU] jsepRun: sessionHandle=${l}, kernel=${a}, contextDataOffset=${d}`);let m=new Eo(t,i,Number(d));return i.computeKernel(Number(a),m,p)},()=>i.captureBegin(),()=>i.captureEnd(),()=>i.replay()])}else{let i=new un(r);o("webnn",[i,()=>i.reserveTensorId(),a=>i.releaseTensorId(a),async(a,d,l,p)=>i.ensureTensor(a,d,l,p),(a,d)=>{i.uploadTensor(a,d)},async(a,d)=>i.downloadTensor(a,d)])}}});var fg,Tr,Ir,Pt,hg,Kt,Cr,Ar,uc,kr,Er,Pr,qn=U(()=>{"use strict";Ha();qa();te();gt();Or();Xn();fg=(e,t)=>{Ie()._OrtInit(e,t)!==0&&pe("Can't initialize onnxruntime.")},Tr=async e=>{fg(e.wasm.numThreads,Zt(e.logLevel))},Ir=async(e,t)=>{{let r=(sc(),br(ac)).init;if(t==="webgpu"){if(typeof navigator>"u"||!navigator.gpu)throw new Error("WebGPU is not supported in current environment");let n=e.webgpu.adapter;if(n){if(typeof n.limits!="object"||typeof n.features!="object"||typeof n.requestDevice!="function")throw new Error("Invalid GPU adapter set in `env.webgpu.adapter`. It must be a GPUAdapter object.")}else{let o=e.webgpu.powerPreference;if(o!==void 0&&o!=="low-power"&&o!=="high-performance")throw new Error(`Invalid powerPreference setting: "${o}"`);let i=e.webgpu.forceFallbackAdapter;if(i!==void 0&&typeof i!="boolean")throw new Error(`Invalid forceFallbackAdapter setting: "${i}"`);if(n=await navigator.gpu.requestAdapter({powerPreference:o,forceFallbackAdapter:i}),!n)throw new Error('Failed to get GPU adapter. You may need to enable flag "--enable-unsafe-webgpu" if you are using Chrome.')}await r("webgpu",Ie(),e,n)}if(t==="webnn"){if(typeof navigator>"u"||!navigator.ml)throw new Error("WebNN is not supported in current environment");await r("webnn",Ie(),e)}}},Pt=new Map,hg=e=>{let t=Ie(),r=t.stackSave();try{let n=t.PTR_SIZE,o=t.stackAlloc(2*n);t._OrtGetInputOutputCount(e,o,o+n)!==0&&pe("Can't get session input/output count.");let a=n===4?"i32":"i64";return[Number(t.getValue(o,a)),Number(t.getValue(o+n,a))]}finally{t.stackRestore(r)}},Kt=e=>{let t=Ie(),r=t._malloc(e.byteLength);if(r===0)throw new Error(`Can't create a session. failed to allocate a buffer of size ${e.byteLength}.`);return t.HEAPU8.set(e,r),[r,e.byteLength]},Cr=async(e,t)=>{let r,n,o=Ie();Array.isArray(e)?[r,n]=e:e.buffer===o.HEAPU8.buffer?[r,n]=[e.byteOffset,e.byteLength]:[r,n]=Kt(e);let i=0,a=0,d=0,l=[],p=[],m=[];try{if([a,l]=Fa(t),t?.externalData&&o.mountExternalData){let v=[];for(let S of t.externalData){let T=typeof S=="string"?S:S.path;v.push(Qt(typeof S=="string"?S:S.data).then(A=>{o.mountExternalData(T,A)}))}await Promise.all(v)}for(let v of t?.executionProviders??[])if((typeof v=="string"?v:v.name)==="webnn"){if(o.shouldTransferToMLTensor=!1,typeof v!="string"){let T=v,A=T?.context,k=T?.gpuDevice,P=T?.deviceType,D=T?.powerPreference;A?o.currentContext=A:k?o.currentContext=await o.jsepCreateMLContext(k):o.currentContext=await o.jsepCreateMLContext({deviceType:P,powerPreference:D})}else o.currentContext=await o.jsepCreateMLContext();break}i=await o._OrtCreateSession(r,n,a),i===0&&pe("Can't create a session."),o.jsepOnCreateSession?.(),o.currentContext&&(o.jsepRegisterMLContext(i,o.currentContext),o.currentContext=void 0,o.shouldTransferToMLTensor=!0);let[u,h]=hg(i),_=!!t?.enableGraphCapture,y=[],g=[],x=[];for(let v=0;v<u;v++){let S=o._OrtGetInputName(i,v);S===0&&pe("Can't get an input name."),p.push(S),y.push(o.UTF8ToString(S))}for(let v=0;v<h;v++){let S=o._OrtGetOutputName(i,v);S===0&&pe("Can't get an output name."),m.push(S);let T=o.UTF8ToString(S);g.push(T);{if(_&&t?.preferredOutputLocation===void 0){x.push("gpu-buffer");continue}let A=typeof t?.preferredOutputLocation=="string"?t.preferredOutputLocation:t?.preferredOutputLocation?.[T]??"cpu";if(A!=="cpu"&&A!=="cpu-pinned"&&A!=="gpu-buffer"&&A!=="ml-tensor")throw new Error(`Not supported preferred output location: ${A}.`);if(_&&A!=="gpu-buffer")throw new Error(`Not supported preferred output location: ${A}. Only 'gpu-buffer' location is supported when enableGraphCapture is true.`);x.push(A)}}let $=null;return x.some(v=>v==="gpu-buffer"||v==="ml-tensor")&&(d=o._OrtCreateBinding(i),d===0&&pe("Can't create IO binding."),$={handle:d,outputPreferredLocations:x,outputPreferredLocationsEncoded:x.map(v=>Qn(v))}),Pt.set(i,[i,p,m,$,_,!1]),[i,y,g]}catch(u){throw p.forEach(h=>o._OrtFree(h)),m.forEach(h=>o._OrtFree(h)),d!==0&&o._OrtReleaseBinding(d)!==0&&pe("Can't release IO binding."),i!==0&&o._OrtReleaseSession(i)!==0&&pe("Can't release session."),u}finally{o._free(r),a!==0&&o._OrtReleaseSessionOptions(a)!==0&&pe("Can't release session options."),l.forEach(u=>o._free(u)),o.unmountExternalData?.()}},Ar=e=>{let t=Ie(),r=Pt.get(e);if(!r)throw new Error(`cannot release session. invalid session id: ${e}`);let[n,o,i,a,d]=r;a&&(d&&t._OrtClearBoundOutputs(a.handle)!==0&&pe("Can't clear bound outputs."),t._OrtReleaseBinding(a.handle)!==0&&pe("Can't release IO binding.")),t.jsepOnReleaseSession?.(e),o.forEach(l=>t._OrtFree(l)),i.forEach(l=>t._OrtFree(l)),t._OrtReleaseSession(n)!==0&&pe("Can't release session."),Pt.delete(e)},uc=(e,t,r,n,o,i=!1)=>{if(!e){t.push(0);return}let a=Ie(),d=a.PTR_SIZE,l=e[0],p=e[1],m=e[3],u,h;if(l==="string"&&(m==="gpu-buffer"||m==="ml-tensor"))throw new Error("String tensor is not supported on GPU.");if(i&&m!=="gpu-buffer")throw new Error(`External buffer must be provided for input/output index ${o} when enableGraphCapture is true.`);if(m==="gpu-buffer"){let g=e[2].gpuBuffer;h=Ct(Yt(l),p);let x=a.jsepRegisterBuffer;if(!x)throw new Error('Tensor location "gpu-buffer" is not supported without using WebGPU.');u=x(n,o,g,h)}else if(m==="ml-tensor"){let g=e[2].mlTensor;h=Ct(Yt(l),p);let x=a.jsepRegisterMLTensor;if(!x)throw new Error('Tensor location "ml-tensor" is not supported without using WebNN.');u=x(g,Yt(l),p)}else{let g=e[2];if(Array.isArray(g)){h=d*g.length,u=a._malloc(h),r.push(u);for(let x=0;x<g.length;x++){if(typeof g[x]!="string")throw new TypeError(`tensor data at index ${x} is not a string`);a.setValue(u+x*d,ke(g[x],r),"*")}}else h=g.byteLength,u=a._malloc(h),r.push(u),a.HEAPU8.set(new Uint8Array(g.buffer,g.byteOffset,h),u)}let _=a.stackSave(),y=a.stackAlloc(4*p.length);try{p.forEach((x,$)=>a.setValue(y+$*d,x,d===4?"i32":"i64"));let g=a._OrtCreateTensor(Yt(l),u,h,y,p.length,Qn(m));g===0&&pe(`Can't create tensor for input/output. session=${n}, index=${o}.`),t.push(g)}finally{a.stackRestore(_)}},kr=async(e,t,r,n,o,i)=>{let a=Ie(),d=a.PTR_SIZE,l=Pt.get(e);if(!l)throw new Error(`cannot run inference. invalid session id: ${e}`);let p=l[0],m=l[1],u=l[2],h=l[3],_=l[4],y=l[5],g=t.length,x=n.length,$=0,v=[],S=[],T=[],A=[],k=a.stackSave(),P=a.stackAlloc(g*d),D=a.stackAlloc(g*d),R=a.stackAlloc(x*d),G=a.stackAlloc(x*d);try{a.jsepOnRunStart?.(p),[$,v]=Ga(i);for(let V=0;V<g;V++)uc(r[V],S,A,e,t[V],_);for(let V=0;V<x;V++)uc(o[V],T,A,e,g+n[V],_);for(let V=0;V<g;V++)a.setValue(P+V*d,S[V],"*"),a.setValue(D+V*d,m[t[V]],"*");for(let V=0;V<x;V++)a.setValue(R+V*d,T[V],"*"),a.setValue(G+V*d,u[n[V]],"*");if(h&&!y){let{handle:V,outputPreferredLocations:Q,outputPreferredLocationsEncoded:se}=h;if(m.length!==g)throw new Error(`input count from feeds (${g}) is expected to be always equal to model's input count (${m.length}).`);for(let Y=0;Y<g;Y++){let ee=t[Y];await a._OrtBindInput(V,m[ee],S[Y])!==0&&pe(`Can't bind input[${Y}] for session=${e}.`)}for(let Y=0;Y<x;Y++){let ee=n[Y];o[Y]?.[3]?a._OrtBindOutput(V,u[ee],T[Y],0)!==0&&pe(`Can't bind pre-allocated output[${Y}] for session=${e}.`):a._OrtBindOutput(V,u[ee],0,se[ee])!==0&&pe(`Can't bind output[${Y}] to ${Q[Y]} for session=${e}.`)}Pt.set(e,[p,m,u,h,_,!0])}let K;h?K=await a._OrtRunWithBinding(p,h.handle,x,R,$):K=await a._OrtRun(p,D,P,g,G,x,R,$),K!==0&&pe("failed to call OrtRun().");let j=[];for(let V=0;V<x;V++){let Q=Number(a.getValue(R+V*d,"*"));if(Q===T[V]){j.push(o[V]);continue}let se=a.stackSave(),Y=a.stackAlloc(4*d),ee=!1,J,ne=0;try{a._OrtGetTensorData(Q,Y,Y+d,Y+2*d,Y+3*d)!==0&&pe(`Can't access output tensor data on index ${V}.`);let Oe=d===4?"i32":"i64",$e=Number(a.getValue(Y,Oe));ne=a.getValue(Y+d,"*");let le=a.getValue(Y+d*2,"*"),W=Number(a.getValue(Y+d*3,Oe)),q=[];for(let we=0;we<W;we++)q.push(Number(a.getValue(le+we*d,Oe)));a._OrtFree(le)!==0&&pe("Can't free memory for tensor dims.");let he=q.reduce((we,ye)=>we*ye,1);J=bt($e);let Ge=h?.outputPreferredLocations[n[V]];if(J==="string"){if(Ge==="gpu-buffer"||Ge==="ml-tensor")throw new Error("String tensor is not supported on GPU.");let we=[];for(let ye=0;ye<he;ye++){let Ye=a.getValue(ne+ye*d,"*"),Lt=a.getValue(ne+(ye+1)*d,"*"),fn=ye===he-1?void 0:Lt-Ye;we.push(a.UTF8ToString(Ye,fn))}j.push([J,q,we,"cpu"])}else if(Ge==="gpu-buffer"&&he>0){let we=a.jsepGetBuffer;if(!we)throw new Error('preferredLocation "gpu-buffer" is not supported without using WebGPU.');let ye=we(ne),Ye=Ct($e,he);if(Ye===void 0||!Br(J))throw new Error(`Unsupported data type: ${J}`);ee=!0,j.push([J,q,{gpuBuffer:ye,download:a.jsepCreateDownloader(ye,Ye,J),dispose:()=>{a._OrtReleaseTensor(Q)!==0&&pe("Can't release tensor.")}},"gpu-buffer"])}else if(Ge==="ml-tensor"&&he>0){let we=a.jsepEnsureTensor;if(!we)throw new Error('preferredLocation "ml-tensor" is not supported without using WebNN.');if(Ct($e,he)===void 0||!Mr(J))throw new Error(`Unsupported data type: ${J}`);let Ye=await we(ne,$e,q,!1);ee=!0,j.push([J,q,{mlTensor:Ye,download:a.jsepCreateMLTensorDownloader(ne,J),dispose:()=>{a.jsepReleaseTensorId(ne),a._OrtReleaseTensor(Q)}},"ml-tensor"])}else{let we=Dr(J),ye=new we(he);new Uint8Array(ye.buffer,ye.byteOffset,ye.byteLength).set(a.HEAPU8.subarray(ne,ne+ye.byteLength)),j.push([J,q,ye,"cpu"])}}finally{a.stackRestore(se),J==="string"&&ne&&a._free(ne),ee||a._OrtReleaseTensor(Q)}}return h&&!_&&(a._OrtClearBoundOutputs(h.handle)!==0&&pe("Can't clear bound outputs."),Pt.set(e,[p,m,u,h,_,!1])),j}finally{a.stackRestore(k),S.forEach(K=>a._OrtReleaseTensor(K)),T.forEach(K=>a._OrtReleaseTensor(K)),A.forEach(K=>a._free(K)),$!==0&&a._OrtReleaseRunOptions($),v.forEach(K=>a._free(K))}},Er=e=>{let t=Ie(),r=Pt.get(e);if(!r)throw new Error("invalid session id");let n=r[0],o=t._OrtEndProfiling(n);o===0&&pe("Can't get an profile file name."),t._OrtFree(o)},Pr=e=>{let t=[];for(let r of e){let n=r[2];!Array.isArray(n)&&"buffer"in n&&t.push(n.buffer)}return t}});var zt,Le,nr,ln,cn,dn,Po,zo,Vt,Wt,bg,dc,lc,cc,pc,mc,fc,hc,Oo=U(()=>{"use strict";We();qn();gt();xr();zt=()=>!!ve.wasm.proxy&&typeof document<"u",nr=!1,ln=!1,cn=!1,zo=new Map,Vt=(e,t)=>{let r=zo.get(e);r?r.push(t):zo.set(e,[t])},Wt=()=>{if(nr||!ln||cn||!Le)throw new Error("worker not ready")},bg=e=>{switch(e.data.type){case"init-wasm":nr=!1,e.data.err?(cn=!0,Po[1](e.data.err)):(ln=!0,Po[0]()),dn&&(URL.revokeObjectURL(dn),dn=void 0);break;case"init-ep":case"copy-from":case"create":case"release":case"run":case"end-profiling":{let t=zo.get(e.data.type);e.data.err?t.shift()[1](e.data.err):t.shift()[0](e.data.out);break}default:}},dc=async()=>{if(!ln){if(nr)throw new Error("multiple calls to 'initWasm()' detected.");if(cn)throw new Error("previous call to 'initWasm()' failed.");if(nr=!0,zt())return new Promise((e,t)=>{Le?.terminate(),Va().then(([r,n])=>{try{Le=n,Le.onerror=i=>t(i),Le.onmessage=bg,Po=[e,t];let o={type:"init-wasm",in:ve};!o.in.wasm.wasmPaths&&(r||import.meta.url?.startsWith("file:"))&&(o.in.wasm.wasmPaths={wasm:new URL(/* asset import */ __webpack_require__(/*! ort-wasm-simd-threaded.jsep.wasm */ "./node_modules/onnxruntime-web/dist/ort-wasm-simd-threaded.jsep.wasm"), __webpack_require__.b).href}),Le.postMessage(o),dn=r}catch(o){t(o)}},t)});try{await Sr(ve.wasm),await Tr(ve),ln=!0}catch(e){throw cn=!0,e}finally{nr=!1}}},lc=async e=>{if(zt())return Wt(),new Promise((t,r)=>{Vt("init-ep",[t,r]);let n={type:"init-ep",in:{epName:e,env:ve}};Le.postMessage(n)});await Ir(ve,e)},cc=async e=>zt()?(Wt(),new Promise((t,r)=>{Vt("copy-from",[t,r]);let n={type:"copy-from",in:{buffer:e}};Le.postMessage(n,[e.buffer])})):Kt(e),pc=async(e,t)=>{if(zt()){if(t?.preferredOutputLocation)throw new Error('session option "preferredOutputLocation" is not supported for proxy.');return Wt(),new Promise((r,n)=>{Vt("create",[r,n]);let o={type:"create",in:{model:e,options:{...t}}},i=[];e instanceof Uint8Array&&i.push(e.buffer),Le.postMessage(o,i)})}else return Cr(e,t)},mc=async e=>{if(zt())return Wt(),new Promise((t,r)=>{Vt("release",[t,r]);let n={type:"release",in:e};Le.postMessage(n)});Ar(e)},fc=async(e,t,r,n,o,i)=>{if(zt()){if(r.some(a=>a[3]!=="cpu"))throw new Error("input tensor on GPU is not supported for proxy.");if(o.some(a=>a))throw new Error("pre-allocated output tensor is not supported for proxy.");return Wt(),new Promise((a,d)=>{Vt("run",[a,d]);let l=r,p={type:"run",in:{sessionId:e,inputIndices:t,inputs:l,outputIndices:n,options:i}};Le.postMessage(p,Pr(l))})}else return kr(e,t,r,n,o,i)},hc=async e=>{if(zt())return Wt(),new Promise((t,r)=>{Vt("end-profiling",[t,r]);let n={type:"end-profiling",in:e};Le.postMessage(n)});Er(e)}});var gc,yg,pn,bc=U(()=>{"use strict";We();Oo();te();$r();Xn();gc=(e,t)=>{switch(e.location){case"cpu":return[e.type,e.dims,e.data,"cpu"];case"gpu-buffer":return[e.type,e.dims,{gpuBuffer:e.gpuBuffer},"gpu-buffer"];case"ml-tensor":return[e.type,e.dims,{mlTensor:e.mlTensor},"ml-tensor"];default:throw new Error(`invalid data location: ${e.location} for ${t()}`)}},yg=e=>{switch(e[3]){case"cpu":return new He(e[0],e[2],e[1]);case"gpu-buffer":{let t=e[0];if(!Br(t))throw new Error(`not supported data type: ${t} for deserializing GPU tensor`);let{gpuBuffer:r,download:n,dispose:o}=e[2];return He.fromGpuBuffer(r,{dataType:t,dims:e[1],download:n,dispose:o})}case"ml-tensor":{let t=e[0];if(!Mr(t))throw new Error(`not supported data type: ${t} for deserializing MLTensor tensor`);let{mlTensor:r,download:n,dispose:o}=e[2];return He.fromMLTensor(r,{dataType:t,dims:e[1],download:n,dispose:o})}default:throw new Error(`invalid data location: ${e[3]}`)}},pn=class{async fetchModelAndCopyToWasmMemory(t){return cc(await Qt(t))}async loadModel(t,r){Ue();let n;typeof t=="string"? false?0:n=await this.fetchModelAndCopyToWasmMemory(t):n=t,[this.sessionId,this.inputNames,this.outputNames]=await pc(n,r),De()}async dispose(){return mc(this.sessionId)}async run(t,r,n){Ue();let o=[],i=[];Object.entries(t).forEach(h=>{let _=h[0],y=h[1],g=this.inputNames.indexOf(_);if(g===-1)throw new Error(`invalid input '${_}'`);o.push(y),i.push(g)});let a=[],d=[];Object.entries(r).forEach(h=>{let _=h[0],y=h[1],g=this.outputNames.indexOf(_);if(g===-1)throw new Error(`invalid output '${_}'`);a.push(y),d.push(g)});let l=o.map((h,_)=>gc(h,()=>`input "${this.inputNames[i[_]]}"`)),p=a.map((h,_)=>h?gc(h,()=>`output "${this.outputNames[d[_]]}"`):null),m=await fc(this.sessionId,i,l,d,p,n),u={};for(let h=0;h<m.length;h++)u[this.outputNames[d[h]]]=a[h]??yg(m[h]);return De(),u}startProfiling(){}endProfiling(){hc(this.sessionId)}}});var _c={};Ft(_c,{OnnxruntimeWebAssemblyBackend:()=>mn,initializeFlags:()=>yc,wasmBackend:()=>_g});var yc,mn,_g,wc=U(()=>{"use strict";We();Oo();bc();yc=()=>{if((typeof ve.wasm.initTimeout!="number"||ve.wasm.initTimeout<0)&&(ve.wasm.initTimeout=0),ve.wasm.simd===!1&&console.warn('Deprecated property "env.wasm.simd" is set to false. non-SIMD build is no longer provided, and this setting will be ignored.'),typeof ve.wasm.proxy!="boolean"&&(ve.wasm.proxy=!1),typeof ve.wasm.trace!="boolean"&&(ve.wasm.trace=!1),typeof ve.wasm.numThreads!="number"||!Number.isInteger(ve.wasm.numThreads)||ve.wasm.numThreads<=0)if(typeof self<"u"&&!self.crossOriginIsolated)ve.wasm.numThreads=1;else{let e=typeof navigator>"u"?Nn("node:os").cpus().length:navigator.hardwareConcurrency;ve.wasm.numThreads=Math.min(4,Math.ceil((e||1)/2))}},mn=class{async init(t){yc(),await dc(),await lc(t)}async createInferenceSessionHandler(t,r){let n=new pn;return await n.loadModel(t,r),Promise.resolve(n)}},_g=new mn});We();We();We();var Ca="1.21.0-dev.20250114-228dd16893";var $1=Fn;{let e=(wc(),br(_c)).wasmBackend;St("webgpu",e,5),St("webnn",e,5),St("cpu",e,10),St("wasm",e,10)}Object.defineProperty(ve.versions,"web",{value:Ca,enumerable:!0});
6709
+ ${d}`,p=n.createShaderModule({code:l,label:t.name});se("verbose",()=>`[WebGPU] ${t.name} shader code: ${l}`);let m=n.createComputePipeline({compute:{module:p,entryPoint:"main"},layout:"auto",label:t.name});return Be(t.name),{programInfo:t,computePipeline:m,uniformVariablesInfo:a.variablesInfo}}normalizeDispatchGroupSize(t){let r=typeof t=="number"?t:t.x,n=typeof t=="number"?1:t.y||1,o=typeof t=="number"?1:t.z||1,i=this.backend.device.limits.maxComputeWorkgroupsPerDimension;if(r<=i&&n<=i&&o<=i)return[r,n,o];let a=r*n*o,d=Math.ceil(Math.sqrt(a));if(d>i){if(d=Math.ceil(Math.cbrt(a)),d>i)throw new Error("Total dispatch size exceeds WebGPU maximum.");return[d,d,d]}else return[d,d,1]}}});var ug,dg,Io,Co,nn,Jl=U(()=>{"use strict";Le();J();et();Xn();Xa();Ql();Xl();ug=(e,t)=>{if(t.length!==e.length)throw new Error(`inputDependencies length ${t.length} is not equal to inputTensors length ${e.length}.`);let r=[];for(let n=0;n<e.length;++n){let o=e[n].dataType;switch(t[n]){case"none":{r.push("");break}case"type":{r.push(`${o}`);break}case"rank":{let i=e[n].dims.length;r.push(`${o};${i}`);break}case"dims":{let i=e[n].dims.join(",");r.push(`${o};${i}`);break}default:throw new Error(`unsupported input dependency: ${t[n]}`)}}return r.join("|")},dg=(e,t,r)=>{let n=e.name;return e.shaderCache?.hint&&(n+="["+e.shaderCache.hint+"]"),n+=":"+r+`:${ug(t,e.shaderCache?.inputDependencies??new Array(t.length).fill("dims"))}`,n},Io=class{constructor(t){t&&(this.architecture=t.architecture,this.vendor=t.vendor)}isArchitecture(t){return this.architecture===t}isVendor(t){return this.vendor===t}},Co=class{constructor(t){this.subgroupsSupported=t.features.has("subgroups"),this.subgroupsF16Supported=t.features.has("subgroups");let r=t.limits;!this.subgroupsSupported||!r.minSubgroupSize||!r.maxSubgroupSize?this.subgroupSizeRange=void 0:this.subgroupSizeRange=[r.minSubgroupSize,r.maxSubgroupSize]}},nn=class{constructor(){this.currentSessionId=null;this.currentKernelId=null;this.commandEncoder=null;this.computePassEncoder=null;this.maxDispatchNumber=16;this.pendingDispatchNumber=0;this.pendingKernels=[];this.pendingQueries=new Map;this.sessionStatus="default";this.capturedCommandList=new Map;this.capturedPendingKernels=new Map;this.sessionExternalDataMapping=new Map}get currentKernelCustomData(){if(this.currentKernelId===null)throw new Error("currentKernelCustomData(): currentKernelId is null. (should not happen)");let t=this.kernelCustomData.get(this.currentKernelId);return t||(t={},this.kernelCustomData.set(this.currentKernelId,t)),t}async initialize(t,r){this.env=t;let n=[],o={requiredLimits:{maxComputeWorkgroupStorageSize:r.limits.maxComputeWorkgroupStorageSize,maxComputeWorkgroupsPerDimension:r.limits.maxComputeWorkgroupsPerDimension,maxStorageBufferBindingSize:r.limits.maxStorageBufferBindingSize,maxBufferSize:r.limits.maxBufferSize,maxComputeInvocationsPerWorkgroup:r.limits.maxComputeInvocationsPerWorkgroup,maxComputeWorkgroupSizeX:r.limits.maxComputeWorkgroupSizeX,maxComputeWorkgroupSizeY:r.limits.maxComputeWorkgroupSizeY,maxComputeWorkgroupSizeZ:r.limits.maxComputeWorkgroupSizeZ},requiredFeatures:n},i=a=>r.features.has(a)&&n.push(a)&&!0;i("chromium-experimental-timestamp-query-inside-passes")||i("timestamp-query"),i("shader-f16"),i("subgroups")&&i("subgroups-f16"),this.device=await r.requestDevice(o),this.deviceInfo=new Co(this.device),this.adapterInfo=new Io(r.info||await r.requestAdapterInfo()),this.gpuDataManager=Qa(this),this.programManager=new rn(this),this.kernels=new Map,this.kernelPersistentData=new Map,this.kernelCustomData=new Map,Mr(t.logLevel,!!t.debug),this.device.onuncapturederror=a=>{a.error instanceof GPUValidationError&&console.error(`An uncaught WebGPU validation error was raised: ${a.error.message}`)},Object.defineProperty(this.env.webgpu,"device",{value:this.device,writable:!1,enumerable:!0,configurable:!1}),Object.defineProperty(this.env.webgpu,"adapter",{value:r,writable:!1,enumerable:!0,configurable:!1}),this.setQueryType()}dispose(){typeof this.querySet<"u"&&this.querySet.destroy(),this.gpuDataManager.dispose()}getCommandEncoder(){return this.commandEncoder||(this.commandEncoder=this.device.createCommandEncoder()),this.commandEncoder}getComputePassEncoder(){if(!this.computePassEncoder){let t=this.getCommandEncoder(),r={};this.queryType==="at-passes"&&(r.timestampWrites={querySet:this.querySet,beginningOfPassWriteIndex:this.pendingDispatchNumber*2,endOfPassWriteIndex:this.pendingDispatchNumber*2+1}),this.computePassEncoder=t.beginComputePass(r)}return this.computePassEncoder}endComputePass(){this.computePassEncoder&&(this.computePassEncoder.end(),this.computePassEncoder=null)}flush(){if(!this.commandEncoder)return;Ne(),this.endComputePass();let t;this.queryType!=="none"&&(this.commandEncoder.resolveQuerySet(this.querySet,0,this.pendingDispatchNumber*2,this.queryResolveBuffer,0),t=this.device.createBuffer({size:this.pendingDispatchNumber*2*8,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST}),this.pendingQueries.set(t,this.pendingKernels),this.pendingKernels=[],this.commandEncoder.copyBufferToBuffer(this.queryResolveBuffer,0,t,0,this.pendingDispatchNumber*2*8)),this.device.queue.submit([this.commandEncoder.finish()]),this.gpuDataManager.refreshPendingBuffers(),this.commandEncoder=null,this.pendingDispatchNumber=0,this.queryType!=="none"&&t.mapAsync(GPUMapMode.READ).then(()=>{let r=new BigUint64Array(t.getMappedRange()),n=this.pendingQueries.get(t);for(let o=0;o<r.length/2;o++){let i=n[o],a=i.kernelId,d=this.kernels.get(a),l=d.kernelType,p=d.kernelName,m=i.programName,u=i.inputTensorViews,h=i.outputTensorViews,_=r[o*2],y=r[o*2+1];typeof this.queryTimeBase>"u"&&(this.queryTimeBase=_);let g=Number(_-this.queryTimeBase),x=Number(y-this.queryTimeBase);if(!Number.isSafeInteger(g)||!Number.isSafeInteger(x))throw new RangeError("incorrect timestamp range");if(this.env.webgpu.profiling?.ondata)this.env.webgpu.profiling.ondata({version:1,inputsMetadata:u.map($=>({dims:$.dims,dataType:bt($.dataType)})),outputsMetadata:h.map($=>({dims:$.dims,dataType:bt($.dataType)})),kernelId:a,kernelType:l,kernelName:p,programName:m,startTime:g,endTime:x});else{let $="";u.forEach((S,T)=>{$+=`input[${T}]: [${S.dims}] | ${bt(S.dataType)}, `});let v="";h.forEach((S,T)=>{v+=`output[${T}]: [${S.dims}] | ${bt(S.dataType)}, `}),console.log(`[profiling] kernel "${a}|${l}|${p}|${m}" ${$}${v}execution time: ${x-g} ns`)}_r("GPU",`${m}::${_}::${y}`)}t.unmap(),this.pendingQueries.delete(t)}),Be()}run(t,r,n,o,i,a){Ne(t.name);let d=[];for(let S=0;S<r.length;++S){let T=r[S].data;if(T===0)continue;let A=this.gpuDataManager.get(T);if(!A)throw new Error(`no GPU data for input: ${T}`);d.push(A)}let{outputs:l,dispatchGroup:p,programUniforms:m}=t.getRunData(r),u=n.length===0?l.map((S,T)=>T):n;if(u.length!==l.length)throw new Error(`Output size ${u.length} must be equal to ${l.length}.`);let h=[],_=[];for(let S=0;S<l.length;++S){if(!Number.isInteger(u[S])||u[S]<-3||u[S]>=a)throw new Error(`Invalid output index: ${u[S]}`);if(u[S]===-3)continue;let T=u[S]===-1,A=u[S]===-2,C=T||A?i(l[S].dataType,l[S].dims):o(u[S],l[S].dataType,l[S].dims);if(h.push(C),C.data===0)continue;let P=this.gpuDataManager.get(C.data);if(!P)throw new Error(`no GPU data for output: ${C.data}`);if(T&&this.temporaryData.push(P),A){let D=this.kernelPersistentData.get(this.currentKernelId);D||(D=[],this.kernelPersistentData.set(this.currentKernelId,D)),D.push(P)}_.push(P)}if(d.length!==r.length||_.length!==h.length){if(_.length===0)return Be(t.name),h;throw new Error(`Program ${t.name} has zero-sized tensor(s) in inputs or outputs. This is not supported now.`)}let y;if(m){let S=0,T=[];m.forEach(D=>{let R=typeof D.data=="number"?[D.data]:D.data;if(R.length===0)return;let H=D.type===10?2:4,L,re;D.type===10?(re=R.length>4?16:R.length>2?8:R.length*H,L=R.length>4?16:H*R.length):(re=R.length<=2?R.length*H:16,L=16),S=Math.ceil(S/re)*re,T.push(S);let V=D.type===10?8:4;S+=R.length>4?Math.ceil(R.length/V)*L:R.length*H});let A=16;S=Math.ceil(S/A)*A;let C=new ArrayBuffer(S);m.forEach((D,R)=>{let H=T[R],L=typeof D.data=="number"?[D.data]:D.data;if(D.type===6)new Int32Array(C,H,L.length).set(L);else if(D.type===12)new Uint32Array(C,H,L.length).set(L);else if(D.type===10)new Uint16Array(C,H,L.length).set(L);else if(D.type===1)new Float32Array(C,H,L.length).set(L);else throw new Error(`Unsupported uniform type: ${bt(D.type)}`)});let P=this.gpuDataManager.create(S,GPUBufferUsage.COPY_DST|GPUBufferUsage.UNIFORM);this.device.queue.writeBuffer(P.buffer,0,C,0,S),this.gpuDataManager.release(P.id),y={offset:0,size:S,buffer:P.buffer}}let g=this.programManager.normalizeDispatchGroupSize(p),x=g[1]===1&&g[2]===1,$=dg(t,r,x),v=this.programManager.getArtifact($);if(v||(v=this.programManager.build(t,g),this.programManager.setArtifact($,v),se("info",()=>`[artifact] key: ${$}, programName: ${t.name}`)),m&&v.uniformVariablesInfo){if(m.length!==v.uniformVariablesInfo.length)throw new Error(`Uniform variables count mismatch: expect ${v.uniformVariablesInfo.length}, got ${m.length} in program "${v.programInfo.name}".`);for(let S=0;S<m.length;S++){let T=m[S],A=T.type,C=typeof T.data=="number"?1:T.data.length,[P,D]=v.uniformVariablesInfo[S];if(A!==P||C!==D)throw new Error(`Uniform variable ${S} mismatch: expect type ${P} with size ${D}, got type ${A} with size ${C} in program "${v.programInfo.name}".`)}}if(se("info",()=>`[ProgramManager] run "${t.name}" (key=${$}) with ${g[0]}x${g[1]}x${g[2]}`),this.queryType!=="none"||this.sessionStatus==="capturing"){let S={kernelId:this.currentKernelId,programName:v.programInfo.name,inputTensorViews:r,outputTensorViews:h};this.pendingKernels.push(S),this.sessionStatus==="capturing"&&this.capturedPendingKernels.get(this.currentSessionId).push(S)}return this.programManager.run(v,d,_,g,y),Be(t.name),h}upload(t,r){this.gpuDataManager.upload(t,r)}memcpy(t,r){this.gpuDataManager.memcpy(t,r)}async download(t,r){await this.gpuDataManager.download(t,r)}alloc(t){return this.gpuDataManager.create(t).id}free(t){return this.gpuDataManager.release(t)}createKernel(t,r,n,o){let i=Zl.get(t);if(!i)throw new Error(`kernel not implemented: ${t}`);let a={kernelType:t,kernelName:o,kernelEntry:i[0],attributes:[i[1],n]};this.kernels.set(r,a)}releaseKernel(t){let r=this.kernelPersistentData.get(t);if(r){for(let n of r)this.gpuDataManager.release(n.id);this.kernelPersistentData.delete(t)}this.kernelCustomData.delete(t),this.kernels.delete(t)}computeKernel(t,r,n){let o=this.kernels.get(t);if(!o)throw new Error(`kernel not created: ${t}`);let i=o.kernelType,a=o.kernelName,d=o.kernelEntry,l=o.attributes;if(this.currentKernelId!==null)throw new Error(`kernel "[${i}] ${a}" is not allowed to be called recursively`);this.currentKernelId=t,l[0]&&(l[1]=l[0](l[1]),l[0]=void 0),se("info",()=>`[WebGPU] Start to run kernel "[${i}] ${a}"...`);let p=this.env.debug;this.temporaryData=[];try{return p&&this.device.pushErrorScope("validation"),d(r,l[1]),0}catch(m){return n.push(Promise.resolve(`[WebGPU] Kernel "[${i}] ${a}" failed. ${m}`)),1}finally{p&&n.push(this.device.popErrorScope().then(m=>m?`GPU validation error for kernel "[${i}] ${a}": ${m.message}`:null));for(let m of this.temporaryData)this.gpuDataManager.release(m.id);this.temporaryData=[],this.currentKernelId=null}}registerBuffer(t,r,n,o){let i=this.sessionExternalDataMapping.get(t);i||(i=new Map,this.sessionExternalDataMapping.set(t,i));let a=i.get(r),d=this.gpuDataManager.registerExternalBuffer(n,o,a);return i.set(r,[d,n]),d}unregisterBuffers(t){let r=this.sessionExternalDataMapping.get(t);r&&(r.forEach(n=>this.gpuDataManager.unregisterExternalBuffer(n[0])),this.sessionExternalDataMapping.delete(t))}getBuffer(t){let r=this.gpuDataManager.get(t);if(!r)throw new Error(`no GPU data for buffer: ${t}`);return r.buffer}createDownloader(t,r,n){return async()=>{let o=await ro(this,t,r);return Rr(o.buffer,n)}}writeTimestamp(t){this.queryType==="inside-passes"&&this.computePassEncoder.writeTimestamp(this.querySet,t)}setQueryType(){this.queryType="none",(this.env.webgpu.profiling?.mode==="default"||(typeof this.env.trace>"u"?this.env.wasm.trace:this.env.trace))&&(this.device.features.has("chromium-experimental-timestamp-query-inside-passes")?this.queryType="inside-passes":this.device.features.has("timestamp-query")&&(this.queryType="at-passes"),this.queryType!=="none"&&typeof this.querySet>"u"&&(this.querySet=this.device.createQuerySet({type:"timestamp",count:this.maxDispatchNumber*2}),this.queryResolveBuffer=this.device.createBuffer({size:this.maxDispatchNumber*2*8,usage:GPUBufferUsage.COPY_SRC|GPUBufferUsage.QUERY_RESOLVE})))}captureBegin(){se("info","captureBegin"),this.capturedCommandList.get(this.currentSessionId)||this.capturedCommandList.set(this.currentSessionId,[]),this.capturedPendingKernels.get(this.currentSessionId)||this.capturedPendingKernels.set(this.currentSessionId,[]),this.flush(),this.sessionStatus="capturing"}captureEnd(){se("info","captureEnd"),this.flush(),this.sessionStatus="default"}replay(){se("info","replay"),this.sessionStatus="replaying";let t=this.capturedCommandList.get(this.currentSessionId),r=this.capturedPendingKernels.get(this.currentSessionId),n=t.length;this.pendingKernels=[];for(let o=0;o<n;o++){let i=this.getComputePassEncoder(),a=t[o];this.writeTimestamp(this.pendingDispatchNumber*2),i.setPipeline(a.computePipeline),i.setBindGroup(0,a.bindGroup),i.dispatchWorkgroups(...a.dispatchGroup),this.writeTimestamp(this.pendingDispatchNumber*2+1),this.pendingDispatchNumber++,this.queryType!=="none"&&this.pendingKernels.push(r[o]),(this.pendingDispatchNumber>=this.maxDispatchNumber||this.queryType==="at-passes")&&this.endComputePass(),this.pendingDispatchNumber>=this.maxDispatchNumber&&this.flush()}this.flush(),this.sessionStatus="default"}onCreateSession(){this.gpuDataManager.onCreateSession()}onReleaseSession(t){this.unregisterBuffers(t),this.capturedCommandList.has(t)&&this.capturedCommandList.delete(t),this.capturedPendingKernels.has(t)&&this.capturedPendingKernels.delete(t),this.gpuDataManager.onReleaseSession(t)}onRunStart(t){this.currentSessionId=t,this.setQueryType()}}});var lg,ec,cg,tc,on,an,Ao,rc,nc=U(()=>{"use strict";et();lg=1,ec=()=>lg++,cg=new Map([["float32",32],["float16",16],["int32",32],["uint32",32],["int64",64],["uint64",64],["int8",8],["uint8",8],["int4",4],["uint4",4]]),tc=(e,t)=>{let r=cg.get(e);if(!r)throw new Error("Unsupported data type.");return t.length>0?Math.ceil(t.reduce((n,o)=>n*o)*r/8):0},on=class{constructor(t){this.sessionId=t.sessionId,this.mlContext=t.context,this.mlTensor=t.tensor,this.dataType=t.dataType,this.tensorShape=t.shape}get tensor(){return this.mlTensor}get type(){return this.dataType}get shape(){return this.tensorShape}get byteLength(){return tc(this.dataType,this.tensorShape)}destroy(){se("verbose",()=>"[WebNN] TensorWrapper.destroy"),this.mlTensor.destroy()}write(t){this.mlContext.writeTensor(this.mlTensor,t)}async read(t){return t?this.mlContext.readTensor(this.mlTensor,t):this.mlContext.readTensor(this.mlTensor)}canReuseTensor(t,r,n){return this.mlContext===t&&this.dataType===r&&this.tensorShape.length===n.length&&this.tensorShape.every((o,i)=>o===n[i])}},an=class{constructor(t,r){this.tensorManager=t;this.wrapper=r}get tensorWrapper(){return this.wrapper}releaseTensor(){this.tensorWrapper&&(this.tensorManager.releaseTensor(this.tensorWrapper),this.wrapper=void 0)}async ensureTensor(t,r,n,o){if(this.wrapper){if(this.wrapper.canReuseTensor(t,r,n))return this.wrapper.tensor;if(o){if(this.wrapper.byteLength!==tc(r,n))throw new Error("Unable to copy data to tensor with different size.");this.activeUpload=new Uint8Array(await this.wrapper.read())}this.tensorManager.releaseTensor(this.wrapper)}let i=typeof MLTensorUsage>"u"?void 0:MLTensorUsage.READ|MLTensorUsage.WRITE;return this.wrapper=await this.tensorManager.getCachedTensor(r,n,i,!0,!0),o&&this.activeUpload&&(this.wrapper.write(this.activeUpload),this.activeUpload=void 0),this.wrapper.tensor}upload(t){if(this.wrapper)if(t.byteLength===this.wrapper.byteLength){this.wrapper.write(t);return}else se("verbose",()=>"Data size does not match tensor size. Releasing tensor."),this.releaseTensor();this.activeUpload?this.activeUpload.set(t):this.activeUpload=new Uint8Array(t)}async download(t){if(this.activeUpload)if(t){t instanceof ArrayBuffer?new Uint8Array(t).set(this.activeUpload):new Uint8Array(t.buffer,t.byteOffset,t.byteLength).set(this.activeUpload);return}else return this.activeUpload.buffer;if(!this.wrapper)throw new Error("Tensor has not been created.");return t?this.wrapper.read(t):this.wrapper.read()}},Ao=class{constructor(t){this.backend=t;this.tensorTrackersById=new Map;this.freeTensors=[];this.externalTensors=new Set}reserveTensorId(){let t=ec();return this.tensorTrackersById.set(t,new an(this)),t}releaseTensorId(t){let r=this.tensorTrackersById.get(t);r&&(this.tensorTrackersById.delete(t),r.tensorWrapper&&this.releaseTensor(r.tensorWrapper))}async ensureTensor(t,r,n,o){se("verbose",()=>`[WebNN] TensorManager.ensureTensor {tensorId: ${t}, dataType: ${r}, shape: ${n}, copyOld: ${o}}`);let i=this.tensorTrackersById.get(t);if(!i)throw new Error("Tensor not found.");return i.ensureTensor(this.backend.currentContext,r,n,o)}upload(t,r){let n=this.tensorTrackersById.get(t);if(!n)throw new Error("Tensor not found.");n.upload(r)}async download(t,r){se("verbose",()=>`[WebNN] TensorManager.download {tensorId: ${t}, dstBuffer: ${r?.byteLength}}`);let n=this.tensorTrackersById.get(t);if(!n)throw new Error("Tensor not found.");return n.download(r)}releaseTensorsForSession(t){for(let r of this.freeTensors)r.sessionId===t&&r.destroy();this.freeTensors=this.freeTensors.filter(r=>r.sessionId!==t)}registerTensor(t,r,n,o){let i=ec(),a=new on({sessionId:this.backend.currentSessionId,context:t,tensor:r,dataType:n,shape:o});return this.tensorTrackersById.set(i,new an(this,a)),this.externalTensors.add(a),i}async getCachedTensor(t,r,n,o,i){let a=this.backend.currentSessionId,d=this.backend.currentContext;for(let[p,m]of this.freeTensors.entries())if(m.canReuseTensor(d,t,r)){se("verbose",()=>`[WebNN] Reusing tensor {dataType: ${t}, shape: ${r}}`);let u=this.freeTensors.splice(p,1)[0];return u.sessionId=a,u}se("verbose",()=>`[WebNN] MLContext.createTensor {dataType: ${t}, shape: ${r}}`);let l=await d.createTensor({dataType:t,shape:r,dimensions:r,usage:n,writable:o,readable:i});return new on({sessionId:a,context:d,tensor:l,dataType:t,shape:r})}releaseTensor(t){this.externalTensors.has(t)&&this.externalTensors.delete(t),this.freeTensors.push(t)}},rc=(...e)=>new Ao(...e)});var oc,pg,sn,ic=U(()=>{"use strict";J();gt();Xn();nc();et();oc=new Map([[1,"float32"],[10,"float16"],[6,"int32"],[12,"uint32"],[7,"int64"],[13,"uint64"],[22,"int4"],[21,"uint4"],[3,"int8"],[2,"uint8"],[9,"uint8"]]),pg=(e,t)=>{if(e===t)return!0;if(e===void 0||t===void 0)return!1;let r=Object.keys(e).sort(),n=Object.keys(t).sort();return r.length===n.length&&r.every((o,i)=>o===n[i]&&e[o]===t[o])},sn=class{constructor(t){this.tensorManager=rc(this);this.mlContextBySessionId=new Map;this.sessionIdsByMLContext=new Map;this.mlContextCache=[];Mr(t.logLevel,!!t.debug)}get currentSessionId(){if(this.activeSessionId===void 0)throw new Error("No active session");return this.activeSessionId}onRunStart(t){this.activeSessionId=t}async createMLContext(t){if(t instanceof GPUDevice){let n=this.mlContextCache.findIndex(o=>o.gpuDevice===t);if(n!==-1)return this.mlContextCache[n].mlContext;{let o=await navigator.ml.createContext(t);return this.mlContextCache.push({gpuDevice:t,mlContext:o}),o}}else if(t===void 0){let n=this.mlContextCache.findIndex(o=>o.options===void 0&&o.gpuDevice===void 0);if(n!==-1)return this.mlContextCache[n].mlContext;{let o=await navigator.ml.createContext();return this.mlContextCache.push({mlContext:o}),o}}let r=this.mlContextCache.findIndex(n=>pg(n.options,t));if(r!==-1)return this.mlContextCache[r].mlContext;{let n=await navigator.ml.createContext(t);return this.mlContextCache.push({options:t,mlContext:n}),n}}get currentContext(){let t=this.getMLContext(this.currentSessionId);if(!t)throw new Error(`No MLContext found for session ${this.currentSessionId}`);return t}registerMLContext(t,r){this.mlContextBySessionId.set(t,r);let n=this.sessionIdsByMLContext.get(r);n||(n=new Set,this.sessionIdsByMLContext.set(r,n)),n.add(t)}onReleaseSession(t){let r=this.mlContextBySessionId.get(t);if(!r)return;this.tensorManager.releaseTensorsForSession(t),this.mlContextBySessionId.delete(t);let n=this.sessionIdsByMLContext.get(r);if(n.delete(t),n.size===0){this.sessionIdsByMLContext.delete(r);let o=this.mlContextCache.findIndex(i=>i.mlContext===r);o!==-1&&this.mlContextCache.splice(o,1)}}getMLContext(t){return this.mlContextBySessionId.get(t)}reserveTensorId(){return this.tensorManager.reserveTensorId()}releaseTensorId(t){se("verbose",()=>`[WebNN] releaseTensorId {tensorId: ${t}}`),this.tensorManager.releaseTensorId(t)}async ensureTensor(t,r,n,o){let i=oc.get(r);if(!i)throw new Error(`Unsupported ONNX data type: ${r}`);return this.tensorManager.ensureTensor(t,i,n,o)}uploadTensor(t,r){if(!Te().shouldTransferToMLTensor)throw new Error("Trying to upload to a MLTensor while shouldTransferToMLTensor is false");se("verbose",()=>`[WebNN] uploadTensor {tensorId: ${t}, data: ${r.byteLength}}`),this.tensorManager.upload(t,r)}async downloadTensor(t,r){return this.tensorManager.download(t,r)}createMLTensorDownloader(t,r){return async()=>{let n=await this.tensorManager.download(t);return Rr(n,r)}}registerMLTensor(t,r,n){let o=oc.get(r);if(!o)throw new Error(`Unsupported ONNX data type: ${r}`);let i=this.tensorManager.registerTensor(this.currentContext,t,o,n);return se("verbose",()=>`[WebNN] registerMLTensor {tensor: ${t}, dataType: ${o}, dimensions: ${n}} -> {tensorId: ${i}}`),i}registerMLConstant(t,r,n,o,i,a){if(!a)throw new Error("External mounted files are not available.");let d=t;t.startsWith("./")&&(d=t.substring(2));let l=a.get(d);if(!l)throw new Error(`File with name ${d} not found in preloaded files.`);if(r+n>l.byteLength)throw new Error("Out of bounds: data offset and length exceed the external file data size.");let p=l.slice(r,r+n).buffer,m;switch(i.dataType){case"float32":m=new Float32Array(p);break;case"float16":m=new Uint16Array(p);break;case"int32":m=new Int32Array(p);break;case"uint32":m=new Uint32Array(p);break;case"int64":m=new BigInt64Array(p);break;case"uint64":m=new BigUint64Array(p);break;case"int8":m=new Int8Array(p);break;case"int4":case"uint4":case"uint8":m=new Uint8Array(p);break;default:throw new Error(`Unsupported data type: ${i.dataType} in creating WebNN Constant from external data.`)}return se("verbose",()=>`[WebNN] registerMLConstant {dataType: ${i.dataType}, shape: ${i.shape}}}`),o.constant(i,m)}flush(){}}});var ac={};Ht(ac,{init:()=>mg});var tr,ko,mg,sc=U(()=>{"use strict";J();Jl();et();ne();ic();tr=class e{constructor(t,r,n,o){this.module=t;this.dataType=r;this.data=n;this.dims=o}getFloat32Array(){if(this.dataType!==1)throw new Error("Invalid data type");let t=k.size(this.dims);return t===0?new Float32Array:new Float32Array(this.module.HEAP8.buffer,this.data,t)}getBigInt64Array(){if(this.dataType!==7)throw new Error("Invalid data type");let t=k.size(this.dims);return t===0?new BigInt64Array:new BigInt64Array(this.module.HEAP8.buffer,this.data,t)}getInt32Array(){if(this.dataType!==6)throw new Error("Invalid data type");let t=k.size(this.dims);return t===0?new Int32Array:new Int32Array(this.module.HEAP8.buffer,this.data,t)}getUint16Array(){if(this.dataType!==10&&this.dataType!==4)throw new Error("Invalid data type");let t=k.size(this.dims);return t===0?new Uint16Array:new Uint16Array(this.module.HEAP8.buffer,this.data,t)}reshape(t){if(k.size(t)!==k.size(this.dims))throw new Error("Invalid new shape");return new e(this.module,this.dataType,this.data,t)}},ko=class{constructor(t,r,n){this.module=t;this.backend=r;this.customDataOffset=0;this.customDataSize=0;this.adapterInfo=r.adapterInfo,this.deviceInfo=r.deviceInfo;let o=t.PTR_SIZE,i=n/t.PTR_SIZE,a=o===4?"i32":"i64";this.opKernelContext=Number(t.getValue(o*i++,a));let d=Number(t.getValue(o*i++,a));this.outputCount=Number(t.getValue(o*i++,a)),this.customDataOffset=Number(t.getValue(o*i++,"*")),this.customDataSize=Number(t.getValue(o*i++,a));let l=[];for(let p=0;p<d;p++){let m=Number(t.getValue(o*i++,a)),u=Number(t.getValue(o*i++,"*")),h=Number(t.getValue(o*i++,a)),_=[];for(let y=0;y<h;y++)_.push(Number(t.getValue(o*i++,a)));l.push(new tr(t,m,u,_))}this.inputs=l}get kernelCustomData(){return this.backend.currentKernelCustomData}get customDataBuffer(){return this.module.HEAPU8.subarray(this.customDataOffset,this.customDataOffset+this.customDataSize)}compute(t,r){let n=r?.inputs?.map(d=>typeof d=="number"?this.inputs[d]:d)??this.inputs,o=r?.outputs??[],i=(d,l,p)=>new tr(this.module,l,this.output(d,p),p),a=(d,l)=>{let p=At(d,l);if(!p)throw new Error(`Unsupported data type: ${d}`);let m=p>0?this.backend.gpuDataManager.create(p).id:0;return new tr(this.module,d,m,l)};return this.backend.run(t,n,o,i,a,this.outputCount)}output(t,r){let n=this.module.stackSave();try{let o=this.module.PTR_SIZE,i=o===4?"i32":"i64",a=this.module.stackAlloc((1+r.length)*o);this.module.setValue(a,r.length,i);for(let d=0;d<r.length;d++)this.module.setValue(a+o*(d+1),r[d],i);return this.module._JsepOutput(this.opKernelContext,t,a)}catch(o){throw new Error(`Failed to generate kernel's output[${t}] with dims [${r}]. If you are running with pre-allocated output, please make sure the output type/dims are correct. Error: ${o}`)}finally{this.module.stackRestore(n)}}},mg=async(e,t,r,n)=>{let o=t.jsepInit;if(!o)throw new Error("Failed to initialize JSEP. The WebAssembly module is not built with JSEP support.");if(e==="webgpu"){let i=new nn;await i.initialize(r,n),o("webgpu",[i,a=>i.alloc(Number(a)),a=>i.free(a),(a,d,l,p=!1)=>{if(p)se("verbose",()=>`[WebGPU] jsepCopyGpuToGpu: src=${Number(a)}, dst=${Number(d)}, size=${Number(l)}`),i.memcpy(Number(a),Number(d));else{se("verbose",()=>`[WebGPU] jsepCopyCpuToGpu: dataOffset=${Number(a)}, gpuDataId=${Number(d)}, size=${Number(l)}`);let m=t.HEAPU8.subarray(Number(a>>>0),Number(a>>>0)+Number(l));i.upload(Number(d),m)}},async(a,d,l)=>{se("verbose",()=>`[WebGPU] jsepCopyGpuToCpu: gpuDataId=${a}, dataOffset=${d}, size=${l}`),await i.download(Number(a),()=>t.HEAPU8.subarray(Number(d)>>>0,Number(d+l)>>>0))},(a,d,l)=>i.createKernel(a,Number(d),l,t.UTF8ToString(t._JsepGetNodeName(Number(d)))),a=>i.releaseKernel(a),(a,d,l,p)=>{se("verbose",()=>`[WebGPU] jsepRun: sessionHandle=${l}, kernel=${a}, contextDataOffset=${d}`);let m=new ko(t,i,Number(d));return i.computeKernel(Number(a),m,p)},()=>i.captureBegin(),()=>i.captureEnd(),()=>i.replay()])}else{let i=new sn(r);o("webnn",[i,()=>i.reserveTensorId(),a=>i.releaseTensorId(a),async(a,d,l,p)=>i.ensureTensor(a,d,l,p),(a,d)=>{i.uploadTensor(a,d)},async(a,d)=>i.downloadTensor(a,d)])}}});var fg,Sr,Tr,zt,hg,qt,Ir,Cr,uc,Ar,kr,Er,Fn=U(()=>{"use strict";Ga();Fa();J();gt();zr();Qn();fg=(e,t)=>{Te()._OrtInit(e,t)!==0&&ce("Can't initialize onnxruntime.")},Sr=async e=>{fg(e.wasm.numThreads,Yt(e.logLevel))},Tr=async(e,t)=>{{let r=(sc(),gr(ac)).init;if(t==="webgpu"){if(typeof navigator>"u"||!navigator.gpu)throw new Error("WebGPU is not supported in current environment");let n=e.webgpu.adapter;if(n){if(typeof n.limits!="object"||typeof n.features!="object"||typeof n.requestDevice!="function")throw new Error("Invalid GPU adapter set in `env.webgpu.adapter`. It must be a GPUAdapter object.")}else{let o=e.webgpu.powerPreference;if(o!==void 0&&o!=="low-power"&&o!=="high-performance")throw new Error(`Invalid powerPreference setting: "${o}"`);let i=e.webgpu.forceFallbackAdapter;if(i!==void 0&&typeof i!="boolean")throw new Error(`Invalid forceFallbackAdapter setting: "${i}"`);if(n=await navigator.gpu.requestAdapter({powerPreference:o,forceFallbackAdapter:i}),!n)throw new Error('Failed to get GPU adapter. You may need to enable flag "--enable-unsafe-webgpu" if you are using Chrome.')}await r("webgpu",Te(),e,n)}if(t==="webnn"){if(typeof navigator>"u"||!navigator.ml)throw new Error("WebNN is not supported in current environment");await r("webnn",Te(),e)}}},zt=new Map,hg=e=>{let t=Te(),r=t.stackSave();try{let n=t.PTR_SIZE,o=t.stackAlloc(2*n);t._OrtGetInputOutputCount(e,o,o+n)!==0&&ce("Can't get session input/output count.");let a=n===4?"i32":"i64";return[Number(t.getValue(o,a)),Number(t.getValue(o+n,a))]}finally{t.stackRestore(r)}},qt=e=>{let t=Te(),r=t._malloc(e.byteLength);if(r===0)throw new Error(`Can't create a session. failed to allocate a buffer of size ${e.byteLength}.`);return t.HEAPU8.set(e,r),[r,e.byteLength]},Ir=async(e,t)=>{let r,n,o=Te();Array.isArray(e)?[r,n]=e:e.buffer===o.HEAPU8.buffer?[r,n]=[e.byteOffset,e.byteLength]:[r,n]=qt(e);let i=0,a=0,d=0,l=[],p=[],m=[];try{if([a,l]=Ha(t),t?.externalData&&o.mountExternalData){let v=[];for(let S of t.externalData){let T=typeof S=="string"?S:S.path;v.push(Zt(typeof S=="string"?S:S.data).then(A=>{o.mountExternalData(T,A)}))}await Promise.all(v)}for(let v of t?.executionProviders??[])if((typeof v=="string"?v:v.name)==="webnn"){if(o.shouldTransferToMLTensor=!1,typeof v!="string"){let T=v,A=T?.context,C=T?.gpuDevice,P=T?.deviceType,D=T?.powerPreference;A?o.currentContext=A:C?o.currentContext=await o.jsepCreateMLContext(C):o.currentContext=await o.jsepCreateMLContext({deviceType:P,powerPreference:D})}else o.currentContext=await o.jsepCreateMLContext();break}i=await o._OrtCreateSession(r,n,a),i===0&&ce("Can't create a session."),o.jsepOnCreateSession?.(),o.currentContext&&(o.jsepRegisterMLContext(i,o.currentContext),o.currentContext=void 0,o.shouldTransferToMLTensor=!0);let[u,h]=hg(i),_=!!t?.enableGraphCapture,y=[],g=[],x=[];for(let v=0;v<u;v++){let S=o._OrtGetInputName(i,v);S===0&&ce("Can't get an input name."),p.push(S),y.push(o.UTF8ToString(S))}for(let v=0;v<h;v++){let S=o._OrtGetOutputName(i,v);S===0&&ce("Can't get an output name."),m.push(S);let T=o.UTF8ToString(S);g.push(T);{if(_&&t?.preferredOutputLocation===void 0){x.push("gpu-buffer");continue}let A=typeof t?.preferredOutputLocation=="string"?t.preferredOutputLocation:t?.preferredOutputLocation?.[T]??"cpu";if(A!=="cpu"&&A!=="cpu-pinned"&&A!=="gpu-buffer"&&A!=="ml-tensor")throw new Error(`Not supported preferred output location: ${A}.`);if(_&&A!=="gpu-buffer")throw new Error(`Not supported preferred output location: ${A}. Only 'gpu-buffer' location is supported when enableGraphCapture is true.`);x.push(A)}}let $=null;return x.some(v=>v==="gpu-buffer"||v==="ml-tensor")&&(d=o._OrtCreateBinding(i),d===0&&ce("Can't create IO binding."),$={handle:d,outputPreferredLocations:x,outputPreferredLocationsEncoded:x.map(v=>Zn(v))}),zt.set(i,[i,p,m,$,_,!1]),[i,y,g]}catch(u){throw p.forEach(h=>o._OrtFree(h)),m.forEach(h=>o._OrtFree(h)),d!==0&&o._OrtReleaseBinding(d)!==0&&ce("Can't release IO binding."),i!==0&&o._OrtReleaseSession(i)!==0&&ce("Can't release session."),u}finally{o._free(r),a!==0&&o._OrtReleaseSessionOptions(a)!==0&&ce("Can't release session options."),l.forEach(u=>o._free(u)),o.unmountExternalData?.()}},Cr=e=>{let t=Te(),r=zt.get(e);if(!r)throw new Error(`cannot release session. invalid session id: ${e}`);let[n,o,i,a,d]=r;a&&(d&&t._OrtClearBoundOutputs(a.handle)!==0&&ce("Can't clear bound outputs."),t._OrtReleaseBinding(a.handle)!==0&&ce("Can't release IO binding.")),t.jsepOnReleaseSession?.(e),o.forEach(l=>t._OrtFree(l)),i.forEach(l=>t._OrtFree(l)),t._OrtReleaseSession(n)!==0&&ce("Can't release session."),zt.delete(e)},uc=(e,t,r,n,o,i=!1)=>{if(!e){t.push(0);return}let a=Te(),d=a.PTR_SIZE,l=e[0],p=e[1],m=e[3],u,h;if(l==="string"&&(m==="gpu-buffer"||m==="ml-tensor"))throw new Error("String tensor is not supported on GPU.");if(i&&m!=="gpu-buffer")throw new Error(`External buffer must be provided for input/output index ${o} when enableGraphCapture is true.`);if(m==="gpu-buffer"){let g=e[2].gpuBuffer;h=At(jt(l),p);let x=a.jsepRegisterBuffer;if(!x)throw new Error('Tensor location "gpu-buffer" is not supported without using WebGPU.');u=x(n,o,g,h)}else if(m==="ml-tensor"){let g=e[2].mlTensor;h=At(jt(l),p);let x=a.jsepRegisterMLTensor;if(!x)throw new Error('Tensor location "ml-tensor" is not supported without using WebNN.');u=x(g,jt(l),p)}else{let g=e[2];if(Array.isArray(g)){h=d*g.length,u=a._malloc(h),r.push(u);for(let x=0;x<g.length;x++){if(typeof g[x]!="string")throw new TypeError(`tensor data at index ${x} is not a string`);a.setValue(u+x*d,ke(g[x],r),"*")}}else h=g.byteLength,u=a._malloc(h),r.push(u),a.HEAPU8.set(new Uint8Array(g.buffer,g.byteOffset,h),u)}let _=a.stackSave(),y=a.stackAlloc(4*p.length);try{p.forEach((x,$)=>a.setValue(y+$*d,x,d===4?"i32":"i64"));let g=a._OrtCreateTensor(jt(l),u,h,y,p.length,Zn(m));g===0&&ce(`Can't create tensor for input/output. session=${n}, index=${o}.`),t.push(g)}finally{a.stackRestore(_)}},Ar=async(e,t,r,n,o,i)=>{let a=Te(),d=a.PTR_SIZE,l=zt.get(e);if(!l)throw new Error(`cannot run inference. invalid session id: ${e}`);let p=l[0],m=l[1],u=l[2],h=l[3],_=l[4],y=l[5],g=t.length,x=n.length,$=0,v=[],S=[],T=[],A=[],C=a.stackSave(),P=a.stackAlloc(g*d),D=a.stackAlloc(g*d),R=a.stackAlloc(x*d),H=a.stackAlloc(x*d);try{a.jsepOnRunStart?.(p),[$,v]=La(i);for(let V=0;V<g;V++)uc(r[V],S,A,e,t[V],_);for(let V=0;V<x;V++)uc(o[V],T,A,e,g+n[V],_);for(let V=0;V<g;V++)a.setValue(P+V*d,S[V],"*"),a.setValue(D+V*d,m[t[V]],"*");for(let V=0;V<x;V++)a.setValue(R+V*d,T[V],"*"),a.setValue(H+V*d,u[n[V]],"*");if(h&&!y){let{handle:V,outputPreferredLocations:K,outputPreferredLocationsEncoded:we}=h;if(m.length!==g)throw new Error(`input count from feeds (${g}) is expected to be always equal to model's input count (${m.length}).`);for(let j=0;j<g;j++){let Q=t[j];await a._OrtBindInput(V,m[Q],S[j])!==0&&ce(`Can't bind input[${j}] for session=${e}.`)}for(let j=0;j<x;j++){let Q=n[j];o[j]?.[3]?a._OrtBindOutput(V,u[Q],T[j],0)!==0&&ce(`Can't bind pre-allocated output[${j}] for session=${e}.`):a._OrtBindOutput(V,u[Q],0,we[Q])!==0&&ce(`Can't bind output[${j}] to ${K[j]} for session=${e}.`)}zt.set(e,[p,m,u,h,_,!0])}let L;h?L=await a._OrtRunWithBinding(p,h.handle,x,R,$):L=await a._OrtRun(p,D,P,g,H,x,R,$),L!==0&&ce("failed to call OrtRun().");let re=[];for(let V=0;V<x;V++){let K=Number(a.getValue(R+V*d,"*"));if(K===T[V]){re.push(o[V]);continue}let we=a.stackSave(),j=a.stackAlloc(4*d),Q=!1,ie,te=0;try{a._OrtGetTensorData(K,j,j+d,j+2*d,j+3*d)!==0&&ce(`Can't access output tensor data on index ${V}.`);let Oe=d===4?"i32":"i64",ve=Number(a.getValue(j,Oe));te=a.getValue(j+d,"*");let de=a.getValue(j+d*2,"*"),W=Number(a.getValue(j+d*3,Oe)),Y=[];for(let pe=0;pe<W;pe++)Y.push(Number(a.getValue(de+pe*d,Oe)));a._OrtFree(de)!==0&&ce("Can't free memory for tensor dims.");let he=Y.reduce((pe,Ie)=>pe*Ie,1);ie=bt(ve);let De=h?.outputPreferredLocations[n[V]];if(ie==="string"){if(De==="gpu-buffer"||De==="ml-tensor")throw new Error("String tensor is not supported on GPU.");let pe=[];for(let Ie=0;Ie<he;Ie++){let He=a.getValue(te+Ie*d,"*"),mn=a.getValue(te+(Ie+1)*d,"*"),wt=Ie===he-1?void 0:mn-He;pe.push(a.UTF8ToString(He,wt))}re.push([ie,Y,pe,"cpu"])}else if(De==="gpu-buffer"&&he>0){let pe=a.jsepGetBuffer;if(!pe)throw new Error('preferredLocation "gpu-buffer" is not supported without using WebGPU.');let Ie=pe(te),He=At(ve,he);if(He===void 0||!Dr(ie))throw new Error(`Unsupported data type: ${ie}`);Q=!0,re.push([ie,Y,{gpuBuffer:Ie,download:a.jsepCreateDownloader(Ie,He,ie),dispose:()=>{a._OrtReleaseTensor(K)!==0&&ce("Can't release tensor.")}},"gpu-buffer"])}else if(De==="ml-tensor"&&he>0){let pe=a.jsepEnsureTensor;if(!pe)throw new Error('preferredLocation "ml-tensor" is not supported without using WebNN.');if(At(ve,he)===void 0||!Br(ie))throw new Error(`Unsupported data type: ${ie}`);let He=await pe(te,ve,Y,!1);Q=!0,re.push([ie,Y,{mlTensor:He,download:a.jsepCreateMLTensorDownloader(te,ie),dispose:()=>{a.jsepReleaseTensorId(te),a._OrtReleaseTensor(K)}},"ml-tensor"])}else{let pe=Or(ie),Ie=new pe(he);new Uint8Array(Ie.buffer,Ie.byteOffset,Ie.byteLength).set(a.HEAPU8.subarray(te,te+Ie.byteLength)),re.push([ie,Y,Ie,"cpu"])}}finally{a.stackRestore(we),ie==="string"&&te&&a._free(te),Q||a._OrtReleaseTensor(K)}}return h&&!_&&(a._OrtClearBoundOutputs(h.handle)!==0&&ce("Can't clear bound outputs."),zt.set(e,[p,m,u,h,_,!1])),re}finally{a.stackRestore(C),S.forEach(L=>a._OrtReleaseTensor(L)),T.forEach(L=>a._OrtReleaseTensor(L)),A.forEach(L=>a._free(L)),$!==0&&a._OrtReleaseRunOptions($),v.forEach(L=>a._free(L))}},kr=e=>{let t=Te(),r=zt.get(e);if(!r)throw new Error("invalid session id");let n=r[0],o=t._OrtEndProfiling(n);o===0&&ce("Can't get an profile file name."),t._OrtFree(o)},Er=e=>{let t=[];for(let r of e){let n=r[2];!Array.isArray(n)&&"buffer"in n&&t.push(n.buffer)}return t}});var Ot,Ge,rr,dn,ln,un,Eo,Po,Wt,Lt,bg,dc,lc,cc,pc,mc,fc,hc,zo=U(()=>{"use strict";Le();Fn();gt();$r();Ot=()=>!!_e.wasm.proxy&&typeof document<"u",rr=!1,dn=!1,ln=!1,Po=new Map,Wt=(e,t)=>{let r=Po.get(e);r?r.push(t):Po.set(e,[t])},Lt=()=>{if(rr||!dn||ln||!Ge)throw new Error("worker not ready")},bg=e=>{switch(e.data.type){case"init-wasm":rr=!1,e.data.err?(ln=!0,Eo[1](e.data.err)):(dn=!0,Eo[0]()),un&&(URL.revokeObjectURL(un),un=void 0);break;case"init-ep":case"copy-from":case"create":case"release":case"run":case"end-profiling":{let t=Po.get(e.data.type);e.data.err?t.shift()[1](e.data.err):t.shift()[0](e.data.out);break}default:}},dc=async()=>{if(!dn){if(rr)throw new Error("multiple calls to 'initWasm()' detected.");if(ln)throw new Error("previous call to 'initWasm()' failed.");if(rr=!0,Ot())return new Promise((e,t)=>{Ge?.terminate(),Na().then(([r,n])=>{try{Ge=n,Ge.onerror=i=>t(i),Ge.onmessage=bg,Eo=[e,t];let o={type:"init-wasm",in:_e};!o.in.wasm.wasmPaths&&(r||import.meta.url?.startsWith("file:"))&&(o.in.wasm.wasmPaths={wasm:new URL(/* asset import */ __webpack_require__(/*! ort-wasm-simd-threaded.jsep.wasm */ "./node_modules/onnxruntime-web/dist/ort-wasm-simd-threaded.jsep.wasm"), __webpack_require__.b).href}),Ge.postMessage(o),un=r}catch(o){t(o)}},t)});try{await xr(_e.wasm),await Sr(_e),dn=!0}catch(e){throw ln=!0,e}finally{rr=!1}}},lc=async e=>{if(Ot())return Lt(),new Promise((t,r)=>{Wt("init-ep",[t,r]);let n={type:"init-ep",in:{epName:e,env:_e}};Ge.postMessage(n)});await Tr(_e,e)},cc=async e=>Ot()?(Lt(),new Promise((t,r)=>{Wt("copy-from",[t,r]);let n={type:"copy-from",in:{buffer:e}};Ge.postMessage(n,[e.buffer])})):qt(e),pc=async(e,t)=>{if(Ot()){if(t?.preferredOutputLocation)throw new Error('session option "preferredOutputLocation" is not supported for proxy.');return Lt(),new Promise((r,n)=>{Wt("create",[r,n]);let o={type:"create",in:{model:e,options:{...t}}},i=[];e instanceof Uint8Array&&i.push(e.buffer),Ge.postMessage(o,i)})}else return Ir(e,t)},mc=async e=>{if(Ot())return Lt(),new Promise((t,r)=>{Wt("release",[t,r]);let n={type:"release",in:e};Ge.postMessage(n)});Cr(e)},fc=async(e,t,r,n,o,i)=>{if(Ot()){if(r.some(a=>a[3]!=="cpu"))throw new Error("input tensor on GPU is not supported for proxy.");if(o.some(a=>a))throw new Error("pre-allocated output tensor is not supported for proxy.");return Lt(),new Promise((a,d)=>{Wt("run",[a,d]);let l=r,p={type:"run",in:{sessionId:e,inputIndices:t,inputs:l,outputIndices:n,options:i}};Ge.postMessage(p,Er(l))})}else return Ar(e,t,r,n,o,i)},hc=async e=>{if(Ot())return Lt(),new Promise((t,r)=>{Wt("end-profiling",[t,r]);let n={type:"end-profiling",in:e};Ge.postMessage(n)});kr(e)}});var gc,yg,cn,bc=U(()=>{"use strict";Le();zo();J();vr();Qn();gc=(e,t)=>{switch(e.location){case"cpu":return[e.type,e.dims,e.data,"cpu"];case"gpu-buffer":return[e.type,e.dims,{gpuBuffer:e.gpuBuffer},"gpu-buffer"];case"ml-tensor":return[e.type,e.dims,{mlTensor:e.mlTensor},"ml-tensor"];default:throw new Error(`invalid data location: ${e.location} for ${t()}`)}},yg=e=>{switch(e[3]){case"cpu":return new Fe(e[0],e[2],e[1]);case"gpu-buffer":{let t=e[0];if(!Dr(t))throw new Error(`not supported data type: ${t} for deserializing GPU tensor`);let{gpuBuffer:r,download:n,dispose:o}=e[2];return Fe.fromGpuBuffer(r,{dataType:t,dims:e[1],download:n,dispose:o})}case"ml-tensor":{let t=e[0];if(!Br(t))throw new Error(`not supported data type: ${t} for deserializing MLTensor tensor`);let{mlTensor:r,download:n,dispose:o}=e[2];return Fe.fromMLTensor(r,{dataType:t,dims:e[1],download:n,dispose:o})}default:throw new Error(`invalid data location: ${e[3]}`)}},cn=class{async fetchModelAndCopyToWasmMemory(t){return cc(await Zt(t))}async loadModel(t,r){Ne();let n;typeof t=="string"? false?0:n=await this.fetchModelAndCopyToWasmMemory(t):n=t,[this.sessionId,this.inputNames,this.outputNames]=await pc(n,r),Be()}async dispose(){return mc(this.sessionId)}async run(t,r,n){Ne();let o=[],i=[];Object.entries(t).forEach(h=>{let _=h[0],y=h[1],g=this.inputNames.indexOf(_);if(g===-1)throw new Error(`invalid input '${_}'`);o.push(y),i.push(g)});let a=[],d=[];Object.entries(r).forEach(h=>{let _=h[0],y=h[1],g=this.outputNames.indexOf(_);if(g===-1)throw new Error(`invalid output '${_}'`);a.push(y),d.push(g)});let l=o.map((h,_)=>gc(h,()=>`input "${this.inputNames[i[_]]}"`)),p=a.map((h,_)=>h?gc(h,()=>`output "${this.outputNames[d[_]]}"`):null),m=await fc(this.sessionId,i,l,d,p,n),u={};for(let h=0;h<m.length;h++)u[this.outputNames[d[h]]]=a[h]??yg(m[h]);return Be(),u}startProfiling(){}endProfiling(){hc(this.sessionId)}}});var _c={};Ht(_c,{OnnxruntimeWebAssemblyBackend:()=>pn,initializeFlags:()=>yc,wasmBackend:()=>_g});var yc,pn,_g,wc=U(()=>{"use strict";Le();zo();bc();yc=()=>{if((typeof _e.wasm.initTimeout!="number"||_e.wasm.initTimeout<0)&&(_e.wasm.initTimeout=0),_e.wasm.simd===!1&&console.warn('Deprecated property "env.wasm.simd" is set to false. non-SIMD build is no longer provided, and this setting will be ignored.'),typeof _e.wasm.proxy!="boolean"&&(_e.wasm.proxy=!1),typeof _e.wasm.trace!="boolean"&&(_e.wasm.trace=!1),typeof _e.wasm.numThreads!="number"||!Number.isInteger(_e.wasm.numThreads)||_e.wasm.numThreads<=0)if(typeof self<"u"&&!self.crossOriginIsolated)_e.wasm.numThreads=1;else{let e=typeof navigator>"u"?Un("node:os").cpus().length:navigator.hardwareConcurrency;_e.wasm.numThreads=Math.min(4,Math.ceil((e||1)/2))}},pn=class{async init(t){yc(),await dc(),await lc(t)}async createInferenceSessionHandler(t,r){let n=new cn;return await n.loadModel(t,r),Promise.resolve(n)}},_g=new pn});Le();Le();Le();var Ia="1.21.0-dev.20250206-d981b153d3";var $1=Hn;{let e=(wc(),gr(_c)).wasmBackend;Tt("webgpu",e,5),Tt("webnn",e,5),Tt("cpu",e,10),Tt("wasm",e,10)}Object.defineProperty(_e.versions,"web",{value:Ia,enumerable:!0});
6637
6710
  /**
6638
6711
  * @license
6639
6712
  * Copyright 2021 Google LLC. All Rights Reserved.
@@ -8769,7 +8842,7 @@ __webpack_require__.r(__webpack_exports__);
8769
8842
 
8770
8843
 
8771
8844
 
8772
- const VERSION = '3.3.2';
8845
+ const VERSION = '3.3.3';
8773
8846
 
8774
8847
  // Check if various APIs are available (depends on environment)
8775
8848
  const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";