@jax-js/jax 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -314,6 +314,16 @@ function runWithCache(cache, key, thunk) {
314
314
  return value;
315
315
  }
316
316
  }
317
+ /** Async version of `runWithCache`. */
318
+ async function runWithCacheAsync(cache, key, thunk) {
319
+ const keyStr = JSON.stringify(key);
320
+ if (cache.has(keyStr)) return cache.get(keyStr);
321
+ else {
322
+ const value = await thunk();
323
+ cache.set(keyStr, value);
324
+ return value;
325
+ }
326
+ }
317
327
 
318
328
  //#endregion
319
329
  //#region src/alu.ts
@@ -416,8 +426,23 @@ var AluExp = class AluExp {
416
426
  this.src = src;
417
427
  this.arg = arg;
418
428
  if (AluGroup.RequiredFloat.has(op) && !isFloatDtype(dtype)) throw new TypeError(`Unsupported dtype for ${op}: ${dtype}`);
419
- if (op === AluOp.Bitcast && (dtype === DType.Bool || src[0].dtype === DType.Bool || byteWidth(dtype) !== byteWidth(src[0].dtype))) throw new TypeError(`Bitcast from ${src[0].dtype} -> ${dtype}`);
420
- if (op === AluOp.Threefry2x32 && (dtype !== DType.Uint32 || src.some((x) => x.dtype !== DType.Uint32))) throw new TypeError("Threefry2x32 requires uint32 types");
429
+ switch (op) {
430
+ case AluOp.Bitcast:
431
+ if (dtype === DType.Bool || src[0].dtype === DType.Bool || byteWidth(dtype) !== byteWidth(src[0].dtype)) throw new TypeError(`Bitcast from ${src[0].dtype} -> ${dtype}`);
432
+ break;
433
+ case AluOp.Threefry2x32:
434
+ if (dtype !== DType.Uint32 || src.some((x) => x.dtype !== DType.Uint32)) throw new TypeError("Threefry2x32 requires uint32 types");
435
+ break;
436
+ case AluOp.BitCombine:
437
+ if (src[0].dtype !== src[1].dtype || isFloatDtype(src[0].dtype)) throw new TypeError(`BitCombine[${arg}] requires matching integral dtype, got ${src[0].dtype} and ${src[1].dtype}`);
438
+ break;
439
+ case AluOp.BitShift:
440
+ if (src[0].dtype === DType.Bool || src[1].dtype === DType.Bool || isFloatDtype(src[0].dtype) || isFloatDtype(src[1].dtype)) throw new TypeError(`BitShift[${arg}] requires two integral, non-bool dtypes, got ${src[0].dtype} and ${src[1].dtype}`);
441
+ break;
442
+ case AluOp.BitInvert:
443
+ if (isFloatDtype(src[0].dtype)) throw new TypeError(`BitInvert requires an integral dtype, got ${src[0].dtype}`);
444
+ break;
445
+ }
421
446
  }
422
447
  static add(a, b) {
423
448
  return new AluExp(AluOp.Add, a.dtype, [a, b]);
@@ -494,6 +519,12 @@ var AluExp = class AluExp {
494
519
  c1
495
520
  ], mode);
496
521
  }
522
+ static bitCombine(a, b, mode) {
523
+ return new AluExp(AluOp.BitCombine, a.dtype, [a, b], mode);
524
+ }
525
+ static bitShift(a, b, mode) {
526
+ return new AluExp(AluOp.BitShift, a.dtype, [a, b], mode);
527
+ }
497
528
  static cmplt(a, b) {
498
529
  return new AluExp(AluOp.Cmplt, DType.Bool, [a, b]);
499
530
  }
@@ -966,6 +997,16 @@ var AluExp = class AluExp {
966
997
  case AluOp.Mod: return x % y;
967
998
  case AluOp.Min: return Math.min(x, y);
968
999
  case AluOp.Max: return Math.max(x, y);
1000
+ case AluOp.BitCombine: {
1001
+ let r;
1002
+ if (this.arg === "and") r = x & y;
1003
+ else if (this.arg === "or") r = x | y;
1004
+ else r = x ^ y;
1005
+ return this.dtype === DType.Int32 ? r | 0 : r >>> 0;
1006
+ }
1007
+ case AluOp.BitShift:
1008
+ if (this.arg === "shl") return this.dtype === DType.Int32 ? x << y | 0 : x << y >>> 0;
1009
+ return x >>> y;
969
1010
  case AluOp.Cmplt: return Number(x < y);
970
1011
  case AluOp.Cmpne: return Number(x != y);
971
1012
  default: throw new Error(`Missing implemementation for ${this.op}`);
@@ -1087,6 +1128,18 @@ var AluExp = class AluExp {
1087
1128
  }
1088
1129
  if (BIN_SYM[node.op]) return `(${parts[0]} ${BIN_SYM[node.op]} ${parts[1]})`;
1089
1130
  if (CMP_SYM[node.op]) return `(${parts[0]} ${CMP_SYM[node.op]} ${parts[1]})`;
1131
+ if (node.op === AluOp.BitCombine) {
1132
+ const sym = {
1133
+ and: "&",
1134
+ or: "|",
1135
+ xor: "^"
1136
+ }[node.arg];
1137
+ return `(${parts[0]} ${sym} ${parts[1]})`;
1138
+ }
1139
+ if (node.op === AluOp.BitShift) {
1140
+ const sym = node.arg === "shl" ? "<<" : ">>";
1141
+ return `(${parts[0]} ${sym} ${parts[1]})`;
1142
+ }
1090
1143
  if (UNARY_SYM[node.op]) return `${UNARY_SYM[node.op]}${parts[0]}`;
1091
1144
  if (node.op === AluOp.Cast) return `Cast<${node.dtype}>(${strip1(parts[0])})`;
1092
1145
  if (node.op === AluOp.Bitcast) return `Bitcast<${node.dtype}>(${strip1(parts[0])})`;
@@ -1179,6 +1232,9 @@ let AluOp = /* @__PURE__ */ function(AluOp$1) {
1179
1232
  AluOp$1["Reciprocal"] = "Reciprocal";
1180
1233
  AluOp$1["Cast"] = "Cast";
1181
1234
  AluOp$1["Bitcast"] = "Bitcast";
1235
+ AluOp$1["BitCombine"] = "BitCombine";
1236
+ AluOp$1["BitInvert"] = "BitInvert";
1237
+ AluOp$1["BitShift"] = "BitShift";
1182
1238
  AluOp$1["Cmplt"] = "Cmplt";
1183
1239
  AluOp$1["Cmpne"] = "Cmpne";
1184
1240
  AluOp$1["Where"] = "Where";
@@ -1198,7 +1254,9 @@ const AluGroup = {
1198
1254
  AluOp.Idiv,
1199
1255
  AluOp.Mod,
1200
1256
  AluOp.Min,
1201
- AluOp.Max
1257
+ AluOp.Max,
1258
+ AluOp.BitCombine,
1259
+ AluOp.BitShift
1202
1260
  ]),
1203
1261
  Unary: new Set([
1204
1262
  AluOp.Sin,
@@ -1373,11 +1431,13 @@ var Reduction = class {
1373
1431
  function accessorGlobal(dtype, gid, st, indices) {
1374
1432
  const [index, valid] = st.toAluExp(indices);
1375
1433
  const [, len] = st.views[0].dataRange();
1434
+ if (valid.resolve()) return AluExp.globalIndex(dtype, gid, len, index);
1376
1435
  return AluExp.where(valid, AluExp.globalIndex(dtype, gid, len, index), AluExp.const(dtype, 0));
1377
1436
  }
1378
1437
  /** Expression for accessing `indices` in an array recipe with variable "idx". */
1379
1438
  function accessorAluExp(exp, st, indices) {
1380
1439
  const [index, valid] = st.toAluExp(indices);
1440
+ if (valid.resolve()) return exp.substitute({ idx: index });
1381
1441
  return AluExp.where(valid, exp.substitute({ idx: index }), AluExp.const(exp.dtype, 0));
1382
1442
  }
1383
1443
  function threefry2x32(k0, k1, c0, c1) {
@@ -3159,6 +3219,147 @@ function wasm_threefry2x32(cg) {
3159
3219
  });
3160
3220
  }
3161
3221
 
3222
+ //#endregion
3223
+ //#region src/backend/wasm/parallel.ts
3224
+ /** Check if SharedArrayBuffer is available. */
3225
+ function hasSharedArrayBuffer() {
3226
+ return typeof SharedArrayBuffer !== "undefined" && typeof Worker !== "undefined";
3227
+ }
3228
+ const MIN_ELEMS_PER_THREAD = 256;
3229
+ const WORKER_SOURCE = `
3230
+ let memory = null;
3231
+ let cachedModule = null;
3232
+ let cachedFunc = null;
3233
+
3234
+ self.onmessage = (e) => {
3235
+ const msg = e.data;
3236
+ if (msg.type === "init") {
3237
+ memory = msg.memory;
3238
+ postMessage({ type: "ready" });
3239
+ return;
3240
+ }
3241
+ try {
3242
+ const { module, ptrs, begin, end } = msg;
3243
+ if (module !== cachedModule) {
3244
+ cachedModule = module;
3245
+ const instance = new WebAssembly.Instance(module, { env: { memory } });
3246
+ cachedFunc = instance.exports.kernel;
3247
+ }
3248
+ cachedFunc(...ptrs, begin, end);
3249
+ postMessage({ type: "done", ok: true });
3250
+ } catch (err) {
3251
+ postMessage({ type: "done", ok: false, error: String(err) });
3252
+ }
3253
+ };
3254
+ `;
3255
+ /** Pool of Web Workers for parallel WASM kernel dispatch. */
3256
+ var WasmWorkerPool = class {
3257
+ #memory;
3258
+ #numWorkers;
3259
+ #workers = [];
3260
+ #ready = Promise.resolve();
3261
+ /** Serializes dispatches so concurrent read() calls don't clobber onmessage. */
3262
+ #queue = Promise.resolve();
3263
+ #epoch = 0n;
3264
+ #epochEnd = 0n;
3265
+ #hooks = /* @__PURE__ */ new Map();
3266
+ constructor(memory, numWorkers) {
3267
+ if (numWorkers <= 0) throw new Error("numWorkers must be positive");
3268
+ this.#memory = memory;
3269
+ this.#numWorkers = numWorkers;
3270
+ }
3271
+ get epoch() {
3272
+ return this.#epoch;
3273
+ }
3274
+ waitForEpoch(target) {
3275
+ if (target <= this.#epoch) return Promise.resolve();
3276
+ return new Promise((resolve) => {
3277
+ if (target <= this.#epoch) return resolve();
3278
+ const hooks = this.#hooks.get(target);
3279
+ if (hooks) hooks.push(resolve);
3280
+ else this.#hooks.set(target, [resolve]);
3281
+ });
3282
+ }
3283
+ #ensureInit() {
3284
+ if (this.#workers.length > 0) return;
3285
+ const blob = new Blob([WORKER_SOURCE], { type: "application/javascript" });
3286
+ const url = URL.createObjectURL(blob);
3287
+ this.#workers = [];
3288
+ const readyPromises = [];
3289
+ for (let i = 0; i < this.#numWorkers; i++) {
3290
+ const worker = new Worker(url, { type: "module" });
3291
+ this.#workers.push(worker);
3292
+ readyPromises.push(new Promise((resolve, reject) => {
3293
+ worker.onmessage = () => resolve();
3294
+ worker.onerror = (e) => reject(new Error(e.message || "Worker failed to load"));
3295
+ }));
3296
+ worker.postMessage({
3297
+ type: "init",
3298
+ memory: this.#memory
3299
+ });
3300
+ }
3301
+ this.#ready = Promise.all(readyPromises).then(() => {
3302
+ URL.revokeObjectURL(url);
3303
+ });
3304
+ this.#queue = this.#ready;
3305
+ }
3306
+ /**
3307
+ * Dispatch a kernel across multiple workers.
3308
+ *
3309
+ * Returns an epoch that can be used to wait for the ongoing work to complete,
3310
+ * which is guaranteed to be monotonically increasing.
3311
+ */
3312
+ dispatch(module$1, ptrs, size) {
3313
+ this.#ensureInit();
3314
+ this.#epochEnd++;
3315
+ const result = this.#queue.then(() => this.#dispatchNow(module$1, ptrs, size));
3316
+ this.#queue = result.then(() => {}, () => {}).then(() => {
3317
+ this.#epoch++;
3318
+ const hooks = this.#hooks.get(this.#epoch);
3319
+ if (hooks) {
3320
+ for (const hook of hooks) hook();
3321
+ this.#hooks.delete(this.#epoch);
3322
+ }
3323
+ });
3324
+ return this.#epochEnd;
3325
+ }
3326
+ async #dispatchNow(module$1, ptrs, size) {
3327
+ if (size === 0) return;
3328
+ const n = Math.min(this.#workers.length, Math.ceil(size / MIN_ELEMS_PER_THREAD));
3329
+ const chunkSize = Math.ceil(size / n / 16) * 16;
3330
+ const promises = [];
3331
+ for (let i = 0; i < n; i++) {
3332
+ const begin = i * chunkSize;
3333
+ const end = Math.min(begin + chunkSize, size);
3334
+ if (begin >= size) break;
3335
+ const worker = this.#workers[i];
3336
+ promises.push(new Promise((resolve, reject) => {
3337
+ worker.onmessage = (e) => {
3338
+ if (e.data.ok) resolve();
3339
+ else reject(/* @__PURE__ */ new Error(`Worker error: ${e.data.error}`));
3340
+ };
3341
+ worker.postMessage({
3342
+ module: module$1,
3343
+ ptrs,
3344
+ begin,
3345
+ end
3346
+ });
3347
+ }));
3348
+ }
3349
+ await Promise.all(promises);
3350
+ }
3351
+ };
3352
+ /** Try to create a worker pool. Returns null if workers are unavailable. */
3353
+ function createWorkerPool(memory) {
3354
+ if (!hasSharedArrayBuffer()) return null;
3355
+ try {
3356
+ const numWorkers = Math.max(1, typeof navigator !== "undefined" && navigator.hardwareConcurrency || 4);
3357
+ return new WasmWorkerPool(memory, numWorkers);
3358
+ } catch {
3359
+ return null;
3360
+ }
3361
+ }
3362
+
3162
3363
  //#endregion
3163
3364
  //#region src/backend/wasm/wasmblr.ts
3164
3365
  /**
@@ -3496,7 +3697,7 @@ var CodeGenerator = class {
3496
3697
  concat(importSectionBytes, encodeString(this.memory.aString));
3497
3698
  concat(importSectionBytes, encodeString(this.memory.bString));
3498
3699
  importSectionBytes.push(2);
3499
- if (this.memory.min && this.memory.max) {
3700
+ if (this.memory.max) {
3500
3701
  if (this.memory.isShared) importSectionBytes.push(3);
3501
3702
  else importSectionBytes.push(1);
3502
3703
  concat(importSectionBytes, encodeUnsigned(this.memory.min));
@@ -3903,6 +4104,8 @@ var I32x4 = class extends V128 {
3903
4104
  min_u = VECTOR_OP("min_u", 183, ["v128", "v128"], "v128");
3904
4105
  max_s = VECTOR_OP("max_s", 184, ["v128", "v128"], "v128");
3905
4106
  max_u = VECTOR_OP("max_u", 185, ["v128", "v128"], "v128");
4107
+ trunc_sat_f32x4_s = VECTOR_OP("trunc_sat_f32x4_s", 248, ["v128"], "v128");
4108
+ trunc_sat_f32x4_u = VECTOR_OP("trunc_sat_f32x4_u", 249, ["v128"], "v128");
3906
4109
  };
3907
4110
  var F32x4 = class extends V128 {
3908
4111
  splat = VECTOR_OP("splat", 19, ["f32"], "v128");
@@ -3929,10 +4132,333 @@ var F32x4 = class extends V128 {
3929
4132
  max = VECTOR_OP("max", 233, ["v128", "v128"], "v128");
3930
4133
  pmin = VECTOR_OP("pmin", 234, ["v128", "v128"], "v128");
3931
4134
  pmax = VECTOR_OP("pmax", 235, ["v128", "v128"], "v128");
4135
+ convert_i32x4_s = VECTOR_OP("convert_i32x4_s", 250, ["v128"], "v128");
4136
+ convert_i32x4_u = VECTOR_OP("convert_i32x4_u", 251, ["v128"], "v128");
3932
4137
  };
3933
4138
 
3934
4139
  //#endregion
3935
4140
  //#region src/backend/wasm.ts
4141
+ /**
4142
+ * SIMD version of translateExp: emits v128 (f32x4 or i32x4) instructions instead of scalar.
4143
+ * gidx always steps by 4. strideMap classifies each GlobalIndex as broadcast/contiguous/gather.
4144
+ */
4145
+ function translateExpSimd(cg, funcs, exp, ctx, strideMap) {
4146
+ const references = /* @__PURE__ */ new Map();
4147
+ const seen = /* @__PURE__ */ new Set();
4148
+ const countReferences = (exp$1) => {
4149
+ references.set(exp$1, (references.get(exp$1) ?? 0) + 1);
4150
+ if (!seen.has(exp$1)) {
4151
+ seen.add(exp$1);
4152
+ for (const src of exp$1.src) countReferences(src);
4153
+ }
4154
+ };
4155
+ const expContext = /* @__PURE__ */ new Map();
4156
+ const gen = (exp$1) => {
4157
+ if (expContext.has(exp$1)) return cg.local.get(expContext.get(exp$1));
4158
+ const { op, src, arg, dtype } = exp$1;
4159
+ const isInt = dtype === DType.Int32 || dtype === DType.Uint32 || dtype === DType.Bool;
4160
+ const isSigned = dtype === DType.Int32;
4161
+ if (op === AluOp.Add) {
4162
+ gen(src[0]);
4163
+ gen(src[1]);
4164
+ if (isInt) cg.i32x4.add();
4165
+ else cg.f32x4.add();
4166
+ } else if (op === AluOp.Sub) {
4167
+ gen(src[0]);
4168
+ gen(src[1]);
4169
+ if (isInt) cg.i32x4.sub();
4170
+ else cg.f32x4.sub();
4171
+ } else if (op === AluOp.Mul) {
4172
+ gen(src[0]);
4173
+ gen(src[1]);
4174
+ if (isInt) cg.i32x4.mul();
4175
+ else cg.f32x4.mul();
4176
+ } else if (op === AluOp.Min) {
4177
+ gen(src[0]);
4178
+ gen(src[1]);
4179
+ if (isInt) if (isSigned) cg.i32x4.min_s();
4180
+ else cg.i32x4.min_u();
4181
+ else cg.f32x4.min();
4182
+ } else if (op === AluOp.Max) {
4183
+ gen(src[0]);
4184
+ gen(src[1]);
4185
+ if (isInt) if (isSigned) cg.i32x4.max_s();
4186
+ else cg.i32x4.max_u();
4187
+ else cg.f32x4.max();
4188
+ } else if (op === AluOp.Sqrt) {
4189
+ gen(src[0]);
4190
+ cg.f32x4.sqrt();
4191
+ } else if (op === AluOp.Floor) {
4192
+ gen(src[0]);
4193
+ cg.f32x4.floor();
4194
+ } else if (op === AluOp.Ceil) {
4195
+ gen(src[0]);
4196
+ cg.f32x4.ceil();
4197
+ } else if (op === AluOp.Const) if (isInt) {
4198
+ cg.i32.const(arg);
4199
+ cg.i32x4.splat();
4200
+ } else {
4201
+ cg.f32.const(arg);
4202
+ cg.f32x4.splat();
4203
+ }
4204
+ else if (op === AluOp.Cast) {
4205
+ gen(src[0]);
4206
+ const dtype0 = src[0].dtype;
4207
+ const src0IsInt = dtype0 === DType.Int32 || dtype0 === DType.Uint32 || dtype0 === DType.Bool;
4208
+ if (isInt && !src0IsInt) if (isSigned) cg.i32x4.trunc_sat_f32x4_s();
4209
+ else cg.i32x4.trunc_sat_f32x4_u();
4210
+ else if (!isInt && src0IsInt) if (dtype0 === DType.Int32 || dtype0 === DType.Bool) cg.f32x4.convert_i32x4_s();
4211
+ else cg.f32x4.convert_i32x4_u();
4212
+ } else if (op === AluOp.Cmplt) {
4213
+ gen(src[0]);
4214
+ gen(src[1]);
4215
+ const srcDtype = src[0].dtype;
4216
+ if (srcDtype === DType.Float32) cg.f32x4.lt();
4217
+ else if (srcDtype === DType.Int32) cg.i32x4.lt_s();
4218
+ else if (srcDtype === DType.Uint32) cg.i32x4.lt_u();
4219
+ else throw new UnsupportedOpError(op, dtype, "wasm");
4220
+ cg.i32.const(1);
4221
+ cg.i32x4.splat();
4222
+ cg.v128.and();
4223
+ } else if (op === AluOp.Cmpne) {
4224
+ gen(src[0]);
4225
+ gen(src[1]);
4226
+ const srcDtype = src[0].dtype;
4227
+ if (srcDtype === DType.Float32) cg.f32x4.ne();
4228
+ else cg.i32x4.ne();
4229
+ cg.i32.const(1);
4230
+ cg.i32x4.splat();
4231
+ cg.v128.and();
4232
+ } else if (op === AluOp.Where) {
4233
+ gen(src[1]);
4234
+ gen(src[2]);
4235
+ gen(src[0]);
4236
+ cg.i32.const(0);
4237
+ cg.i32x4.splat();
4238
+ cg.i32x4.ne();
4239
+ cg.v128.bitselect();
4240
+ } else if (op === AluOp.Variable || op === AluOp.Special) throw new Error(`translateExpSimd: unexpected ${op}(${arg})`);
4241
+ else if (op === AluOp.GlobalIndex) {
4242
+ const [gid, len] = arg;
4243
+ const indexSubtree = src[0];
4244
+ const stride = strideMap.get(exp$1) ?? GATHER;
4245
+ if (stride.kind === "contiguous") {
4246
+ translateExp(cg, funcs, indexSubtree, ctx);
4247
+ {
4248
+ const maxIdx = Math.max(len - SIMD_LANES, 0);
4249
+ const wideIdx = cg.local.declare(cg.i32);
4250
+ cg.local.set(wideIdx);
4251
+ cg.local.get(wideIdx);
4252
+ cg.i32.const(maxIdx);
4253
+ cg.local.get(wideIdx);
4254
+ cg.i32.const(maxIdx);
4255
+ cg.i32.lt_u();
4256
+ cg.select();
4257
+ }
4258
+ cg.i32.const(byteWidth(dtype));
4259
+ cg.i32.mul();
4260
+ cg.local.get(gid);
4261
+ cg.i32.add();
4262
+ if (isInt) cg.i32x4.load(4);
4263
+ else cg.f32x4.load(4);
4264
+ } else if (stride.kind === "broadcast") {
4265
+ translateExp(cg, funcs, indexSubtree, ctx);
4266
+ const local = cg.local.declare(cg.i32);
4267
+ cg.local.tee(local);
4268
+ cg.i32.const(0);
4269
+ cg.local.get(local), cg.i32.const(len), cg.i32.lt_u();
4270
+ cg.select();
4271
+ cg.i32.const(byteWidth(dtype));
4272
+ cg.i32.mul();
4273
+ cg.local.get(gid);
4274
+ cg.i32.add();
4275
+ if (isInt) {
4276
+ cg.i32.load(2);
4277
+ cg.i32x4.splat();
4278
+ } else {
4279
+ cg.f32.load(2);
4280
+ cg.f32x4.splat();
4281
+ }
4282
+ } else {
4283
+ const steppingLocal = ctx["gidx"];
4284
+ const origValue = cg.local.declare(cg.i32);
4285
+ cg.local.get(steppingLocal);
4286
+ cg.local.set(origValue);
4287
+ if (isInt) {
4288
+ cg.i32.const(0);
4289
+ cg.i32x4.splat();
4290
+ } else {
4291
+ cg.f32.const(0);
4292
+ cg.f32x4.splat();
4293
+ }
4294
+ const vec = cg.local.declare(isInt ? cg.i32x4 : cg.f32x4);
4295
+ cg.local.set(vec);
4296
+ const idx = cg.local.declare(cg.i32);
4297
+ const scalarVal = cg.local.declare(isInt ? cg.i32 : cg.f32);
4298
+ for (let lane = 0; lane < SIMD_LANES; lane++) {
4299
+ cg.local.get(origValue);
4300
+ if (lane > 0) {
4301
+ cg.i32.const(lane);
4302
+ cg.i32.add();
4303
+ }
4304
+ cg.local.set(steppingLocal);
4305
+ translateExp(cg, funcs, indexSubtree, ctx);
4306
+ cg.local.tee(idx);
4307
+ cg.i32.const(0);
4308
+ cg.local.get(idx), cg.i32.const(len), cg.i32.lt_u();
4309
+ cg.select();
4310
+ cg.i32.const(byteWidth(dtype));
4311
+ cg.i32.mul();
4312
+ cg.local.get(gid);
4313
+ cg.i32.add();
4314
+ if (isInt) cg.i32.load(2);
4315
+ else cg.f32.load(2);
4316
+ cg.local.set(scalarVal);
4317
+ cg.local.get(vec);
4318
+ cg.local.get(scalarVal);
4319
+ if (isInt) cg.i32x4.replace_lane(lane);
4320
+ else cg.f32x4.replace_lane(lane);
4321
+ cg.local.set(vec);
4322
+ }
4323
+ cg.local.get(origValue);
4324
+ cg.local.set(steppingLocal);
4325
+ cg.local.get(vec);
4326
+ }
4327
+ } else throw new Error(`translateExpSimd: unsupported op ${op}`);
4328
+ if ((references.get(exp$1) ?? 0) > 1) {
4329
+ const local = cg.local.declare(isInt ? cg.i32x4 : cg.f32x4);
4330
+ cg.local.tee(local);
4331
+ expContext.set(exp$1, local);
4332
+ }
4333
+ };
4334
+ countReferences(exp);
4335
+ gen(exp);
4336
+ }
4337
+ /** Number of SIMD lanes (f32x4 / i32x4 = 4 lanes). */
4338
+ const SIMD_LANES = 4;
4339
+ function referencesGidx(exp) {
4340
+ if (exp.op === AluOp.Special && exp.arg[0] === "gidx") return true;
4341
+ return exp.src.some(referencesGidx);
4342
+ }
4343
+ /** When tileSize > N but doesn't divide evenly, the last group before the
4344
+ * inner reset is shorter than N — a SIMD group could straddle it. */
4345
+ function hasFragmentRisk(tileSize, N) {
4346
+ return isFinite(tileSize) && tileSize > N && tileSize % N !== 0;
4347
+ }
4348
+ const GATHER = { kind: "gather" };
4349
+ /**
4350
+ * Classify how a GlobalIndex's index expression behaves as gidx increments.
4351
+ */
4352
+ function analyzeStride(exp) {
4353
+ if (!referencesGidx(exp)) return {
4354
+ kind: "broadcast",
4355
+ tileSize: Infinity
4356
+ };
4357
+ if (exp.op === AluOp.Special && exp.arg[0] === "gidx") return {
4358
+ kind: "contiguous",
4359
+ tileSize: Infinity
4360
+ };
4361
+ if (exp.op === AluOp.Idiv && exp.src[1].op === AluOp.Const) {
4362
+ const N = exp.src[1].arg;
4363
+ const inner = analyzeStride(exp.src[0]);
4364
+ if (inner.kind === "broadcast") return inner;
4365
+ if (inner.kind !== "contiguous") return GATHER;
4366
+ if (hasFragmentRisk(inner.tileSize, N)) return GATHER;
4367
+ return {
4368
+ kind: "broadcast",
4369
+ tileSize: Math.min(inner.tileSize, N)
4370
+ };
4371
+ }
4372
+ if (exp.op === AluOp.Mod && exp.src[1].op === AluOp.Const) {
4373
+ const N = exp.src[1].arg;
4374
+ const inner = analyzeStride(exp.src[0]);
4375
+ if (inner.kind === "broadcast") return inner;
4376
+ if (inner.kind !== "contiguous") return GATHER;
4377
+ if (hasFragmentRisk(inner.tileSize, N)) return GATHER;
4378
+ return {
4379
+ kind: "contiguous",
4380
+ tileSize: Math.min(inner.tileSize, N)
4381
+ };
4382
+ }
4383
+ if (exp.op === AluOp.Mul) {
4384
+ for (let i = 0; i < 2; i++) if (exp.src[i].op === AluOp.Const) {
4385
+ const inner = analyzeStride(exp.src[1 - i]);
4386
+ if (inner.kind === "broadcast") return inner;
4387
+ return GATHER;
4388
+ }
4389
+ }
4390
+ if (exp.op === AluOp.Add) {
4391
+ const lhsHasGidx = referencesGidx(exp.src[0]);
4392
+ const rhsHasGidx = referencesGidx(exp.src[1]);
4393
+ if (lhsHasGidx && !rhsHasGidx) return analyzeStride(exp.src[0]);
4394
+ if (!lhsHasGidx && rhsHasGidx) return analyzeStride(exp.src[1]);
4395
+ }
4396
+ return GATHER;
4397
+ }
4398
+ /** Ops that have direct SIMD (f32x4) instruction variants. */
4399
+ const simdF32Ops = new Set([
4400
+ AluOp.Add,
4401
+ AluOp.Sub,
4402
+ AluOp.Mul,
4403
+ AluOp.Floor,
4404
+ AluOp.Ceil,
4405
+ AluOp.Min,
4406
+ AluOp.Max,
4407
+ AluOp.Sqrt,
4408
+ AluOp.Cast,
4409
+ AluOp.Where,
4410
+ AluOp.Const,
4411
+ AluOp.GlobalIndex
4412
+ ]);
4413
+ /** Ops that have direct SIMD (i32x4) instruction variants. */
4414
+ const simdI32Ops = new Set([
4415
+ AluOp.Add,
4416
+ AluOp.Sub,
4417
+ AluOp.Mul,
4418
+ AluOp.Min,
4419
+ AluOp.Max,
4420
+ AluOp.Cast,
4421
+ AluOp.Where,
4422
+ AluOp.Const,
4423
+ AluOp.GlobalIndex
4424
+ ]);
4425
+ /** Ops that produce Bool (i32x4 bitmask) in SIMD. */
4426
+ const simdBoolOps = new Set([
4427
+ AluOp.Cmplt,
4428
+ AluOp.Cmpne,
4429
+ AluOp.Const,
4430
+ AluOp.GlobalIndex
4431
+ ]);
4432
+ /**
4433
+ * Check if a kernel is eligible for SIMD codegen.
4434
+ *
4435
+ * A kernel qualifies when:
4436
+ * - size >= 4 (need at least 4 elements for a SIMD group)
4437
+ * - For reductions: the reduction op has a SIMD variant for its dtype
4438
+ * - All nodes have a supported dtype (f32, i32, u32, bool) with SIMD variants
4439
+ */
4440
+ function isSimdEligible(tunedExp, kernel) {
4441
+ if (kernel.size < SIMD_LANES) return false;
4442
+ if (kernel.reduction) {
4443
+ if (!simdSupportedOpsForDtype(kernel.reduction.dtype)?.has(kernel.reduction.op)) return false;
4444
+ }
4445
+ const check = (exp, visited) => {
4446
+ if (visited.has(exp)) return true;
4447
+ visited.add(exp);
4448
+ const supportedOps = simdSupportedOpsForDtype(exp.dtype);
4449
+ if (!supportedOps || !supportedOps.has(exp.op)) return false;
4450
+ if (exp.op === AluOp.GlobalIndex) return true;
4451
+ for (const child of exp.src) if (!check(child, visited)) return false;
4452
+ return true;
4453
+ };
4454
+ return check(tunedExp, /* @__PURE__ */ new Set());
4455
+ }
4456
+ function simdSupportedOpsForDtype(dtype) {
4457
+ if (dtype === DType.Float32) return simdF32Ops;
4458
+ if (dtype === DType.Int32 || dtype === DType.Uint32) return simdI32Ops;
4459
+ if (dtype === DType.Bool) return simdBoolOps;
4460
+ return null;
4461
+ }
3936
4462
  const moduleCache = /* @__PURE__ */ new Map();
3937
4463
  /** Backend that compiles into WebAssembly bytecode for immediate execution. */
3938
4464
  var WasmBackend = class {
@@ -3942,11 +4468,18 @@ var WasmBackend = class {
3942
4468
  #nextSlot;
3943
4469
  #allocator;
3944
4470
  #buffers;
4471
+ #workerPool;
4472
+ #pendingWork = /* @__PURE__ */ new Map();
3945
4473
  constructor() {
3946
- this.#memory = new WebAssembly.Memory({ initial: 0 });
4474
+ this.#memory = hasSharedArrayBuffer() ? new WebAssembly.Memory({
4475
+ initial: 0,
4476
+ maximum: 65536,
4477
+ shared: true
4478
+ }) : new WebAssembly.Memory({ initial: 0 });
3947
4479
  this.#allocator = new WasmAllocator(this.#memory);
3948
4480
  this.#nextSlot = 1;
3949
4481
  this.#buffers = /* @__PURE__ */ new Map();
4482
+ this.#workerPool = createWorkerPool(this.#memory);
3950
4483
  }
3951
4484
  malloc(size, initialData) {
3952
4485
  const ptr = this.#allocator.malloc(size);
@@ -3977,40 +4510,65 @@ var WasmBackend = class {
3977
4510
  }
3978
4511
  }
3979
4512
  async read(slot, start, count) {
3980
- return this.readSync(slot, start, count);
4513
+ const epoch = this.#pendingWork.get(slot);
4514
+ if (epoch) await this.#workerPool.waitForEpoch(epoch);
4515
+ return this.#readData(slot, start, count);
3981
4516
  }
3982
4517
  readSync(slot, start, count) {
4518
+ const epoch = this.#pendingWork.get(slot);
4519
+ if (epoch && this.#workerPool.epoch < epoch) throw new Error("cannot read synchronously from a slot with async work");
4520
+ return this.#readData(slot, start, count);
4521
+ }
4522
+ #readData(slot, start, count) {
3983
4523
  const buffer = this.#getBuffer(slot);
3984
4524
  if (start === void 0) start = 0;
3985
4525
  if (count === void 0) count = buffer.byteLength - start;
3986
- return buffer.slice(start, start + count);
4526
+ if (buffer.buffer instanceof SharedArrayBuffer) return new Uint8Array(buffer.slice(start, start + count));
4527
+ else return buffer.slice(start, start + count);
3987
4528
  }
3988
4529
  async prepareKernel(kernel) {
3989
- return this.prepareKernelSync(kernel);
4530
+ const kernelHash = FpHash.hash(kernel);
4531
+ const module$1 = await runWithCacheAsync(moduleCache, kernelHash.toString(), () => WebAssembly.compile(codegenWasm(kernel)));
4532
+ return new Executable(kernel, {
4533
+ module: module$1,
4534
+ parallel: this.#workerPool !== null
4535
+ });
3990
4536
  }
3991
4537
  prepareKernelSync(kernel) {
3992
4538
  const kernelHash = FpHash.hash(kernel);
3993
- const module$1 = runWithCache(moduleCache, kernelHash.toString(), () => {
3994
- const bytes = codegenWasm(kernel);
3995
- return new WebAssembly.Module(bytes);
4539
+ const module$1 = runWithCache(moduleCache, kernelHash.toString(), () => new WebAssembly.Module(codegenWasm(kernel)));
4540
+ return new Executable(kernel, {
4541
+ module: module$1,
4542
+ parallel: false
3996
4543
  });
3997
- return new Executable(kernel, { module: module$1 });
3998
4544
  }
3999
4545
  async prepareRoutine(routine) {
4000
4546
  return this.prepareRoutineSync(routine);
4001
4547
  }
4002
4548
  prepareRoutineSync(routine) {
4003
- return new Executable(routine, void 0);
4549
+ return new Executable(routine, {
4550
+ module: void 0,
4551
+ parallel: false
4552
+ });
4004
4553
  }
4005
4554
  dispatch(exe, inputs, outputs) {
4006
4555
  const tracing = isTracing();
4007
4556
  const start = tracing ? performance.now() : 0;
4008
4557
  if (exe.source instanceof Routine) runCpuRoutine(exe.source, inputs.map((slot) => this.#getBuffer(slot)), outputs.map((slot) => this.#getBuffer(slot)));
4009
4558
  else {
4010
- const instance = new WebAssembly.Instance(exe.data.module, { env: { memory: this.#memory } });
4011
- const func = instance.exports.kernel;
4012
4559
  const ptrs = [...inputs, ...outputs].map((slot) => this.#buffers.get(slot).ptr);
4013
- func(...ptrs);
4560
+ if (exe.data.parallel && this.#workerPool) {
4561
+ const epoch = this.#workerPool.dispatch(exe.data.module, ptrs, exe.source.size);
4562
+ for (const slot of outputs) this.#pendingWork.set(slot, epoch);
4563
+ } else {
4564
+ if (inputs.some((slot) => {
4565
+ const epoch = this.#pendingWork.get(slot);
4566
+ return epoch && this.#workerPool.epoch < epoch;
4567
+ })) throw new Error("cannot dispatch synchronously with pending async work");
4568
+ const instance = new WebAssembly.Instance(exe.data.module, { env: { memory: this.#memory } });
4569
+ const func = instance.exports.kernel;
4570
+ func(...ptrs, 0, exe.source.size);
4571
+ }
4014
4572
  }
4015
4573
  if (tracing) {
4016
4574
  const info = traceSourceInfo(exe.source);
@@ -4023,12 +4581,36 @@ var WasmBackend = class {
4023
4581
  return new Uint8Array(this.#memory.buffer, buffer.ptr, buffer.size);
4024
4582
  }
4025
4583
  };
4584
+ /** Emit a runtime guard: enter the if-block only when [begin, end) is SIMD-aligned. */
4585
+ function emitAlignmentGuard(cg, paramBegin, paramEnd) {
4586
+ const mask = SIMD_LANES - 1;
4587
+ cg.local.get(paramEnd);
4588
+ cg.local.get(paramBegin);
4589
+ cg.i32.sub();
4590
+ cg.i32.const(mask);
4591
+ cg.i32.and();
4592
+ cg.i32.eqz();
4593
+ cg.local.get(paramBegin);
4594
+ cg.i32.const(mask);
4595
+ cg.i32.and();
4596
+ cg.i32.eqz();
4597
+ cg.i32.and();
4598
+ cg.if(cg.void);
4599
+ }
4026
4600
  function codegenWasm(kernel) {
4027
4601
  const tune = tuneNullopt(kernel);
4028
4602
  const re = kernel.reduction;
4029
4603
  if (DEBUG >= 3) console.info(`kernel.exp: ${kernel.exp}\ntune.exp: ${tune.exp}`);
4604
+ const useSimd = isSimdEligible(tune.exp, kernel);
4605
+ const bufferStrides = /* @__PURE__ */ new Map();
4606
+ if (useSimd) tune.exp.collect((e) => e.op === AluOp.GlobalIndex).forEach((gi) => {
4607
+ const result = analyzeStride(gi.src[0]);
4608
+ if (result.kind !== "gather" && (result.tileSize < SIMD_LANES || isFinite(result.tileSize) && result.tileSize % SIMD_LANES !== 0)) bufferStrides.set(gi, GATHER);
4609
+ else bufferStrides.set(gi, result);
4610
+ });
4030
4611
  const cg = new CodeGenerator();
4031
4612
  cg.memory.import("env", "memory");
4613
+ if (hasSharedArrayBuffer()) cg.memory.pages(0, 65536).shared(true);
4032
4614
  const distinctOps = mapSetUnion(tune.exp.distinctOps(), tune.epilogue?.distinctOps());
4033
4615
  const funcs = {};
4034
4616
  if (distinctOps.has(AluOp.Sin)) funcs.sin = wasm_sin(cg);
@@ -4040,12 +4622,127 @@ function codegenWasm(kernel) {
4040
4622
  if (distinctOps.has(AluOp.Erf)) funcs.erf = wasm_erf(cg, funcs.exp);
4041
4623
  if (distinctOps.has(AluOp.Erfc)) funcs.erfc = wasm_erfc(cg, funcs.exp);
4042
4624
  if (distinctOps.has(AluOp.Threefry2x32)) funcs.threefry2x32 = wasm_threefry2x32(cg);
4043
- const kernelFunc = cg.function(rep(kernel.nargs + 1, cg.i32), [], () => {
4625
+ const paramBegin = kernel.nargs + 1;
4626
+ const paramEnd = kernel.nargs + 2;
4627
+ const kernelFunc = cg.function(rep(kernel.nargs + 3, cg.i32), [], () => {
4044
4628
  const gidx = cg.local.declare(cg.i32);
4629
+ cg.local.get(paramBegin);
4630
+ cg.local.set(gidx);
4631
+ if (useSimd) {
4632
+ emitAlignmentGuard(cg, paramBegin, paramEnd);
4633
+ cg.loop(cg.void);
4634
+ if (!re) {
4635
+ cg.block(cg.void);
4636
+ cg.local.get(gidx);
4637
+ cg.local.get(paramEnd);
4638
+ cg.i32.ge_u();
4639
+ cg.br_if(0);
4640
+ cg.local.get(kernel.nargs);
4641
+ cg.local.get(gidx);
4642
+ cg.i32.const(byteWidth(kernel.dtype));
4643
+ cg.i32.mul();
4644
+ cg.i32.add();
4645
+ translateExpSimd(cg, funcs, tune.exp, { gidx }, bufferStrides);
4646
+ cg.v128.store(4);
4647
+ cg.local.get(gidx);
4648
+ cg.i32.const(SIMD_LANES);
4649
+ cg.i32.add();
4650
+ cg.local.set(gidx);
4651
+ cg.br(1);
4652
+ cg.end();
4653
+ } else {
4654
+ const reIsInt = kernel.exp.dtype === DType.Int32 || kernel.exp.dtype === DType.Uint32;
4655
+ cg.block(cg.void);
4656
+ cg.local.get(gidx);
4657
+ cg.local.get(paramEnd);
4658
+ cg.i32.ge_u();
4659
+ cg.br_if(0);
4660
+ const vecAcc = cg.local.declare(reIsInt ? cg.i32x4 : cg.f32x4);
4661
+ if (reIsInt) {
4662
+ cg.i32.const(re.identity);
4663
+ cg.i32x4.splat();
4664
+ } else {
4665
+ cg.f32.const(re.identity);
4666
+ cg.f32x4.splat();
4667
+ }
4668
+ cg.local.set(vecAcc);
4669
+ const ridx = cg.local.declare(cg.i32);
4670
+ cg.i32.const(0);
4671
+ cg.local.set(ridx);
4672
+ cg.loop(cg.void);
4673
+ cg.block(cg.void);
4674
+ cg.local.get(ridx);
4675
+ cg.i32.const(re.size);
4676
+ cg.i32.ge_u();
4677
+ cg.br_if(0);
4678
+ translateExpSimd(cg, funcs, tune.exp, {
4679
+ gidx,
4680
+ ridx
4681
+ }, bufferStrides);
4682
+ cg.local.get(vecAcc);
4683
+ if (reIsInt) if (re.op === AluOp.Add) cg.i32x4.add();
4684
+ else if (re.op === AluOp.Mul) cg.i32x4.mul();
4685
+ else if (re.op === AluOp.Min) if (re.dtype === DType.Int32) cg.i32x4.min_s();
4686
+ else cg.i32x4.min_u();
4687
+ else if (re.op === AluOp.Max) if (re.dtype === DType.Int32) cg.i32x4.max_s();
4688
+ else cg.i32x4.max_u();
4689
+ else throw new Error(`invalid SIMD reduction op: ${re.op}`);
4690
+ else if (re.op === AluOp.Add) cg.f32x4.add();
4691
+ else if (re.op === AluOp.Mul) cg.f32x4.mul();
4692
+ else if (re.op === AluOp.Min) cg.f32x4.min();
4693
+ else if (re.op === AluOp.Max) cg.f32x4.max();
4694
+ else throw new Error(`invalid SIMD reduction op: ${re.op}`);
4695
+ cg.local.set(vecAcc);
4696
+ cg.local.get(ridx);
4697
+ cg.i32.const(1);
4698
+ cg.i32.add();
4699
+ cg.local.set(ridx);
4700
+ cg.br(1);
4701
+ cg.end();
4702
+ cg.end();
4703
+ for (let lane = 0; lane < SIMD_LANES; lane++) {
4704
+ cg.local.get(kernel.nargs);
4705
+ cg.local.get(gidx);
4706
+ if (lane > 0) {
4707
+ cg.i32.const(lane);
4708
+ cg.i32.add();
4709
+ }
4710
+ cg.i32.const(byteWidth(kernel.dtype));
4711
+ cg.i32.mul();
4712
+ cg.i32.add();
4713
+ const acc = cg.local.declare(reIsInt ? cg.i32 : cg.f32);
4714
+ cg.local.get(vecAcc);
4715
+ if (reIsInt) cg.i32x4.extract_lane(lane);
4716
+ else cg.f32x4.extract_lane(lane);
4717
+ cg.local.set(acc);
4718
+ const laneGidx = cg.local.declare(cg.i32);
4719
+ cg.local.get(gidx);
4720
+ if (lane > 0) {
4721
+ cg.i32.const(lane);
4722
+ cg.i32.add();
4723
+ }
4724
+ cg.local.set(laneGidx);
4725
+ translateExp(cg, funcs, tune.epilogue, {
4726
+ acc,
4727
+ gidx: laneGidx
4728
+ });
4729
+ dty(cg, null, kernel.dtype).store(Math.log2(byteWidth(kernel.dtype)));
4730
+ }
4731
+ cg.local.get(gidx);
4732
+ cg.i32.const(SIMD_LANES);
4733
+ cg.i32.add();
4734
+ cg.local.set(gidx);
4735
+ cg.br(1);
4736
+ cg.end();
4737
+ }
4738
+ cg.end();
4739
+ cg.return();
4740
+ cg.end();
4741
+ }
4045
4742
  cg.loop(cg.void);
4046
4743
  cg.block(cg.void);
4047
4744
  cg.local.get(gidx);
4048
- cg.i32.const(kernel.size);
4745
+ cg.local.get(paramEnd);
4049
4746
  cg.i32.ge_u();
4050
4747
  cg.br_if(0);
4051
4748
  cg.local.get(kernel.nargs);
@@ -4184,6 +4881,11 @@ function translateExp(cg, funcs, exp, ctx) {
4184
4881
  else cg.i32.gt_u();
4185
4882
  cg.select();
4186
4883
  } else throw new UnsupportedOpError(op, dtype, "wasm");
4884
+ else if (op === AluOp.BitCombine) if (arg === "and") cg.i32.and();
4885
+ else if (arg === "or") cg.i32.or();
4886
+ else cg.i32.xor();
4887
+ else if (op === AluOp.BitShift) if (arg === "shl") cg.i32.shl();
4888
+ else cg.i32.shr_u();
4187
4889
  else if (op === AluOp.Cmplt) {
4188
4890
  const srcDtype = src[0].dtype;
4189
4891
  if (isFloatDtype(srcDtype)) dtyF(cg, op, srcDtype).lt();
@@ -4360,7 +5062,7 @@ async function createBackend(device) {
4360
5062
  if (!navigator.gpu) return null;
4361
5063
  const adapter = await navigator.gpu.requestAdapter({ powerPreference: "high-performance" });
4362
5064
  if (!adapter) return null;
4363
- const { WebGPUBackend } = await Promise.resolve().then(() => require("./webgpu-DMSx7a6M.cjs"));
5065
+ const { WebGPUBackend } = await Promise.resolve().then(() => require("./webgpu-C4S8Uq9e.cjs"));
4364
5066
  const importantLimits = [
4365
5067
  "maxBufferSize",
4366
5068
  "maxComputeInvocationsPerWorkgroup",
@@ -4398,7 +5100,7 @@ async function createBackend(device) {
4398
5100
  });
4399
5101
  if (!gl) return null;
4400
5102
  if (!gl.getExtension("EXT_color_buffer_float")) return null;
4401
- const { WebGLBackend } = await Promise.resolve().then(() => require("./webgl-kvVt7-T7.cjs"));
5103
+ const { WebGLBackend } = await Promise.resolve().then(() => require("./webgl-CD3WK_Me.cjs"));
4402
5104
  return new WebGLBackend(gl);
4403
5105
  } else throw new Error(`Backend not found: ${device}`);
4404
5106
  }
@@ -4432,6 +5134,15 @@ var UnsupportedRoutineError = class extends Error {
4432
5134
  super(`routine '${name}' is not supported in ${device} backend`);
4433
5135
  }
4434
5136
  };
5137
+ /**
5138
+ * If the WebGPU backend has been initialized, return the `GPUDevice` that this
5139
+ * backend runs on. This is useful for sharing buffers.
5140
+ */
5141
+ function getWebGPUDevice() {
5142
+ const backend = initializedBackends.get("webgpu");
5143
+ if (!backend) throw new Error("WebGPU backend not initialized, call init('webgpu') first");
5144
+ return backend.device;
5145
+ }
4435
5146
 
4436
5147
  //#endregion
4437
5148
  Object.defineProperty(exports, 'AluExp', {
@@ -4626,6 +5337,12 @@ Object.defineProperty(exports, 'getBackend', {
4626
5337
  return getBackend;
4627
5338
  }
4628
5339
  });
5340
+ Object.defineProperty(exports, 'getWebGPUDevice', {
5341
+ enumerable: true,
5342
+ get: function () {
5343
+ return getWebGPUDevice;
5344
+ }
5345
+ });
4629
5346
  Object.defineProperty(exports, 'init', {
4630
5347
  enumerable: true,
4631
5348
  get: function () {