npm - @jax-js/jax - Versions diffs - 0.1.13 → 0.1.14 - Mend

@jax-js/jax 0.1.13 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +10 -7
package/dist/{backend-DMyuoWi2.cjs → backend-VlXzdQvR.cjs} +2111 -1557
package/dist/{backend-DLEk-B3V.js → backend-apsUOPzb.js} +2111 -1557
package/dist/index.cjs +10 -1
package/dist/index.js +10 -1
package/dist/{webgl-pbfUGDA6.cjs → webgl-C6rCbloA.cjs} +1 -1
package/dist/{webgl-NsFtyIts.js → webgl-Hh0FX6oV.js} +1 -1
package/dist/{webgpu-NkF1TZ0t.js → webgpu-BRv5r9Sl.js} +45 -9
package/dist/{webgpu-DDGCYtHa.cjs → webgpu-pWnE96Xc.cjs} +45 -9
package/package.json +1 -1

package/dist/index.cjs CHANGED Viewed

@@ -30,7 +30,7 @@ var __toESM = (mod$1, isNodeMode, target) => (target = mod$1 != null ? __create(
 }) : target, mod$1));
 //#endregion
-const require_backend = require('./backend-DMyuoWi2.cjs');
+const require_backend = require('./backend-VlXzdQvR.cjs');
 //#region src/frontend/convolution.ts
 /**
@@ -3224,6 +3224,15 @@ var Array$1 = class Array$1 extends Tracer {
 			},
 			[Primitive.Conv]([x, y], params) {
 				checkConvShape(x.shape, y.shape, params);
+				const shouldMaterializePadding = x.#backend.type === "wasm" && params.lhsDilation.every((d) => d === 1) && params.padding.some(([left, right]) => left > 0 || right > 0);
+				if (shouldMaterializePadding) {
+					x = x.#reshape(x.#st.padOrShrink([...require_backend.rep(params.vmapDims + 2, [0, 0]), ...params.padding]));
+					x.#realize();
+					params = {
+						...params,
+						padding: require_backend.rep(params.padding.length, [0, 0])
+					};
+				}
 				const [stX, stY] = prepareConv(x.#st, y.#st, params);
 				return [Array$1.#naryCustom("conv", ([x$1, y$1]) => require_backend.AluExp.mul(x$1, y$1), [x.#reshape(stX), y.#reshape(stY)], { reduceAxis: true })];
 			},

package/dist/index.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { __export } from "./chunk-Cl8Af3a2.js";
-import { AluExp, AluGroup, AluOp, AluVar, DEBUG, DType, FpHash, Kernel, PPrint, Reduction, Routine, Routines, ShapeTracker, accessorAluExp, accessorGlobal, assertNonNull, byteWidth, checkAxis, checkInts, deepEqual, defaultDevice, devices, dtypedArray, dtypedJsArray, generalBroadcast, getBackend, getWebGPUDevice, init, invertPermutation, isFloatDtype, isNumberPair, isPermutation, normalizeAxis, partitionList, prod, promoteTypes, range, recursiveFlatten, rep, runWithCache, setDebug, startTrace, stopTrace, toposort, unravelAlu, unzip2, zip, zipn } from "./backend-DLEk-B3V.js";
+import { AluExp, AluGroup, AluOp, AluVar, DEBUG, DType, FpHash, Kernel, PPrint, Reduction, Routine, Routines, ShapeTracker, accessorAluExp, accessorGlobal, assertNonNull, byteWidth, checkAxis, checkInts, deepEqual, defaultDevice, devices, dtypedArray, dtypedJsArray, generalBroadcast, getBackend, getWebGPUDevice, init, invertPermutation, isFloatDtype, isNumberPair, isPermutation, normalizeAxis, partitionList, prod, promoteTypes, range, recursiveFlatten, rep, runWithCache, setDebug, startTrace, stopTrace, toposort, unravelAlu, unzip2, zip, zipn } from "./backend-apsUOPzb.js";
 //#region src/frontend/convolution.ts
 /**
@@ -3189,6 +3189,15 @@ var Array$1 = class Array$1 extends Tracer {
 			},
 			[Primitive.Conv]([x, y], params) {
 				checkConvShape(x.shape, y.shape, params);
+				const shouldMaterializePadding = x.#backend.type === "wasm" && params.lhsDilation.every((d) => d === 1) && params.padding.some(([left, right]) => left > 0 || right > 0);
+				if (shouldMaterializePadding) {
+					x = x.#reshape(x.#st.padOrShrink([...rep(params.vmapDims + 2, [0, 0]), ...params.padding]));
+					x.#realize();
+					params = {
+						...params,
+						padding: rep(params.padding.length, [0, 0])
+					};
+				}
 				const [stX, stY] = prepareConv(x.#st, y.#st, params);
 				return [Array$1.#naryCustom("conv", ([x$1, y$1]) => AluExp.mul(x$1, y$1), [x.#reshape(stX), y.#reshape(stY)], { reduceAxis: true })];
 			},

package/dist/{webgl-pbfUGDA6.cjs → webgl-C6rCbloA.cjs} RENAMED Viewed

@@ -1,4 +1,4 @@
-const require_backend = require('./backend-DMyuoWi2.cjs');
+const require_backend = require('./backend-VlXzdQvR.cjs');
 //#region src/backend/webgl/builtins.ts
 const threefrySrc = `

package/dist/{webgl-NsFtyIts.js → webgl-Hh0FX6oV.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { AluGroup, AluOp, DEBUG, DType, Executable, SlotError, UnsupportedOpError, UnsupportedRoutineError, isFloatDtype, range, strip1, tuneNullopt } from "./backend-DLEk-B3V.js";
+import { AluGroup, AluOp, DEBUG, DType, Executable, SlotError, UnsupportedOpError, UnsupportedRoutineError, isFloatDtype, range, strip1, tuneNullopt } from "./backend-apsUOPzb.js";
 //#region src/backend/webgl/builtins.ts
 const threefrySrc = `

package/dist/{webgpu-NkF1TZ0t.js → webgpu-BRv5r9Sl.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { AluExp, AluGroup, AluOp, DEBUG, DType, Executable, FpHash, Routines, SlotError, UnsupportedOpError, UnsupportedRoutineError, emitTrace, findPow2, isFloatDtype, isTracing, mapSetUnion, onFlushTrace, prod, range, strip1, traceSourceInfo, tuneWebgpu } from "./backend-DLEk-B3V.js";
+import { AluExp, AluGroup, AluOp, DEBUG, DType, Executable, FpHash, Routines, SlotError, UnsupportedOpError, UnsupportedRoutineError, emitTrace, findPow2, isFloatDtype, isTracing, mapSetUnion, onFlushTrace, prod, range, strip1, traceSourceInfo, tuneWebgpu } from "./backend-apsUOPzb.js";
 //#region src/backend/webgpu/builtins.ts
 const threefrySrc = `
@@ -147,6 +147,13 @@ function constToWgsl(dtype, value) {
 	}
 	throw new Error(`Unsupported const dtype: ${dtype}`);
 }
+function reduceOpWgsl(op, dtype, a, b) {
+	if (op === AluOp.Add) return `(${a} + ${b})`;
+	if (op === AluOp.Mul) return `(${a} * ${b})`;
+	if (op === AluOp.Min) return dtype === DType.Bool ? `(${a} && ${b})` : `min(${a}, ${b})`;
+	if (op === AluOp.Max) return dtype === DType.Bool ? `(${a} || ${b})` : `max(${a}, ${b})`;
+	throw new Error(`Unsupported reduction op: ${op}`);
+}
 /** Codegen for WebGPU expressions, linearizing AluOp into a kernel. */
 var WgslExpCodegen = class {
 	#gensymCount = 0;
@@ -1316,14 +1323,30 @@ function pipelineSource(device, kernel) {
 	}
 	const resultTy = dtypeToWgsl(kernel.dtype, true);
 	wb.emit(`@group(0) @binding(${nargs}) var<storage, read_write> result : array<${resultTy}>;`);
-	const workgroupSize = findPow2(tune.threadCount, 256);
-	const gridSize = Math.ceil(tune.threadCount / workgroupSize);
+	const groupCount = re ? tune.size.groups ?? 1 : 1;
+	const groupedReduction = re && groupCount > 1;
+	if (groupedReduction && tune.threadCount % groupCount !== 0) throw new Error("WebGPU grouped reduction has invalid thread count");
+	if (groupedReduction && groupCount > device.limits.maxComputeWorkgroupSizeX) throw new Error("WebGPU grouped reduction exceeds workgroup size limit");
+	const workgroupSize = groupedReduction ? groupCount : findPow2(tune.threadCount, 256);
+	const gridSize = groupedReduction ? tune.threadCount / groupCount : Math.ceil(tune.threadCount / workgroupSize);
 	const [gridX, gridY] = calculateGrid(gridSize);
-	wb.emit("", `@compute @workgroup_size(${workgroupSize})`, "fn main(@builtin(global_invocation_id) id : vec3<u32>) {", wb.pushIndent);
-	if (gridY === 1) wb.emit(`if (id.x >= ${tune.threadCount}) { return; }`, "let gidx: i32 = i32(id.x);");
-	else {
-		const sizeX = gridX * workgroupSize;
-		wb.emit(`if (${sizeX} * id.y + id.x >= ${tune.threadCount}) { return; }`, `let gidx: i32 = i32(${sizeX} * id.y + id.x);`);
+	if (groupedReduction) {
+		const partialTy = dtypeToWgsl(re.dtype);
+		for (let i = 0; i < (tune.size.upcast ?? 1); i++) wb.emit(`var<workgroup> partial${i}: array<${partialTy}, ${groupCount}>;`);
+	}
+	wb.emit("", `@compute @workgroup_size(${workgroupSize})`);
+	if (groupedReduction) {
+		wb.emit("fn main(", wb.pushIndent, "@builtin(local_invocation_id) lid : vec3<u32>,", "@builtin(workgroup_id) wg_id : vec3<u32>,", wb.popIndent, ") {", wb.pushIndent);
+		if (gridY === 1) wb.emit(`if (wg_id.x >= ${gridSize}u) { return; }`, "let gidx: i32 = i32(wg_id.x);");
+		else wb.emit(`if (${gridX}u * wg_id.y + wg_id.x >= ${gridSize}u) { return; }`, `let gidx: i32 = i32(${gridX}u * wg_id.y + wg_id.x);`);
+		wb.emit("let group: i32 = i32(lid.x);");
+	} else {
+		wb.emit("fn main(@builtin(global_invocation_id) id : vec3<u32>) {", wb.pushIndent);
+		if (gridY === 1) wb.emit(`if (id.x >= ${tune.threadCount}) { return; }`, "let gidx: i32 = i32(id.x);");
+		else {
+			const sizeX = gridX * workgroupSize;
+			wb.emit(`if (${sizeX} * id.y + id.x >= ${tune.threadCount}) { return; }`, `let gidx: i32 = i32(${sizeX} * id.y + id.x);`);
+		}
 	}
 	wb.emitPhonyAssignments(args);
 	const gen = new WgslExpCodegen(wb, args);
@@ -1333,7 +1356,6 @@ function pipelineSource(device, kernel) {
 		if (resultTy !== dtypeToWgsl(tune.exp.dtype)) rhs = `${resultTy}(${rhs})`;
 		wb.emit(`result[gidx] = ${rhs};`);
 	} else {
-		if ((tune.size.groups ?? 1) > 1) throw new Error("WebGPU backend does not support group optimization yet");
 		const unroll = tune.size.unroll ?? 1;
 		const upcast = tune.size.upcast ?? 1;
 		const acc = [...Array(upcast)].map((_, i) => `acc${i}`);
@@ -1369,6 +1391,15 @@ function pipelineSource(device, kernel) {
 			else throw new Error(`Unsupported reduction op: ${re.op}`);
 		}
 		wb.emit(wb.popIndent, "}");
+		if (groupedReduction) {
+			for (let i = 0; i < upcast; i++) wb.emit(`partial${i}[lid.x] = ${acc[i]};`);
+			wb.emit("workgroupBarrier();");
+			for (let stride = groupCount / 2; stride >= 1; stride /= 2) {
+				wb.emit(`if (lid.x < ${stride}u) {`, wb.pushIndent);
+				for (let i = 0; i < upcast; i++) wb.emit(`partial${i}[lid.x] = ${reduceOpWgsl(re.op, re.dtype, `partial${i}[lid.x]`, `partial${i}[lid.x + ${stride}u]`)};`);
+				wb.emit(wb.popIndent, "}", "workgroupBarrier();");
+			}
+		}
 		gen.reset();
 		const outputIdxExps = [];
 		const fusionExps = [];
@@ -1382,12 +1413,17 @@ function pipelineSource(device, kernel) {
 			}).simplify(cache));
 			gen.countReferences(fusionExps[i]);
 		}
+		if (groupedReduction) {
+			wb.emit("if (lid.x == 0u) {", wb.pushIndent);
+			for (let i = 0; i < upcast; i++) wb.emit(`${acc[i]} = partial${i}[0u];`);
+		}
 		for (let i = 0; i < upcast; i++) {
 			const index = strip1(gen.run(outputIdxExps[i]));
 			let rhs = strip1(gen.run(fusionExps[i]));
 			if (resultTy !== dtypeToWgsl(fusionExps[i].dtype)) rhs = `${resultTy}(${rhs})`;
 			wb.emit(`result[${index}] = ${rhs};`);
 		}
+		if (groupedReduction) wb.emit(wb.popIndent, "}");
 	}
 	wb.emit(wb.popIndent, "}");
 	return {

package/dist/{webgpu-DDGCYtHa.cjs → webgpu-pWnE96Xc.cjs} RENAMED Viewed

@@ -1,4 +1,4 @@
-const require_backend = require('./backend-DMyuoWi2.cjs');
+const require_backend = require('./backend-VlXzdQvR.cjs');
 //#region src/backend/webgpu/builtins.ts
 const threefrySrc = `
@@ -147,6 +147,13 @@ function constToWgsl(dtype, value) {
 	}
 	throw new Error(`Unsupported const dtype: ${dtype}`);
 }
+function reduceOpWgsl(op, dtype, a, b) {
+	if (op === require_backend.AluOp.Add) return `(${a} + ${b})`;
+	if (op === require_backend.AluOp.Mul) return `(${a} * ${b})`;
+	if (op === require_backend.AluOp.Min) return dtype === require_backend.DType.Bool ? `(${a} && ${b})` : `min(${a}, ${b})`;
+	if (op === require_backend.AluOp.Max) return dtype === require_backend.DType.Bool ? `(${a} || ${b})` : `max(${a}, ${b})`;
+	throw new Error(`Unsupported reduction op: ${op}`);
+}
 /** Codegen for WebGPU expressions, linearizing AluOp into a kernel. */
 var WgslExpCodegen = class {
 	#gensymCount = 0;
@@ -1316,14 +1323,30 @@ function pipelineSource(device, kernel) {
 	}
 	const resultTy = dtypeToWgsl(kernel.dtype, true);
 	wb.emit(`@group(0) @binding(${nargs}) var<storage, read_write> result : array<${resultTy}>;`);
-	const workgroupSize = require_backend.findPow2(tune.threadCount, 256);
-	const gridSize = Math.ceil(tune.threadCount / workgroupSize);
+	const groupCount = re ? tune.size.groups ?? 1 : 1;
+	const groupedReduction = re && groupCount > 1;
+	if (groupedReduction && tune.threadCount % groupCount !== 0) throw new Error("WebGPU grouped reduction has invalid thread count");
+	if (groupedReduction && groupCount > device.limits.maxComputeWorkgroupSizeX) throw new Error("WebGPU grouped reduction exceeds workgroup size limit");
+	const workgroupSize = groupedReduction ? groupCount : require_backend.findPow2(tune.threadCount, 256);
+	const gridSize = groupedReduction ? tune.threadCount / groupCount : Math.ceil(tune.threadCount / workgroupSize);
 	const [gridX, gridY] = calculateGrid(gridSize);
-	wb.emit("", `@compute @workgroup_size(${workgroupSize})`, "fn main(@builtin(global_invocation_id) id : vec3<u32>) {", wb.pushIndent);
-	if (gridY === 1) wb.emit(`if (id.x >= ${tune.threadCount}) { return; }`, "let gidx: i32 = i32(id.x);");
-	else {
-		const sizeX = gridX * workgroupSize;
-		wb.emit(`if (${sizeX} * id.y + id.x >= ${tune.threadCount}) { return; }`, `let gidx: i32 = i32(${sizeX} * id.y + id.x);`);
+	if (groupedReduction) {
+		const partialTy = dtypeToWgsl(re.dtype);
+		for (let i = 0; i < (tune.size.upcast ?? 1); i++) wb.emit(`var<workgroup> partial${i}: array<${partialTy}, ${groupCount}>;`);
+	}
+	wb.emit("", `@compute @workgroup_size(${workgroupSize})`);
+	if (groupedReduction) {
+		wb.emit("fn main(", wb.pushIndent, "@builtin(local_invocation_id) lid : vec3<u32>,", "@builtin(workgroup_id) wg_id : vec3<u32>,", wb.popIndent, ") {", wb.pushIndent);
+		if (gridY === 1) wb.emit(`if (wg_id.x >= ${gridSize}u) { return; }`, "let gidx: i32 = i32(wg_id.x);");
+		else wb.emit(`if (${gridX}u * wg_id.y + wg_id.x >= ${gridSize}u) { return; }`, `let gidx: i32 = i32(${gridX}u * wg_id.y + wg_id.x);`);
+		wb.emit("let group: i32 = i32(lid.x);");
+	} else {
+		wb.emit("fn main(@builtin(global_invocation_id) id : vec3<u32>) {", wb.pushIndent);
+		if (gridY === 1) wb.emit(`if (id.x >= ${tune.threadCount}) { return; }`, "let gidx: i32 = i32(id.x);");
+		else {
+			const sizeX = gridX * workgroupSize;
+			wb.emit(`if (${sizeX} * id.y + id.x >= ${tune.threadCount}) { return; }`, `let gidx: i32 = i32(${sizeX} * id.y + id.x);`);
+		}
 	}
 	wb.emitPhonyAssignments(args);
 	const gen = new WgslExpCodegen(wb, args);
@@ -1333,7 +1356,6 @@ function pipelineSource(device, kernel) {
 		if (resultTy !== dtypeToWgsl(tune.exp.dtype)) rhs = `${resultTy}(${rhs})`;
 		wb.emit(`result[gidx] = ${rhs};`);
 	} else {
-		if ((tune.size.groups ?? 1) > 1) throw new Error("WebGPU backend does not support group optimization yet");
 		const unroll = tune.size.unroll ?? 1;
 		const upcast = tune.size.upcast ?? 1;
 		const acc = [...Array(upcast)].map((_, i) => `acc${i}`);
@@ -1369,6 +1391,15 @@ function pipelineSource(device, kernel) {
 			else throw new Error(`Unsupported reduction op: ${re.op}`);
 		}
 		wb.emit(wb.popIndent, "}");
+		if (groupedReduction) {
+			for (let i = 0; i < upcast; i++) wb.emit(`partial${i}[lid.x] = ${acc[i]};`);
+			wb.emit("workgroupBarrier();");
+			for (let stride = groupCount / 2; stride >= 1; stride /= 2) {
+				wb.emit(`if (lid.x < ${stride}u) {`, wb.pushIndent);
+				for (let i = 0; i < upcast; i++) wb.emit(`partial${i}[lid.x] = ${reduceOpWgsl(re.op, re.dtype, `partial${i}[lid.x]`, `partial${i}[lid.x + ${stride}u]`)};`);
+				wb.emit(wb.popIndent, "}", "workgroupBarrier();");
+			}
+		}
 		gen.reset();
 		const outputIdxExps = [];
 		const fusionExps = [];
@@ -1382,12 +1413,17 @@ function pipelineSource(device, kernel) {
 			}).simplify(cache));
 			gen.countReferences(fusionExps[i]);
 		}
+		if (groupedReduction) {
+			wb.emit("if (lid.x == 0u) {", wb.pushIndent);
+			for (let i = 0; i < upcast; i++) wb.emit(`${acc[i]} = partial${i}[0u];`);
+		}
 		for (let i = 0; i < upcast; i++) {
 			const index = require_backend.strip1(gen.run(outputIdxExps[i]));
 			let rhs = require_backend.strip1(gen.run(fusionExps[i]));
 			if (resultTy !== dtypeToWgsl(fusionExps[i].dtype)) rhs = `${resultTy}(${rhs})`;
 			wb.emit(`result[${index}] = ${rhs};`);
 		}
+		if (groupedReduction) wb.emit(wb.popIndent, "}");
 	}
 	wb.emit(wb.popIndent, "}");
 	return {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@jax-js/jax",
-  "version": "0.1.13",
+  "version": "0.1.14",
   "description": "Numerical computing and ML in the browser",
   "keywords": [
     "machine learning",