npm - @jax-js/jax - Versions diffs - 0.1.3 → 0.1.5 - Mend

@jax-js/jax 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +15 -9
package/dist/{backend-BY8wlLEl.js → backend-DaqL-MNz.js} +240 -21
package/dist/{backend-CmaidnkQ.cjs → backend-DziQSaoQ.cjs} +264 -21
package/dist/index.cjs +2407 -1132
package/dist/index.d.cts +596 -97
package/dist/index.d.ts +596 -97
package/dist/index.js +2400 -1126
package/dist/webgl-ClIYb8jP.cjs +522 -0
package/dist/webgl-RSuZKvgc.js +522 -0
package/dist/webgpu-Db2JrNBr.cjs +1261 -0
package/dist/webgpu-Dh7k9io0.js +1261 -0
package/package.json +1 -1
package/dist/webgpu-BVns4DbI.cjs +0 -663
package/dist/webgpu-C9iAP5h5.js +0 -663

package/dist/index.cjs CHANGED Viewed

@@ -8,9 +8,9 @@ var __hasOwnProp = Object.prototype.hasOwnProperty;
 var __commonJS = (cb, mod$1) => function() {
 	return mod$1 || (0, cb[__getOwnPropNames(cb)[0]])((mod$1 = { exports: {} }).exports, mod$1), mod$1.exports;
 };
-var __export = (target, all) => {
-	for (var name in all) __defProp(target, name, {
-		get: all[name],
+var __export = (target, all$1) => {
+	for (var name in all$1) __defProp(target, name, {
+		get: all$1[name],
 		enumerable: true
 	});
 };
@@ -30,7 +30,7 @@ var __toESM = (mod$1, isNodeMode, target) => (target = mod$1 != null ? __create(
 }) : target, mod$1));
 //#endregion
-const require_backend = require('./backend-CmaidnkQ.cjs');
+const require_backend = require('./backend-DziQSaoQ.cjs');
 //#region src/frontend/convolution.ts
 /**
@@ -362,6 +362,8 @@ let Primitive = /* @__PURE__ */ function(Primitive$1) {
 	Primitive$1["Mul"] = "mul";
 	Primitive$1["Idiv"] = "idiv";
 	Primitive$1["Mod"] = "mod";
+	Primitive$1["Min"] = "min";
+	Primitive$1["Max"] = "max";
 	Primitive$1["Neg"] = "neg";
 	Primitive$1["Reciprocal"] = "reciprocal";
 	Primitive$1["Floor"] = "floor";
@@ -369,7 +371,6 @@ let Primitive = /* @__PURE__ */ function(Primitive$1) {
 	Primitive$1["StopGradient"] = "stop_gradient";
 	Primitive$1["Cast"] = "cast";
 	Primitive$1["Bitcast"] = "bitcast";
-	Primitive$1["RandomBits"] = "random_bits";
 	Primitive$1["Sin"] = "sin";
 	Primitive$1["Cos"] = "cos";
 	Primitive$1["Asin"] = "asin";
@@ -379,8 +380,6 @@ let Primitive = /* @__PURE__ */ function(Primitive$1) {
 	Primitive$1["Erf"] = "erf";
 	Primitive$1["Erfc"] = "erfc";
 	Primitive$1["Sqrt"] = "sqrt";
-	Primitive$1["Min"] = "min";
-	Primitive$1["Max"] = "max";
 	Primitive$1["Reduce"] = "reduce";
 	Primitive$1["Dot"] = "dot";
 	Primitive$1["Conv"] = "conv";
@@ -388,14 +387,22 @@ let Primitive = /* @__PURE__ */ function(Primitive$1) {
 	Primitive$1["PoolTranspose"] = "pool_transpose";
 	Primitive$1["Compare"] = "compare";
 	Primitive$1["Where"] = "where";
+	Primitive$1["Concatenate"] = "concatenate";
+	Primitive$1["Split"] = "split";
+	Primitive$1["RandomBits"] = "random_bits";
+	Primitive$1["Gather"] = "gather";
 	Primitive$1["Transpose"] = "transpose";
 	Primitive$1["Broadcast"] = "broadcast";
 	Primitive$1["Reshape"] = "reshape";
 	Primitive$1["Flip"] = "flip";
 	Primitive$1["Shrink"] = "shrink";
 	Primitive$1["Pad"] = "pad";
-	Primitive$1["Gather"] = "gather";
-	Primitive$1["JitCall"] = "jit_call";
+	Primitive$1["Sort"] = "sort";
+	Primitive$1["Argsort"] = "argsort";
+	Primitive$1["TriangularSolve"] = "triangular_solve";
+	Primitive$1["Cholesky"] = "cholesky";
+	Primitive$1["LU"] = "lu";
+	Primitive$1["Jit"] = "jit";
 	return Primitive$1;
 }({});
 let CompareOp = /* @__PURE__ */ function(CompareOp$1) {
@@ -417,6 +424,12 @@ function idiv(x, y) {
 function mod(x, y) {
 	return bind1(Primitive.Mod, [x, y]);
 }
+function min$1(x, y) {
+	return bind1(Primitive.Min, [x, y]);
+}
+function max$1(x, y) {
+	return bind1(Primitive.Max, [x, y]);
+}
 function neg(x) {
 	return bind1(Primitive.Neg, [x]);
 }
@@ -438,12 +451,6 @@ function cast(x, dtype) {
 function bitcast(x, dtype) {
 	return bind1(Primitive.Bitcast, [x], { dtype });
 }
-function randomBits(k0, k1, shape$1, mode = "xor") {
-	return bind1(Primitive.RandomBits, [k0, k1], {
-		shape: shape$1,
-		mode
-	});
-}
 function sin$1(x) {
 	return bind1(Primitive.Sin, [x]);
 }
@@ -471,12 +478,6 @@ function erfc$1(x) {
 function sqrt$1(x) {
 	return bind1(Primitive.Sqrt, [x]);
 }
-function min$1(x, y) {
-	return bind1(Primitive.Min, [x, y]);
-}
-function max$1(x, y) {
-	return bind1(Primitive.Max, [x, y]);
-}
 function reduce(x, op, axis = null, opts) {
 	if (!require_backend.AluGroup.Reduce.has(op)) throw new TypeError(`Invalid reduce operation: ${op}`);
 	axis = require_backend.normalizeAxis(axis, ndim$1(x));
@@ -532,6 +533,41 @@ function where$1(cond, x, y) {
 		y
 	]);
 }
+function concatenate$1(xs, axis) {
+	if (xs.length === 0) throw new Error("concatenate requires at least one input");
+	const avals = xs.map((x) => ShapedArray.fromAval(getAval(x)));
+	axis = require_backend.checkAxis(axis, avals[0].ndim);
+	for (const x of avals) if (x.ndim !== avals[0].ndim || !x.shape.every((s, i) => i === axis || s === avals[0].shape[i])) throw new Error(`Concatenate: inputs ${avals[0]} and ${x} must match shapes except on axis ${axis}`);
+	return bind1(Primitive.Concatenate, xs, { axis });
+}
+function split$2(x, axis, sizes) {
+	axis = require_backend.checkAxis(axis, ndim$1(x));
+	if (sizes.some((s) => s < 0 || !Number.isInteger(s))) throw new Error(`split: sizes must be nonnegative integers, got ${JSON.stringify(sizes)}`);
+	const totalSize = sizes.reduce((a, b) => a + b, 0);
+	if (totalSize !== getShape(x)[axis]) throw new Error(`split: sizes must sum to the size of the axis ${axis}, got ${totalSize}`);
+	return bind(Primitive.Split, [x], {
+		axis,
+		sizes
+	});
+}
+function randomBits(k0, k1, shape$1, mode = "xor") {
+	if (!require_backend.deepEqual(k0.shape, k1.shape) || k0.dtype !== require_backend.DType.Uint32 || k1.dtype !== require_backend.DType.Uint32) throw new Error(`randomBits: key parts must be uint32 with the same shape, got ${ShapedArray.fromAval(k0.aval)} and ${ShapedArray.fromAval(k1.aval)}`);
+	return bind1(Primitive.RandomBits, [k0, k1], {
+		shape: shape$1,
+		mode
+	});
+}
+function gather(x, indices, axis, outDim) {
+	if (indices.length === 0) throw new Error("gather() requires at least one index");
+	if (!Array.isArray(axis) || axis.length !== indices.length) throw new Error(`Invalid gather() axis: expected ${indices.length} axes, got ${JSON.stringify(axis)}`);
+	axis = axis.map((a) => require_backend.checkAxis(a, ndim$1(x)));
+	if (new Set(axis).size !== axis.length) throw new Error(`Invalid gather() axis: duplicate axes ${JSON.stringify(axis)}`);
+	outDim = require_backend.checkAxis(outDim, ndim$1(x) - axis.length + 1);
+	return bind1(Primitive.Gather, [x, ...indices], {
+		axis,
+		outDim
+	});
+}
 function transpose$1(x, perm) {
 	perm = perm ? perm.map((a) => require_backend.checkAxis(a, ndim$1(x))) : require_backend.range(ndim$1(x)).reverse();
 	if (!require_backend.isPermutation(perm, ndim$1(x))) throw new Error(`Invalid transpose permutation for ${ndim$1(x)} axes: ${JSON.stringify(perm)}`);
@@ -581,16 +617,39 @@ function pad$1(x, width) {
 	} else if (width.length !== nd) throw new Error(`Invalid pad(): expected ${nd} axes, got ${width.length}`);
 	return bind1(Primitive.Pad, [x], { width });
 }
-function gather(x, indices, axis, outDim) {
-	if (indices.length === 0) throw new Error("gather() requires at least one index");
-	if (!Array.isArray(axis) || axis.length !== indices.length) throw new Error(`Invalid gather() axis: expected ${indices.length} axes, got ${JSON.stringify(axis)}`);
-	axis = axis.map((a) => require_backend.checkAxis(a, ndim$1(x)));
-	if (new Set(axis).size !== axis.length) throw new Error(`Invalid gather() axis: duplicate axes ${JSON.stringify(axis)}`);
-	outDim = require_backend.checkAxis(outDim, ndim$1(x) - axis.length + 1);
-	return bind1(Primitive.Gather, [x, ...indices], {
-		axis,
-		outDim
-	});
+function triangularSolve$1(a, b, { lower = false, unitDiagonal = false } = {}) {
+	const as = getShape(a);
+	const bs = getShape(b);
+	if (as.length < 2 || bs.length < 2) throw new Error(`triangular_solve: must be >=2D, got a=${as}, b=${bs}`);
+	const n = as[as.length - 2];
+	if (n !== as[as.length - 1] || n !== bs[bs.length - 1]) throw new Error(`triangular_solve: incompatible shapes a=${as}, b=${bs}`);
+	if (lower) {
+		a = flip$1(a, [-2, -1]);
+		b = flip$1(b, [-1]);
+	}
+	let x = bind1(Primitive.TriangularSolve, [a, b], { unitDiagonal });
+	if (lower) x = flip$1(x, [-1]);
+	return x;
+}
+function cholesky$2(x) {
+	const aval = ShapedArray.fromAval(getAval(x));
+	if (aval.ndim < 2 || aval.shape[aval.ndim - 1] !== aval.shape[aval.ndim - 2]) throw new Error(`cholesky: expected batch of square matrices, got ${aval}`);
+	return bind1(Primitive.Cholesky, [x]);
+}
+function lu$1(x) {
+	const aval = ShapedArray.fromAval(getAval(x));
+	if (aval.ndim < 2) throw new Error(`lu: expected batch of matrices, got ${aval}`);
+	return bind(Primitive.LU, [x]);
+}
+function sort$1(x) {
+	const nd = ndim$1(x);
+	if (nd === 0) throw new Error("sort: requires at least 1D input");
+	return bind1(Primitive.Sort, [x]);
+}
+function argsort$1(x) {
+	const nd = ndim$1(x);
+	if (nd === 0) throw new Error("argsort: requires at least 1D input");
+	return bind(Primitive.Argsort, [x]);
 }
 function bind1(prim, args, params = {}) {
 	const [results] = bind(prim, args, params);
@@ -690,6 +749,9 @@ var Tracer = class Tracer {
 	mul(other) {
 		return mul(this, other);
 	}
+	mod(other) {
+		return mod(this, other);
+	}
 	greater(other) {
 		return greater$1(this, other);
 	}
@@ -753,7 +815,7 @@ var Tracer = class Tracer {
 		if (require_backend.isFloatDtype(this.dtype)) return this.mul(reciprocal$1(other));
 		return idiv(this, other);
 	}
-	/** Return specified diagonals. See `numpy.diagonal` for full docs. */
+	/** Return specified diagonals. See `jax.numpy.diagonal` for full docs. */
 	diagonal(offset = 0, axis1 = 0, axis2 = 1) {
 		if (!Number.isInteger(offset)) throw new TypeError(`offset must be an integer, got ${offset}`);
 		if (offset < 0) return this.diagonal(-offset, axis2, axis1);
@@ -802,8 +864,42 @@ var Tracer = class Tracer {
 	*/
 	*[Symbol.iterator]() {
 		if (this.ndim === 0) throw new Error("Cannot iterate over a scalar array");
-		for (let i = 0; i < this.shape[0]; i++) yield this.ref.slice(i);
-		this.dispose();
+		let residual = this;
+		const subarrayShape = this.shape.slice(1);
+		for (let i = 0; i < this.shape[0]; i++) {
+			const lr = split$2(residual, 0, [1, residual.shape[0] - 1]);
+			yield lr[0].reshape(subarrayShape);
+			residual = lr[1];
+		}
+		residual.dispose();
+	}
+	/**
+	* Return a sorted copy of an array in ascending order.
+	*
+	* See `jax.numpy.sort` for full docs.
+	*/
+	sort(axis = -1) {
+		axis = require_backend.checkAxis(axis, this.ndim);
+		if (this.shape[axis] <= 1) return this;
+		if (axis === this.ndim - 1) return sort$1(this);
+		const perm = require_backend.range(this.ndim);
+		perm.splice(axis, 1);
+		perm.push(axis);
+		return sort$1(this.transpose(perm)).transpose(require_backend.invertPermutation(perm));
+	}
+	/**
+	* Return the indices that would sort an array. This may not be a stable
+	* sorting algorithm; it need not preserve order of indices in ties.
+	*
+	* See `jax.numpy.argsort` for full docs.
+	*/
+	argsort(axis = -1) {
+		axis = require_backend.checkAxis(axis, this.ndim);
+		if (axis === this.ndim - 1) return argsort$1(this)[1];
+		const perm = require_backend.range(this.ndim);
+		perm.splice(axis, 1);
+		perm.push(axis);
+		return argsort$1(this.transpose(perm))[1].transpose(require_backend.invertPermutation(perm));
 	}
 	/**
 	* Slice an array along one or more axes.
@@ -922,6 +1018,12 @@ var ShapedArray = class ShapedArray {
 	get ndim() {
 		return this.shape.length;
 	}
+	get size() {
+		return require_backend.prod(this.shape);
+	}
+	scalar() {
+		return new ShapedArray([], this.dtype, this.weakType);
+	}
 	toString() {
 		return `${this.dtype}[${this.shape.join(",")}]`;
 	}
@@ -1221,13 +1323,13 @@ var Jaxpr = class Jaxpr {
 		}
 		return new Jaxpr(this.inBinders, liveEqns.reverse(), outs);
 	}
-	/** Flattens nested JitCall in a Jaxpr. Useful for handling jit-of-jit. */
+	/** Flattens nested Jit in a Jaxpr. Useful for handling jit-of-jit. */
 	flatten() {
-		if (!this.eqns.some((eqn) => eqn.primitive === Primitive.JitCall)) return this;
+		if (!this.eqns.some((eqn) => eqn.primitive === Primitive.Jit)) return this;
 		const newEqns = [];
 		const varMap = /* @__PURE__ */ new Map();
 		const varMapF = (x) => x instanceof Var ? varMap.get(x) ?? x : x;
-		for (const eqn of this.eqns) if (eqn.primitive === Primitive.JitCall) {
+		for (const eqn of this.eqns) if (eqn.primitive === Primitive.Jit) {
 			const jaxpr = eqn.params.jaxpr.flatten();
 			const translation = /* @__PURE__ */ new Map();
 			const translationF = (x) => x instanceof Var ? translation.get(x) : x;
@@ -1328,19 +1430,48 @@ function evalJaxpr(jaxpr, args) {
 function jaxprAsFun(jaxpr) {
 	return (...args) => evalJaxpr(jaxpr, args);
 }
+/** Jaxpr with a collection of associated, traced constants. */
+var ClosedJaxpr = class ClosedJaxpr {
+	constructor(jaxpr, consts) {
+		this.jaxpr = jaxpr;
+		this.consts = consts;
+	}
+	/** String representation of this Jaxpr. */
+	toString() {
+		return this.jaxpr.toString();
+	}
+	/** Apply a function to the underlying Jaxpr. */
+	mapJaxpr(f) {
+		return new ClosedJaxpr(f(this.jaxpr), this.consts);
+	}
+	/** Dispose of the constants in this Jaxpr. */
+	dispose() {
+		for (const c of this.consts) c.dispose();
+	}
+};
 /** Tracer that records its operations to dynamically construct a Jaxpr. */
 var JaxprTracer = class extends Tracer {
+	#rc;
 	constructor(trace$1, aval) {
 		super(trace$1);
 		this.aval = aval;
+		this.#rc = 1;
 	}
 	toString() {
 		return `JaxprTracer(${this.aval.toString()})`;
 	}
 	get ref() {
+		if (this.#rc <= 0) throw new UseAfterFreeError(this);
+		this.#rc++;
 		return this;
 	}
-	dispose() {}
+	dispose() {
+		if (this.#rc <= 0) throw new UseAfterFreeError(this);
+		this.#rc--;
+	}
+	trackLiftedConstant() {
+		this.#rc++;
+	}
 };
 /** Analogous to the 'DynamicJaxprTrace' class in JAX. */
 var JaxprTrace = class extends Trace {
@@ -1353,17 +1484,24 @@ var JaxprTrace = class extends Trace {
 	}
 	/** Register a constant / literal in this Jaxpr. */
 	getOrMakeConstTracer(val) {
+		if (!(val instanceof Tracer)) val = pureArray(val);
 		let tracer = this.builder.constTracers.get(val);
 		if (tracer === void 0) {
 			tracer = this.builder.newTracer(this, ShapedArray.fromAval(getAval(val)));
-			this.builder.addConst(tracer, val instanceof Tracer ? val.ref : array(val));
+			this.builder.addConst(tracer, val);
+		} else {
+			val.dispose();
+			tracer.trackLiftedConstant();
 		}
 		return tracer;
 	}
 	pure = this.getOrMakeConstTracer;
 	lift = this.getOrMakeConstTracer;
 	processPrimitive(primitive, tracers, params) {
-		const avalsIn = tracers.map((t) => t.aval);
+		const avalsIn = tracers.map((t) => {
+			t.dispose();
+			return t.aval;
+		});
 		const avalsOut = abstractEvalRules[primitive](avalsIn, params);
 		const outTracers = avalsOut.map((aval) => this.builder.newTracer(this, aval));
 		this.builder.addEqn(new JaxprEqn(primitive, tracers.map((t) => this.builder.getVar(t)), params, outTracers.map((t) => this.builder.addVar(t))));
@@ -1406,20 +1544,17 @@ var JaxprBuilder = class {
 		return v;
 	}
 	build(inTracers, outTracers) {
-		let [constVars, consts] = require_backend.unzip2(this.constVals.entries());
+		const [constVars, consts] = require_backend.unzip2(this.constVals.entries());
 		const t2v = this.getVar.bind(this);
 		const inBinders = [...constVars, ...inTracers.map(t2v)];
 		const outVars = outTracers.map(t2v);
-		let jaxpr = new Jaxpr(inBinders, this.eqns, outVars);
+		const jaxpr = new Jaxpr(inBinders, this.eqns, outVars);
 		typecheckJaxpr(jaxpr);
-		[jaxpr, consts] = _inlineLiterals(jaxpr, consts);
-		return {
-			jaxpr,
-			consts
-		};
+		const cjaxpr = new ClosedJaxpr(jaxpr, consts);
+		return _inlineLiterals(cjaxpr);
 	}
 };
-function _inlineLiterals(jaxpr, consts) {
+function _inlineLiterals({ jaxpr, consts }) {
 	const literals = /* @__PURE__ */ new Map();
 	const constBinders = [];
 	const newConsts = [];
@@ -1434,7 +1569,7 @@ function _inlineLiterals(jaxpr, consts) {
 	const newOuts = jaxpr.outs.map((x) => literals.get(x) ?? x);
 	const newJaxpr = new Jaxpr([...constBinders, ...jaxpr.inBinders.slice(consts.length)], newEqns, newOuts);
 	typecheckJaxpr(newJaxpr);
-	return [newJaxpr, newConsts];
+	return new ClosedJaxpr(newJaxpr, newConsts);
 }
 function binopAbstractEval([x, y]) {
 	if (!(x instanceof ShapedArray) || !(y instanceof ShapedArray)) throw new TypeError("binopAbstractEval expects ShapedArray inputs");
@@ -1453,6 +1588,8 @@ const abstractEvalRules = {
 	[Primitive.Mul]: binopAbstractEval,
 	[Primitive.Idiv]: binopAbstractEval,
 	[Primitive.Mod]: binopAbstractEval,
+	[Primitive.Min]: binopAbstractEval,
+	[Primitive.Max]: binopAbstractEval,
 	[Primitive.Neg]: vectorizedUnopAbstractEval,
 	[Primitive.Reciprocal]: vectorizedUnopAbstractEval,
 	[Primitive.Floor]: vectorizedUnopAbstractEval,
@@ -1466,12 +1603,6 @@ const abstractEvalRules = {
 		if (require_backend.byteWidth(x.dtype) !== require_backend.byteWidth(dtype)) throw new TypeError(`Bitcast from ${x.dtype} to ${dtype} with different byte width`);
 		return [new ShapedArray(x.shape, dtype, false)];
 	},
-	[Primitive.RandomBits]([k0, k1], { shape: shape$1 }) {
-		if (k0.dtype !== require_backend.DType.Uint32 || k1.dtype !== require_backend.DType.Uint32) throw new TypeError(`RandomBits requires uint32 keys, got ${k0.dtype} and ${k1.dtype}`);
-		const keyShape = require_backend.generalBroadcast(k0.shape, k1.shape);
-		if (!require_backend.deepEqual(require_backend.generalBroadcast(keyShape, shape$1), shape$1)) throw new TypeError(`Keys of shapes ${k0.shape} and ${k1.shape} cannot be broadcast to shape ${shape$1}`);
-		return [new ShapedArray(shape$1, require_backend.DType.Uint32, false)];
-	},
 	[Primitive.Sin]: vectorizedUnopAbstractEval,
 	[Primitive.Cos]: vectorizedUnopAbstractEval,
 	[Primitive.Asin]: vectorizedUnopAbstractEval,
@@ -1481,8 +1612,6 @@ const abstractEvalRules = {
 	[Primitive.Erf]: vectorizedUnopAbstractEval,
 	[Primitive.Erfc]: vectorizedUnopAbstractEval,
 	[Primitive.Sqrt]: vectorizedUnopAbstractEval,
-	[Primitive.Min]: binopAbstractEval,
-	[Primitive.Max]: binopAbstractEval,
 	[Primitive.Reduce]([x], { axis }) {
 		const axisSet = new Set(axis);
 		const newShape = x.shape.filter((_, i) => !axisSet.has(i));
@@ -1504,7 +1633,7 @@ const abstractEvalRules = {
 		return [new ShapedArray(shape$1, dtype, weakType)];
 	},
 	[Primitive.Conv]([lhs, rhs], params) {
-		const { dtype, weakType } = promoteAvals(new ShapedArray([], lhs.dtype, lhs.weakType), new ShapedArray([], rhs.dtype, rhs.weakType));
+		const { dtype, weakType } = promoteAvals(lhs.scalar(), rhs.scalar());
 		const shape$1 = checkConvShape(lhs.shape, rhs.shape, params);
 		return [new ShapedArray(shape$1, dtype, weakType)];
 	},
@@ -1515,6 +1644,40 @@ const abstractEvalRules = {
 		const shape$1 = require_backend.generalBroadcast(cond.shape, xy.shape);
 		return [new ShapedArray(shape$1, xy.dtype, xy.weakType)];
 	},
+	[Primitive.Concatenate](xs, { axis }) {
+		if (xs.length === 0) throw new TypeError("Concatenate requires at least one input");
+		for (const x of xs) if (x.ndim !== xs[0].ndim || !x.shape.every((s, i) => i === axis || s === xs[0].shape[i])) throw new TypeError(`Concatenate: inputs ${xs[0]} and ${x} must match shapes except on axis ${axis}`);
+		const shape$1 = xs[0].shape.slice();
+		shape$1[axis] = xs.reduce((sum$1, x) => sum$1 + x.shape[axis], 0);
+		const { dtype, weakType } = xs.map((x) => x.scalar()).reduce(promoteAvals);
+		return [new ShapedArray(shape$1, dtype, weakType)];
+	},
+	[Primitive.Split]([x], { axis, sizes }) {
+		const totalSize = sizes.reduce((a, b) => a + b, 0);
+		if (x.shape[axis] !== totalSize) throw new TypeError(`Split: sizes ${sizes} do not sum to dimension ${x.shape[axis]} on axis ${axis}`);
+		return sizes.map((size$1) => {
+			return new ShapedArray(x.shape.toSpliced(axis, 1, size$1), x.dtype, x.weakType);
+		});
+	},
+	[Primitive.RandomBits]([k0, k1], { shape: shape$1 }) {
+		if (k0.dtype !== require_backend.DType.Uint32 || k1.dtype !== require_backend.DType.Uint32) throw new TypeError(`RandomBits requires uint32 keys, got ${k0.dtype} and ${k1.dtype}`);
+		if (!require_backend.deepEqual(k0.shape, k1.shape)) throw new TypeError(`RandomBits: Keys have different shapes ${k0.shape} and ${k1.shape}`);
+		if (!require_backend.deepEqual(shape$1.slice(0, k0.ndim), k0.shape)) throw new TypeError(`RandomBits: generated shape ${shape$1} must match key shape ${k0.shape}`);
+		return [new ShapedArray(shape$1, require_backend.DType.Uint32, false)];
+	},
+	[Primitive.Gather]([x, ...indices], { axis, outDim }) {
+		for (const a of indices) if (a.dtype !== require_backend.DType.Int32 && a.dtype !== require_backend.DType.Uint32) throw new TypeError(`Gather indices must be Int32 or Uint32, got ${a.dtype}`);
+		if (axis.length !== indices.length) throw new TypeError(`Gather: ${axis} axes but ${indices.length} indices`);
+		if (indices.length === 0) throw new TypeError("Gather must have 1+ indices with same shape");
+		if (axis.some((a) => a < 0 || a >= x.shape.length)) throw new TypeError("Gather axis out of bounds");
+		if (outDim < 0 || outDim > x.shape.length - axis.length) throw new TypeError("Gather outDim out of bounds");
+		const axisSet = new Set(axis);
+		if (axisSet.size !== axis.length) throw new TypeError("Gather axes are not unique");
+		const gatherShape = indices.reduce((shape$1, a) => require_backend.generalBroadcast(shape$1, a.shape), []);
+		const newShape = x.shape.filter((_, i) => !axisSet.has(i));
+		newShape.splice(outDim, 0, ...gatherShape);
+		return [new ShapedArray(newShape, x.dtype, x.weakType)];
+	},
 	[Primitive.Transpose]([x], { perm }) {
 		return [new ShapedArray(perm.map((i) => x.shape[i]), x.dtype, x.weakType)];
 	},
@@ -1535,23 +1698,41 @@ const abstractEvalRules = {
 		const newShape = x.shape.map((dim, i) => dim + width[i][0] + width[i][1]);
 		return [new ShapedArray(newShape, x.dtype, x.weakType)];
 	},
-	[Primitive.Gather]([x, ...indices], { axis, outDim }) {
-		for (const a of indices) if (a.dtype !== require_backend.DType.Int32 && a.dtype !== require_backend.DType.Uint32) throw new TypeError(`Gather indices must be Int32 or Uint32, got ${a.dtype}`);
-		if (axis.length !== indices.length) throw new TypeError(`Gather: ${axis} axes but ${indices.length} indices`);
-		if (indices.length === 0) throw new TypeError("Gather must have 1+ indices with same shape");
-		if (axis.some((a) => a < 0 || a >= x.shape.length)) throw new TypeError("Gather axis out of bounds");
-		if (outDim < 0 || outDim > x.shape.length - axis.length) throw new TypeError("Gather outDim out of bounds");
-		const axisSet = new Set(axis);
-		if (axisSet.size !== axis.length) throw new TypeError("Gather axes are not unique");
-		const gatherShape = indices.reduce((shape$1, a) => require_backend.generalBroadcast(shape$1, a.shape), []);
-		const newShape = x.shape.filter((_, i) => !axisSet.has(i));
-		newShape.splice(outDim, 0, ...gatherShape);
-		return [new ShapedArray(newShape, x.dtype, x.weakType)];
+	[Primitive.Sort]([x]) {
+		if (x.ndim === 0) throw new TypeError("sort: requires at least 1D input");
+		return [ShapedArray.fromAval(x)];
+	},
+	[Primitive.Argsort]([x]) {
+		if (x.ndim === 0) throw new TypeError("argsort: requires at least 1D input");
+		return [ShapedArray.fromAval(x), new ShapedArray(x.shape, require_backend.DType.Int32, false)];
+	},
+	[Primitive.TriangularSolve]([a, b]) {
+		if (a.ndim < 2) throw new TypeError(`triangular_solve: a must be at least 2D, got ${a}`);
+		if (b.ndim < 2) throw new TypeError(`triangular_solve: b must be at least 2D, got ${b}`);
+		const [m, n] = a.shape.slice(-2);
+		const [_batch, q] = b.shape.slice(-2);
+		if (!require_backend.deepEqual(a.shape.slice(0, -2), b.shape.slice(0, -2)) || a.dtype !== b.dtype || m !== n || n !== q) throw new TypeError(`triangular_solve: mismatch ${a} vs ${b}`);
+		return [new ShapedArray(b.shape, b.dtype, a.weakType && b.weakType)];
+	},
+	[Primitive.Cholesky]([a]) {
+		if (a.ndim < 2) throw new TypeError(`cholesky: requires at least 2D input, got ${a}`);
+		if (a.shape[a.ndim - 2] !== a.shape[a.ndim - 1]) throw new TypeError(`cholesky: must be square, got ${a}`);
+		return [ShapedArray.fromAval(a)];
+	},
+	[Primitive.LU]([a]) {
+		if (a.ndim < 2) throw new TypeError(`lu: requires at least 2D input, got ${a}`);
+		const batch = a.shape.slice(0, -2);
+		const [m, n] = a.shape.slice(-2);
+		return [
+			ShapedArray.fromAval(a),
+			new ShapedArray([...batch, Math.min(m, n)], require_backend.DType.Int32, false),
+			new ShapedArray([...batch, m], require_backend.DType.Int32, false)
+		];
 	},
-	[Primitive.JitCall](args, { jaxpr }) {
+	[Primitive.Jit](args, { jaxpr }) {
 		const { inTypes, outTypes } = typecheckJaxpr(jaxpr);
-		if (args.length !== inTypes.length) throw new TypeError(`jit_call expected ${inTypes.length} arguments, got ${args.length}`);
-		for (let i = 0; i < inTypes.length; i++) if (!args[i].equals(inTypes[i])) throw new TypeError(`jit_call argument ${i} has type ${args[i]}, expected ${inTypes[i]}`);
+		if (args.length !== inTypes.length) throw new TypeError(`jit expected ${inTypes.length} arguments, got ${args.length}`);
+		for (let i = 0; i < inTypes.length; i++) if (!args[i].equals(inTypes[i])) throw new TypeError(`jit argument ${i} has type ${args[i]}, expected ${inTypes[i]}`);
 		return outTypes;
 	}
 };
@@ -1587,11 +1768,10 @@ function makeJaxpr$1(f, opts) {
 			const tracersIn = avalsIn.map((aval) => trace$1.newArg(typeof aval === "object" ? aval : pureArray(aval)));
 			const outs = fFlat(...tracersIn);
 			const tracersOut = outs.map((out) => fullRaise(trace$1, out));
-			const { jaxpr, consts } = builder.build(tracersIn, tracersOut);
+			const jaxpr = builder.build(tracersIn, tracersOut);
 			if (outTree.value === void 0) throw new Error("outTree was not set in makeJaxpr");
 			return {
-				jaxpr: jaxpr.simplify(),
-				consts,
+				jaxpr: jaxpr.mapJaxpr((j) => j.simplify()),
 				treedef: outTree.value
 			};
 		} catch (_) {
@@ -1610,22 +1790,29 @@ function jit$1(f, opts) {
 		const avalsInFlat = argsFlat.map((x) => ShapedArray.fromAval(getAval(x)));
 		const avalsIn = unflatten(inTree, avalsInFlat);
 		const jaxprArgs = joinIdx(args.length, staticArgs, avalsIn, staticArgnums);
-		const { jaxpr, consts, treedef: outTree } = require_backend.runWithCache(cache, jaxprArgs, () => makeJaxpr$1(f, opts)(...jaxprArgs));
-		const outs = bind(Primitive.JitCall, [...consts.map((c) => c.ref), ...argsFlat], {
+		const { jaxpr, treedef: outTree } = require_backend.runWithCache(cache, jaxprArgs, () => makeJaxpr$1(f, opts)(...jaxprArgs));
+		const outs = bind(Primitive.Jit, [...jaxpr.consts.map((c) => c.ref), ...argsFlat], {
 			name: f.name || "closure",
-			jaxpr,
-			numConsts: consts.length
+			jaxpr: jaxpr.jaxpr,
+			numConsts: jaxpr.consts.length
 		});
 		return unflatten(outTree, outs);
 	});
 	result.dispose = () => {
-		for (const { consts } of cache.values()) for (const c of consts) c.dispose();
+		for (const { jaxpr } of cache.values()) jaxpr.dispose();
 	};
 	return result;
 }
 //#endregion
 //#region src/frontend/jit.ts
+const routinePrimitives = new Map([
+	[Primitive.Sort, require_backend.Routines.Sort],
+	[Primitive.Argsort, require_backend.Routines.Argsort],
+	[Primitive.TriangularSolve, require_backend.Routines.TriangularSolve],
+	[Primitive.Cholesky, require_backend.Routines.Cholesky],
+	[Primitive.LU, require_backend.Routines.LU]
+]);
 /** Result of compiling a Jaxpr. Can be evaluated on a series of inputs. */
 var JitProgram = class {
 	constructor(backend, steps, inputs, outputs) {
@@ -1640,9 +1827,14 @@ var JitProgram = class {
 				case "execute": {
 					const inputsNice = step.inputs.map((id, i) => `${i}: %${id}`).join(", ");
 					const outputsNice = step.outputs.map((id) => `%${id}`).join(", ");
-					return require_backend.PPrint.pp(`execute (${inputsNice}) -> ${outputsNice}, kernel`).concat(step.kernel.pprint().indent(2));
+					const executeText = `execute (${inputsNice}) -> ${outputsNice}`;
+					if (step.source instanceof require_backend.Kernel) return require_backend.PPrint.pp(`${executeText}, kernel`).concat(step.source.pprint().indent(2));
+					else if (step.source instanceof require_backend.Routine) return require_backend.PPrint.pp(`${executeText}, routine ${step.source.name}`);
+					else {
+						step.source;
+						return require_backend.PPrint.pp(executeText);
+					}
 				}
-				case "const": return require_backend.PPrint.pp(`%${step.output} = const <Slot ${step.slot}>`);
 				case "malloc": return require_backend.PPrint.pp(`%${step.output} = malloc <${step.size} bytes>`);
 				case "incref": return require_backend.PPrint.pp(`incref ${step.input}`);
 				case "free": return require_backend.PPrint.pp(`free ${step.input}`);
@@ -1665,12 +1857,9 @@ var JitProgram = class {
 				const inputs$1 = step.inputs.map((id) => scope.get(id));
 				const outputs = step.outputs.map((id) => scope.get(id));
 				if (inputs$1.some((s) => s === void 0) || outputs.some((s) => s === void 0)) throw new Error(`internal: JitProgram scope undefined`);
-				pending.push(new PendingExecute(this.backend, step.kernel, inputs$1, outputs));
+				pending.push(new PendingExecute(this.backend, step.source, inputs$1, outputs));
 				break;
 			}
-			case "const":
-				scope.set(step.output, step.slot);
-				break;
 			case "malloc": {
 				const slot = this.backend.malloc(step.size);
 				scope.set(step.output, slot);
@@ -1704,34 +1893,37 @@ var JitProgramBuilder = class {
 		this.#nextId = nargs;
 		this.steps = [];
 	}
-	pushConst(slot) {
-		const id = this.#nextId++;
-		this.steps.push({
-			type: "const",
-			slot,
-			output: id
-		});
-		return id;
-	}
 	pushLit(lit) {
-		const kernel = new require_backend.Kernel(0, require_backend.prod(lit.aval.shape), require_backend.AluExp.const(lit.dtype, lit.value));
+		const kernel = new require_backend.Kernel(0, lit.aval.size, require_backend.AluExp.const(lit.dtype, lit.value));
 		return this.pushKernel(kernel, []);
 	}
-	pushKernel(kernel, inputs) {
+	pushBuffer(size$1) {
 		const id = this.#nextId++;
 		this.steps.push({
 			type: "malloc",
-			size: kernel.bytes,
+			size: size$1,
 			output: id
 		});
+		return id;
+	}
+	pushKernel(kernel, inputs) {
+		const id = this.pushBuffer(kernel.bytes);
 		this.steps.push({
 			type: "execute",
-			kernel,
+			source: kernel,
 			inputs,
 			outputs: [id]
 		});
 		return id;
 	}
+	pushRoutine(routine, inputs, outputs) {
+		this.steps.push({
+			type: "execute",
+			source: routine,
+			inputs,
+			outputs
+		});
+	}
 	pushIncref(id) {
 		this.steps.push({
 			type: "incref",
@@ -1757,28 +1949,18 @@ var JitProgramBuilder = class {
 	}
 };
 const jitCompileCache = /* @__PURE__ */ new Map();
-function jitCompile(backend, jaxpr, consts) {
-	if (jaxpr.inBinders.length < consts.length) throw new TypeError(`Jaxpr has ${jaxpr.inBinders.length} inputs, but ${consts.length} consts were provided`);
-	for (let i = 0; i < consts.length; i++) if (consts[i].device !== backend.type) throw new TypeError(`Const ${i} has device ${consts[i].device}, but expected ${backend.type}`);
-	const cacheKey = backend.type + require_backend.FpHash.hash(jaxpr, ...consts.map((c) => c.id));
+function jitCompile(backend, jaxpr) {
+	const cacheKey = backend.type + "," + require_backend.FpHash.hash(jaxpr);
 	const cached = jitCompileCache.get(cacheKey);
 	if (cached) return cached;
 	if (require_backend.DEBUG >= 1) console.info("=========== JIT Compile ===========\n" + jaxpr.toString());
 	jaxpr = jaxpr.flatten().simplify();
-	const nargs = jaxpr.inBinders.length - consts.length;
+	const nargs = jaxpr.inBinders.length;
 	const builder = new JitProgramBuilder(backend, nargs);
 	const blackNodes = splitGraphDataflow(backend, jaxpr);
 	const ctx = /* @__PURE__ */ new Map();
-	for (let i = 0; i < consts.length; i++) {
-		const v = jaxpr.inBinders[i];
-		const slot = consts[i]._realizeSource();
-		ctx.set(v, {
-			type: "imm",
-			arg: builder.pushConst(slot)
-		});
-	}
 	for (let i = 0; i < nargs; i++) {
-		const v = jaxpr.inBinders[consts.length + i];
+		const v = jaxpr.inBinders[i];
 		ctx.set(v, {
 			type: "imm",
 			arg: i
@@ -1786,6 +1968,31 @@ function jitCompile(backend, jaxpr, consts) {
 	}
 	for (let i = 0; i < jaxpr.eqns.length; i++) {
 		const eqn = jaxpr.eqns[i];
+		if (routinePrimitives.has(eqn.primitive)) {
+			const routine = new require_backend.Routine(routinePrimitives.get(eqn.primitive), {
+				inputShapes: eqn.inputs.map((x) => x.aval.shape),
+				inputDtypes: eqn.inputs.map((x) => x.aval.dtype),
+				outputShapes: eqn.outBinders.map((x) => x.aval.shape),
+				outputDtypes: eqn.outBinders.map((x) => x.aval.dtype)
+			}, eqn.params);
+			const inputs = [];
+			for (const input of eqn.inputs) if (input instanceof Var) {
+				const jv = ctx.get(input);
+				if (jv.type !== "imm") throw new Error(`jit: routine primitive ${eqn.primitive} input is not imm`);
+				inputs.push(jv.arg);
+			} else if (input instanceof Lit) inputs.push(builder.pushLit(input));
+			const outputs = [];
+			for (const outVar of eqn.outBinders) {
+				const outId = builder.pushBuffer(outVar.aval.size * require_backend.byteWidth(outVar.aval.dtype));
+				outputs.push(outId);
+				ctx.set(outVar, {
+					type: "imm",
+					arg: outId
+				});
+			}
+			builder.pushRoutine(routine, inputs, outputs);
+			continue;
+		}
 		const inputExps = [];
 		const inputAvals = [];
 		const inputArgs = [];
@@ -1829,35 +2036,37 @@ function jitCompile(backend, jaxpr, consts) {
 		let reduction;
 		if (inputReduction) {
 			const jv = inputReduction;
-			const newEpilogue = rule(inputExps, inputAvals, eqn.params).exp;
-			exp$2 = jv.exp.reindexGids(addArgs(jv.args));
+			const newEpilogue = rule(inputExps, inputAvals, eqn.params).exp[0];
+			exp$2 = [jv.exp.reindexGids(addArgs(jv.args))];
 			reduction = new require_backend.Reduction(jv.reduction.dtype, jv.reduction.op, jv.reduction.size, newEpilogue);
 		} else {
 			const ruleOutput = rule(inputExps, inputAvals, eqn.params);
 			exp$2 = ruleOutput.exp;
 			reduction = ruleOutput.reduction;
 		}
-		const outVar = eqn.outBinders[0];
-		if (blackNodes.has(outVar)) {
-			const nargs$1 = inputArgs.length;
-			const size$1 = require_backend.prod(outVar.aval.shape);
-			const kernel = new require_backend.Kernel(nargs$1, size$1, exp$2, reduction);
-			const outId = builder.pushKernel(kernel, inputArgs);
-			ctx.set(outVar, {
-				type: "imm",
-				arg: outId
+		for (let i$1 = 0; i$1 < eqn.outBinders.length; i$1++) {
+			const outVar = eqn.outBinders[i$1];
+			if (blackNodes.has(outVar)) {
+				const nargs$1 = inputArgs.length;
+				const size$1 = outVar.aval.size;
+				const kernel = new require_backend.Kernel(nargs$1, size$1, exp$2[i$1], reduction);
+				const outId = builder.pushKernel(kernel, inputArgs);
+				ctx.set(outVar, {
+					type: "imm",
+					arg: outId
+				});
+			} else if (reduction) ctx.set(outVar, {
+				type: "red",
+				exp: exp$2[i$1],
+				reduction,
+				args: inputArgs
 			});
-		} else if (reduction) ctx.set(outVar, {
-			type: "red",
-			exp: exp$2,
-			reduction,
-			args: inputArgs
-		});
-		else ctx.set(outVar, {
-			type: "exp",
-			exp: exp$2,
-			args: inputArgs
-		});
+			else ctx.set(outVar, {
+				type: "exp",
+				exp: exp$2[i$1],
+				args: inputArgs
+			});
+		}
 	}
 	const outputIds = [];
 	for (const out of jaxpr.outs) if (out instanceof Var) {
@@ -1865,7 +2074,7 @@ function jitCompile(backend, jaxpr, consts) {
 		if (jitValue.type !== "imm") throw new Error("internal: Expected imm, since outs are black nodes");
 		outputIds.push(jitValue.arg);
 	} else if (out instanceof Lit) outputIds.push(builder.pushLit(out));
-	const outputNeedsRef = new Set([...require_backend.range(nargs), ...builder.steps.filter((s) => s.type === "const").map((s) => s.output)]);
+	const outputNeedsRef = new Set(require_backend.range(nargs));
 	for (const outputId of outputIds) if (outputNeedsRef.has(outputId)) builder.pushIncref(outputId);
 	else outputNeedsRef.add(outputId);
 	builder.insertFreeSteps(outputIds);
@@ -1898,17 +2107,22 @@ function broadcastedJit(fn, opts) {
 			if (exp$2.dtype !== newDtype && !skipCastIdx.includes(i)) exp$2 = require_backend.AluExp.cast(newDtype, exp$2);
 			return exp$2;
 		});
-		return { exp: fn(exps, params) };
+		return { exp: [fn(exps, params)] };
 	};
 }
 function unopJit(fn) {
 	return ([a], [_as], params) => {
-		return { exp: fn(a, params) };
+		return { exp: [fn(a, params)] };
 	};
 }
 function reshapeJit(fn) {
 	return ([a], [_as], params) => {
-		return { exp: reshapeViews(a, (st) => fn(st, params)) };
+		return { exp: [reshapeViews(a, (st) => fn(st, params))] };
+	};
+}
+function routineNoJit() {
+	return () => {
+		throw new Error("jit: rule is not implemented for routines");
 	};
 }
 const jitRules = {
@@ -1916,6 +2130,8 @@ const jitRules = {
 	[Primitive.Mul]: broadcastedJit(([a, b]) => require_backend.AluExp.mul(a, b)),
 	[Primitive.Idiv]: broadcastedJit(([a, b]) => require_backend.AluExp.idiv(a, b)),
 	[Primitive.Mod]: broadcastedJit(([a, b]) => require_backend.AluExp.mod(a, b)),
+	[Primitive.Min]: broadcastedJit(([a, b]) => require_backend.AluExp.min(a, b)),
+	[Primitive.Max]: broadcastedJit(([a, b]) => require_backend.AluExp.max(a, b)),
 	[Primitive.Neg]: unopJit((a) => require_backend.AluExp.sub(require_backend.AluExp.const(a.dtype, 0), a)),
 	[Primitive.Reciprocal]: unopJit(require_backend.AluExp.reciprocal),
 	[Primitive.Floor]: unopJit(require_backend.AluExp.floor),
@@ -1923,17 +2139,6 @@ const jitRules = {
 	[Primitive.StopGradient]: unopJit((a) => a),
 	[Primitive.Cast]: unopJit((a, { dtype }) => require_backend.AluExp.cast(dtype, a)),
 	[Primitive.Bitcast]: unopJit((a, { dtype }) => require_backend.AluExp.bitcast(dtype, a)),
-	[Primitive.RandomBits]: (keys, keyShapes, { shape: shape$1, mode }) => {
-		const mapping = (st) => {
-			if (!require_backend.deepEqual(st.shape, shape$1)) return st.broadcast(shape$1, require_backend.range(shape$1.length - st.shape.length));
-		};
-		const k0 = reshapeViews(keys[0], mapping);
-		const k1 = reshapeViews(keys[1], mapping);
-		const c0 = require_backend.AluExp.u32(0);
-		const c1 = require_backend.AluExp.cast(require_backend.DType.Uint32, require_backend.AluVar.gidx);
-		const exp$2 = require_backend.AluExp.threefry2x32(k0, k1, c0, c1, mode);
-		return { exp: exp$2 };
-	},
 	[Primitive.Sin]: unopJit(require_backend.AluExp.sin),
 	[Primitive.Cos]: unopJit(require_backend.AluExp.cos),
 	[Primitive.Asin]: unopJit(require_backend.AluExp.asin),
@@ -1943,8 +2148,6 @@ const jitRules = {
 	[Primitive.Erf]: unopJit(require_backend.AluExp.erf),
 	[Primitive.Erfc]: unopJit(require_backend.AluExp.erfc),
 	[Primitive.Sqrt]: unopJit(require_backend.AluExp.sqrt),
-	[Primitive.Min]: broadcastedJit(([a, b]) => require_backend.AluExp.min(a, b)),
-	[Primitive.Max]: broadcastedJit(([a, b]) => require_backend.AluExp.max(a, b)),
 	[Primitive.Reduce]([a], [as], { op, axis }) {
 		const keptAxes = [];
 		const shiftedAxes = [];
@@ -1960,7 +2163,7 @@ const jitRules = {
 		a = reshapeViews(a, (st) => st.permute(perm).reshape(newShape), true);
 		const reduction = new require_backend.Reduction(a.dtype, op, reductionSize);
 		return {
-			exp: a,
+			exp: [a],
 			reduction
 		};
 	},
@@ -1971,13 +2174,13 @@ const jitRules = {
 		a = reshapeViews(a, (st) => st.compose(stX), true);
 		const reduction = new require_backend.Reduction(a.dtype, require_backend.AluOp.Add, stX.shape[stX.shape.length - 1]);
 		return {
-			exp: a,
+			exp: [a],
 			reduction
 		};
 	},
 	[Primitive.Dot]([a, b], [as, bs]) {
 		const k1 = jitRules[Primitive.Mul]([a, b], [as, bs], {});
-		const c = k1.exp;
+		const [c] = k1.exp;
 		const cs = promoteAvals(as, bs);
 		return jitRules[Primitive.Reduce]([c], [cs], {
 			op: require_backend.AluOp.Add,
@@ -1994,16 +2197,42 @@ const jitRules = {
 	},
 	[Primitive.Compare]: broadcastedJit(([a, b], { op }) => aluCompare(a, b, op)),
 	[Primitive.Where]: broadcastedJit(([cond, a, b]) => require_backend.AluExp.where(cond, a, b), { skipCastIdx: [0] }),
-	[Primitive.Transpose]: reshapeJit((st, { perm }) => st.permute(perm)),
-	[Primitive.Broadcast]: reshapeJit((st, { shape: shape$1, axis }) => st.broadcast(shape$1, axis)),
-	[Primitive.Reshape]: reshapeJit((st, { shape: shape$1 }) => st.reshape(shape$1)),
-	[Primitive.Flip]: reshapeJit((st, { axis }) => {
-		const arg = require_backend.rep(st.shape.length, false);
-		for (const ax of axis) arg[ax] = true;
-		return st.flip(arg);
-	}),
-	[Primitive.Shrink]: reshapeJit((st, { slice }) => st.shrink(slice)),
-	[Primitive.Pad]: reshapeJit((st, { width }) => st.pad(width)),
+	[Primitive.Concatenate](exps, avals, { axis }) {
+		const ndim$2 = avals[0].ndim;
+		const sizes = avals.map((x) => x.shape[axis]);
+		const finalSize = sizes.reduce((a, b) => a + b, 0);
+		const makePadAxis = (start, end) => require_backend.range(ndim$2).map((i) => i === axis ? [start, end] : [0, 0]);
+		let cum = 0;
+		const src = [];
+		for (let i = 0; i < exps.length; i++) {
+			const padding = makePadAxis(cum, finalSize - cum - sizes[i]);
+			src.push(reshapeViews(exps[i], (st) => st.pad(padding)));
+			cum += sizes[i];
+		}
+		return { exp: [src.reduce(require_backend.AluExp.add)] };
+	},
+	[Primitive.Split]([a], [as], { axis, sizes }) {
+		const exp$2 = [];
+		let start = 0;
+		for (const size$1 of sizes) {
+			const slice = require_backend.range(as.ndim).map((d) => d === axis ? [start, start + size$1] : [0, as.shape[d]]);
+			exp$2.push(reshapeViews(a, (st) => st.shrink(slice)));
+			start += size$1;
+		}
+		return { exp: exp$2 };
+	},
+	[Primitive.RandomBits]: (keys, keyShapes, { shape: shape$1, mode }) => {
+		const keyShape = keyShapes[0].shape;
+		const mapping = (st) => {
+			if (!require_backend.deepEqual(st.shape, shape$1)) return st.broadcast(shape$1, require_backend.range(st.shape.length, shape$1.length));
+		};
+		const k0 = reshapeViews(keys[0], mapping);
+		const k1 = reshapeViews(keys[1], mapping);
+		const c0 = require_backend.AluExp.u32(0);
+		const c1 = require_backend.AluExp.mod(require_backend.AluExp.cast(require_backend.DType.Uint32, require_backend.AluVar.gidx), require_backend.AluExp.u32(Math.max(require_backend.prod(shape$1.slice(keyShape.length)), 1)));
+		const exp$2 = require_backend.AluExp.threefry2x32(k0, k1, c0, c1, mode);
+		return { exp: [exp$2] };
+	},
 	[Primitive.Gather]([x, ...indices], [xs, ...indicesShapes], { axis, outDim }) {
 		const axisSet = new Set(axis);
 		const indexShape = indicesShapes.map((c) => c.shape).reduce(require_backend.generalBroadcast);
@@ -2017,10 +2246,25 @@ const jitRules = {
 		for (const [i, iexp] of indices.entries()) src[axis[i]] = require_backend.AluExp.cast(require_backend.DType.Int32, reshapeViews(iexp, (st) => st.broadcast(finalShape, [...require_backend.range(outDim + indexShape.length - st.shape.length), ...require_backend.range(outDim + indexShape.length, finalShape.length)])));
 		const [index, valid] = require_backend.ShapeTracker.fromShape(xs.shape).toAluExp(src);
 		if (!valid.resolve()) throw new Error("internal: expected full validity mask in Gather");
-		return { exp: x.substitute({ gidx: index }) };
+		return { exp: [x.substitute({ gidx: index })] };
 	},
-	[Primitive.JitCall]() {
-		throw new Error("internal: JitCall should have been flattened before JIT compilation");
+	[Primitive.Transpose]: reshapeJit((st, { perm }) => st.permute(perm)),
+	[Primitive.Broadcast]: reshapeJit((st, { shape: shape$1, axis }) => st.broadcast(shape$1, axis)),
+	[Primitive.Reshape]: reshapeJit((st, { shape: shape$1 }) => st.reshape(shape$1)),
+	[Primitive.Flip]: reshapeJit((st, { axis }) => {
+		const arg = require_backend.rep(st.shape.length, false);
+		for (const ax of axis) arg[ax] = true;
+		return st.flip(arg);
+	}),
+	[Primitive.Shrink]: reshapeJit((st, { slice }) => st.shrink(slice)),
+	[Primitive.Pad]: reshapeJit((st, { width }) => st.pad(width)),
+	[Primitive.Sort]: routineNoJit(),
+	[Primitive.Argsort]: routineNoJit(),
+	[Primitive.TriangularSolve]: routineNoJit(),
+	[Primitive.Cholesky]: routineNoJit(),
+	[Primitive.LU]: routineNoJit(),
+	[Primitive.Jit]() {
+		throw new Error("internal: Jit should have been flattened before JIT compilation");
 	}
 };
 /** Determines how to split the Jaxpr into kernels via dataflow analysis. */
@@ -2078,8 +2322,8 @@ function splitGraphDataflow(backend, jaxpr) {
 					case Primitive.Mul:
 					case Primitive.Idiv:
 					case Primitive.Mod:
-					case Primitive.Max:
-					case Primitive.Min: {
+					case Primitive.Min:
+					case Primitive.Max: {
 						const otherInput = nextEqn.inputs.find((v) => v !== outVar);
 						if (otherInput instanceof Lit || require_backend.deepEqual(require_backend.generalBroadcast(otherInput.aval.shape, outVar.aval.shape), outVar.aval.shape)) {
 							head = usages[0];
@@ -2099,11 +2343,11 @@ function splitGraphDataflow(backend, jaxpr) {
 		blackNodes.add(v);
 		p1NextBlack.set(v, v);
 	}
-	const heterogeneousViewPrimitives = [Primitive.Gather, Primitive.RandomBits];
+	const heterogeneousViewPrimitives = [Primitive.RandomBits, Primitive.Gather];
 	const needsCleanShapePrimitives = [Primitive.Pad];
 	for (let i = jaxpr.eqns.length - 1; i >= 0; i--) {
 		const eqn = jaxpr.eqns[i];
-		if (reductionEndpointEqns.has(i) || heterogeneousViewPrimitives.includes(eqn.primitive) || eqn.outBinders.some((v) => blackNodes.has(v))) {
+		if (reductionEndpointEqns.has(i) || heterogeneousViewPrimitives.includes(eqn.primitive) || routinePrimitives.has(eqn.primitive) || eqn.outBinders.some((v) => blackNodes.has(v))) {
 			for (const v of eqn.outBinders) {
 				blackNodes.add(v);
 				p1NextBlack.set(v, v);
@@ -2113,7 +2357,7 @@ function splitGraphDataflow(backend, jaxpr) {
 		const reach = /* @__PURE__ */ new Set();
 		let needsCleanOutput = false;
 		outer: for (const v of eqn.outBinders) for (const j of varToUsages.get(v) ?? []) {
-			if (needsCleanShapePrimitives.includes(jaxpr.eqns[j].primitive)) {
+			if (needsCleanShapePrimitives.includes(jaxpr.eqns[j].primitive) || routinePrimitives.has(jaxpr.eqns[j].primitive)) {
 				needsCleanOutput = true;
 				break outer;
 			}
@@ -2137,7 +2381,6 @@ function splitGraphDataflow(backend, jaxpr) {
 	while (p2idx < jaxpr.eqns.length) {
 		const eqn = jaxpr.eqns[p2idx++];
 		const deps = [];
-		if (eqn.outBinders.some((v) => blackNodes.has(v))) continue;
 		for (const input of eqn.inputs) if (input instanceof Var) if (blackNodes.has(input)) deps.push(new Set([input]));
 		else deps.push(p2Deps.get(input));
 		else deps.push(/* @__PURE__ */ new Set());
@@ -2160,7 +2403,7 @@ function splitGraphDataflow(backend, jaxpr) {
 			if (assocInput === -1) throw new Error(`internal: maxArgs, no input found to mark as black in Jaxpr equation ${eqn}`);
 			const assocVar = eqn.inputs[assocInput];
 			p2idx = varToDefn.get(assocVar);
-			for (const out of jaxpr.eqns[p2idx].outBinders) blackNodes.add(out);
+			for (const out of jaxpr.eqns[p2idx++].outBinders) blackNodes.add(out);
 		} else {
 			const s = new Set(depCounter.keys());
 			for (const out of eqn.outBinders) p2Deps.set(out, s);
@@ -2186,9 +2429,9 @@ var PendingExecute = class {
 	submitted = false;
 	#promise = null;
 	#rc = 1;
-	constructor(backend, kernel, inputs, outputs) {
+	constructor(backend, source, inputs, outputs) {
 		this.backend = backend;
-		this.kernel = kernel;
+		this.source = source;
 		this.inputs = inputs;
 		this.outputs = outputs;
 		for (const slot of inputs) this.backend.incRef(slot);
@@ -2209,13 +2452,15 @@ var PendingExecute = class {
 			return;
 		}
 		this.#promise = (async () => {
-			this.prepared = await this.backend.prepare(this.kernel);
+			if (this.source instanceof require_backend.Kernel) this.prepared = await this.backend.prepareKernel(this.source);
+			else this.prepared = await this.backend.prepareRoutine(this.source);
 		})();
 		await this.#promise;
 	}
 	prepareSync() {
 		if (this.prepared) return;
-		this.prepared = this.backend.prepareSync(this.kernel);
+		if (this.source instanceof require_backend.Kernel) this.prepared = this.backend.prepareKernelSync(this.source);
+		else this.prepared = this.backend.prepareRoutineSync(this.source);
 	}
 	submit() {
 		if (this.submitted) return;
@@ -2238,8 +2483,6 @@ var PendingExecute = class {
 * "Array" type by name.
 */
 var Array$1 = class Array$1 extends Tracer {
-	static #nextId = 1001;
-	id;
 	#dtype;
 	#weakType;
 	#source;
@@ -2256,7 +2499,6 @@ var Array$1 = class Array$1 extends Tracer {
 	*/
 	constructor(args) {
 		super(baseArrayTrace);
-		this.id = Array$1.#nextId++;
 		this.#dtype = args.dtype;
 		this.#weakType = args.weakType;
 		this.#source = args.source;
@@ -2299,6 +2541,10 @@ var Array$1 = class Array$1 extends Tracer {
 		this.#rc++;
 		return this;
 	}
+	/** Get the current reference count (for debugging memory management). */
+	get refCount() {
+		return this.#rc;
+	}
 	dispose() {
 		this.#check();
 		if (--this.#rc === 0) {
@@ -2456,7 +2702,7 @@ var Array$1 = class Array$1 extends Tracer {
 		} else if (castDtype === void 0) {
 			castDtype = arrays[i].#dtype;
 			castWeakType = arrays[i].#weakType;
-		} else ({dtype: castDtype, weakType: castWeakType} = promoteAvals(new ShapedArray([], castDtype, castWeakType), new ShapedArray([], arrays[i].#dtype, arrays[i].#weakType)));
+		} else ({dtype: castDtype, weakType: castWeakType} = promoteAvals(new ShapedArray([], castDtype, castWeakType), arrays[i].aval.scalar()));
 		const weakType = castWeakType && !strongTypeOutput;
 		const { backend, committed } = Array$1.#computeBackend(name, arrays);
 		arrays = arrays.map((ar) => ar._putSync(backend));
@@ -2565,6 +2811,27 @@ var Array$1 = class Array$1 extends Tracer {
 			pending
 		});
 	}
+	/** Apply an operation with custom lowering to this array. */
+	static #routine(routine, arrays, outputWeakType) {
+		const { backend, committed } = Array$1.#computeBackend(routine.name, arrays);
+		for (const ar of arrays) ar.#realize();
+		const inputs = arrays.map((ar) => ar.#source);
+		const outputs = routine.type.outputDtypes.map((dtype, i) => backend.malloc(require_backend.byteWidth(dtype) * require_backend.prod(routine.type.outputShapes[i])));
+		const pending = arrays.flatMap((ar) => ar.#pending);
+		for (const exe of pending) exe.updateRc(+outputs.length);
+		pending.push(new PendingExecute(backend, routine, inputs, outputs));
+		pending[pending.length - 1].updateRc(+outputs.length - 1);
+		arrays.forEach((ar) => ar.dispose());
+		return outputs.map((output, i) => new Array$1({
+			source: output,
+			st: require_backend.ShapeTracker.fromShape(routine.type.outputShapes[i]),
+			dtype: routine.type.outputDtypes[i],
+			weakType: outputWeakType[i],
+			backend,
+			committed,
+			pending
+		}));
+	}
 	/**
 	* Normalizes this array into one backed by a `Slot`.
 	*
@@ -2725,6 +2992,12 @@ var Array$1 = class Array$1 extends Tracer {
 			[Primitive.Mod]([x, y]) {
 				return [x.#binary(require_backend.AluOp.Mod, y)];
 			},
+			[Primitive.Min]([x, y]) {
+				return [x.#binary(require_backend.AluOp.Min, y)];
+			},
+			[Primitive.Max]([x, y]) {
+				return [x.#binary(require_backend.AluOp.Max, y)];
+			},
 			[Primitive.Neg]([x]) {
 				return [zerosLike$1(x.ref).#binary(require_backend.AluOp.Sub, x)];
 			},
@@ -2761,25 +3034,6 @@ var Array$1 = class Array$1 extends Tracer {
 					return [y];
 				}
 			},
-			[Primitive.RandomBits]([k0, k1], { shape: shape$1, mode }) {
-				const keyShape = require_backend.generalBroadcast(k0.shape, k1.shape);
-				if (!require_backend.deepEqual(require_backend.generalBroadcast(keyShape, shape$1), shape$1)) throw new TypeError(`Keys of shapes ${k0.shape} and ${k1.shape} cannot be broadcast to shape ${shape$1}`);
-				const c0 = zeros(shape$1, {
-					dtype: require_backend.DType.Uint32,
-					device: k0.device
-				});
-				const c1 = arange(0, require_backend.prod(shape$1), 1, {
-					dtype: require_backend.DType.Uint32,
-					device: k0.device
-				}).reshape(shape$1);
-				const custom = ([k0$1, k1$1, c0$1, c1$1]) => require_backend.AluExp.threefry2x32(k0$1, k1$1, c0$1, c1$1, mode);
-				return [Array$1.#naryCustom("random_bits", custom, [
-					k0,
-					k1,
-					c0,
-					c1
-				])];
-			},
 			[Primitive.Sin]([x]) {
 				return [x.#unary(require_backend.AluOp.Sin)];
 			},
@@ -2807,12 +3061,6 @@ var Array$1 = class Array$1 extends Tracer {
 			[Primitive.Sqrt]([x]) {
 				return [x.#unary(require_backend.AluOp.Sqrt)];
 			},
-			[Primitive.Min]([x, y]) {
-				return [x.#binary(require_backend.AluOp.Min, y)];
-			},
-			[Primitive.Max]([x, y]) {
-				return [x.#binary(require_backend.AluOp.Max, y)];
-			},
 			[Primitive.Reduce]([x], { op, axis }) {
 				if (axis.length === 0) return [x];
 				return [x.#moveAxesDown(axis).#reduce(op)];
@@ -2847,6 +3095,55 @@ var Array$1 = class Array$1 extends Tracer {
 					y
 				], { dtypeOverride: [require_backend.DType.Bool] })];
 			},
+			[Primitive.Concatenate](xs, { axis }) {
+				const ndim$2 = xs[0].ndim;
+				const sizes = xs.map((x) => x.shape[axis]);
+				const finalSize = sizes.reduce((a, b) => a + b, 0);
+				const makePadAxis = (start, end) => require_backend.range(ndim$2).map((i) => i === axis ? [start, end] : [0, 0]);
+				let cum = 0;
+				const xsPadded = [];
+				for (let i = 0; i < xs.length; i++) {
+					const padding = makePadAxis(cum, finalSize - cum - sizes[i]);
+					xsPadded.push(xs[i].#reshape(xs[i].#st.pad(padding)));
+					cum += sizes[i];
+				}
+				const custom = (exps) => exps.reduce(require_backend.AluExp.add);
+				return [Array$1.#naryCustom("concatenate", custom, xsPadded)];
+			},
+			[Primitive.Split]([x], { axis, sizes }) {
+				const outputs = [];
+				for (let i = 0, start = 0; i < sizes.length; i++) {
+					const slice = require_backend.range(x.ndim).map((d) => d === axis ? [start, start + sizes[i]] : [0, x.shape[d]]);
+					outputs.push(x.ref.#reshape(x.#st.shrink(slice)));
+					start += sizes[i];
+				}
+				x.dispose();
+				return outputs;
+			},
+			[Primitive.RandomBits]([k0, k1], { shape: shape$1, mode }) {
+				const keyShape = k0.shape;
+				const genShape = shape$1.slice(keyShape.length);
+				const c0 = zeros(genShape, {
+					dtype: require_backend.DType.Uint32,
+					device: k0.device
+				});
+				const c1 = arange(0, require_backend.prod(genShape), 1, {
+					dtype: require_backend.DType.Uint32,
+					device: k0.device
+				}).reshape(genShape);
+				k0 = k0.#reshape(k0.#st.reshape(keyShape.concat(require_backend.rep(genShape.length, 1))));
+				k1 = k1.#reshape(k1.#st.reshape(keyShape.concat(require_backend.rep(genShape.length, 1))));
+				const custom = ([k0$1, k1$1, c0$1, c1$1]) => require_backend.AluExp.threefry2x32(k0$1, k1$1, c0$1, c1$1, mode);
+				return [Array$1.#naryCustom("random_bits", custom, [
+					k0,
+					k1,
+					c0,
+					c1
+				])];
+			},
+			[Primitive.Gather]([x, ...indices], { axis, outDim }) {
+				return [x.#gather(indices, axis, outDim)];
+			},
 			[Primitive.Transpose]([x], { perm }) {
 				return [x.#transpose(perm)];
 			},
@@ -2867,17 +3164,71 @@ var Array$1 = class Array$1 extends Tracer {
 			[Primitive.Pad]([x], { width }) {
 				return [x.#reshape(x.#st.pad(width))];
 			},
-			[Primitive.Gather]([x, ...indices], { axis, outDim }) {
-				return [x.#gather(indices, axis, outDim)];
+			[Primitive.Sort]([x]) {
+				const routine = new require_backend.Routine(require_backend.Routines.Sort, {
+					inputShapes: [x.shape],
+					inputDtypes: [x.dtype],
+					outputShapes: [x.shape],
+					outputDtypes: [x.dtype]
+				});
+				return Array$1.#routine(routine, [x], [x.#weakType]);
+			},
+			[Primitive.Argsort]([x]) {
+				const routine = new require_backend.Routine(require_backend.Routines.Argsort, {
+					inputShapes: [x.shape],
+					inputDtypes: [x.dtype],
+					outputShapes: [x.shape, x.shape],
+					outputDtypes: [x.dtype, require_backend.DType.Int32]
+				});
+				return Array$1.#routine(routine, [x], [x.#weakType, false]);
+			},
+			[Primitive.TriangularSolve]([a, b], { unitDiagonal }) {
+				const routine = new require_backend.Routine(require_backend.Routines.TriangularSolve, {
+					inputShapes: [a.shape, b.shape],
+					inputDtypes: [a.dtype, b.dtype],
+					outputShapes: [b.shape],
+					outputDtypes: [b.dtype]
+				}, { unitDiagonal });
+				return Array$1.#routine(routine, [a, b], [a.#weakType && b.#weakType]);
 			},
-			[Primitive.JitCall](args, { jaxpr, numConsts }) {
-				if (jaxpr.inBinders.length !== args.length) throw new Error(`jit_call expects ${jaxpr.inBinders.length} args, got ${args.length}`);
-				const { backend, committed } = Array$1.#computeBackend("jit_call", args);
+			[Primitive.Cholesky]([a]) {
+				const routine = new require_backend.Routine(require_backend.Routines.Cholesky, {
+					inputShapes: [a.shape],
+					inputDtypes: [a.dtype],
+					outputShapes: [a.shape],
+					outputDtypes: [a.dtype]
+				});
+				return Array$1.#routine(routine, [a], [a.#weakType]);
+			},
+			[Primitive.LU]([a]) {
+				const batch = a.shape.slice(0, -2);
+				const [m, n] = a.shape.slice(-2);
+				const routine = new require_backend.Routine(require_backend.Routines.LU, {
+					inputShapes: [a.shape],
+					inputDtypes: [a.dtype],
+					outputShapes: [
+						a.shape,
+						[...batch, Math.min(m, n)],
+						[...batch, m]
+					],
+					outputDtypes: [
+						a.dtype,
+						require_backend.DType.Int32,
+						require_backend.DType.Int32
+					]
+				});
+				return Array$1.#routine(routine, [a], [
+					a.#weakType,
+					false,
+					false
+				]);
+			},
+			[Primitive.Jit](args, { jaxpr }) {
+				if (jaxpr.inBinders.length !== args.length) throw new Error(`jit expects ${jaxpr.inBinders.length} args, got ${args.length}`);
+				const { backend, committed } = Array$1.#computeBackend("jit", args);
 				args = args.map((ar) => ar._putSync(backend));
-				const consts = args.slice(0, numConsts);
-				const tracers = args.slice(numConsts);
-				const jp = jitCompile(backend, jaxpr, consts);
-				const { outputs, pending } = jp.execute(tracers.map((x) => x._realizeSource()));
+				const jp = jitCompile(backend, jaxpr);
+				const { outputs, pending } = jp.execute(args.map((x) => x._realizeSource()));
 				for (const exe of pending) exe.updateRc(+outputs.length - 1);
 				const prevPending = [...new Set(args.flatMap((x) => x.#pending))];
 				for (const exe of prevPending) exe.updateRc(+outputs.length);
@@ -2977,7 +3328,7 @@ function array(values, { shape: shape$1, dtype, device } = {}) {
 				device
 			});
 		} else {
-			const weakType = dtype == void 0;
+			const weakType = dtype == void 0 && shape$1.length === 0;
 			dtype = dtype ?? require_backend.DType.Float32;
 			const data = require_backend.dtypedJsArray(dtype, flat);
 			return arrayFromData(data, shape$1, {
@@ -3091,7 +3442,7 @@ function ones(shape$1, { dtype, device } = {}) {
 }
 /** Return a new array of given shape and type, filled with `fill_value`. */
 function full(shape$1, fillValue, { dtype, device } = {}) {
-	let weakType = dtype == void 0;
+	let weakType = dtype == void 0 && shape$1.length === 0;
 	if (typeof fillValue === "number") dtype = dtype ?? require_backend.DType.Float32;
 	else if (typeof fillValue === "boolean") {
 		dtype = dtype ?? require_backend.DType.Bool;
@@ -3176,6 +3527,43 @@ function arange(start, stop, step = 1, { dtype, device } = {}) {
 	});
 }
 /**
+* Return an array with ones on and below the diagonal and zeros elsewhere.
+*
+* If `k` is provided, it specifies the sub-diagonal on and below which the
+* array is filled with ones. `k=0` is the main diagonal, `k<0` is below it, and
+* `k>0` is above it.
+*/
+function tri(n, m, k = 0, { dtype, device } = {}) {
+	m ??= n;
+	dtype ??= require_backend.DType.Float32;
+	if (!Number.isInteger(n) || n < 0) throw new Error(`tri: n must be a non-negative integer, got ${n}`);
+	if (!Number.isInteger(m) || m < 0) throw new Error(`tri: m must be a non-negative integer, got ${m}`);
+	if (!Number.isInteger(k)) throw new Error(`tri: k must be an integer, got ${k}`);
+	const rows = arange(k, n + k, 1, {
+		dtype: require_backend.DType.Int32,
+		device
+	});
+	const cols = arange(0, m, 1, {
+		dtype: require_backend.DType.Int32,
+		device
+	});
+	return rows.reshape([n, 1]).greaterEqual(cols).astype(dtype);
+}
+/** Return the lower triangle of an array. Must be of dimension >= 2. */
+function tril(a, k = 0) {
+	if (ndim$1(a) < 2) throw new Error(`tril: input array must be at least 2D, got ${ndim$1(a)}D`);
+	a = fudgeArray(a);
+	const [n, m] = a.shape.slice(-2);
+	return where$1(tri(n, m, k, { dtype: require_backend.DType.Bool }), a.ref, zerosLike$1(a));
+}
+/** Return the upper triangle of an array. Must be of dimension >= 2. */
+function triu(a, k = 0) {
+	if (ndim$1(a) < 2) throw new Error(`tril: input array must be at least 2D, got ${ndim$1(a)}D`);
+	a = fudgeArray(a);
+	const [n, m] = a.shape.slice(-2);
+	return where$1(tri(n, m, k - 1, { dtype: require_backend.DType.Bool }), zerosLike$1(a.ref), a);
+}
+/**
 * Return evenly spaced numbers over a specified interval.
 *
 * Returns _num_ evenly spaced samples, calculated over the interval
@@ -3212,6 +3600,27 @@ function linspace(start, stop, num = 50, endpoint = true, { dtype, device } = {}
 		committed: device != void 0
 	});
 }
+/**
+* Return numbers spaced evenly on a log scale.
+*
+* In linear space, the sequence starts at `base ** start` and ends at
+* `base ** stop` (see `endpoint` below).
+*
+* @param start - `base ** start` is the starting value of the sequence.
+* @param stop - `base ** stop` is the final value of the sequence, unless `endpoint` is false.
+* @param num - Number of samples to generate. Default is 50.
+* @param endpoint - If true, `stop` is the last sample. Otherwise, it is not included. Default is true.
+* @param base - The base of the log space. Default is 10.
+* @returns Array of evenly spaced values on a log scale.
+*/
+function logspace(start, stop, num = 50, endpoint = true, base = 10, { dtype, device } = {}) {
+	const y = linspace(start, stop, num, endpoint, {
+		dtype,
+		device
+	});
+	const logBase = Math.log(base);
+	return exp$1(mul(y, logBase));
+}
 function aluCompare(a, b, op) {
 	switch (op) {
 		case CompareOp.Less: return require_backend.AluExp.cmplt(a, b);
@@ -3222,385 +3631,211 @@ function aluCompare(a, b, op) {
 }
 //#endregion
-//#region src/frontend/jvp.ts
+//#region src/frontend/vmap.ts
 var import_usingCtx$1 = /* @__PURE__ */ __toESM(require_usingCtx(), 1);
-var JVPTracer = class extends Tracer {
-	constructor(trace$1, primal, tangent) {
+function mappedAval(batchDim, aval) {
+	const shape$1 = [...aval.shape];
+	shape$1.splice(batchDim, 1);
+	return new ShapedArray(shape$1, aval.dtype, aval.weakType);
+}
+/** Move one axis to a different index. */
+function moveaxis(x, src, dst) {
+	const t = pureArray(x);
+	src = require_backend.checkAxis(src, t.ndim);
+	dst = require_backend.checkAxis(dst, t.ndim);
+	if (src === dst) return t;
+	const perm = require_backend.range(t.ndim);
+	perm.splice(src, 1);
+	perm.splice(dst, 0, src);
+	return transpose$1(t, perm);
+}
+function moveBatchAxis(axisSize, src, dst, x) {
+	if (src === null) {
+		const targetShape = [...x.shape];
+		targetShape.splice(dst, 0, axisSize);
+		return broadcast(x, targetShape, [dst]);
+	} else if (src === dst) return x;
+	else return moveaxis(x, src, dst);
+}
+var BatchTracer = class extends Tracer {
+	constructor(trace$1, val, batchDim) {
 		super(trace$1);
-		this.primal = primal;
-		this.tangent = tangent;
+		this.val = val;
+		this.batchDim = batchDim;
 	}
 	get aval() {
-		return this.primal.aval;
+		if (this.batchDim === null) return this.val.aval;
+		else return mappedAval(this.batchDim, this.val.aval);
 	}
 	toString() {
-		return `JVPTracer(${this.primal.toString()}, ${this.tangent.toString()})`;
+		return `BatchTracer(${this.val.toString()}, ${this.batchDim})`;
 	}
 	get ref() {
-		this.primal.ref, this.tangent.ref;
+		this.val.ref;
 		return this;
 	}
 	dispose() {
-		this.primal.dispose();
-		this.tangent.dispose();
+		this.val.dispose();
+	}
+	fullLower() {
+		if (this.batchDim === null) return this.val.fullLower();
+		else return this;
 	}
 };
-var JVPTrace = class extends Trace {
+var BatchTrace = class extends Trace {
 	pure(val) {
 		return this.lift(pureArray(val));
 	}
 	lift(val) {
-		return new JVPTracer(this, val, zerosLike$1(val.ref));
+		return new BatchTracer(this, val, null);
 	}
 	processPrimitive(primitive, tracers, params) {
-		const [primalsIn, tangentsIn] = require_backend.unzip2(tracers.map((x) => [x.primal, x.tangent]));
-		const jvpRule = jvpRules[primitive];
-		if (jvpRule === void 0) throw new Error(`No JVP rule for: ${primitive}`);
-		const [primalsOut, tangentsOut] = jvpRule(primalsIn, tangentsIn, params);
-		return require_backend.zip(primalsOut, tangentsOut).map(([x, t]) => new JVPTracer(this, x, t));
+		const [valsIn, bdimsIn] = require_backend.unzip2(tracers.map((t) => [t.val, t.batchDim]));
+		const vmapRule = vmapRules[primitive];
+		if (vmapRule === void 0) throw new Error(`No vmap rule for: ${primitive}`);
+		if (bdimsIn.every((d) => d === null)) {
+			const valOuts$1 = bind(primitive, valsIn, params);
+			return valOuts$1.map((x) => new BatchTracer(this, x, null));
+		}
+		const [valOuts, bdimOuts] = vmapRule(this.axisSize, valsIn, bdimsIn, params);
+		if (valOuts.length !== bdimOuts.length) throw new Error(`vmap rule for ${primitive} returned mismatched lengths: ${valOuts.length} vs ${bdimOuts.length}`);
+		return require_backend.zip(valOuts, bdimOuts).map(([x, bd]) => new BatchTracer(this, x, bd));
+	}
+	get axisSize() {
+		return this.main.globalData;
 	}
 };
-/** Rule that applies the same operation to primals and tangents. */
-function linearTangentsJvp(primitive) {
-	return (primals, tangents, params) => {
-		const ys = bind(primitive, primals, params);
-		const dys = bind(primitive, tangents, params);
-		return [ys, dys];
+/**
+* Process a primitive with built-in broadcasting.
+*
+* Reference: https://github.com/jax-ml/jax/blob/jax-v0.8.1/jax/_src/interpreters/batching.py#L1029
+*/
+function broadcastBatcher(prim) {
+	return (axisSize, args, dims, params) => {
+		if (args.length === 0) throw new Error("Empty list in broadcastBatcher");
+		const nd = Math.max(...args.map((x, i) => ndim$1(x) + (dims[i] === null ? 1 : 0)));
+		const firstIdx = dims.findIndex((d) => d !== null);
+		const firstBdim = dims[firstIdx] - args[firstIdx].ndim;
+		if (require_backend.zip(args, dims).every(([x, d]) => d === null && ndim$1(x) < -firstBdim || d !== null && d - x.ndim === firstBdim)) return [[bind1(prim, args, params)], [nd + firstBdim]];
+		args = args.map((x, i) => {
+			if (dims[i] === null) return x;
+			x = moveBatchAxis(axisSize, dims[i], 0, x);
+			if (x.ndim < nd) x = x.reshape([
+				x.shape[0],
+				...require_backend.rep(nd - x.ndim, 1),
+				...x.shape.slice(1)
+			]);
+			return x;
+		});
+		return [[bind1(prim, args, params)], [0]];
 	};
 }
-/** Rule for product of gradients in bilinear operations. */
-function bilinearTangentsJvp(primitive) {
-	return ([x, y], [dx, dy], params) => {
-		const primal = bind1(primitive, [x.ref, y.ref], params);
-		const tangent = bind1(primitive, [x, dy], params).add(bind1(primitive, [dx, y], params));
-		return [[primal], [tangent]];
+function unopBatcher(prim) {
+	return (axisSize, [x], [xBdim], params) => {
+		return [[bind1(prim, [x], params)], [xBdim]];
 	};
 }
-/** Rule that zeros out any tangents. */
-function zeroTangentsJvp(primitive) {
-	return (primals, tangents, params) => {
-		for (const t of tangents) t.dispose();
-		const ys = bind(primitive, primals, params);
-		return [ys, ys.map((y) => zerosLike$1(y.ref))];
+function lastDimsBatcher(prim, inputDims, numOutputs = 1) {
+	return (axisSize, [x], [xBdim], params) => {
+		require_backend.assertNonNull(xBdim);
+		if (xBdim < x.ndim - inputDims) return [bind(prim, [x], params), require_backend.rep(numOutputs, xBdim)];
+		x = moveBatchAxis(axisSize, xBdim, 0, x);
+		return [bind(prim, [x], params), require_backend.rep(numOutputs, 0)];
 	};
 }
-const jvpRules = {
-	[Primitive.Add]: linearTangentsJvp(Primitive.Add),
-	[Primitive.Mul]: bilinearTangentsJvp(Primitive.Mul),
-	[Primitive.Idiv]: zeroTangentsJvp(Primitive.Idiv),
-	[Primitive.Mod]([x, y], [dx, dy]) {
-		if (!require_backend.isFloatDtype(x.dtype) && !require_backend.isFloatDtype(y.dtype)) {
-			dx.dispose();
-			dy.dispose();
-			return [[x.ref, y.ref], [zerosLike$1(x), zerosLike$1(y)]];
-		}
-		const q = idiv(x.ref, y.ref);
-		return [[mod(x, y)], [dx.sub(dy.mul(q))]];
-	},
-	[Primitive.Neg]: linearTangentsJvp(Primitive.Neg),
-	[Primitive.Reciprocal]([x], [dx]) {
-		const xRecip = reciprocal$1(x.ref);
-		return [[xRecip.ref], [neg(xRecip.ref.mul(xRecip)).mul(dx)]];
-	},
-	[Primitive.Floor]: zeroTangentsJvp(Primitive.Floor),
-	[Primitive.Ceil]: zeroTangentsJvp(Primitive.Ceil),
-	[Primitive.StopGradient]: zeroTangentsJvp(Primitive.StopGradient),
-	[Primitive.Cast]([x], [dx], { dtype }) {
-		if (x.dtype === dtype) return [[x], [dx]];
-		if (require_backend.isFloatDtype(dtype) && require_backend.isFloatDtype(x.dtype)) return [[cast(x, dtype)], [cast(dx, dtype)]];
-		else {
-			dx.dispose();
-			return [[cast(x.ref, dtype)], [zerosLike$1(x)]];
-		}
-	},
-	[Primitive.Bitcast]([x], [dx], { dtype }) {
-		if (x.dtype === dtype) return [[x], [dx]];
-		dx.dispose();
-		return [[bitcast(x.ref, dtype)], [zerosLike$1(x)]];
-	},
-	[Primitive.RandomBits]: zeroTangentsJvp(Primitive.RandomBits),
-	[Primitive.Sin]([x], [dx]) {
-		return [[sin$1(x.ref)], [cos$1(x).mul(dx)]];
+const vmapRules = {
+	[Primitive.Add]: broadcastBatcher(Primitive.Add),
+	[Primitive.Mul]: broadcastBatcher(Primitive.Mul),
+	[Primitive.Idiv]: broadcastBatcher(Primitive.Idiv),
+	[Primitive.Mod]: broadcastBatcher(Primitive.Mod),
+	[Primitive.Min]: broadcastBatcher(Primitive.Min),
+	[Primitive.Max]: broadcastBatcher(Primitive.Max),
+	[Primitive.Neg]: unopBatcher(Primitive.Neg),
+	[Primitive.Reciprocal]: unopBatcher(Primitive.Reciprocal),
+	[Primitive.Floor]: unopBatcher(Primitive.Floor),
+	[Primitive.Ceil]: unopBatcher(Primitive.Ceil),
+	[Primitive.StopGradient]: unopBatcher(Primitive.StopGradient),
+	[Primitive.Cast]: unopBatcher(Primitive.Cast),
+	[Primitive.Bitcast]: unopBatcher(Primitive.Bitcast),
+	[Primitive.Sin]: unopBatcher(Primitive.Sin),
+	[Primitive.Cos]: unopBatcher(Primitive.Cos),
+	[Primitive.Asin]: unopBatcher(Primitive.Asin),
+	[Primitive.Atan]: unopBatcher(Primitive.Atan),
+	[Primitive.Exp]: unopBatcher(Primitive.Exp),
+	[Primitive.Log]: unopBatcher(Primitive.Log),
+	[Primitive.Erf]: unopBatcher(Primitive.Erf),
+	[Primitive.Erfc]: unopBatcher(Primitive.Erfc),
+	[Primitive.Sqrt]: unopBatcher(Primitive.Sqrt),
+	[Primitive.Reduce](axisSize, [x], [xBdim], { op, axis }) {
+		require_backend.assertNonNull(xBdim);
+		const newAxis = axis.map((ax) => ax + (xBdim <= ax ? 1 : 0));
+		const outBdim = xBdim - axis.filter((ax) => ax < xBdim).length;
+		return [[reduce(x, op, newAxis)], [outBdim]];
 	},
-	[Primitive.Cos]([x], [dx]) {
-		return [[cos$1(x.ref)], [neg(sin$1(x)).mul(dx)]];
+	[Primitive.Dot](axisSize, [x, y], [xBdim, yBdim]) {
+		x = moveBatchAxis(axisSize, xBdim, x.ndim - (xBdim === null ? 1 : 2), x);
+		y = moveBatchAxis(axisSize, yBdim, y.ndim - (yBdim === null ? 1 : 2), y);
+		const z = dot$2(x, y);
+		return [[z], [z.ndim - 1]];
 	},
-	[Primitive.Asin]([x], [dx]) {
-		const denom = sqrt$1(reciprocal$1(cast(1, x.dtype).sub(x.ref.mul(x.ref))));
-		return [[asin$1(x)], [denom.mul(dx)]];
+	[Primitive.Conv](axisSize, [x, y], [xBdim, yBdim], params) {
+		x = moveBatchAxis(axisSize, xBdim, 0, x);
+		y = moveBatchAxis(axisSize, yBdim, 0, y);
+		const z = conv$1(x, y, {
+			...params,
+			vmapDims: params.vmapDims + 1
+		});
+		return [[z], [0]];
 	},
-	[Primitive.Atan]([x], [dx]) {
-		const denom = cast(1, x.dtype).add(x.ref.mul(x.ref));
-		return [[atan$1(x)], [dx.div(denom)]];
+	[Primitive.Compare]: broadcastBatcher(Primitive.Compare),
+	[Primitive.Where]: broadcastBatcher(Primitive.Where),
+	[Primitive.Concatenate](axisSize, xs, xBdims, { axis }) {
+		const minBdim = Math.min(...xBdims.filter((d) => d !== null));
+		xs = xs.map((x, i) => moveBatchAxis(axisSize, xBdims[i], minBdim, x));
+		const newAxis = axis + (minBdim <= axis ? 1 : 0);
+		return [[concatenate$1(xs, newAxis)], [minBdim]];
 	},
-	[Primitive.Exp]([x], [dx]) {
-		const z = exp$1(x);
-		return [[z.ref], [z.mul(dx)]];
+	[Primitive.Split](axisSize, [x], [xBdim], { axis, sizes }) {
+		require_backend.assertNonNull(xBdim);
+		const newAxis = axis + (xBdim <= axis ? 1 : 0);
+		const outs = split$2(x, newAxis, sizes);
+		return [outs, require_backend.rep(outs.length, xBdim)];
 	},
-	[Primitive.Log]([x], [dx]) {
-		return [[log$1(x.ref)], [reciprocal$1(x).mul(dx)]];
+	[Primitive.RandomBits](axisSize, [k0, k1], [bdim0, bdim1], { shape: shape$1, mode }) {
+		k0 = moveBatchAxis(axisSize, bdim0, 0, k0);
+		k1 = moveBatchAxis(axisSize, bdim1, 0, k1);
+		return [[randomBits(k0, k1, [axisSize, ...shape$1], mode)], [0]];
 	},
-	[Primitive.Erf]([x], [dx]) {
-		const coeff = 2 / Math.sqrt(Math.PI);
-		const expTerm = exp$1(neg(x.ref.mul(x.ref)));
-		return [[erf$1(x)], [expTerm.mul(coeff).mul(dx)]];
-	},
-	[Primitive.Erfc]([x], [dx]) {
-		const coeff = -2 / Math.sqrt(Math.PI);
-		const expTerm = exp$1(neg(x.ref.mul(x.ref)));
-		return [[erfc$1(x)], [expTerm.mul(coeff).mul(dx)]];
-	},
-	[Primitive.Sqrt]([x], [dx]) {
-		const z = sqrt$1(x);
-		return [[z.ref], [reciprocal$1(z.mul(2)).mul(dx)]];
-	},
-	[Primitive.Min]([x, y], [dx, dy]) {
-		return [[min$1(x.ref, y.ref)], [where$1(less$1(y, x), dy, dx)]];
-	},
-	[Primitive.Max]([x, y], [dx, dy]) {
-		return [[max$1(x.ref, y.ref)], [where$1(less$1(x, y), dy, dx)]];
-	},
-	[Primitive.Reduce]([x], [dx], { op, axis }) {
-		if (op === require_backend.AluOp.Add) return [[reduce(x, op, axis)], [reduce(dx, op, axis)]];
-		else if (op === require_backend.AluOp.Mul) {
-			const primal = reduce(x.ref, op, axis);
-			const tangent = broadcast(primal.ref, x.shape, axis).mul(reciprocal$1(x)).mul(dx).sum(axis);
-			return [[primal], [tangent]];
-		} else if (op === require_backend.AluOp.Min || op === require_backend.AluOp.Max) {
-			const primal = reduce(x.ref, op, axis);
-			const notMin = notEqual$1(x, broadcast(primal.ref, x.shape, axis));
-			const minCount = where$1(notMin.ref, 0, 1).sum(axis);
-			const tangent = where$1(notMin, 0, dx).sum(axis).div(minCount);
-			return [[primal], [tangent]];
-		} else throw new Error(`JVP rule not implemented for reduce op: ${op}`);
-	},
-	[Primitive.Pool]: linearTangentsJvp(Primitive.Pool),
-	[Primitive.PoolTranspose]: linearTangentsJvp(Primitive.PoolTranspose),
-	[Primitive.Dot]: bilinearTangentsJvp(Primitive.Dot),
-	[Primitive.Conv]: bilinearTangentsJvp(Primitive.Conv),
-	[Primitive.Compare]: zeroTangentsJvp(Primitive.Compare),
-	[Primitive.Where]([cond, x, y], [dcond, dx, dy]) {
-		dcond.dispose();
-		return [[where$1(cond.ref, x, y)], [where$1(cond, dx, dy)]];
-	},
-	[Primitive.Transpose]: linearTangentsJvp(Primitive.Transpose),
-	[Primitive.Broadcast]: linearTangentsJvp(Primitive.Broadcast),
-	[Primitive.Reshape]: linearTangentsJvp(Primitive.Reshape),
-	[Primitive.Flip]: linearTangentsJvp(Primitive.Flip),
-	[Primitive.Shrink]: linearTangentsJvp(Primitive.Shrink),
-	[Primitive.Pad]: linearTangentsJvp(Primitive.Pad),
-	[Primitive.Gather]([x, ...indices], [dx, ..._], { axis, outDim }) {
-		const indicesRef = indices.map((t) => t.ref);
-		return [[gather(x, indices, axis, outDim)], [gather(dx, indicesRef, axis, outDim)]];
-	},
-	[Primitive.JitCall](primals, tangents, { name, jaxpr }) {
-		const { newJaxpr, newConsts } = jvpJaxpr(jaxpr);
-		const outs = bind(Primitive.JitCall, [
-			...newConsts.map((c) => c.ref),
-			...primals,
-			...tangents
-		], {
-			name: `${name}_jvp`,
-			jaxpr: newJaxpr,
-			numConsts: newConsts.length
-		});
-		const n = outs.length / 2;
-		if (!Number.isInteger(n)) throw new Error("internal: JVP Jaxpr output length is not even");
-		const [primalsOut, tangentsOut] = [outs.slice(0, n), outs.slice(n)];
-		return [primalsOut, tangentsOut];
-	}
-};
-const jvpJaxprCache = /* @__PURE__ */ new Map();
-function jvpJaxpr(jaxpr) {
-	if (jvpJaxprCache.has(jaxpr)) return jvpJaxprCache.get(jaxpr);
-	const inAvals = jaxpr.inBinders.map((v) => v.aval);
-	const { jaxpr: newJaxpr, consts: newConsts } = makeJaxpr$1((primals, tangents) => jvpFlat(jaxprAsFun(jaxpr), primals, tangents))(inAvals, inAvals);
-	const result = {
-		newJaxpr,
-		newConsts
-	};
-	jvpJaxprCache.set(jaxpr, result);
-	return result;
-}
-function jvpFlat(f, primals, tangents) {
-	try {
-		var _usingCtx$1 = (0, import_usingCtx$1.default)();
-		const main = _usingCtx$1.u(newMain(JVPTrace));
-		const trace$1 = new JVPTrace(main);
-		const tracersIn = require_backend.zip(primals, tangents).map(([x, t]) => new JVPTracer(trace$1, pureArray(x), pureArray(t)));
-		const outs = f(...tracersIn);
-		const tracersOut = outs.map((out) => fullRaise(trace$1, out));
-		return require_backend.unzip2(tracersOut.map((t) => [t.primal, t.tangent]));
-	} catch (_) {
-		_usingCtx$1.e = _;
-	} finally {
-		_usingCtx$1.d();
-	}
-}
-function jvp$1(f, primals, tangents) {
-	const [primalsFlat, inTree] = flatten(primals);
-	const [tangentsFlat, inTree2] = flatten(tangents);
-	if (!inTree.equals(inTree2)) throw new TreeMismatchError("jvp", inTree, inTree2);
-	const [flatFun, outTree] = flattenFun(f, inTree);
-	const [primalsOutFlat, tangentsOutFlat] = jvpFlat(flatFun, primalsFlat, tangentsFlat);
-	if (outTree.value === void 0) throw new Error("outTree was not set in jvp");
-	const primalsOut = unflatten(outTree.value, primalsOutFlat);
-	const tangentsOut = unflatten(outTree.value, tangentsOutFlat);
-	return [primalsOut, tangentsOut];
-}
-//#endregion
-//#region src/frontend/vmap.ts
-var import_usingCtx = /* @__PURE__ */ __toESM(require_usingCtx(), 1);
-function mappedAval(batchDim, aval) {
-	const shape$1 = [...aval.shape];
-	shape$1.splice(batchDim, 1);
-	return new ShapedArray(shape$1, aval.dtype, aval.weakType);
-}
-/** Move one axis to a different index. */
-function moveaxis(x, src, dst) {
-	const t = pureArray(x);
-	src = require_backend.checkAxis(src, t.ndim);
-	dst = require_backend.checkAxis(dst, t.ndim);
-	if (src === dst) return t;
-	const perm = require_backend.range(t.ndim);
-	perm.splice(src, 1);
-	perm.splice(dst, 0, src);
-	return transpose$1(t, perm);
-}
-function moveBatchAxis(axisSize, src, dst, x) {
-	if (src === null) {
-		const targetShape = [...x.shape];
-		targetShape.splice(dst, 0, axisSize);
-		return broadcast(x, targetShape, [dst]);
-	} else if (src === dst) return x;
-	else return moveaxis(x, src, dst);
-}
-var BatchTracer = class extends Tracer {
-	constructor(trace$1, val, batchDim) {
-		super(trace$1);
-		this.val = val;
-		this.batchDim = batchDim;
-	}
-	get aval() {
-		if (this.batchDim === null) return this.val.aval;
-		else return mappedAval(this.batchDim, this.val.aval);
-	}
-	toString() {
-		return `BatchTracer(${this.val.toString()}, ${this.batchDim})`;
-	}
-	get ref() {
-		this.val.ref;
-		return this;
-	}
-	dispose() {
-		this.val.dispose();
-	}
-	fullLower() {
-		if (this.batchDim === null) return this.val.fullLower();
-		else return this;
-	}
-};
-var BatchTrace = class extends Trace {
-	pure(val) {
-		return this.lift(pureArray(val));
-	}
-	lift(val) {
-		return new BatchTracer(this, val, null);
-	}
-	processPrimitive(primitive, tracers, params) {
-		const [valsIn, bdimsIn] = require_backend.unzip2(tracers.map((t) => [t.val, t.batchDim]));
-		const vmapRule = vmapRules[primitive];
-		if (vmapRule === void 0) throw new Error(`No vmap rule for: ${primitive}`);
-		if (bdimsIn.every((d) => d === null)) {
-			const valOuts$1 = bind(primitive, valsIn, params);
-			return valOuts$1.map((x) => new BatchTracer(this, x, null));
+	[Primitive.Gather](axisSize, [x, ...indices], [xBdim, ...indicesBdim], { axis, outDim }) {
+		if (indicesBdim.every((d) => d === null)) {
+			require_backend.assertNonNull(xBdim);
+			const newAxis = axis.map((ax) => ax + (xBdim <= ax ? 1 : 0));
+			let newBdim = xBdim - axis.filter((ax) => ax < xBdim).length;
+			let newOutDim = outDim;
+			if (newOutDim < newBdim) newBdim += axis.length;
+			else newOutDim += 1;
+			return [[gather(x, indices, newAxis, newOutDim)], [newBdim]];
 		}
-		const [valOuts, bdimOuts] = vmapRule(this.axisSize, valsIn, bdimsIn, params);
-		return require_backend.zip(valOuts, bdimOuts).map(([x, bd]) => new BatchTracer(this, x, bd));
-	}
-	get axisSize() {
-		return this.main.globalData;
-	}
-};
-/**
-* Process a primitive with built-in broadcasting.
-*
-* Reference: https://github.com/jax-ml/jax/blob/jax-v0.8.1/jax/_src/interpreters/batching.py#L1029
-*/
-function broadcastBatcher(op) {
-	return (axisSize, args, dims) => {
-		if (args.length === 0) throw new Error("Empty list in broadcastBatcher");
-		const nd = Math.max(...args.map((x, i) => ndim$1(x) + (dims[i] === null ? 1 : 0)));
-		const firstIdx = dims.findIndex((d) => d !== null);
-		const firstBdim = dims[firstIdx] - args[firstIdx].ndim;
-		if (require_backend.zip(args, dims).every(([x, d]) => d === null && ndim$1(x) < -firstBdim || d !== null && d - x.ndim === firstBdim)) return [[op(...args)], [nd + firstBdim]];
-		args = args.map((x, i) => {
-			if (dims[i] === null) return x;
-			x = moveBatchAxis(axisSize, dims[i], 0, x);
-			if (x.ndim < nd) x = x.reshape([
-				x.shape[0],
-				...require_backend.rep(nd - x.ndim, 1),
-				...x.shape.slice(1)
+		const nd = Math.max(...indices.map((m, i) => ndim$1(m) + (indicesBdim[i] === null ? 1 : 0)));
+		indices = indices.map((m, i) => {
+			if (indicesBdim[i] === null) return m;
+			m = moveBatchAxis(axisSize, indicesBdim[i], 0, m);
+			if (m.ndim < nd) m = m.reshape([
+				m.shape[0],
+				...require_backend.rep(nd - m.ndim, 1),
+				...m.shape.slice(1)
 			]);
-			return x;
-		});
-		return [[op(...args)], [0]];
-	};
-}
-function unopBatcher(op) {
-	return (axisSize, [x], [xBdim], params) => {
-		return [[op(x, params)], [xBdim]];
-	};
-}
-const vmapRules = {
-	[Primitive.Add]: broadcastBatcher(add$1),
-	[Primitive.Mul]: broadcastBatcher(mul),
-	[Primitive.Idiv]: broadcastBatcher(idiv),
-	[Primitive.Mod]: broadcastBatcher(mod),
-	[Primitive.Neg]: unopBatcher(neg),
-	[Primitive.Reciprocal]: unopBatcher(reciprocal$1),
-	[Primitive.Floor]: unopBatcher(floor$1),
-	[Primitive.Ceil]: unopBatcher(ceil$1),
-	[Primitive.StopGradient]: unopBatcher(stopGradient),
-	[Primitive.Cast]: unopBatcher((x, { dtype }) => cast(x, dtype)),
-	[Primitive.Bitcast]: unopBatcher((x, { dtype }) => bitcast(x, dtype)),
-	[Primitive.Sin]: unopBatcher(sin$1),
-	[Primitive.Cos]: unopBatcher(cos$1),
-	[Primitive.Asin]: unopBatcher(asin$1),
-	[Primitive.Atan]: unopBatcher(atan$1),
-	[Primitive.Exp]: unopBatcher(exp$1),
-	[Primitive.Log]: unopBatcher(log$1),
-	[Primitive.Erf]: unopBatcher(erf$1),
-	[Primitive.Erfc]: unopBatcher(erfc$1),
-	[Primitive.Sqrt]: unopBatcher(sqrt$1),
-	[Primitive.Min]: broadcastBatcher(min$1),
-	[Primitive.Max]: broadcastBatcher(max$1),
-	[Primitive.Reduce](axisSize, [x], [xBdim], { op, axis }) {
-		require_backend.assertNonNull(xBdim);
-		const newAxis = axis.map((ax) => ax + (xBdim <= ax ? 1 : 0));
-		const outBdim = xBdim - axis.filter((ax) => ax < xBdim).length;
-		return [[reduce(x, op, newAxis)], [outBdim]];
-	},
-	[Primitive.Dot](axisSize, [x, y], [xBdim, yBdim]) {
-		x = moveBatchAxis(axisSize, xBdim, x.ndim - (xBdim === null ? 1 : 2), x);
-		y = moveBatchAxis(axisSize, yBdim, y.ndim - (yBdim === null ? 1 : 2), y);
-		const z = dot$2(x, y);
-		return [[z], [z.ndim - 1]];
-	},
-	[Primitive.Conv](axisSize, [x, y], [xBdim, yBdim], params) {
-		x = moveBatchAxis(axisSize, xBdim, 0, x);
-		y = moveBatchAxis(axisSize, yBdim, 0, y);
-		const z = conv$1(x, y, {
-			...params,
-			vmapDims: params.vmapDims + 1
+			return m;
 		});
-		return [[z], [0]];
-	},
-	[Primitive.Compare](axisSize, args, dims, { op }) {
-		return broadcastBatcher((x, y) => compare(x, y, op))(axisSize, args, dims, {});
+		if (xBdim === null) return [[gather(x, indices, axis, outDim)], [outDim]];
+		else {
+			x = moveBatchAxis(axisSize, xBdim, 0, x);
+			const newAxis = [0, ...axis.map((ax) => ax + 1)];
+			const extraBatchIndex = arange(axisSize).reshape([-1, ...require_backend.rep(nd - 1, 1)]);
+			indices.splice(0, 0, extraBatchIndex);
+			return [[gather(x, indices, newAxis, outDim)], [outDim]];
+		}
 	},
-	[Primitive.Where]: broadcastBatcher(where$1),
 	[Primitive.Transpose](axisSize, [x], [xBdim], { perm }) {
 		require_backend.assertNonNull(xBdim);
 		const newPerm = perm.map((p) => p + (xBdim <= p ? 1 : 0));
@@ -3632,42 +3867,39 @@ const vmapRules = {
 		const newWidth = width.toSpliced(xBdim, 0, [0, 0]);
 		return [[pad$1(x, newWidth)], [xBdim]];
 	},
-	[Primitive.Gather](axisSize, [x, ...indices], [xBdim, ...indicesBdim], { axis, outDim }) {
-		if (indicesBdim.every((d) => d === null)) {
-			require_backend.assertNonNull(xBdim);
-			const newAxis = axis.map((ax) => ax + (xBdim <= ax ? 1 : 0));
-			let newBdim = xBdim - axis.filter((ax) => ax < xBdim).length;
-			let newOutDim = outDim;
-			if (newOutDim < newBdim) newBdim += axis.length;
-			else newOutDim += 1;
-			return [[gather(x, indices, newAxis, newOutDim)], [newBdim]];
-		}
-		const nd = Math.max(...indices.map((m, i) => ndim$1(m) + (indicesBdim[i] === null ? 1 : 0)));
-		indices = indices.map((m, i) => {
-			if (indicesBdim[i] === null) return m;
-			m = moveBatchAxis(axisSize, indicesBdim[i], 0, m);
-			if (m.ndim < nd) m = m.reshape([
-				m.shape[0],
-				...require_backend.rep(nd - m.ndim, 1),
-				...m.shape.slice(1)
+	[Primitive.Sort]: lastDimsBatcher(Primitive.Sort, 1),
+	[Primitive.Argsort]: lastDimsBatcher(Primitive.Argsort, 1, 2),
+	[Primitive.TriangularSolve](axisSize, [a, b], [aBdim, bBdim], { unitDiagonal }) {
+		if (aBdim === null) {
+			b = moveBatchAxis(axisSize, bBdim, -3, b);
+			const [s, m, n] = b.shape.slice(-3);
+			b = b.reshape([
+				...b.shape.slice(0, -3),
+				s * m,
+				n
 			]);
-			return m;
-		});
-		if (xBdim === null) return [[gather(x, indices, axis, outDim)], [outDim]];
-		else {
-			x = moveBatchAxis(axisSize, xBdim, 0, x);
-			const newAxis = [0, ...axis.map((ax) => ax + 1)];
-			const extraBatchIndex = arange(axisSize).reshape([-1, ...require_backend.rep(nd - 1, 1)]);
-			indices.splice(0, 0, extraBatchIndex);
-			return [[gather(x, indices, newAxis, outDim)], [outDim]];
+			let x$1 = bind1(Primitive.TriangularSolve, [a, b], { unitDiagonal });
+			x$1 = x$1.reshape([
+				...b.shape.slice(0, -2),
+				s,
+				m,
+				n
+			]);
+			return [[x$1], [x$1.ndim - 3]];
 		}
+		a = moveBatchAxis(axisSize, aBdim, 0, a);
+		b = moveBatchAxis(axisSize, bBdim, 0, b);
+		const x = bind1(Primitive.TriangularSolve, [a, b], { unitDiagonal });
+		return [[x], [0]];
 	},
-	[Primitive.JitCall](axisSize, args, dims, { name, jaxpr }) {
-		const { newJaxpr, newConsts } = vmapJaxpr(jaxpr, axisSize, dims);
-		const outs = bind(Primitive.JitCall, [...newConsts.map((c) => c.ref), ...args], {
+	[Primitive.Cholesky]: lastDimsBatcher(Primitive.Cholesky, 2),
+	[Primitive.LU]: lastDimsBatcher(Primitive.LU, 2, 3),
+	[Primitive.Jit](axisSize, args, dims, { name, jaxpr }) {
+		const newJaxpr = vmapJaxpr(jaxpr, axisSize, dims);
+		const outs = bind(Primitive.Jit, [...newJaxpr.consts.map((c) => c.ref), ...args], {
 			name: `${name}_vmap`,
-			jaxpr: newJaxpr,
-			numConsts: newConsts.length
+			jaxpr: newJaxpr.jaxpr,
+			numConsts: newJaxpr.consts.length
 		});
 		return [outs, require_backend.rep(outs.length, 0)];
 	}
@@ -3683,14 +3915,10 @@ function vmapJaxpr(jaxpr, axisSize, dims) {
 		shape$1.splice(dims[i], 0, axisSize);
 		return new ShapedArray(shape$1, v.aval.dtype, v.aval.weakType);
 	});
-	const { jaxpr: newJaxpr, consts: newConsts } = makeJaxpr$1((args) => vmapFlat(jaxprAsFun(jaxpr), dims, args))(inAvals);
-	const result = {
-		newJaxpr,
-		newConsts
-	};
+	const { jaxpr: newJaxpr } = makeJaxpr$1((args) => vmapFlat(jaxprAsFun(jaxpr), dims, args))(inAvals);
 	if (!vmapJaxprCache.has(jaxpr)) vmapJaxprCache.set(jaxpr, /* @__PURE__ */ new Map());
-	vmapJaxprCache.get(jaxpr).set(cacheKey, result);
-	return result;
+	vmapJaxprCache.get(jaxpr).set(cacheKey, newJaxpr);
+	return newJaxpr;
 }
 function vmapFlat(f, inAxes, args) {
 	let axisSize = void 0;
@@ -3704,7 +3932,7 @@ function vmapFlat(f, inAxes, args) {
 	if (axisSize === void 0) throw new TypeError("vmap requires at least one mapped axis");
 	let valsOut, bdimsOut;
 	try {
-		var _usingCtx$1 = (0, import_usingCtx.default)();
+		var _usingCtx$1 = (0, import_usingCtx$1.default)();
 		const main = _usingCtx$1.u(newMain(BatchTrace, axisSize));
 		const trace$1 = new BatchTrace(main);
 		const tracersIn = args.map((x, i) => inAxes[i] === null ? pureArray(x) : new BatchTracer(trace$1, pureArray(x), inAxes[i]));
@@ -3736,13 +3964,312 @@ function vmap$1(f, inAxes = 0) {
 		return unflatten(outTree.value, outsFlat);
 	};
 }
-function jacfwd$1(f) {
-	return function jacobianForward(x) {
-		if (x.shape.length !== 1) throw new TypeError("jacfwd only supports 1D inputs");
-		const [size$1] = x.shape;
-		const pushfwd = (v) => jvp$1(f, [x], [v])[1];
-		return vmap$1(pushfwd, [0])(eye(size$1, void 0, { dtype: x.dtype }));
-	};
+function jacfwd$1(f) {
+	return function jacobianForward(x) {
+		if (x.shape.length !== 1) throw new TypeError("jacfwd only supports 1D inputs");
+		const [size$1] = x.shape;
+		const pushfwd = (v) => jvp$1(f, [x], [v])[1];
+		return vmap$1(pushfwd, [0])(eye(size$1, void 0, { dtype: x.dtype }));
+	};
+}
+//#endregion
+//#region src/frontend/jvp.ts
+var import_usingCtx = /* @__PURE__ */ __toESM(require_usingCtx(), 1);
+var JVPTracer = class extends Tracer {
+	constructor(trace$1, primal, tangent) {
+		super(trace$1);
+		this.primal = primal;
+		this.tangent = tangent;
+	}
+	get aval() {
+		return this.primal.aval;
+	}
+	toString() {
+		return `JVPTracer(${this.primal.toString()}, ${this.tangent.toString()})`;
+	}
+	get ref() {
+		this.primal.ref, this.tangent.ref;
+		return this;
+	}
+	dispose() {
+		this.primal.dispose();
+		this.tangent.dispose();
+	}
+};
+var JVPTrace = class extends Trace {
+	pure(val) {
+		return this.lift(pureArray(val));
+	}
+	lift(val) {
+		return new JVPTracer(this, val, zerosLike$1(val.ref));
+	}
+	processPrimitive(primitive, tracers, params) {
+		const [primalsIn, tangentsIn] = require_backend.unzip2(tracers.map((x) => [x.primal, x.tangent]));
+		const jvpRule = jvpRules[primitive];
+		if (jvpRule === void 0) throw new Error(`No JVP rule for: ${primitive}`);
+		const [primalsOut, tangentsOut] = jvpRule(primalsIn, tangentsIn, params);
+		return require_backend.zip(primalsOut, tangentsOut).map(([x, t]) => new JVPTracer(this, x, t));
+	}
+};
+/** Rule that applies the same operation to primals and tangents. */
+function linearTangentsJvp(primitive) {
+	return (primals, tangents, params) => {
+		const ys = bind(primitive, primals, params);
+		const dys = bind(primitive, tangents, params);
+		return [ys, dys];
+	};
+}
+/** Rule for product of gradients in bilinear operations. */
+function bilinearTangentsJvp(primitive) {
+	return ([x, y], [dx, dy], params) => {
+		const primal = bind1(primitive, [x.ref, y.ref], params);
+		const tangent = bind1(primitive, [x, dy], params).add(bind1(primitive, [dx, y], params));
+		return [[primal], [tangent]];
+	};
+}
+/** Rule that zeros out any tangents. */
+function zeroTangentsJvp(primitive) {
+	return (primals, tangents, params) => {
+		for (const t of tangents) t.dispose();
+		const ys = bind(primitive, primals, params);
+		return [ys, ys.map((y) => zerosLike$1(y.ref))];
+	};
+}
+/** Compute `a @ b.T`, batched to last two axes. */
+function batchMatmulT(a, b) {
+	return dot$2(a.reshape(a.shape.toSpliced(-1, 0, 1)), b.reshape(b.shape.toSpliced(-2, 0, 1)));
+}
+/** Batch matrix transpose. */
+function mT(a) {
+	return moveaxis(a, -2, -1);
+}
+function sliceAxis(a, axis, p) {
+	const slices = Array(a.shape.length).fill([]);
+	slices[require_backend.checkAxis(axis, a.ndim)] = p;
+	return a.slice(...slices);
+}
+function padAxis(a, axis, p) {
+	const pads = Array(a.shape.length).fill([0, 0]);
+	pads[require_backend.checkAxis(axis, a.ndim)] = p;
+	return pad$1(a, pads);
+}
+const jvpRules = {
+	[Primitive.Add]: linearTangentsJvp(Primitive.Add),
+	[Primitive.Mul]: bilinearTangentsJvp(Primitive.Mul),
+	[Primitive.Idiv]: zeroTangentsJvp(Primitive.Idiv),
+	[Primitive.Mod]([x, y], [dx, dy]) {
+		if (!require_backend.isFloatDtype(x.dtype) && !require_backend.isFloatDtype(y.dtype)) {
+			dx.dispose();
+			dy.dispose();
+			return [[x.ref, y.ref], [zerosLike$1(x), zerosLike$1(y)]];
+		}
+		const q = idiv(x.ref, y.ref);
+		return [[mod(x, y)], [dx.sub(dy.mul(q))]];
+	},
+	[Primitive.Min]([x, y], [dx, dy]) {
+		return [[min$1(x.ref, y.ref)], [where$1(less$1(y, x), dy, dx)]];
+	},
+	[Primitive.Max]([x, y], [dx, dy]) {
+		return [[max$1(x.ref, y.ref)], [where$1(less$1(x, y), dy, dx)]];
+	},
+	[Primitive.Neg]: linearTangentsJvp(Primitive.Neg),
+	[Primitive.Reciprocal]([x], [dx]) {
+		const xRecip = reciprocal$1(x.ref);
+		return [[xRecip.ref], [neg(xRecip.ref.mul(xRecip)).mul(dx)]];
+	},
+	[Primitive.Floor]: zeroTangentsJvp(Primitive.Floor),
+	[Primitive.Ceil]: zeroTangentsJvp(Primitive.Ceil),
+	[Primitive.StopGradient]: zeroTangentsJvp(Primitive.StopGradient),
+	[Primitive.Cast]([x], [dx], { dtype }) {
+		if (x.dtype === dtype) return [[x], [dx]];
+		if (require_backend.isFloatDtype(dtype) && require_backend.isFloatDtype(x.dtype)) return [[cast(x, dtype)], [cast(dx, dtype)]];
+		else {
+			dx.dispose();
+			return [[cast(x.ref, dtype)], [zerosLike$1(x)]];
+		}
+	},
+	[Primitive.Bitcast]([x], [dx], { dtype }) {
+		if (x.dtype === dtype) return [[x], [dx]];
+		dx.dispose();
+		return [[bitcast(x.ref, dtype)], [zerosLike$1(x)]];
+	},
+	[Primitive.Sin]([x], [dx]) {
+		return [[sin$1(x.ref)], [cos$1(x).mul(dx)]];
+	},
+	[Primitive.Cos]([x], [dx]) {
+		return [[cos$1(x.ref)], [neg(sin$1(x)).mul(dx)]];
+	},
+	[Primitive.Asin]([x], [dx]) {
+		const denom = sqrt$1(reciprocal$1(cast(1, x.dtype).sub(x.ref.mul(x.ref))));
+		return [[asin$1(x)], [denom.mul(dx)]];
+	},
+	[Primitive.Atan]([x], [dx]) {
+		const denom = cast(1, x.dtype).add(x.ref.mul(x.ref));
+		return [[atan$1(x)], [dx.div(denom)]];
+	},
+	[Primitive.Exp]([x], [dx]) {
+		const z = exp$1(x);
+		return [[z.ref], [z.mul(dx)]];
+	},
+	[Primitive.Log]([x], [dx]) {
+		return [[log$1(x.ref)], [reciprocal$1(x).mul(dx)]];
+	},
+	[Primitive.Erf]([x], [dx]) {
+		const coeff = 2 / Math.sqrt(Math.PI);
+		const expTerm = exp$1(neg(x.ref.mul(x.ref)));
+		return [[erf$1(x)], [expTerm.mul(coeff).mul(dx)]];
+	},
+	[Primitive.Erfc]([x], [dx]) {
+		const coeff = -2 / Math.sqrt(Math.PI);
+		const expTerm = exp$1(neg(x.ref.mul(x.ref)));
+		return [[erfc$1(x)], [expTerm.mul(coeff).mul(dx)]];
+	},
+	[Primitive.Sqrt]([x], [dx]) {
+		const z = sqrt$1(x);
+		return [[z.ref], [reciprocal$1(z.mul(2)).mul(dx)]];
+	},
+	[Primitive.Reduce]([x], [dx], { op, axis }) {
+		if (op === require_backend.AluOp.Add) return [[reduce(x, op, axis)], [reduce(dx, op, axis)]];
+		else if (op === require_backend.AluOp.Mul) {
+			const primal = reduce(x.ref, op, axis);
+			const tangent = broadcast(primal.ref, x.shape, axis).mul(reciprocal$1(x)).mul(dx).sum(axis);
+			return [[primal], [tangent]];
+		} else if (op === require_backend.AluOp.Min || op === require_backend.AluOp.Max) {
+			const primal = reduce(x.ref, op, axis);
+			const notMin = notEqual$1(x, broadcast(primal.ref, x.shape, axis));
+			const minCount = where$1(notMin.ref, 0, 1).sum(axis);
+			const tangent = where$1(notMin, 0, dx).sum(axis).div(minCount);
+			return [[primal], [tangent]];
+		} else throw new Error(`JVP rule not implemented for reduce op: ${op}`);
+	},
+	[Primitive.Pool]: linearTangentsJvp(Primitive.Pool),
+	[Primitive.PoolTranspose]: linearTangentsJvp(Primitive.PoolTranspose),
+	[Primitive.Dot]: bilinearTangentsJvp(Primitive.Dot),
+	[Primitive.Conv]: bilinearTangentsJvp(Primitive.Conv),
+	[Primitive.Compare]: zeroTangentsJvp(Primitive.Compare),
+	[Primitive.Where]([cond, x, y], [dcond, dx, dy]) {
+		dcond.dispose();
+		return [[where$1(cond.ref, x, y)], [where$1(cond, dx, dy)]];
+	},
+	[Primitive.Concatenate]: linearTangentsJvp(Primitive.Concatenate),
+	[Primitive.Split]: linearTangentsJvp(Primitive.Split),
+	[Primitive.RandomBits]: zeroTangentsJvp(Primitive.RandomBits),
+	[Primitive.Gather]([x, ...indices], [dx, ..._], { axis, outDim }) {
+		const indicesRef = indices.map((t) => t.ref);
+		return [[gather(x, indices, axis, outDim)], [gather(dx, indicesRef, axis, outDim)]];
+	},
+	[Primitive.Transpose]: linearTangentsJvp(Primitive.Transpose),
+	[Primitive.Broadcast]: linearTangentsJvp(Primitive.Broadcast),
+	[Primitive.Reshape]: linearTangentsJvp(Primitive.Reshape),
+	[Primitive.Flip]: linearTangentsJvp(Primitive.Flip),
+	[Primitive.Shrink]: linearTangentsJvp(Primitive.Shrink),
+	[Primitive.Pad]: linearTangentsJvp(Primitive.Pad),
+	[Primitive.Sort]([x], [dx]) {
+		const [y, idx] = argsort$1(x);
+		return [[y], [gather(dx, [idx], [-1], -1)]];
+	},
+	[Primitive.Argsort]([x], [dx]) {
+		const [y, idx] = argsort$1(x);
+		return [[y, idx.ref], [gather(dx, [idx.ref], [-1], -1), zerosLike$1(idx)]];
+	},
+	[Primitive.TriangularSolve]([a, b], [da, db], { unitDiagonal }) {
+		const x = triangularSolve$1(a.ref, b, { unitDiagonal });
+		const dax = batchMatmulT(da, x.ref);
+		const rhsT = db.sub(mT(dax));
+		const dx = triangularSolve$1(a, rhsT, { unitDiagonal });
+		return [[x], [dx]];
+	},
+	[Primitive.Cholesky]([a], [da]) {
+		const L = cholesky$2(a.ref);
+		da = da.ref.add(mT(da)).mul(.5);
+		const W = triangularSolve$1(L.ref, da, { lower: true });
+		const ST = triangularSolve$1(L.ref, mT(W), { lower: true });
+		const dL = batchMatmulT(L.ref, triu(ST.ref, 1).add(triu(ST)).mul(.5));
+		return [[L], [dL]];
+	},
+	[Primitive.LU]([a], [da]) {
+		const [luMatrix, pivots, permutation] = lu$1(a);
+		const [m, n] = a.shape.slice(-2);
+		const k = Math.min(m, n);
+		const luSliceL = sliceAxis(luMatrix.ref, -1, [0, k]);
+		const lLower = tril(luSliceL, -1);
+		const lPadded = m > k ? padAxis(lLower, -1, [0, m - k]) : lLower;
+		const L = lPadded.add(eye(m));
+		const luSliceU = sliceAxis(luMatrix.ref, -2, [0, k]);
+		const uUpper = triu(luSliceU);
+		const uPadded = n > k ? padAxis(uUpper, -2, [0, n - k]) : uUpper;
+		const uEye = n > k ? padAxis(padAxis(eye(n - k), -1, [k, 0]), -2, [k, 0]) : zerosLike$1(uPadded.ref);
+		const U = uPadded.add(uEye);
+		const P = permutation.ref.reshape([...permutation.shape, 1]).equal(arange(m)).astype(da.dtype);
+		const pda = batchMatmulT(P, mT(da));
+		const la = mT(triangularSolve$1(L.ref, mT(pda), {
+			lower: true,
+			unitDiagonal: true
+		}));
+		const lau = triangularSolve$1(mT(U.ref), la, { lower: true });
+		const lDot = batchMatmulT(L, mT(tril(lau.ref, -1)));
+		const uDot = batchMatmulT(triu(lau), mT(U));
+		return [[
+			luMatrix,
+			pivots,
+			permutation
+		], [
+			lDot.add(uDot),
+			zerosLike$1(pivots.ref),
+			zerosLike$1(permutation.ref)
+		]];
+	},
+	[Primitive.Jit](primals, tangents, { name, jaxpr }) {
+		const newJaxpr = jvpJaxpr(jaxpr);
+		const outs = bind(Primitive.Jit, [
+			...newJaxpr.consts.map((c) => c.ref),
+			...primals,
+			...tangents
+		], {
+			name: `${name}_jvp`,
+			jaxpr: newJaxpr.jaxpr,
+			numConsts: newJaxpr.consts.length
+		});
+		const n = outs.length / 2;
+		if (!Number.isInteger(n)) throw new Error("internal: JVP Jaxpr output length is not even");
+		const [primalsOut, tangentsOut] = [outs.slice(0, n), outs.slice(n)];
+		return [primalsOut, tangentsOut];
+	}
+};
+const jvpJaxprCache = /* @__PURE__ */ new Map();
+function jvpJaxpr(jaxpr) {
+	if (jvpJaxprCache.has(jaxpr)) return jvpJaxprCache.get(jaxpr);
+	const inAvals = jaxpr.inBinders.map((v) => v.aval);
+	const { jaxpr: newJaxpr } = makeJaxpr$1((primals, tangents) => jvpFlat(jaxprAsFun(jaxpr), primals, tangents))(inAvals, inAvals);
+	jvpJaxprCache.set(jaxpr, newJaxpr);
+	return newJaxpr;
+}
+function jvpFlat(f, primals, tangents) {
+	try {
+		var _usingCtx$1 = (0, import_usingCtx.default)();
+		const main = _usingCtx$1.u(newMain(JVPTrace));
+		const trace$1 = new JVPTrace(main);
+		const tracersIn = require_backend.zip(primals, tangents).map(([x, t]) => new JVPTracer(trace$1, pureArray(x), pureArray(t)));
+		const outs = f(...tracersIn);
+		const tracersOut = outs.map((out) => fullRaise(trace$1, out));
+		return require_backend.unzip2(tracersOut.map((t) => [t.primal, t.tangent]));
+	} catch (_) {
+		_usingCtx$1.e = _;
+	} finally {
+		_usingCtx$1.d();
+	}
+}
+function jvp$1(f, primals, tangents) {
+	const [primalsFlat, inTree] = flatten(primals);
+	const [tangentsFlat, inTree2] = flatten(tangents);
+	if (!inTree.equals(inTree2)) throw new TreeMismatchError("jvp", inTree, inTree2);
+	const [flatFun, outTree] = flattenFun(f, inTree);
+	const [primalsOutFlat, tangentsOutFlat] = jvpFlat(flatFun, primalsFlat, tangentsFlat);
+	if (outTree.value === void 0) throw new Error("outTree was not set in jvp");
+	const primalsOut = unflatten(outTree.value, primalsOutFlat);
+	const tangentsOut = unflatten(outTree.value, tangentsOutFlat);
+	return [primalsOut, tangentsOut];
 }
 //#endregion
@@ -3775,11 +4302,10 @@ function partialEvalFlat(f, pvalsIn) {
 	const tracersOut = outs.map((out) => fullRaise(trace$1, out));
 	const pvalsOut = tracersOut.map((t) => t.pval);
 	const unknownTracersOut = tracersOut.filter((t) => !t.pval.isKnown);
-	const { jaxpr, consts } = partialEvalGraphToJaxpr(unknownTracersIn, unknownTracersOut);
+	const jaxpr = partialEvalGraphToJaxpr(unknownTracersIn, unknownTracersOut);
 	return {
 		jaxpr,
-		pvalsOut,
-		consts
+		pvalsOut
 	};
 }
 /**
@@ -3796,22 +4322,19 @@ function linearizeFlatUtil(f, primalsIn) {
 		const [primalsOut$1, tangentsOut] = jvp$1(f, x.slice(0, k), x.slice(k, 2 * k));
 		return [...primalsOut$1, ...tangentsOut];
 	};
-	const { jaxpr, pvalsOut, consts } = partialEvalFlat(fJvp, pvalsIn);
+	const { jaxpr, pvalsOut } = partialEvalFlat(fJvp, pvalsIn);
 	const primalPvals = pvalsOut.slice(0, pvalsOut.length / 2);
 	if (!primalPvals.every((pval) => pval.isKnown)) throw new Error("Not all primal values are known after partial evaluation");
 	const primalsOut = primalPvals.map((pval) => pval.val);
 	return {
 		primalsOut,
-		jaxpr,
-		consts
+		jaxpr
 	};
 }
 function linearizeFlat(f, primalsIn) {
-	const { primalsOut, jaxpr, consts } = linearizeFlatUtil(f, primalsIn);
-	const fLin = (...tangents) => evalJaxpr(jaxpr, [...consts.map((c) => c.ref), ...tangents]);
-	const dispose$1 = () => {
-		for (const c of consts) c.dispose();
-	};
+	const { primalsOut, jaxpr } = linearizeFlatUtil(f, primalsIn);
+	const fLin = (...tangents) => evalJaxpr(jaxpr.jaxpr, [...jaxpr.consts.map((c) => c.ref), ...tangents]);
+	const dispose$1 = () => jaxpr.dispose();
 	return [
 		primalsOut,
 		fLin,
@@ -3895,7 +4418,7 @@ var PartialEvalTrace = class extends Trace {
 	}
 	processPrimitive(primitive, tracers, params) {
 		if (tracers.every((t) => t.pval.isKnown)) return bind(primitive, tracers.map((t) => t.fullLower()), params);
-		if (primitive === Primitive.JitCall) {
+		if (primitive === Primitive.Jit) {
 			const { name, jaxpr, numConsts } = params;
 			return this.#partialEvalJaxpr(name, jaxpr, numConsts, tracers);
 		}
@@ -3921,14 +4444,14 @@ var PartialEvalTrace = class extends Trace {
 	* Evaluate a Jaxpr on a set of PartialEvalTracers, computing as many known
 	* values as possible (with JIT) and forwarding the unknown ones.
 	*
-	* Used when encountering a JitCall rule during the trace.
+	* Used when encountering a Jit rule during the trace.
 	*/
 	#partialEvalJaxpr(name, jaxpr, numConsts, tracers) {
 		jaxpr = jaxpr.flatten();
 		const inUnknowns = tracers.map((t) => !t.pval.isKnown);
 		const { jaxpr1, jaxpr2, outUnknowns, numRes } = partialEvalJaxpr(jaxpr, inUnknowns);
 		const [knownTracers, unknownTracers] = require_backend.partitionList(inUnknowns, tracers);
-		const outs1Res = bind(Primitive.JitCall, knownTracers.map((t) => t.ref.fullLower()), {
+		const outs1Res = bind(Primitive.Jit, knownTracers.map((t) => t.ref.fullLower()), {
 			name: `${name}_peval`,
 			jaxpr: jaxpr1,
 			numConsts: 0
@@ -3938,7 +4461,7 @@ var PartialEvalTrace = class extends Trace {
 		const resTracers = res.map((x) => this.instantiateConst(fullRaise(this, x)));
 		const recipe = {
 			type: "JaxprEqn",
-			prim: Primitive.JitCall,
+			prim: Primitive.Jit,
 			tracersIn: resTracers.concat(unknownTracers),
 			params: {
 				name: `${name}_resid`,
@@ -3967,7 +4490,7 @@ function partialEvalJaxpr(jaxpr, inUnknowns, instantiate) {
 	const eqns1 = [];
 	const eqns2 = [];
 	for (const eqn of jaxpr.eqns) {
-		if (eqn.primitive === Primitive.JitCall) throw new TypeError("partialEvalJaxpr requires flattened Jaxpr");
+		if (eqn.primitive === Primitive.Jit) throw new TypeError("partialEvalJaxpr requires flattened Jaxpr");
 		const hasUnknowns = eqn.inputs.some((x) => x instanceof Var && !knownVars.has(x));
 		if (hasUnknowns) {
 			for (const x of eqn.inputs) if (x instanceof Var && knownVars.has(x)) residuals.add(x);
@@ -4042,10 +4565,7 @@ function partialEvalGraphToJaxpr(tracersIn, tracersOut) {
 	for (const t of tracersOut) t.dispose();
 	jaxpr = jaxpr.simplify();
 	if (require_backend.DEBUG >= 5) console.info("jaxpr from partial evaluation:\n" + jaxpr.toString());
-	return {
-		jaxpr,
-		consts
-	};
+	return new ClosedJaxpr(jaxpr, consts);
 }
 /** Marker type for pullback, used by transpose rules. */
 var UndefPrimal = class {
@@ -4237,317 +4757,151 @@ const transposeRules = {
 		cond.dispose();
 		return cts;
 	},
-	[Primitive.Transpose]([ct], [x], { perm }) {
-		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Transpose);
-		return [transpose$1(ct, require_backend.invertPermutation(perm))];
-	},
-	[Primitive.Broadcast]([ct], [x], { axis }) {
-		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Broadcast);
-		return [reduce(ct, require_backend.AluOp.Add, axis)];
-	},
-	[Primitive.Reshape]([ct], [x], _) {
-		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Reshape);
-		return [reshape$1(ct, x.aval.shape)];
-	},
-	[Primitive.Flip]([ct], [x], { axis }) {
-		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Flip);
-		return [flip$1(ct, axis)];
-	},
-	[Primitive.Shrink]([ct], [x], { slice }) {
-		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Shrink);
-		const width = slice.map(([s, e$1], i) => [s, x.aval.shape[i] - e$1]);
-		return [pad$1(ct, width)];
+	[Primitive.Concatenate]([ct], inputs, { axis }) {
+		if (inputs.some((x) => !(x instanceof UndefPrimal))) throw new NonlinearError(Primitive.Concatenate);
+		const sizes = inputs.map((x) => x.aval.shape[axis]);
+		return split$2(ct, axis, sizes);
 	},
-	[Primitive.Pad]([ct], [x], { width }) {
-		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Pad);
-		const slice = width.map(([s, _e], i) => [s, s + x.aval.shape[i]]);
-		return [shrink(ct, slice)];
+	[Primitive.Split](cts, [x], { axis }) {
+		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Split);
+		return [concatenate$1(cts, axis)];
 	},
 	[Primitive.Gather]([ct], [x, ...indices], { axis, outDim }) {
 		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Gather);
 		if (indices.some((i) => i instanceof UndefPrimal)) throw new NonlinearError(Primitive.Gather);
-		throw new Error("Gather transpose rule is not yet implemented, requires complex Scatter sum operation");
-	},
-	[Primitive.JitCall](cts, args, { name, jaxpr }) {
-		const undefPrimals = args.map((x) => x instanceof UndefPrimal);
-		const { newJaxpr, newConsts } = transposeJaxpr(jaxpr, undefPrimals);
-		const residuals = args.filter((x, i$1) => !undefPrimals[i$1]);
-		const outs = bind(Primitive.JitCall, [
-			...newConsts.map((c) => c.ref),
-			...residuals,
-			...cts
-		], {
-			name: `${name}_t`,
-			jaxpr: newJaxpr,
-			numConsts: newConsts.length
-		});
-		let i = 0;
-		return undefPrimals.map((isUndef) => isUndef ? outs[i++] : null);
-	}
-};
-const transposeJaxprCache = /* @__PURE__ */ new Map();
-function transposeJaxpr(jaxpr, undefPrimals) {
-	const cacheKey = JSON.stringify(undefPrimals);
-	const prevResult = transposeJaxprCache.get(jaxpr)?.get(cacheKey);
-	if (prevResult) return prevResult;
-	const { inTypes, outTypes } = typecheckJaxpr(jaxpr);
-	const forwardInTypes = inTypes.filter((_, i) => !undefPrimals[i]);
-	const { jaxpr: newJaxpr, consts: newConsts } = makeJaxpr$1((forwardIn, cotangents) => {
-		const args = [];
-		let forwardInIdx = 0;
-		for (let i = 0; i < undefPrimals.length; i++) if (undefPrimals[i]) args.push(new UndefPrimal(inTypes[i]));
-		else args.push(forwardIn[forwardInIdx++]);
-		return evalJaxprTransposed(jaxpr, args, cotangents);
-	})(forwardInTypes, outTypes);
-	typecheckJaxpr(newJaxpr);
-	const result = {
-		newJaxpr,
-		newConsts
-	};
-	if (!transposeJaxprCache.has(jaxpr)) transposeJaxprCache.set(jaxpr, /* @__PURE__ */ new Map());
-	transposeJaxprCache.get(jaxpr).set(cacheKey, result);
-	return result;
-}
-function vjpFlat(f, primalsIn) {
-	const { primalsOut, jaxpr, consts } = linearizeFlatUtil(f, primalsIn);
-	const fVjp = (...cotangents) => {
-		const transposeInputs = [...consts.map((c) => c.ref), ...primalsIn.map((t) => new UndefPrimal(t.aval))];
-		return evalJaxprTransposed(jaxpr, transposeInputs, cotangents);
-	};
-	const dispose$1 = () => {
-		for (const c of consts) c.dispose();
-	};
-	return [
-		primalsOut,
-		fVjp,
-		dispose$1
-	];
-}
-function vjp$1(f, ...primalsIn) {
-	const [primalsInFlat, inTree] = flatten(primalsIn);
-	const [fFlat, outTree] = flattenFun(f, inTree);
-	const [primalsOutFlat, fVjpFlat, dispose$1] = vjpFlat(fFlat, primalsInFlat.map(pureArray));
-	if (outTree.value === void 0) throw new Error("outTree was not set in vjp");
-	const primalsOut = unflatten(outTree.value, primalsOutFlat);
-	const fVjp = ((cotangentsOut) => {
-		const [cotangentsOutFlat, outTree2] = flatten(cotangentsOut);
-		if (!outTree.value.equals(outTree2)) throw new TreeMismatchError("vjp", outTree.value, outTree2);
-		const cotangentsInFlat = fVjpFlat(...cotangentsOutFlat.map(pureArray));
-		return unflatten(inTree, cotangentsInFlat);
-	});
-	fVjp.dispose = dispose$1;
-	return [primalsOut, fVjp];
-}
-function grad$1(f) {
-	const valueAndGradFn = valueAndGrad$1(f);
-	return (...x) => {
-		const [y, dx] = valueAndGradFn(...x);
-		y.dispose();
-		return dx;
-	};
-}
-function valueAndGrad$1(f) {
-	return (...x) => {
-		if (x.length === 0) throw new Error("grad requires at least one argument to differentiate");
-		const [y, fVjp] = vjp$1(f, x[0], ...x.slice(1).map(stopGradient));
-		if (!(y instanceof Tracer) || ndim$1(y) !== 0) throw new TypeError("grad requires a scalar output");
-		if (!require_backend.isFloatDtype(y.dtype)) throw new TypeError("grad only supports floating-point dtypes");
-		const [ct, ...rest] = fVjp(onesLike$1(y.ref));
-		for (const r of rest) dispose(r);
-		fVjp.dispose();
-		return [y, ct];
-	};
-}
-function jacrev$1(f) {
-	return function jacobianReverse(x) {
-		if (x.shape.length !== 1) throw new TypeError("jacrev only supports 1D inputs");
-		const [size$1] = x.shape;
-		const pullback = (ct) => {
-			const [y, fVjp] = vjp$1(f, x);
-			y.dispose();
-			const [ret] = fVjp(ct);
-			fVjp.dispose();
-			return ret;
-		};
-		return vmap$1(pullback, [1])(eye(size$1, void 0, { dtype: x.dtype }));
-	};
-}
-//#endregion
-//#region src/library/lax.ts
-var lax_exports = {};
-__export(lax_exports, {
-	conv: () => conv,
-	convGeneralDilated: () => convGeneralDilated,
-	convWithGeneralPadding: () => convWithGeneralPadding,
-	dot: () => dot$1,
-	erf: () => erf,
-	erfc: () => erfc,
-	reduceWindow: () => reduceWindow,
-	stopGradient: () => stopGradient$1
-});
-/**
-* General dot product/contraction operator.
-*
-* Prefer higher-level functions like `jax.numpy.dot()`, `jax.numpy.matmul()`,
-* `jax.numpy.tensordot(), and `jax.numpy.einsum()` where possible.
-*/
-function dot$1(lhs, rhs, { lhsContractingDims: lc = [], rhsContractingDims: rc = [], lhsBatchDims: lb = [], rhsBatchDims: rb = [] } = {}) {
-	if (lc.length !== rc.length) throw new Error(`dot: contracting dims lengths mismatch, got ${JSON.stringify(lc)} and ${JSON.stringify(rc)}`);
-	else if (lb.length !== rb.length) throw new Error(`dot: batch dims lengths mismatch, got ${JSON.stringify(lb)} and ${JSON.stringify(rb)}`);
-	lc = lc.map((a) => require_backend.checkAxis(a, lhs.ndim));
-	rc = rc.map((a) => require_backend.checkAxis(a, rhs.ndim));
-	lb = lb.map((a) => require_backend.checkAxis(a, lhs.ndim));
-	rb = rb.map((a) => require_backend.checkAxis(a, rhs.ndim));
-	if (lc.some((a) => lb.includes(a))) throw new Error(`dot: lhs contracting dims ${JSON.stringify(lc)} overlap with batch dims ${JSON.stringify(lb)}`);
-	else if (rc.some((a) => rb.includes(a))) throw new Error(`dot: rhs contracting dims ${JSON.stringify(rc)} overlap with batch dims ${JSON.stringify(rb)}`);
-	const lf = require_backend.range(lhs.ndim).filter((a) => !lc.includes(a) && !lb.includes(a));
-	const rf = require_backend.range(rhs.ndim).filter((a) => !rc.includes(a) && !rb.includes(a));
-	const lhs2 = lhs.transpose([
-		...lb,
-		...lf,
-		...lc
-	]);
-	const rhs2 = rhs.transpose([
-		...rb,
-		...rf,
-		...rc
-	]);
-	if (lc.length === 0) return mul(lhs2.reshape([
-		...lb.map((a) => lhs.shape[a]),
-		...lf.map((a) => lhs.shape[a]),
-		...require_backend.rep(rf.length, 1)
-	]), rhs2.reshape([
-		...rb.map((a) => rhs.shape[a]),
-		...require_backend.rep(lf.length, 1),
-		...rf.map((a) => rhs.shape[a])
-	]));
-	const dotShapeX = lc.map((a) => lhs.shape[a]);
-	const dotShapeY = rc.map((a) => rhs.shape[a]);
-	if (!require_backend.deepEqual(dotShapeX, dotShapeY)) throw new Error(`dot: shapes not aligned along contracting dims: ${JSON.stringify(dotShapeX)} != ${JSON.stringify(dotShapeY)}`);
-	return dot$2(lhs2.reshape([
-		...lb.map((a) => lhs.shape[a]),
-		...lf.map((a) => lhs.shape[a]),
-		...require_backend.rep(rf.length, 1),
-		require_backend.prod(dotShapeX)
-	]), rhs2.reshape([
-		...rb.map((a) => rhs.shape[a]),
-		...require_backend.rep(lf.length, 1),
-		...rf.map((a) => rhs.shape[a]),
-		require_backend.prod(dotShapeY)
-	]));
-}
-function padtypeToPads(inShape, filterShape, strides, dilation, padding) {
-	const padType = padding.toUpperCase();
-	switch (padType) {
-		case "VALID": return require_backend.rep(inShape.length, [0, 0]);
-		case "SAME":
-		case "SAME_LOWER": {
-			const outShape = inShape.map((size$1, i) => Math.ceil(size$1 / strides[i]));
-			const padSizes = require_backend.zipn(outShape, strides, filterShape, dilation, inShape).map(([o, s, k, d, i]) => Math.max(0, (o - 1) * s + 1 + (k - 1) * d - i));
-			if (padType === "SAME") return padSizes.map((size$1) => [size$1 >> 1, size$1 - (size$1 >> 1)]);
-			else return padSizes.map((size$1) => [size$1 - (size$1 >> 1), size$1 >> 1]);
-		}
-		default: throw new Error(`Unknown padding type: ${padType}`);
-	}
-}
-/**
-* General n-dimensional convolution operator, with optional dilation.
-*
-* The semantics of this operation mimic the `jax.lax.conv_general_dilated`
-* function in JAX, which wraps XLA's general convolution operator.
-*
-* Grouped convolutions are not supported right now.
-*/
-function convGeneralDilated(lhs, rhs, windowStrides, padding, { lhsDilation, rhsDilation, featureGroupCount = 1 } = {}) {
-	if (lhs.ndim < 2) throw new Error("lhs must have at least 2 dimensions");
-	if (rhs.ndim < 2) throw new Error("rhs must have at least 2 dimensions");
-	if (typeof padding === "string") {
-		if (lhsDilation?.some((d) => d !== 1)) throw new Error("String padding is not supported for transposed convolutions");
-		padding = padtypeToPads(lhs.shape.slice(2), rhs.shape.slice(2), windowStrides, rhsDilation ?? require_backend.rep(rhs.ndim - 2, 1), padding);
-	}
-	if (featureGroupCount !== 1) {
-		const G = featureGroupCount;
-		const [N, C_in, ...xs] = lhs.shape;
-		const [C_out, C_in_per_group, ...ks] = rhs.shape;
-		if (C_in % G !== 0) throw new Error(`featureGroupCount=${G} must divide input channels=${C_in}`);
-		if (C_out % G !== 0) throw new Error(`featureGroupCount=${G} must divide output channels=${C_out}`);
-		if (C_in / G !== C_in_per_group) throw new Error(`rhs input channels=${C_in_per_group} must equal lhs input channels / groups=${C_in / G}`);
-		const lhsGrouped = moveaxis(lhs.reshape([
-			N,
-			G,
-			C_in / G,
-			...xs
-		]), 1, 0);
-		const rhsGrouped = rhs.reshape([
-			G,
-			C_out / G,
-			C_in_per_group,
-			...ks
-		]);
-		const result = conv$1(lhsGrouped, rhsGrouped, {
-			vmapDims: 1,
-			strides: windowStrides,
-			padding,
-			lhsDilation,
-			rhsDilation
+		throw new Error("Gather transpose rule is not yet implemented, requires complex Scatter sum operation");
+	},
+	[Primitive.Transpose]([ct], [x], { perm }) {
+		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Transpose);
+		return [transpose$1(ct, require_backend.invertPermutation(perm))];
+	},
+	[Primitive.Broadcast]([ct], [x], { axis }) {
+		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Broadcast);
+		return [reduce(ct, require_backend.AluOp.Add, axis)];
+	},
+	[Primitive.Reshape]([ct], [x], _) {
+		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Reshape);
+		return [reshape$1(ct, x.aval.shape)];
+	},
+	[Primitive.Flip]([ct], [x], { axis }) {
+		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Flip);
+		return [flip$1(ct, axis)];
+	},
+	[Primitive.Shrink]([ct], [x], { slice }) {
+		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Shrink);
+		const width = slice.map(([s, e$1], i) => [s, x.aval.shape[i] - e$1]);
+		return [pad$1(ct, width)];
+	},
+	[Primitive.Pad]([ct], [x], { width }) {
+		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Pad);
+		const slice = width.map(([s, _e], i) => [s, s + x.aval.shape[i]]);
+		return [shrink(ct, slice)];
+	},
+	[Primitive.TriangularSolve]([ct], [a, b], { unitDiagonal }) {
+		if (a instanceof UndefPrimal || !(b instanceof UndefPrimal)) throw new NonlinearError(Primitive.TriangularSolve);
+		const ctB = triangularSolve$1(moveaxis(a, -2, -1), ct, {
+			lower: true,
+			unitDiagonal
 		});
-		const ys = result.shape.slice(3);
-		return moveaxis(result, 0, 1).reshape([
-			N,
-			C_out,
-			...ys
-		]);
+		return [null, ctB];
+	},
+	[Primitive.Jit](cts, args, { name, jaxpr }) {
+		const undefPrimals = args.map((x) => x instanceof UndefPrimal);
+		const newJaxpr = transposeJaxpr(jaxpr, undefPrimals);
+		const residuals = args.filter((x, i$1) => !undefPrimals[i$1]);
+		const outs = bind(Primitive.Jit, [
+			...newJaxpr.consts.map((c) => c.ref),
+			...residuals,
+			...cts
+		], {
+			name: `${name}_t`,
+			jaxpr: newJaxpr.jaxpr,
+			numConsts: newJaxpr.consts.length
+		});
+		let i = 0;
+		return undefPrimals.map((isUndef) => isUndef ? outs[i++] : null);
 	}
-	return conv$1(lhs, rhs, {
-		strides: windowStrides,
-		padding,
-		lhsDilation,
-		rhsDilation
-	});
-}
-/** Convenience wrapper around `convGeneralDilated`. */
-function convWithGeneralPadding(lhs, rhs, windowStrides, padding, lhsDilation, rhsDilation) {
-	return convGeneralDilated(lhs, rhs, windowStrides, padding, {
-		lhsDilation,
-		rhsDilation
-	});
+};
+const transposeJaxprCache = /* @__PURE__ */ new Map();
+function transposeJaxpr(jaxpr, undefPrimals) {
+	const cacheKey = JSON.stringify(undefPrimals);
+	const prevResult = transposeJaxprCache.get(jaxpr)?.get(cacheKey);
+	if (prevResult) return prevResult;
+	const { inTypes, outTypes } = typecheckJaxpr(jaxpr);
+	const forwardInTypes = inTypes.filter((_, i) => !undefPrimals[i]);
+	const { jaxpr: newJaxpr } = makeJaxpr$1((forwardIn, cotangents) => {
+		const args = [];
+		let forwardInIdx = 0;
+		for (let i = 0; i < undefPrimals.length; i++) if (undefPrimals[i]) args.push(new UndefPrimal(inTypes[i]));
+		else args.push(forwardIn[forwardInIdx++]);
+		return evalJaxprTransposed(jaxpr, args, cotangents);
+	})(forwardInTypes, outTypes);
+	typecheckJaxpr(newJaxpr.jaxpr);
+	if (!transposeJaxprCache.has(jaxpr)) transposeJaxprCache.set(jaxpr, /* @__PURE__ */ new Map());
+	transposeJaxprCache.get(jaxpr).set(cacheKey, newJaxpr);
+	return newJaxpr;
 }
-/** Convenience wrapper around `convGeneralDilated`. */
-function conv(lhs, rhs, windowStrides, padding) {
-	return convGeneralDilated(lhs, rhs, windowStrides, padding);
+function vjpFlat(f, primalsIn) {
+	const { primalsOut, jaxpr } = linearizeFlatUtil(f, primalsIn);
+	const fVjp = (...cotangents) => {
+		const transposeInputs = [...jaxpr.consts.map((c) => c.ref), ...primalsIn.map((t) => new UndefPrimal(t.aval))];
+		return evalJaxprTransposed(jaxpr.jaxpr, transposeInputs, cotangents);
+	};
+	const dispose$1 = () => jaxpr.dispose();
+	return [
+		primalsOut,
+		fVjp,
+		dispose$1
+	];
 }
-/** Reduce a computation over padded windows. */
-function reduceWindow(operand, computation, windowDimensions, windowStrides) {
-	if (operand.ndim < windowDimensions.length) throw new Error(`Operand dimensions ${operand.ndim} < window ${windowDimensions.length}`);
-	if (!windowStrides) windowStrides = require_backend.rep(windowDimensions.length, 1);
-	for (let i = 0; i < operand.ndim; i++) computation = vmap$1(computation, 0);
-	return computation(bind1(Primitive.Pool, [operand], {
-		window: windowDimensions,
-		strides: windowStrides
-	}));
+function vjp$1(f, ...primalsIn) {
+	const [primalsInFlat, inTree] = flatten(primalsIn);
+	const [fFlat, outTree] = flattenFun(f, inTree);
+	const [primalsOutFlat, fVjpFlat, dispose$1] = vjpFlat(fFlat, primalsInFlat.map(pureArray));
+	if (outTree.value === void 0) throw new Error("outTree was not set in vjp");
+	const primalsOut = unflatten(outTree.value, primalsOutFlat);
+	const fVjp = ((cotangentsOut) => {
+		const [cotangentsOutFlat, outTree2] = flatten(cotangentsOut);
+		if (!outTree.value.equals(outTree2)) throw new TreeMismatchError("vjp", outTree.value, outTree2);
+		const cotangentsInFlat = fVjpFlat(...cotangentsOutFlat.map(pureArray));
+		return unflatten(inTree, cotangentsInFlat);
+	});
+	fVjp.dispose = dispose$1;
+	return [primalsOut, fVjp];
 }
-/** The error function: `erf(x) = 2/sqrt(pi) * int[0..x] exp(-t^2) dt`. */
-function erf(x) {
-	return erf$1(x);
+function grad$1(f) {
+	const valueAndGradFn = valueAndGrad$1(f);
+	return (...x) => {
+		const [y, dx] = valueAndGradFn(...x);
+		y.dispose();
+		return dx;
+	};
 }
-/**
-* The complementary error function: `erfc(x) = 1 - erf(x)`.
-*
-* This function is more accurate than `1 - erf(x)` for large values of `x`,
-* where `erf(x)` is very close to 1.
-*/
-function erfc(x) {
-	return erfc$1(x);
+function valueAndGrad$1(f) {
+	return (...x) => {
+		if (x.length === 0) throw new Error("grad requires at least one argument to differentiate");
+		const [y, fVjp] = vjp$1(f, x[0], ...x.slice(1).map(stopGradient));
+		if (!(y instanceof Tracer) || ndim$1(y) !== 0) throw new TypeError("grad requires a scalar output");
+		if (!require_backend.isFloatDtype(y.dtype)) throw new TypeError("grad only supports floating-point dtypes");
+		const [ct, ...rest] = fVjp(onesLike$1(y.ref));
+		for (const r of rest) dispose(r);
+		fVjp.dispose();
+		return [y, ct];
+	};
 }
-/**
-* Stops gradient computation.
-*
-* Behaves as the identity function but prevents the flow of gradients during
-* forward or reverse-mode automatic differentiation.
-*/
-function stopGradient$1(x) {
-	return stopGradient(x);
+function jacrev$1(f) {
+	return function jacobianReverse(x) {
+		if (x.shape.length !== 1) throw new TypeError("jacrev only supports 1D inputs");
+		const [size$1] = x.shape;
+		const pullback = (ct) => {
+			const [y, fVjp] = vjp$1(f, x);
+			y.dispose();
+			const [ret] = fVjp(ct);
+			fVjp.dispose();
+			return ret;
+		};
+		return vmap$1(pullback, [1])(eye(size$1, void 0, { dtype: x.dtype }));
+	};
 }
 //#endregion
@@ -4687,8 +5041,8 @@ function computeSizeMap({ shapes, lhsIndices, rhsIndex }) {
 			const idx = lhsIndex[j];
 			const dim = shape$1[j];
 			const existing = sizeMap.get(idx);
-			if (existing === void 0) sizeMap.set(idx, dim);
-			else if (existing !== dim) throw new Error(`Inconsistent size for index ${idx} in einsum: ${existing} vs ${dim}`);
+			if (existing === void 0 || existing === 1) sizeMap.set(idx, dim);
+			else if (existing !== dim && dim !== 1) throw new Error(`Inconsistent size for index ${idx} in einsum: ${existing} vs ${dim}`);
 		}
 	}
 	for (const [idx, size$1] of sizeMap) if (!Number.isInteger(idx) || idx < 0) throw new Error(`Invalid index ${idx} in einsum expression, must be non-negative integer`);
@@ -4696,52 +5050,410 @@ function computeSizeMap({ shapes, lhsIndices, rhsIndex }) {
 	for (const idx of rhsIndex) if (!sizeMap.has(idx)) throw new Error(`Output index ${idx} not present in einsum inputs`);
 	return sizeMap;
 }
-const einsumPathCache = /* @__PURE__ */ new Map();
-function computeEinsumPath(input, method) {
-	if (!method) method = input.shapes.length <= 5 ? "optimal" : "naive";
-	return require_backend.runWithCache(einsumPathCache, [input, method], () => {
-		const sizeMap = computeSizeMap(input);
-		if (input.shapes.length === 1) return new EinsumPath(input, sizeMap, []);
-		switch (method) {
-			case "naive": return computePathNaive(input, sizeMap);
-			case "optimal": return computePathOptimal(input, sizeMap);
-			default: throw new Error(`Unknown computePath method: ${method}`);
-		}
-	});
+const einsumPathCache = /* @__PURE__ */ new Map();
+function computeEinsumPath(input, method) {
+	if (!method) method = input.shapes.length <= 5 ? "optimal" : "naive";
+	return require_backend.runWithCache(einsumPathCache, [input, method], () => {
+		const sizeMap = computeSizeMap(input);
+		if (input.shapes.length === 1) return new EinsumPath(input, sizeMap, []);
+		switch (method) {
+			case "naive": return computePathNaive(input, sizeMap);
+			case "optimal": return computePathOptimal(input, sizeMap);
+			default: throw new Error(`Unknown computePath method: ${method}`);
+		}
+	});
+}
+function computePathNaive(input, sizeMap) {
+	const n = input.shapes.length;
+	const path = [];
+	let lastTensorIndex = 0;
+	for (let i = 1; i < n; i++) {
+		path.push([lastTensorIndex, i]);
+		lastTensorIndex = n + i - 1;
+	}
+	return new EinsumPath(input, sizeMap, path);
+}
+function computePathOptimal(input, sizeMap) {
+	const n = input.shapes.length;
+	let bestPath = null;
+	let bestFlops = null;
+	for (const path of allPaths(require_backend.range(n), n)) {
+		const flops = approximatePathFlops(input, sizeMap, path);
+		if (bestFlops === null || flops < bestFlops) {
+			bestPath = path;
+			bestFlops = flops;
+		}
+	}
+	return new EinsumPath(input, sizeMap, bestPath);
+}
+function* allPaths(tensors, next) {
+	if (tensors.length === 2) {
+		yield [[tensors[0], tensors[1]]];
+		return;
+	}
+	for (let i = 0; i < tensors.length; i++) for (let j = i + 1; j < tensors.length; j++) {
+		const pair = [tensors[i], tensors[j]];
+		const newTensors = tensors.filter((t) => t !== pair[0] && t !== pair[1]);
+		newTensors.push(next);
+		for (const subpath of allPaths(newTensors, next + 1)) yield [pair, ...subpath];
+	}
+}
+//#endregion
+//#region src/library/numpy-fft.ts
+var numpy_fft_exports = {};
+__export(numpy_fft_exports, {
+	fft: () => fft,
+	ifft: () => ifft
+});
+function checkPairInput(name, a) {
+	const fullName = `jax.numpy.fft.${name}`;
+	if (!require_backend.deepEqual(a.real.shape, a.imag.shape)) throw new Error(`${fullName}: real and imaginary parts must have the same shape, got ${JSON.stringify(a.real.shape)} and ${JSON.stringify(a.imag.shape)}`);
+	if (a.real.dtype !== a.imag.dtype) throw new Error(`${fullName}: real and imaginary parts must have the same dtype, got ${a.real.dtype} and ${a.imag.dtype}`);
+	if (!require_backend.isFloatDtype(a.real.dtype)) throw new Error(`${fullName}: input must have a float dtype, got ${a.real.dtype}`);
+}
+function checkPowerOfTwo(name, n) {
+	if ((n & n - 1) !== 0) throw new Error(`jax.numpy.fft.${name}: size must be a power of two, got ${n}`);
+}
+const fftUpdate = jit$1(function fftUpdate$1(i, { real, imag }) {
+	const half = 2 ** i;
+	real = real.reshape([-1, 2 * half]);
+	imag = imag.reshape([-1, 2 * half]);
+	const k = arange(0, half, 1, { dtype: real.dtype });
+	const theta = k.mul(-Math.PI / half);
+	const wr = cos(theta.ref);
+	const wi = sin(theta);
+	const ur = real.ref.slice([], [0, half]);
+	const ui = imag.ref.slice([], [0, half]);
+	const vr = real.slice([], [half, 2 * half]);
+	const vi = imag.slice([], [half, 2 * half]);
+	const tr = vr.ref.mul(wr.ref).sub(vi.ref.mul(wi.ref));
+	const ti = vr.mul(wi).add(vi.mul(wr));
+	return {
+		real: concatenate([ur.ref.add(tr.ref), ur.sub(tr)], -1),
+		imag: concatenate([ui.ref.add(ti.ref), ui.sub(ti)], -1)
+	};
+}, { staticArgnums: [0] });
+/**
+* Compute a one-dimensional discrete Fourier transform.
+*
+* Currently, the size of the axis must be a power of two.
+*/
+function fft(a, axis = -1) {
+	checkPairInput("fft", a);
+	let { real, imag } = a;
+	axis = require_backend.checkAxis(axis, real.ndim);
+	const n = real.shape[axis];
+	checkPowerOfTwo("fft", n);
+	const logN = Math.log2(n);
+	let perm = null;
+	if (axis !== real.ndim - 1) {
+		perm = require_backend.range(real.ndim);
+		perm.splice(axis, 1);
+		perm.push(axis);
+		real = real.transpose(perm);
+		imag = imag.transpose(perm);
+	}
+	const originalShape = real.shape;
+	real = real.reshape([-1, ...require_backend.rep(logN, 2)]).transpose([0, ...require_backend.range(1, logN + 1).reverse()]).flatten();
+	imag = imag.reshape([-1, ...require_backend.rep(logN, 2)]).transpose([0, ...require_backend.range(1, logN + 1).reverse()]).flatten();
+	for (let i = 0; i < logN; i++) ({real, imag} = fftUpdate(i, {
+		real,
+		imag
+	}));
+	real = real.reshape(originalShape);
+	imag = imag.reshape(originalShape);
+	if (perm !== null) {
+		real = real.transpose(require_backend.invertPermutation(perm));
+		imag = imag.transpose(require_backend.invertPermutation(perm));
+	}
+	return {
+		real,
+		imag
+	};
+}
+/**
+* Compute a one-dimensional inverse discrete Fourier transform.
+*
+* Currently, the size of the axis must be a power of two.
+*/
+function ifft(a, axis = -1) {
+	checkPairInput("ifft", a);
+	let { real, imag } = a;
+	axis = require_backend.checkAxis(axis, real.ndim);
+	const n = real.shape[axis];
+	checkPowerOfTwo("ifft", n);
+	imag = imag.mul(-1);
+	const result = fft({
+		real,
+		imag
+	}, axis);
+	return {
+		real: result.real.div(n),
+		imag: result.imag.mul(-1).div(n)
+	};
+}
+//#endregion
+//#region src/library/numpy-linalg.ts
+var numpy_linalg_exports = {};
+__export(numpy_linalg_exports, {
+	cholesky: () => cholesky,
+	det: () => det,
+	diagonal: () => diagonal,
+	inv: () => inv,
+	lstsq: () => lstsq,
+	matmul: () => matmul,
+	matrixPower: () => matrixPower,
+	matrixTranspose: () => matrixTranspose,
+	outer: () => outer,
+	slogdet: () => slogdet,
+	solve: () => solve,
+	tensordot: () => tensordot,
+	trace: () => trace,
+	vecdot: () => vecdot
+});
+function checkSquare(name, a) {
+	if (a.ndim < 2 || a.shape[a.ndim - 1] !== a.shape[a.ndim - 2]) throw new Error(`${name}: input must be at least 2D square matrix, got ${a.aval}`);
+	return a.shape[a.ndim - 1];
+}
+/**
+* Compute the Cholesky decomposition of a (batched) positive-definite matrix.
+*
+* This is like `jax.lax.linalg.cholesky()`, except with an option to symmetrize
+* the input matrix, which is on by default.
+*/
+function cholesky(a, { upper = false, symmetrizeInput = true } = {}) {
+	a = fudgeArray(a);
+	checkSquare("cholesky", a);
+	if (symmetrizeInput) a = a.ref.add(matrixTranspose(a)).mul(.5);
+	return cholesky$1(a, { upper });
+}
+/** Compute the determinant of a square matrix (batched). */
+function det(a) {
+	a = fudgeArray(a);
+	const n = checkSquare("det", a);
+	const [lu$2, pivots, permutation] = lu(a);
+	permutation.dispose();
+	const parity = pivots.notEqual(arange(n)).astype(int32).sum(-1).mod(2);
+	const sign$1 = parity.mul(-2).add(1);
+	const diag$1 = lu$2.diagonal(0, -1, -2);
+	return prod$1(diag$1, -1).mul(sign$1);
+}
+/** Compute the inverse of a square matrix (batched). */
+function inv(a) {
+	a = fudgeArray(a);
+	const n = checkSquare("inv", a);
+	return solve(a, eye(n));
+}
+/**
+* Return the least-squares solution to a linear equation.
+*
+* For overdetermined systems, this finds the `x` that minimizes `norm(ax - b)`.
+* For underdetermined systems, this finds the minimum-norm solution for `x`.
+*
+* This currently uses Cholesky decomposition to solve the normal equations,
+* under the hood. The method is not as robust as QR or SVD.
+*
+* @param a coefficient matrix of shape `(M, N)`
+* @param b right-hand side of shape `(M,)` or `(M, K)`
+* @return least-squares solution of shape `(N,)` or `(N, K)`
+*/
+function lstsq(a, b) {
+	a = fudgeArray(a);
+	b = fudgeArray(b);
+	if (a.ndim !== 2) throw new Error(`lstsq: 'a' must be a 2D array, got ${a.aval}`);
+	const [m, n] = a.shape;
+	if (b.shape[0] !== m) throw new Error(`lstsq: leading dimension of 'b' must match number of rows of 'a', got ${b.aval}`);
+	const at = matrixTranspose(a.ref);
+	if (m <= n) {
+		const aat = matmul(a, at.ref);
+		const l = cholesky(aat, { symmetrizeInput: false });
+		const lb = triangularSolve(l.ref, b, {
+			leftSide: true,
+			lower: true
+		});
+		const llb = triangularSolve(l, lb, {
+			leftSide: true,
+			transposeA: true
+		});
+		return matmul(at, llb.ref);
+	} else {
+		const ata = matmul(at.ref, a);
+		const l = cholesky(ata, { symmetrizeInput: false });
+		const atb = matmul(at, b);
+		const lb = triangularSolve(l.ref, atb, {
+			leftSide: true,
+			lower: true
+		});
+		const llb = triangularSolve(l, lb, {
+			leftSide: true,
+			transposeA: true
+		});
+		return llb;
+	}
+}
+/** Raise a square matrix to an integer power, via repeated squarings. */
+function matrixPower(a, n) {
+	if (!Number.isInteger(n)) throw new Error(`matrixPower: exponent must be an integer, got ${n}`);
+	a = fudgeArray(a);
+	const m = checkSquare("matrixPower", a);
+	if (n === 0) {
+		a.dispose();
+		return broadcastTo(eye(m), a.shape);
+	}
+	if (n < 0) {
+		a = inv(a);
+		n = -n;
+	}
+	let result = null;
+	let a2k = a;
+	for (let k = 0; n; k++) {
+		if (k > 0) a2k = matmul(a2k.ref, a2k);
+		if (n % 2 === 1) result = result === null ? a2k.ref : matmul(result, a2k.ref);
+		n = Math.floor(n / 2);
+	}
+	a2k.dispose();
+	return result;
+}
+/** Return sign and natural logarithm of the determinant of `a`. */
+function slogdet(a) {
+	a = fudgeArray(a);
+	const n = checkSquare("slogdet", a);
+	const [lu$2, pivots, permutation] = lu(a);
+	permutation.dispose();
+	let parity = pivots.notEqual(arange(n)).astype(int32).sum(-1);
+	const diag$1 = lu$2.diagonal(0, -1, -2);
+	parity = parity.add(diag$1.ref.less(0).astype(int32).sum(-1)).mod(2);
+	const logabsdet = log(absolute(diag$1)).sum(-1);
+	const sign$1 = parity.mul(-2).add(1);
+	return [sign$1, logabsdet];
 }
-function computePathNaive(input, sizeMap) {
-	const n = input.shapes.length;
-	const path = [];
-	let lastTensorIndex = 0;
-	for (let i = 1; i < n; i++) {
-		path.push([lastTensorIndex, i]);
-		lastTensorIndex = n + i - 1;
-	}
-	return new EinsumPath(input, sizeMap, path);
+/**
+* Solve a linear system of equations.
+*
+* This solves a (batched) linear system of equations `a @ x = b` for `x` given
+* `a` and `b`. If `a` is singular, this will return `nan` or `inf` values.
+*
+* @param a - Coefficient matrix of shape `(..., N, N)`.
+* @param b - Values of shape `(N,)` or `(..., N, M)`.
+* @returns Solution `x` of shape `(..., N)` or `(..., N, M)`.
+*/
+function solve(a, b) {
+	a = fudgeArray(a);
+	b = fudgeArray(b);
+	const n = checkSquare("solve", a);
+	if (b.ndim === 0) throw new Error(`solve: b cannot be scalar`);
+	const bIs1d = b.ndim === 1;
+	if (bIs1d) b = b.reshape([...b.shape, 1]);
+	if (b.shape[b.ndim - 2] !== n) throw new Error(`solve: leading dimension of b must match size of a, got a=${a.aval}, b=${b.aval}`);
+	const m = b.shape[b.ndim - 1];
+	const batchDims = require_backend.generalBroadcast(a.shape.slice(0, -2), b.shape.slice(0, -2));
+	a = broadcastTo(a, [
+		...batchDims,
+		n,
+		n
+	]);
+	b = broadcastTo(b, [
+		...batchDims,
+		n,
+		m
+	]);
+	const [lu$2, pivots, permutation] = lu(a);
+	pivots.dispose();
+	const P = arange(n).equal(permutation.reshape([...permutation.shape, 1])).astype(b.dtype);
+	const LPb = triangularSolve(lu$2.ref, matmul(P, b), {
+		leftSide: true,
+		lower: true,
+		unitDiagonal: true
+	});
+	let x = triangularSolve(lu$2, LPb.ref, {
+		leftSide: true,
+		lower: false
+	});
+	if (bIs1d) x = squeeze(x, -1);
+	return x;
 }
-function computePathOptimal(input, sizeMap) {
-	const n = input.shapes.length;
-	let bestPath = null;
-	let bestFlops = null;
-	for (const path of allPaths(require_backend.range(n), n)) {
-		const flops = approximatePathFlops(input, sizeMap, path);
-		if (bestFlops === null || flops < bestFlops) {
-			bestPath = path;
-			bestFlops = flops;
-		}
+//#endregion
+//#region src/library/numpy/dtype-info.ts
+/** Machine limits for floating-point types. */
+function finfo(dtype) {
+	if (!require_backend.isFloatDtype(dtype)) throw new Error(`finfo: received ${dtype}, must be a floating-point type`);
+	switch (dtype) {
+		case require_backend.DType.Float16: return Object.freeze({
+			bits: 16,
+			dtype: require_backend.DType.Float16,
+			eps: 2 ** -10,
+			epsneg: 2 ** -11,
+			machep: -10,
+			max: 65504,
+			maxexp: 16,
+			min: -65504,
+			minexp: -14,
+			negep: -24,
+			nexp: 5,
+			nmant: 10,
+			precision: 3,
+			resolution: .001,
+			smallestNormal: 2 ** -14,
+			smallestSubnormal: 2 ** -24
+		});
+		case require_backend.DType.Float32: return Object.freeze({
+			bits: 32,
+			dtype: require_backend.DType.Float32,
+			eps: 2 ** -23,
+			epsneg: 2 ** -24,
+			machep: -23,
+			max: 34028234663852886e22,
+			maxexp: 128,
+			min: -34028234663852886e22,
+			minexp: -126,
+			negep: -24,
+			nexp: 8,
+			nmant: 23,
+			precision: 6,
+			resolution: 1e-6,
+			smallestNormal: 2 ** -126,
+			smallestSubnormal: 2 ** -149
+		});
+		case require_backend.DType.Float64: return Object.freeze({
+			bits: 64,
+			dtype: require_backend.DType.Float64,
+			eps: 2 ** -52,
+			epsneg: 2 ** -53,
+			machep: -52,
+			max: Number.MAX_VALUE,
+			maxexp: 1024,
+			min: -Number.MAX_VALUE,
+			minexp: -1022,
+			negep: -53,
+			nexp: 11,
+			nmant: 52,
+			precision: 15,
+			resolution: 1e-15,
+			smallestNormal: 2 ** -1022,
+			smallestSubnormal: 2 ** -1074
+		});
+		default: throw new Error(`finfo: unsupported dtype ${dtype}`);
 	}
-	return new EinsumPath(input, sizeMap, bestPath);
 }
-function* allPaths(tensors, next) {
-	if (tensors.length === 2) {
-		yield [[tensors[0], tensors[1]]];
-		return;
-	}
-	for (let i = 0; i < tensors.length; i++) for (let j = i + 1; j < tensors.length; j++) {
-		const pair = [tensors[i], tensors[j]];
-		const newTensors = tensors.filter((t) => t !== pair[0] && t !== pair[1]);
-		newTensors.push(next);
-		for (const subpath of allPaths(newTensors, next + 1)) yield [pair, ...subpath];
+/** Machine limits for integer types. */
+function iinfo(dtype) {
+	switch (dtype) {
+		case require_backend.DType.Int32: return Object.freeze({
+			bits: 32,
+			dtype: require_backend.DType.Int32,
+			max: 2147483647,
+			min: -2147483648
+		});
+		case require_backend.DType.Uint32: return Object.freeze({
+			bits: 32,
+			dtype: require_backend.DType.Uint32,
+			max: 4294967295,
+			min: 0
+		});
+		default: throw new Error(`iinfo: unsupported dtype ${dtype}`);
 	}
 }
@@ -4751,28 +5463,32 @@ var numpy_exports = {};
 __export(numpy_exports, {
 	Array: () => Array$1,
 	DType: () => require_backend.DType,
-	abs: () => abs,
+	abs: () => absolute,
 	absolute: () => absolute,
 	acos: () => acos,
-	acosh: () => acosh,
+	acosh: () => arccosh,
 	add: () => add,
+	all: () => all,
 	allclose: () => allclose,
+	any: () => any,
 	arange: () => arange,
-	arccos: () => arccos,
+	arccos: () => acos,
 	arccosh: () => arccosh,
+	arcsin: () => asin,
 	arcsinh: () => arcsinh,
-	arctan: () => arctan,
-	arctan2: () => arctan2,
+	arctan: () => atan,
+	arctan2: () => atan2,
 	arctanh: () => arctanh,
 	argmax: () => argmax,
 	argmin: () => argmin,
+	argsort: () => argsort,
 	array: () => array,
 	asin: () => asin,
-	asinh: () => asinh,
+	asinh: () => arcsinh,
 	astype: () => astype,
 	atan: () => atan,
 	atan2: () => atan2,
-	atanh: () => atanh,
+	atanh: () => arctanh,
 	bool: () => bool,
 	broadcastArrays: () => broadcastArrays,
 	broadcastShapes: () => broadcastShapes,
@@ -4782,16 +5498,21 @@ __export(numpy_exports, {
 	clip: () => clip,
 	columnStack: () => columnStack,
 	concatenate: () => concatenate,
+	convolve: () => convolve,
+	corrcoef: () => corrcoef,
+	correlate: () => correlate,
 	cos: () => cos,
 	cosh: () => cosh,
+	cov: () => cov,
 	cumsum: () => cumsum,
-	cumulativeSum: () => cumulativeSum,
+	cumulativeSum: () => cumsum,
 	deg2rad: () => deg2rad,
 	degrees: () => degrees,
 	diag: () => diag,
 	diagonal: () => diagonal,
-	divide: () => divide,
-	dot: () => dot,
+	divide: () => trueDivide,
+	divmod: () => divmod,
+	dot: () => dot$1,
 	dstack: () => dstack,
 	e: () => e,
 	einsum: () => einsum,
@@ -4799,8 +5520,11 @@ __export(numpy_exports, {
 	eulerGamma: () => eulerGamma,
 	exp: () => exp,
 	exp2: () => exp2,
+	expandDims: () => expandDims,
 	expm1: () => expm1,
 	eye: () => eye,
+	fft: () => numpy_fft_exports,
+	finfo: () => finfo,
 	flip: () => flip,
 	fliplr: () => fliplr,
 	flipud: () => flipud,
@@ -4808,6 +5532,7 @@ __export(numpy_exports, {
 	float32: () => float32,
 	float64: () => float64,
 	floor: () => floor,
+	floorDivide: () => floorDivide,
 	fmod: () => fmod,
 	frexp: () => frexp,
 	full: () => full,
@@ -4820,6 +5545,7 @@ __export(numpy_exports, {
 	hstack: () => hstack,
 	hypot: () => hypot,
 	identity: () => identity$1,
+	iinfo: () => iinfo,
 	inf: () => inf,
 	inner: () => inner,
 	int32: () => int32,
@@ -4831,12 +5557,15 @@ __export(numpy_exports, {
 	ldexp: () => ldexp,
 	less: () => less,
 	lessEqual: () => lessEqual,
+	linalg: () => numpy_linalg_exports,
 	linspace: () => linspace,
 	log: () => log,
 	log10: () => log10,
 	log1p: () => log1p,
 	log2: () => log2,
+	logspace: () => logspace,
 	matmul: () => matmul,
+	matrixTranspose: () => matrixTranspose,
 	max: () => max,
 	maximum: () => maximum,
 	mean: () => mean,
@@ -4853,10 +5582,10 @@ __export(numpy_exports, {
 	onesLike: () => onesLike,
 	outer: () => outer,
 	pad: () => pad,
-	permuteDims: () => permuteDims,
+	permuteDims: () => transpose,
 	pi: () => pi,
 	positive: () => positive,
-	pow: () => pow,
+	pow: () => power,
 	power: () => power,
 	prod: () => prod$1,
 	promoteTypes: () => require_backend.promoteTypes,
@@ -4871,8 +5600,11 @@ __export(numpy_exports, {
 	shape: () => shape,
 	sign: () => sign,
 	sin: () => sin,
+	sinc: () => sinc,
 	sinh: () => sinh,
 	size: () => size,
+	sort: () => sort,
+	split: () => split$1,
 	sqrt: () => sqrt,
 	square: () => square,
 	squeeze: () => squeeze,
@@ -4880,6 +5612,7 @@ __export(numpy_exports, {
 	std: () => std,
 	subtract: () => subtract,
 	sum: () => sum,
+	take: () => take,
 	tan: () => tan,
 	tanh: () => tanh,
 	tensordot: () => tensordot,
@@ -5037,6 +5770,26 @@ function min(a, axis = null, opts) {
 function max(a, axis = null, opts) {
 	return reduce(a, require_backend.AluOp.Max, axis, opts);
 }
+/**
+* Test whether all array elements along a given axis evaluate to True.
+*
+* Returns a boolean array with the same shape as `a` with the specified axis
+* removed. If axis is None, returns a scalar.
+*/
+function all(a, axis = null, opts) {
+	a = fudgeArray(a).astype(require_backend.DType.Bool);
+	return min(a, axis, opts);
+}
+/**
+* Test whether any array element along a given axis evaluates to True.
+*
+* Returns a boolean array with the same shape as `a` with the specified axis
+* removed. If axis is None, returns a scalar.
+*/
+function any(a, axis = null, opts) {
+	a = fudgeArray(a).astype(require_backend.DType.Bool);
+	return max(a, axis, opts);
+}
 /** Return the peak-to-peak range along a given axis (`max - min`). */
 function ptp(a, axis = null, opts) {
 	a = fudgeArray(a);
@@ -5111,8 +5864,6 @@ function cumsum(a, axis) {
 	a = broadcast(a, a.shape.concat(n), [-2]);
 	return moveaxis$1(tril(a).sum(-1), -1, axis);
 }
-/** @function Alternative name for `jax.numpy.cumsum()`. */
-const cumulativeSum = cumsum;
 /** Reverse the elements in an array along the given axes. */
 function flip(x, axis = null) {
 	const nd = ndim(x);
@@ -5120,6 +5871,45 @@ function flip(x, axis = null) {
 	return flip$1(x, axis);
 }
 /**
+* Split an array into multiple sub-arrays along an axis.
+*
+* @param a - The input array to split.
+* @param indicesOrSections - If an integer, it indicates the number of equal
+* sections to create along the specified axis. If a list of integers, it
+* specifies the indices at which to split the array.
+* @param axis - The axis along which to split the array. Default is 0.
+*/
+function split$1(a, indicesOrSections, axis = 0) {
+	a = fudgeArray(a);
+	axis = require_backend.checkAxis(axis, a.ndim);
+	const size$1 = a.shape[axis];
+	let sizes;
+	if (typeof indicesOrSections === "number") {
+		if (size$1 % indicesOrSections !== 0) throw new Error(`Array of size ${size$1} cannot be split into ${indicesOrSections} equal parts`);
+		const partSize = size$1 / indicesOrSections;
+		sizes = require_backend.rep(indicesOrSections, partSize);
+	} else {
+		const indices = indicesOrSections;
+		sizes = [indices[0]];
+		for (let i = 1; i < indices.length; i++) sizes.push(indices[i] - indices[i - 1]);
+		sizes.push(size$1 - indices[indices.length - 1]);
+	}
+	const results = [];
+	for (let i = 0; i < sizes.length; i += 7) if (i === sizes.length) {
+		results.push(a);
+		break;
+	} else if (i + 8 >= sizes.length) {
+		results.push(...split$2(a, axis, sizes.slice(i)));
+		break;
+	} else {
+		const groupSizes = [...sizes.slice(i, i + 7), sizes.slice(i + 7).reduce((x, y) => x + y, 0)];
+		const outs = split$2(a, axis, groupSizes);
+		results.push(...outs.slice(0, -1));
+		a = outs[outs.length - 1];
+	}
+	return results;
+}
+/**
 * Join a sequence of arrays along an existing axis.
 *
 * The arrays must have the same shape, except in the dimension corresponding to
@@ -5131,13 +5921,11 @@ function concatenate(xs, axis = 0) {
 	if (xs.length === 0) throw new Error("Need at least one array to concatenate");
 	const shapes = xs.map(shape);
 	axis = require_backend.checkAxis(axis, shapes[0].length);
-	for (let i = 1; i < shapes.length; i++) if (shapes[i].length !== shapes[0].length || !shapes[i].every((d, j) => j === axis || d === shapes[0][j])) throw new Error(`Cannot concatenate arrays with shapes ${JSON.stringify(shapes)} along axis ${axis}`);
-	const makePadAxis = (start, end) => shapes[0].map((_, i) => i === axis ? [start, end] : [0, 0]);
+	for (let i = 1; i < shapes.length; i++) if (shapes[i].length !== shapes[0].length || !shapes[i].every((d, j) => j === axis || d === shapes[0][j])) throw new Error(`Cannot concatenate arrays ${xs[0].aval} and ${xs[i].aval} along axis ${axis}`);
 	let result = xs[0];
-	for (let i = 1; i < xs.length; i++) {
-		const len1 = result.shape[axis];
-		const len2 = shapes[i][axis];
-		result = pad(result, makePadAxis(0, len2)).add(pad(xs[i], makePadAxis(len1, 0)));
+	for (let i = 1; i < xs.length; i += 7) {
+		const group = xs.slice(i, i + 7);
+		result = concatenate$1([result, ...group], axis);
 	}
 	return result;
 }
@@ -5222,8 +6010,11 @@ function flipud(x) {
 function fliplr(x) {
 	return flip(x, 1);
 }
-/** @function Alternative name for `numpy.transpose()`. */
-const permuteDims = transpose;
+/** Transpose the last two dimensions of an array. */
+function matrixTranspose(a) {
+	if (ndim(a) < 2) throw new Error(`matrixTranspose: input array must be at least 2D`);
+	return moveaxis$1(a, -1, -2);
+}
 /** Return a 1-D flattened array containing the elements of the input. */
 function ravel(a) {
 	return fudgeArray(a).ravel();
@@ -5239,6 +6030,32 @@ function squeeze(a, axis = null) {
 	return reshape(a, newShape);
 }
 /**
+* Expand the shape of an array by inserting new axes of length 1.
+*
+* @param a - Input array.
+* @param axis - Position(s) in the expanded axes where the new axis (or axes)
+*   is placed. Can be a single integer or an array of integers.
+* @returns Array with the number of dimensions increased.
+*
+* @example
+* ```ts
+* const x = np.array([1, 2]);
+* np.expandDims(x, 0); // Shape [1, 2]
+* np.expandDims(x, 1); // Shape [2, 1]
+* np.expandDims(x, [0, 2]); // Shape [1, 2, 1]
+* ```
+*/
+function expandDims(a, axis) {
+	const as = shape(a);
+	axis = typeof axis === "number" ? [axis] : axis;
+	axis = require_backend.normalizeAxis(axis, as.length + axis.length);
+	const newShape = [];
+	let srcIdx = 0;
+	for (let i = 0; i < as.length + axis.length; i++) if (axis.includes(i)) newShape.push(1);
+	else newShape.push(as[srcIdx++]);
+	return reshape(a, newShape);
+}
+/**
 * Repeat each element of an array after themselves.
 *
 * If no axis is provided, use the flattened input array, and return a flat
@@ -5326,7 +6143,7 @@ function diagonal(a, offset, axis1, axis2) {
 */
 function diag(v, k = 0) {
 	const a = fudgeArray(v);
-	if (!Number.isInteger(k)) throw new TypeError(`k must be an integer, got ${k}`);
+	if (!Number.isInteger(k)) throw new Error(`k must be an integer, got ${k}`);
 	if (a.ndim === 1) {
 		const n = a.shape[0];
 		const ret = where(eye(n).equal(1), a.ref, zerosLike(a));
@@ -5334,12 +6151,46 @@ function diag(v, k = 0) {
 		else if (k < 0) return pad(ret, [[-k, 0], [0, -k]]);
 		else return ret;
 	} else if (a.ndim === 2) return diagonal(a, k);
-	else throw new TypeError("numpy.diag only supports 1D and 2D arrays");
+	else throw new Error("numpy.diag only supports 1D and 2D arrays");
 }
 /** Calculate the sum of the diagonal of an array along the given axes. */
 function trace(a, offset = 0, axis1 = 0, axis2 = 1) {
 	return diagonal(a, offset, axis1, axis2).sum(-1);
 }
+/**
+* Return a sorted copy of an array.
+*
+* The array is sorted along a specified axis (the last by default). This may be
+* an unstable sort, and it dispatches to device-specific implementation.
+*/
+function sort(a, axis = -1) {
+	return fudgeArray(a).sort(axis);
+}
+/**
+* Return indices that would sort an array. This may be an unstable sorting
+* algorithm; it need not preserve order of indices in ties.
+*
+* Returns an array of `int32` indices.
+*
+* The array is sorted along a specified axis (the last by default).
+*/
+function argsort(a, axis = -1) {
+	return fudgeArray(a).argsort(axis);
+}
+/**
+* Take elements from an array along an axis.
+*
+* This is equivalent to advanced indexing with integer indices over that
+* numbered axis. By default, the flattened array is used.
+*/
+function take(a, indices, axis = null) {
+	if (axis === null) {
+		a = ravel(a);
+		axis = 0;
+	}
+	axis = require_backend.checkAxis(axis, ndim(a));
+	return gather(a, [indices], [axis], axis);
+}
 /** Return if two arrays are element-wise equal within a tolerance. */
 function allclose(actual, expected, options) {
 	const { rtol = 1e-5, atol = 1e-7 } = options ?? {};
@@ -5356,11 +6207,11 @@ function allclose(actual, expected, options) {
 }
 /** Matrix product of two arrays. */
 function matmul(x, y) {
-	if (ndim(x) === 0 || ndim(y) === 0) throw new TypeError("matmul: x and y must be at least 1D");
+	if (ndim(x) === 0 || ndim(y) === 0) throw new Error("matmul: x and y must be at least 1D");
 	x = x, y = y;
 	if (y.ndim === 1) return dot$2(x, y);
 	const numBatchDims = Math.min(Math.max(x.ndim, 2), y.ndim) - 2;
-	return dot$1(x, y, {
+	return dot(x, y, {
 		lhsContractingDims: [-1],
 		rhsContractingDims: [-2],
 		lhsBatchDims: require_backend.range(-2 - numBatchDims, -2),
@@ -5368,11 +6219,11 @@ function matmul(x, y) {
 	});
 }
 /** Dot product of two arrays. */
-function dot(x, y) {
+function dot$1(x, y) {
 	if (ndim(x) === 0 || ndim(y) === 0) return multiply(x, y);
 	x = x, y = y;
 	if (y.ndim === 1) return dot$2(x, y);
-	return dot$1(x, y, {
+	return dot(x, y, {
 		lhsContractingDims: [-1],
 		rhsContractingDims: [-2]
 	});
@@ -5388,7 +6239,7 @@ function tensordot(x, y, axes = 2) {
 	x = fudgeArray(x);
 	y = fudgeArray(y);
 	if (typeof axes === "number") axes = [require_backend.range(-axes, 0), require_backend.range(axes)];
-	return dot$1(x, y, {
+	return dot(x, y, {
 		lhsContractingDims: axes[0],
 		rhsContractingDims: axes[1]
 	});
@@ -5481,7 +6332,7 @@ function einsum(...args) {
 		const [b, bidx] = processSingleTensor(operands[j], indices[j], indices[i]);
 		indexReduced = indexReduced.filter((idx) => aidx.includes(idx));
 		const indexBatch = aidx.filter((idx) => bidx.includes(idx) && !indexReduced.includes(idx));
-		const result = dot$1(a, b, {
+		const result = dot(a, b, {
 			lhsContractingDims: indexReduced.map((idx) => aidx.indexOf(idx)),
 			rhsContractingDims: indexReduced.map((idx) => bidx.indexOf(idx)),
 			lhsBatchDims: indexBatch.map((idx) => aidx.indexOf(idx)),
@@ -5509,7 +6360,7 @@ function einsum(...args) {
 * Returned array has shape `[...x.shape[:-1], ...y.shape[:-1]]`.
 */
 function inner(x, y) {
-	return dot$1(fudgeArray(x), fudgeArray(y), {
+	return dot(fudgeArray(x), fudgeArray(y), {
 		lhsContractingDims: [-1],
 		rhsContractingDims: [-1]
 	});
@@ -5542,6 +6393,30 @@ function vecdot(x, y, { axis } = {}) {
 function vdot(x, y) {
 	return dot$2(ravel(x), ravel(y));
 }
+function _convImpl(name, x, y, mode) {
+	if (x.ndim !== 1 || y.ndim !== 1) throw new Error(`${name}: both inputs must be 1D arrays, got ${x.ndim}D and ${y.ndim}D`);
+	let flipOutput = false;
+	if (x.shape[0] < y.shape[0]) {
+		[x, y] = [y, x];
+		if (name === "correlate") flipOutput = true;
+	}
+	if (name === "convolve") y = flip(y);
+	let padding;
+	if (mode === "valid") padding = "VALID";
+	else if (mode === "same") padding = "SAME_LOWER";
+	else if (mode === "full") padding = [[y.shape[0] - 1, y.shape[0] - 1]];
+	else throw new Error(`${name}: invalid mode ${mode}, expected "full", "same", or "valid"`);
+	const z = conv(x.slice(null, null), y.slice(null, null), [1], padding).slice(0, 0);
+	return flipOutput ? flip(z) : z;
+}
+/** Convolution of two one-dimensional arrays. */
+function convolve(x, y, mode = "full") {
+	return _convImpl("convolve", x, y, mode);
+}
+/** Correlation of two one dimensional arrays. */
+function correlate(x, y, mode = "valid") {
+	return _convImpl("correlate", x, y, mode);
+}
 /**
 * Return a tuple of coordinate matrices from coordinate vectors.
 *
@@ -5550,7 +6425,7 @@ function vdot(x, y) {
 */
 function meshgrid(xs, { indexing } = {}) {
 	indexing ??= "xy";
-	for (const x of xs) if (x.ndim !== 1) throw new TypeError(`meshgrid: all inputs must be 1D arrays, got ${x.ndim}D array`);
+	for (const x of xs) if (x.ndim !== 1) throw new Error(`meshgrid: all inputs must be 1D arrays, got ${x.ndim}D array`);
 	if (xs.length <= 1) return xs;
 	if (indexing === "xy") {
 		const [a, b, ...rest] = xs;
@@ -5569,43 +6444,6 @@ function meshgrid(xs, { indexing } = {}) {
 	return xs.map((x, i) => broadcast(x, shape$1, [...require_backend.range(i), ...require_backend.range(i + 1, xs.length)]));
 }
 /**
-* Return an array with ones on and below the diagonal and zeros elsewhere.
-*
-* If `k` is provided, it specifies the sub-diagonal on and below which the
-* array is filled with ones. `k=0` is the main diagonal, `k<0` is below it, and
-* `k>0` is above it.
-*/
-function tri(n, m, k = 0, { dtype, device } = {}) {
-	m ??= n;
-	dtype ??= require_backend.DType.Float32;
-	if (!Number.isInteger(n) || n < 0) throw new TypeError(`tri: n must be a non-negative integer, got ${n}`);
-	if (!Number.isInteger(m) || m < 0) throw new TypeError(`tri: m must be a non-negative integer, got ${m}`);
-	if (!Number.isInteger(k)) throw new TypeError(`tri: k must be an integer, got ${k}`);
-	const rows = arange(k, n + k, 1, {
-		dtype: require_backend.DType.Int32,
-		device
-	});
-	const cols = arange(0, m, 1, {
-		dtype: require_backend.DType.Int32,
-		device
-	});
-	return rows.reshape([n, 1]).greaterEqual(cols).astype(dtype);
-}
-/** Return the lower triangle of an array. Must be of dimension >= 2. */
-function tril(a, k = 0) {
-	if (ndim(a) < 2) throw new TypeError(`tril: input array must be at least 2D, got ${ndim(a)}D`);
-	a = fudgeArray(a);
-	const [n, m] = a.shape.slice(-2);
-	return where(tri(n, m, k, { dtype: bool }), a.ref, zerosLike(a));
-}
-/** Return the upper triangle of an array. Must be of dimension >= 2. */
-function triu(a, k = 0) {
-	if (ndim(a) < 2) throw new TypeError(`tril: input array must be at least 2D, got ${ndim(a)}D`);
-	a = fudgeArray(a);
-	const [n, m] = a.shape.slice(-2);
-	return where(tri(n, m, k - 1, { dtype: bool }), zerosLike(a.ref), a);
-}
-/**
 * Clip (limit) the values in an array.
 *
 * Given an interval, values outside the interval are clipped to the interval
@@ -5629,8 +6467,6 @@ function absolute(x) {
 	x = fudgeArray(x);
 	return where(less(x.ref, 0), x.ref.mul(-1), x);
 }
-/** @function Alias of `jax.numpy.absolute()`. */
-const abs = absolute;
 /** Return an element-wise indication of sign of the input. */
 function sign(x) {
 	x = fudgeArray(x);
@@ -5674,6 +6510,20 @@ function tan(x) {
 	x = fudgeArray(x);
 	return sin(x.ref).div(cos(x));
 }
+/**
+* @function
+* Return the normalized sinc function.
+*
+* The sinc function is defined as `sin(πx) / (πx)` for `x != 0`, and `1` for `x = 0`.
+* This is the normalized sinc function commonly used in signal processing.
+*
+* **Note:** JVP is not supported at x=0 due to discontinuous derivative. This
+* requires a custom JVP rule to handle properly (see JAX implementation).
+*/
+const sinc = jit$1(function sinc$1(x) {
+	const pix = x.ref.mul(Math.PI);
+	return where(equal(x, 0), 1, sin(pix.ref).div(pix));
+});
 /** Element-wise inverse cosine function (inverse of cos). */
 function acos(x) {
 	return subtract(pi / 2, asin(x));
@@ -5709,12 +6559,6 @@ const atan2 = jit$1(function atan2$1(y, x) {
 	const denom = where(xNeg, y, r.add(x));
 	return atan(numer.div(denom)).mul(2);
 });
-/** @function Alias of `jax.numpy.acos()`. */
-const arccos = acos;
-/** @function Alias of `jax.numpy.atan()`. */
-const arctan = atan;
-/** @function Alias of `jax.numpy.atan2()`. */
-const arctan2 = atan2;
 /** Element-wise subtraction, with broadcasting. */
 function subtract(x, y) {
 	x = fudgeArray(x);
@@ -5732,6 +6576,25 @@ function trueDivide(x, y) {
 	return x.div(y);
 }
 /**
+* Return the largest integer smaller or equal to the division of the inputs.
+*
+* The result is always rounded towards negative infinity.
+*
+* For floating-point inputs, this is equivalent to `floor(x / y)`.
+* For integer inputs, we use `(x - remainder(x, y)) / y` to handle
+* negative values correctly (note: may overflow near int32 boundaries).
+*
+* @param x - Dividend array.
+* @param y - Divisor array.
+* @returns Element-wise floor division of x by y.
+*/
+function floorDivide(x, y) {
+	x = fudgeArray(x);
+	y = fudgeArray(y);
+	if (require_backend.isFloatDtype(x.dtype) || require_backend.isFloatDtype(y.dtype)) return floor(trueDivide(x, y));
+	return subtract(x, remainder(x.ref, y.ref)).div(y);
+}
+/**
 * @function
 * Calculate element-wise floating-point modulo operation.
 */
@@ -5745,8 +6608,20 @@ const fmod = jit$1(function fmod$1(x, y) {
 const remainder = jit$1(function remainder$1(x, y) {
 	return mod(mod(x, y.ref).add(y.ref), y);
 });
-/** @function Alias of `jax.numpy.trueDivide()`. */
-const divide = trueDivide;
+/**
+* Return element-wise quotient and remainder simultaneously.
+*
+* Equivalent to `[floorDivide(x, y), remainder(x, y)]`.
+*
+* @param x - Dividend array.
+* @param y - Divisor array.
+* @returns Tuple of [quotient, remainder].
+*/
+function divmod(x, y) {
+	const xArr = fudgeArray(x);
+	const yArr = fudgeArray(y);
+	return [floorDivide(xArr.ref, yArr.ref), remainder(xArr, yArr)];
+}
 /** Round input to the nearest integer towards zero. */
 function trunc(x) {
 	return idiv(x, 1);
@@ -5768,9 +6643,9 @@ function ldexp(x1, x2) {
 */
 function frexp(x) {
 	x = fudgeArray(x);
-	const absx = abs(x.ref);
+	const absx = absolute(x.ref);
 	const exponent = where(equal(x.ref, 0), 0, floor(log2(absx)).add(1).astype(require_backend.DType.Int32));
-	const mantissa = divide(x, exp2(exponent.ref.astype(x.dtype)));
+	const mantissa = x.div(exp2(exponent.ref.astype(x.dtype)));
 	return [mantissa, exponent];
 }
 /** Calculate `2**p` for all p in the input array. */
@@ -5813,10 +6688,8 @@ const power = jit$1(function power$1(x1, x2) {
 	const x2i = trunc(x2.ref);
 	const shouldBeNaN = multiply(x2.ref.notEqual(x2i.ref), x1.ref.less(0));
 	const resultSign = where(mod(x2i, 2).notEqual(0), where(x1.ref.less(0), -1, 1), 1);
-	return where(shouldBeNaN, nan, exp(log(abs(x1)).mul(x2)).mul(resultSign));
+	return where(shouldBeNaN, nan, exp(log(absolute(x1)).mul(x2)).mul(resultSign));
 });
-/** @function Alias of `jax.numpy.power()`. */
-const pow = power;
 /** @function Calculate the element-wise cube root of the input array. */
 const cbrt = jit$1(function cbrt$1(x) {
 	const sgn = where(less(x.ref, 0), -1, 1);
@@ -5882,69 +6755,360 @@ const arccosh = jit$1(function arccosh$1(x) {
 const arctanh = jit$1(function arctanh$1(x) {
 	return log(add(1, x.ref).div(subtract(1, x))).mul(.5);
 });
-/** @function Alias of `jax.numpy.arcsinh()`. */
-const asinh = arcsinh;
-/** @function Alias of `jax.numpy.arccosh()`. */
-const acosh = arccosh;
-/** @function Alias of `jax.numpy.arctanh()`. */
-const atanh = arctanh;
 /**
-* Compute the variance of an array.
-*
-* The variance is computed for the flattened array by default, otherwise over
-* the specified axis.
+* Compute the variance of an array.
+*
+* The variance is computed for the flattened array by default, otherwise over
+* the specified axis.
+*
+* If `correction` is provided, the divisor in calculation is `N - correction`,
+* where `N` represents the number of elements (e.g., for Bessel's correction).
+*/
+function var_(x, axis = null, opts) {
+	x = fudgeArray(x);
+	axis = require_backend.normalizeAxis(axis, x.ndim);
+	const n = axis.reduce((acc, a) => acc * x.shape[a], 1);
+	if (n === 0) throw new Error("var: cannot compute variance over zero-length axis");
+	const mu = opts?.mean !== void 0 ? opts.mean : mean(x.ref, axis, { keepdims: true });
+	return square(x.sub(mu)).sum(axis, { keepdims: opts?.keepdims }).mul(1 / (n - (opts?.correction ?? 0)));
+}
+/**
+* Compute the standard deviation of an array.
+*
+* The standard deviation is computed for the flattened array by default,
+* otherwise over the specified axis.
+*
+* If `correction` is provided, the divisor in calculation is `N - correction`,
+* where `N` represents the number of elements (e.g., for Bessel's correction).
+*/
+function std(x, axis = null, opts) {
+	return sqrt(var_(x, axis, opts));
+}
+/** Estimate the sample covariance of a set of variables. */
+function cov(x, y = null, { rowvar = true } = {}) {
+	x = fudgeArray(x);
+	if (x.ndim === 1) x = x.reshape([1, x.shape[0]]);
+	if (y !== null) {
+		y = fudgeArray(y);
+		if (y.ndim === 1) y = y.reshape([1, y.shape[0]]);
+		x = vstack([x, y]);
+	}
+	if (!rowvar) x = x.transpose();
+	const [_M, N] = x.shape;
+	x = x.ref.sub(x.mean(1, { keepdims: true }));
+	return dot$1(x.ref, x.transpose()).div(N - 1);
+}
+/** Compute the Pearson correlation coefficients (in range `[-1, 1]`). */
+function corrcoef(x, y) {
+	const c = cov(x, y);
+	const variances = diag(c.ref);
+	const norm = sqrt(outer(variances.ref, variances));
+	return c.div(norm);
+}
+/** Test element-wise for positive or negative infinity, return bool array. */
+function isinf(x) {
+	x = fudgeArray(x);
+	return require_backend.isFloatDtype(x.dtype) ? x.ref.equal(Infinity).add(x.equal(-Infinity)) : fullLike$1(x, false);
+}
+/** Test element-wise for NaN (Not a Number). */
+function isnan(x) {
+	x = fudgeArray(x);
+	return require_backend.isFloatDtype(x.dtype) ? x.ref.notEqual(x) : fullLike$1(x, false);
+}
+/** Test element-wise for negative infinity, return bool array. */
+function isneginf(x) {
+	x = fudgeArray(x);
+	return require_backend.isFloatDtype(x.dtype) ? x.equal(-Infinity) : fullLike$1(x, false);
+}
+/** Test element-wise for positive infinity, return bool array. */
+function isposinf(x) {
+	x = fudgeArray(x);
+	return require_backend.isFloatDtype(x.dtype) ? x.equal(Infinity) : fullLike$1(x, false);
+}
+/**
+* @function
+* Test element-wise for finite values (not infinity or NaN).
+*/
+const isfinite = jit$1(function isfinite$1(x) {
+	if (!require_backend.isFloatDtype(x.dtype)) return fullLike$1(x, true);
+	return isnan(x.ref).add(isinf(x)).notEqual(true);
+});
+//#endregion
+//#region src/library/lax-linalg.ts
+var lax_linalg_exports = {};
+__export(lax_linalg_exports, {
+	cholesky: () => cholesky$1,
+	lu: () => lu,
+	triangularSolve: () => triangularSolve
+});
+/**
+* Compute the Cholesky decomposition of a symmetric positive-definite matrix.
+*
+* The Cholesky decomposition of a matrix `A` is:
+*
+* - A = L @ L^T  (for upper=false, default)
+* - A = U^T @ U  (for upper=true)
+*
+* where `L` is a lower-triangular matrix and `U` is an upper-triangular matrix.
+* The input matrix must be symmetric and positive-definite.
+*
+* @example
+* ```ts
+* import { lax, numpy as np } from "@jax-js/jax";
+*
+* const x = np.array([[2., 1.], [1., 2.]]);
+*
+* // Lower Cholesky factorization (default):
+* const L = lax.linalg.cholesky(x);
+* // L ≈ [[1.4142135, 0], [0.70710677, 1.2247449]]
+*
+* // Upper Cholesky factorization:
+* const U = lax.linalg.cholesky(x, { upper: true });
+* // U ≈ [[1.4142135, 0.70710677], [0, 1.2247449]]
+* ```
+*/
+function cholesky$1(a, { upper = false } = {}) {
+	const L = cholesky$2(a);
+	return upper ? moveaxis$1(L, -2, -1) : L;
+}
+/**
+* LU decomposition with partial pivoting.
+*
+* Computes the matrix decomposition: `P @ A = L @ U`, where `P` is a
+* permutation of the rows of `A`, `L` is lower-triangular with unit diagonal,
+* and `U` is upper-triangular.
+*
+* @param x - A batch of matrices with shape `[..., m, n]`.
+*
+* @returns A tuple `(lu, pivots, permutation)` where:
+* - `lu`: combined lower and upper triangular matrices.
+* - `pivots`: an array of pivot indices with shape `[..., min(m, n)]`.
+* - `permutation`: the permutation generated by pivots with shape `[..., m]`.
+*
+* @example
+* ```ts
+* import { lax, numpy as np } from "@jax-js/jax";
+*
+* const A = np.array([[4., 3.], [6., 3.]]);
+* const [lu, pivots, permutation] = lax.linalg.lu(A);
+* // lu ≈ [[6., 3.], [0.6666667, 1.0]]
+* // pivots = [1, 1]
+* // permutation = [1, 0]
+* ```
+*/
+function lu(x) {
+	return lu$1(x);
+}
+/**
+* Solve a triangular linear system.
+*
+* Solves `a @ x = b` (if leftSide=true) or `x @ a = b` (if leftSide=false)
+* where `a` is a triangular matrix.
+*
+* @example
+* ```ts
+* import { lax, numpy as np } from "@jax-js/jax";
+*
+* const L = np.array([[2., 0.], [1., 3.]]);
+* const b = np.array([4., 7.]).reshape([2, 1]);
+*
+* // Solve L @ x = b
+* const x = lax.linalg.triangularSolve(L, b, { leftSide: true, lower: true });
+* // x = [[2.], [5./3.]]
+* ```
+*/
+function triangularSolve(a, b, { leftSide = false, lower = false, transposeA = false, unitDiagonal = false } = {}) {
+	a = fudgeArray(a);
+	b = fudgeArray(b);
+	if (!leftSide) transposeA = !transposeA;
+	else b = moveaxis$1(b, -2, -1);
+	if (transposeA) a = moveaxis$1(a, -2, -1);
+	let x = triangularSolve$1(a, b, {
+		lower,
+		unitDiagonal
+	});
+	if (leftSide) x = moveaxis$1(x, -2, -1);
+	return x;
+}
+//#endregion
+//#region src/library/lax.ts
+var lax_exports = {};
+__export(lax_exports, {
+	conv: () => conv,
+	convGeneralDilated: () => convGeneralDilated,
+	convWithGeneralPadding: () => convWithGeneralPadding,
+	dot: () => dot,
+	erf: () => erf,
+	erfc: () => erfc,
+	linalg: () => lax_linalg_exports,
+	reduceWindow: () => reduceWindow,
+	stopGradient: () => stopGradient$1
+});
+/**
+* General dot product/contraction operator.
 *
-* If `correction` is provided, the divisor in calculation is `N - correction`,
-* where `N` represents the number of elements (e.g., for Bessel's correction).
+* Prefer higher-level functions like `jax.numpy.dot()`, `jax.numpy.matmul()`,
+* `jax.numpy.tensordot(), and `jax.numpy.einsum()` where possible.
 */
-function var_(x, axis = null, opts) {
-	x = fudgeArray(x);
-	axis = require_backend.normalizeAxis(axis, x.ndim);
-	const n = axis.reduce((acc, a) => acc * x.shape[a], 1);
-	if (n === 0) throw new Error("var: cannot compute variance over zero-length axis");
-	const mu = opts?.mean !== void 0 ? opts.mean : mean(x.ref, axis, { keepdims: true });
-	return square(x.sub(mu)).sum(axis, { keepdims: opts?.keepdims }).mul(1 / (n - (opts?.correction ?? 0)));
+function dot(lhs, rhs, { lhsContractingDims: lc = [], rhsContractingDims: rc = [], lhsBatchDims: lb = [], rhsBatchDims: rb = [] } = {}) {
+	if (lc.length !== rc.length) throw new Error(`dot: contracting dims lengths mismatch, got ${JSON.stringify(lc)} and ${JSON.stringify(rc)}`);
+	else if (lb.length !== rb.length) throw new Error(`dot: batch dims lengths mismatch, got ${JSON.stringify(lb)} and ${JSON.stringify(rb)}`);
+	lc = lc.map((a) => require_backend.checkAxis(a, lhs.ndim));
+	rc = rc.map((a) => require_backend.checkAxis(a, rhs.ndim));
+	lb = lb.map((a) => require_backend.checkAxis(a, lhs.ndim));
+	rb = rb.map((a) => require_backend.checkAxis(a, rhs.ndim));
+	if (lc.some((a) => lb.includes(a))) throw new Error(`dot: lhs contracting dims ${JSON.stringify(lc)} overlap with batch dims ${JSON.stringify(lb)}`);
+	else if (rc.some((a) => rb.includes(a))) throw new Error(`dot: rhs contracting dims ${JSON.stringify(rc)} overlap with batch dims ${JSON.stringify(rb)}`);
+	const lf = require_backend.range(lhs.ndim).filter((a) => !lc.includes(a) && !lb.includes(a));
+	const rf = require_backend.range(rhs.ndim).filter((a) => !rc.includes(a) && !rb.includes(a));
+	const lhs2 = lhs.transpose([
+		...lb,
+		...lf,
+		...lc
+	]);
+	const rhs2 = rhs.transpose([
+		...rb,
+		...rf,
+		...rc
+	]);
+	if (lc.length === 0) return mul(lhs2.reshape([
+		...lb.map((a) => lhs.shape[a]),
+		...lf.map((a) => lhs.shape[a]),
+		...require_backend.rep(rf.length, 1)
+	]), rhs2.reshape([
+		...rb.map((a) => rhs.shape[a]),
+		...require_backend.rep(lf.length, 1),
+		...rf.map((a) => rhs.shape[a])
+	]));
+	const dotShapeX = lc.map((a) => lhs.shape[a]);
+	const dotShapeY = rc.map((a) => rhs.shape[a]);
+	if (!require_backend.deepEqual(dotShapeX, dotShapeY)) throw new Error(`dot: shapes not aligned along contracting dims: ${JSON.stringify(dotShapeX)} != ${JSON.stringify(dotShapeY)}`);
+	return dot$2(lhs2.reshape([
+		...lb.map((a) => lhs.shape[a]),
+		...lf.map((a) => lhs.shape[a]),
+		...require_backend.rep(rf.length, 1),
+		require_backend.prod(dotShapeX)
+	]), rhs2.reshape([
+		...rb.map((a) => rhs.shape[a]),
+		...require_backend.rep(lf.length, 1),
+		...rf.map((a) => rhs.shape[a]),
+		require_backend.prod(dotShapeY)
+	]));
+}
+function padtypeToPads(inShape, filterShape, strides, dilation, padding) {
+	const padType = padding.toUpperCase();
+	switch (padType) {
+		case "VALID": return require_backend.rep(inShape.length, [0, 0]);
+		case "SAME":
+		case "SAME_LOWER": {
+			const outShape = inShape.map((size$1, i) => Math.ceil(size$1 / strides[i]));
+			const padSizes = require_backend.zipn(outShape, strides, filterShape, dilation, inShape).map(([o, s, k, d, i]) => Math.max(0, (o - 1) * s + 1 + (k - 1) * d - i));
+			if (padType === "SAME") return padSizes.map((size$1) => [size$1 >> 1, size$1 - (size$1 >> 1)]);
+			else return padSizes.map((size$1) => [size$1 - (size$1 >> 1), size$1 >> 1]);
+		}
+		default: throw new Error(`Unknown padding type: ${padType}`);
+	}
 }
 /**
-* Compute the standard deviation of an array.
+* General n-dimensional convolution operator, with optional dilation.
 *
-* The standard deviation is computed for the flattened array by default,
-* otherwise over the specified axis.
+* The semantics of this operation mimic the `jax.lax.conv_general_dilated`
+* function in JAX, which wraps XLA's general convolution operator.
 *
-* If `correction` is provided, the divisor in calculation is `N - correction`,
-* where `N` represents the number of elements (e.g., for Bessel's correction).
+* Grouped convolutions are not supported right now.
 */
-function std(x, axis = null, opts) {
-	return sqrt(var_(x, axis, opts));
+function convGeneralDilated(lhs, rhs, windowStrides, padding, { lhsDilation, rhsDilation, featureGroupCount = 1 } = {}) {
+	if (lhs.ndim < 2) throw new Error("lhs must have at least 2 dimensions");
+	if (rhs.ndim < 2) throw new Error("rhs must have at least 2 dimensions");
+	if (typeof padding === "string") {
+		if (lhsDilation?.some((d) => d !== 1)) throw new Error("String padding is not supported for transposed convolutions");
+		padding = padtypeToPads(lhs.shape.slice(2), rhs.shape.slice(2), windowStrides, rhsDilation ?? require_backend.rep(rhs.ndim - 2, 1), padding);
+	}
+	if (featureGroupCount !== 1) {
+		const G = featureGroupCount;
+		const [N, C_in, ...xs] = lhs.shape;
+		const [C_out, C_in_per_group, ...ks] = rhs.shape;
+		if (C_in % G !== 0) throw new Error(`featureGroupCount=${G} must divide input channels=${C_in}`);
+		if (C_out % G !== 0) throw new Error(`featureGroupCount=${G} must divide output channels=${C_out}`);
+		if (C_in / G !== C_in_per_group) throw new Error(`rhs input channels=${C_in_per_group} must equal lhs input channels / groups=${C_in / G}`);
+		const lhsGrouped = moveaxis(lhs.reshape([
+			N,
+			G,
+			C_in / G,
+			...xs
+		]), 1, 0);
+		const rhsGrouped = rhs.reshape([
+			G,
+			C_out / G,
+			C_in_per_group,
+			...ks
+		]);
+		const result = conv$1(lhsGrouped, rhsGrouped, {
+			vmapDims: 1,
+			strides: windowStrides,
+			padding,
+			lhsDilation,
+			rhsDilation
+		});
+		const ys = result.shape.slice(3);
+		return moveaxis(result, 0, 1).reshape([
+			N,
+			C_out,
+			...ys
+		]);
+	}
+	return conv$1(lhs, rhs, {
+		strides: windowStrides,
+		padding,
+		lhsDilation,
+		rhsDilation
+	});
 }
-/** Test element-wise for positive or negative infinity, return bool array. */
-function isinf(x) {
-	x = fudgeArray(x);
-	return require_backend.isFloatDtype(x.dtype) ? x.ref.equal(Infinity).add(x.equal(-Infinity)) : fullLike$1(x, false);
+/** Convenience wrapper around `convGeneralDilated`. */
+function convWithGeneralPadding(lhs, rhs, windowStrides, padding, lhsDilation, rhsDilation) {
+	return convGeneralDilated(lhs, rhs, windowStrides, padding, {
+		lhsDilation,
+		rhsDilation
+	});
 }
-/** Test element-wise for NaN (Not a Number). */
-function isnan(x) {
-	x = fudgeArray(x);
-	return require_backend.isFloatDtype(x.dtype) ? x.ref.notEqual(x) : fullLike$1(x, false);
+/** Convenience wrapper around `convGeneralDilated`. */
+function conv(lhs, rhs, windowStrides, padding) {
+	return convGeneralDilated(lhs, rhs, windowStrides, padding);
 }
-/** Test element-wise for negative infinity, return bool array. */
-function isneginf(x) {
-	x = fudgeArray(x);
-	return require_backend.isFloatDtype(x.dtype) ? x.equal(-Infinity) : fullLike$1(x, false);
+/** Reduce a computation over padded windows. */
+function reduceWindow(operand, computation, windowDimensions, windowStrides) {
+	if (operand.ndim < windowDimensions.length) throw new Error(`Operand dimensions ${operand.ndim} < window ${windowDimensions.length}`);
+	if (!windowStrides) windowStrides = require_backend.rep(windowDimensions.length, 1);
+	for (let i = 0; i < operand.ndim; i++) computation = vmap$1(computation, 0);
+	return computation(bind1(Primitive.Pool, [operand], {
+		window: windowDimensions,
+		strides: windowStrides
+	}));
 }
-/** Test element-wise for positive infinity, return bool array. */
-function isposinf(x) {
-	x = fudgeArray(x);
-	return require_backend.isFloatDtype(x.dtype) ? x.equal(Infinity) : fullLike$1(x, false);
+/** The error function: `erf(x) = 2/sqrt(pi) * int[0..x] exp(-t^2) dt`. */
+function erf(x) {
+	return erf$1(x);
 }
 /**
-* @function
-* Test element-wise for finite values (not infinity or NaN).
+* The complementary error function: `erfc(x) = 1 - erf(x)`.
+*
+* This function is more accurate than `1 - erf(x)` for large values of `x`,
+* where `erf(x)` is very close to 1.
 */
-const isfinite = jit$1(function isfinite$1(x) {
-	if (!require_backend.isFloatDtype(x.dtype)) return fullLike$1(x, true);
-	return isnan(x.ref).add(isinf(x)).notEqual(true);
-});
+function erfc(x) {
+	return erfc$1(x);
+}
+/**
+* Stops gradient computation.
+*
+* Behaves as the identity function but prevents the flow of gradients during
+* forward or reverse-mode automatic differentiation.
+*/
+function stopGradient$1(x) {
+	return stopGradient(x);
+}
 //#endregion
 //#region src/library/nn.ts
@@ -5954,6 +7118,10 @@ __export(nn_exports, {
 	elu: () => elu,
 	gelu: () => gelu,
 	glu: () => glu,
+	hardSigmoid: () => hardSigmoid,
+	hardSilu: () => hardSilu,
+	hardSwish: () => hardSilu,
+	hardTanh: () => hardTanh,
 	identity: () => identity,
 	leakyRelu: () => leakyRelu,
 	logSigmoid: () => logSigmoid,
@@ -5964,14 +7132,17 @@ __export(nn_exports, {
 	oneHot: () => oneHot,
 	relu: () => relu,
 	relu6: () => relu6,
+	selu: () => selu,
 	sigmoid: () => sigmoid,
 	silu: () => silu,
 	softSign: () => softSign,
 	softmax: () => softmax,
 	softplus: () => softplus,
+	sparsePlus: () => sparsePlus,
+	sparseSigmoid: () => sparseSigmoid,
 	squareplus: () => squareplus,
 	standardize: () => standardize,
-	swish: () => swish
+	swish: () => silu
 });
 /**
 * Rectified Linear Unit (ReLU) activation function:
@@ -6006,6 +7177,28 @@ function softplus(x) {
 	return log(exp(x).add(1));
 }
 /**
+* @function
+* Sparse plus function:
+*
+* - When `x <= -1`: `0`
+* - When `-1 < x < 1`: `(x+1)**2 / 4`
+* - When `x >= 1`: `x`
+*/
+const sparsePlus = jit$1((x) => {
+	return where(x.ref.lessEqual(-1), 0, where(x.ref.less(1), square(x.ref.add(1)).mul(.25), x));
+});
+/**
+* @function
+* Sparse sigmoid activation function.
+*
+* - When `x <= -1`: `0`
+* - When `-1 < x < 1`: `(x + 1) / 2`
+* - When `x >= 1`: `1`
+*/
+const sparseSigmoid = jit$1((x) => {
+	return clip(x.add(1).mul(.5), 0, 1);
+});
+/**
 * Soft-sign activation function, computed element-wise:
 * `softsign(x) = x / (|x| + 1)`.
 */
@@ -6027,17 +7220,6 @@ const silu = jit$1(function silu$1(x) {
 	return x.ref.mul(sigmoid(x));
 });
 /**
-* @function
-* Sigmoid-weighted Linear Unit (SiLU) activation function, also known as
-* Swish, computed element-wise:
-* `silu(x) = x * sigmoid(x) = x / (1 + exp(-x))`.
-*
-* `swish()` and `silu()` are both aliases for the same function.
-*
-* Reference: https://en.wikipedia.org/wiki/Swish_function
-*/
-const swish = silu;
-/**
 * Log-sigmoid activation function, computed element-wise:
 * `log_sigmoid(x) = log(sigmoid(x)) = -log(1 + exp(-x))`.
 */
@@ -6054,6 +7236,19 @@ function leakyRelu(x, negativeSlope = .01) {
 	x = fudgeArray(x);
 	return where(less(x.ref, 0), x.ref.mul(negativeSlope), x);
 }
+/** Hard sigmoid activation function: `relu6(x+3)/6`. */
+function hardSigmoid(x) {
+	return relu6(add(x, 3)).mul(1 / 6);
+}
+/** Hard SiLU (swish) activation function: `x * hardSigmoid(x)`. */
+function hardSilu(x) {
+	x = fudgeArray(x);
+	return x.ref.mul(hardSigmoid(x));
+}
+/** Hard tanh activation function: `clip(x, -1, 1)`. */
+function hardTanh(x) {
+	return clip(x, -1, 1);
+}
 /**
 * Exponential linear unit activation function.
 *
@@ -6076,6 +7271,20 @@ function celu(x, alpha = 1) {
 }
 /**
 * @function
+* Scaled exponential linear unit activation.
+*
+* Computes the element-wise function:
+* `selu(x) = lambda * (x > 0 ? x : alpha * (exp(x) - 1))`
+*
+* Where `alpha = 1.6732632423543772` and `lambda = 1.0507009873554805`.
+*/
+const selu = jit$1(function selu$1(x) {
+	const alpha = 1.6732632423543772;
+	const lambda = 1.0507009873554805;
+	return where(x.ref.less(0), expm1(x.ref).mul(alpha), x).mul(lambda);
+});
+/**
+* @function
 * Gaussion error linear unit (GELU) activation function.
 *
 * This is computed element-wise. There are two variants depending on whether
@@ -6229,35 +7438,46 @@ var random_exports = {};
 __export(random_exports, {
 	bernoulli: () => bernoulli,
 	bits: () => bits,
+	cauchy: () => cauchy,
 	exponential: () => exponential,
+	gumbel: () => gumbel,
 	key: () => key,
+	laplace: () => laplace,
+	multivariateNormal: () => multivariateNormal,
 	normal: () => normal,
 	split: () => split,
 	uniform: () => uniform
 });
-function validateKeyShape(key$1) {
+function validateKeyShape(key$1, scalar = false) {
 	if (key$1.ndim === 0) throw new Error("Key must have at least one dimension.");
 	if (key$1.shape[key$1.shape.length - 1] !== 2) throw new Error(`Invalid key shape: ${key$1.shape}. Expected last dimension to be 2.`);
+	if (scalar && key$1.shape.length > 1) throw new Error(`Expected a single PRNG key, but got a batch of keys with shape ${JSON.stringify(key$1.shape)} - use jax.vmap for batching.`);
 	return key$1.shape.slice(0, -1);
 }
+function getK01(key$1) {
+	const keyShape = validateKeyShape(key$1, true);
+	let [k0, k1] = split$2(key$1, -1, [1, 1]);
+	k0 = k0.reshape(keyShape);
+	k1 = k1.reshape(keyShape);
+	return [k0, k1];
+}
 /** Create a pseudo-random number generator (PRNG) key from 32-bit integer seed. */
 function key(seed) {
-	seed = seed >>> 0;
-	return array([0, seed], { dtype: require_backend.DType.Uint32 });
+	seed = array(seed, { dtype: require_backend.DType.Uint32 });
+	if (seed.ndim !== 0) throw new Error(`key: seed must be a scalar integer, but got shape ${seed.shape} - use jax.vmap for batching.`);
+	return stack([0, seed]);
 }
 /** Splits a PRNG key into `num` new keys by adding a leading axis. */
 function split(key$1, num = 2) {
 	const shape$1 = typeof num === "number" ? [num] : num;
 	for (const len of shape$1) if (len <= 0 || !Number.isInteger(len)) throw new Error(`Invalid split length: ${len}. Must be a positive integer.`);
-	const keyShape = validateKeyShape(key$1);
-	const k0 = key$1.ref.slice(...keyShape.map(() => null), 0);
-	const k1 = key$1.slice(...keyShape.map(() => null), 1);
+	const [k0, k1] = getK01(key$1);
 	return stack([randomBits(k0.ref, k1.ref, shape$1, 0), randomBits(k0, k1, shape$1, 1)], -1);
 }
 /** Sample uniform bits in the form of unsigned integers. */
 function bits(key$1, shape$1 = []) {
-	const keyShape = validateKeyShape(key$1);
-	return randomBits(key$1.ref.slice(...keyShape.map(() => null), 0), key$1.slice(...keyShape.map(() => null), 1), shape$1);
+	const [k0, k1] = getK01(key$1);
+	return randomBits(k0, k1, shape$1);
 }
 /**
 * @function
@@ -6289,6 +7509,16 @@ function bernoulli(key$1, p = .5, shape$1 = []) {
 }
 /**
 * @function
+* Sample from a Cauchy distribution with location 0 and scale 1.
+*
+* Uses inverse transform sampling: `x = tan(π * (u - 0.5))` where u ~ Uniform(0, 1).
+*/
+const cauchy = jit$1(function cauchy$1(key$1, shape$1 = []) {
+	const u = uniform(key$1, shape$1);
+	return tan(u.sub(.5).mul(Math.PI));
+}, { staticArgnums: [1] });
+/**
+* @function
 * Sample exponential random values according to `p(x) = exp(-x)`.
 */
 const exponential = jit$1(function exponential$1(key$1, shape$1 = []) {
@@ -6297,6 +7527,56 @@ const exponential = jit$1(function exponential$1(key$1, shape$1 = []) {
 }, { staticArgnums: [1] });
 /**
 * @function
+* Sample from a Gumbel distribution with location 0 and scale 1.
+*
+* Uses inverse transform sampling: `x = -log(-log(u))` where u ~ Uniform(0, 1).
+*/
+const gumbel = jit$1(function gumbel$1(key$1, shape$1 = []) {
+	const u = uniform(key$1, shape$1);
+	return negative(log(negative(log1p(negative(u)))));
+}, { staticArgnums: [1] });
+/**
+* @function
+* Sample from a Laplace distribution with location 0 and scale 1.
+*
+* Uses inverse transform sampling: the CDF is `F(x) = 0.5 + 0.5 * sign(x) * (1 - exp(-|x|))`.
+* Inverting: `x = -sign(u - 0.5) * log(1 - 2 * |u - 0.5|)`.
+*/
+const laplace = jit$1(function laplace$1(key$1, shape$1 = []) {
+	const u = uniform(key$1, shape$1);
+	const centered = u.sub(.5);
+	const s = sign(centered.ref);
+	const absVal = absolute(centered);
+	return s.mul(log1p(absVal.mul(-2)).mul(-1));
+}, { staticArgnums: [1] });
+/**
+* @function
+* Sample multivariate normal random values with given mean and covariance.
+*
+* The values are returned with the given shape, along with the final dimension
+* used to represent the n-dimensional multivariate normal factors.
+*
+* This uses Cholesky decomposition on the covariance matrix.
+*
+* - `key` - PRNG key
+* - `mean` - Mean vector of shape `[..., n]`
+* - `cov` - Covariance of shape `[..., n, n]`, must be positive-definite
+* - `shape` - Result batch shape, must be broadcastable with
+*            `mean.shape[:-1]` and `cov.shape[:-2]`
+* @returns Random samples of shape `[...shape, n]`
+*/
+const multivariateNormal = jit$1(function multivariateNormal$1(key$1, mean$1, cov$1, shape$1 = []) {
+	mean$1 = fudgeArray(mean$1);
+	cov$1 = fudgeArray(cov$1);
+	const n = mean$1.shape[mean$1.ndim - 1];
+	if (cov$1.shape[cov$1.ndim - 1] !== n || cov$1.shape[cov$1.ndim - 2] !== n) throw new Error(`Invalid covariance shape: ${cov$1.shape}. Expected last two dimensions to be [${n}, ${n}].`);
+	const outputShape = broadcastShapes(shape$1, mean$1.shape.slice(0, -1), cov$1.shape.slice(0, -2)).concat(n);
+	const L = cholesky(cov$1);
+	const z = normal(key$1, outputShape);
+	return einsum("...ij,...j->...i", L, z).add(mean$1);
+}, { staticArgnums: [3] });
+/**
+* @function
 * Sample random values according to `p(x) = 1/sqrt(2pi) * exp(-x^2/2)`.
 *
 * Unlike JAX, this uses the Box-Muller transform. JAX uses the erf_inv primitive instead and
@@ -6405,11 +7685,6 @@ const valueAndGrad = valueAndGrad$1;
 */
 const jacrev = jacrev$1;
 /**
-* @function
-* Compute the Jacobian with reverse-mode AD. Alias for `jacrev()`.
-*/
-const jacobian = jacrev;
-/**
 * Wait until all `Array` leaves are ready by calling `Array.blockUntilReady()`.
 *
 * This can be used to wait for the results of an intermediate computation to
@@ -6445,6 +7720,7 @@ async function devicePut(x, device) {
 //#endregion
 exports.Array = Array$1;
+exports.ClosedJaxpr = ClosedJaxpr;
 exports.DType = require_backend.DType;
 exports.Jaxpr = Jaxpr;
 exports.blockUntilReady = blockUntilReady;
@@ -6454,7 +7730,7 @@ exports.devices = require_backend.devices;
 exports.grad = grad;
 exports.init = require_backend.init;
 exports.jacfwd = jacfwd;
-exports.jacobian = jacobian;
+exports.jacobian = jacrev;
 exports.jacrev = jacrev;
 exports.jit = jit;
 exports.jvp = jvp;
@@ -6499,5 +7775,4 @@ Object.defineProperty(exports, 'tree', {
 });
 exports.valueAndGrad = valueAndGrad;
 exports.vjp = vjp;
-exports.vmap = vmap;
-//# sourceMappingURL=index.cjs.map
+exports.vmap = vmap;