npm - @jax-js/jax - Versions diffs - 0.1.2 → 0.1.4 - Mend

@jax-js/jax 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +16 -34
package/dist/{backend-DeVfWEFS.cjs → backend-Bu9GY6sK.cjs} +222 -36
package/dist/{backend-BqymqzuU.js → backend-tngXtWe4.js} +204 -36
package/dist/index.cjs +1798 -955
package/dist/index.d.cts +383 -97
package/dist/index.d.ts +383 -97
package/dist/index.js +1791 -949
package/dist/{webgpu-BGuG58KZ.js → webgpu-ChVgx3b6.js} +410 -97
package/dist/{webgpu-CcGP160M.cjs → webgpu-Oj3Kd-kd.cjs} +410 -97
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -1,28 +1,36 @@
 import { __export } from "./chunk-Cl8Af3a2.js";
-import { AluExp, AluGroup, AluOp, AluVar, DEBUG, DType, FpHash, Kernel, PPrint, Reduction, ShapeTracker, accessorAluExp, accessorGlobal, assertNonNull, byteWidth, checkAxis, deepEqual, defaultDevice, devices, dtypedArray, dtypedJsArray, generalBroadcast, getBackend, init, invertPermutation, isFloatDtype, isNumberPair, isPermutation, normalizeAxis, partitionList, prod, promoteTypes, range, recursiveFlatten, rep, runWithCache, setDebug, toposort, unravelAlu, unzip2, zip, zipn } from "./backend-BqymqzuU.js";
+import { AluExp, AluGroup, AluOp, AluVar, DEBUG, DType, FpHash, Kernel, PPrint, Reduction, Routine, Routines, ShapeTracker, accessorAluExp, accessorGlobal, assertNonNull, byteWidth, checkAxis, deepEqual, defaultDevice, devices, dtypedArray, dtypedJsArray, generalBroadcast, getBackend, init, invertPermutation, isFloatDtype, isNumberPair, isPermutation, normalizeAxis, partitionList, prod, promoteTypes, range, recursiveFlatten, rep, runWithCache, setDebug, toposort, unravelAlu, unzip2, zip, zipn } from "./backend-tngXtWe4.js";
 //#region src/frontend/convolution.ts
 /**
 * Check that the shapes and parameters passed to convolution are valid.
+* Expected shapes of the lhs and rhs of the convolution are:
+*
+* - `lhsShape = [*vmapDims, batchSize, inChannels, spatialDims...]`
+* - `rhsShape = [*vmapDims, outChannels, inChannels, kernelSize...]`
 *
 * If the check succeeds, returns the output shape.
 */
-function checkConvShape(lhsShape, rhsShape, { strides, padding, lhsDilation, rhsDilation }) {
+function checkConvShape(lhsShape, rhsShape, { vmapDims, strides, padding, lhsDilation, rhsDilation }) {
 	if (lhsShape.length !== rhsShape.length) throw new Error(`conv() requires inputs with the same number of dimensions, got ${lhsShape.length} and ${rhsShape.length}`);
-	const n = lhsShape.length - 2;
+	const n = lhsShape.length - 2 - vmapDims;
 	if (n < 0) throw new Error("conv() requires at least 2D inputs");
 	if (strides.length !== n) throw new Error("conv() strides != spatial dims");
 	if (padding.length !== n) throw new Error("conv() padding != spatial dims");
 	if (lhsDilation.length !== n) throw new Error("conv() lhsDilation != spatial dimensions");
 	if (rhsDilation.length !== n) throw new Error("conv() rhsDilation != spatial dimensions");
-	if (lhsShape[1] !== rhsShape[1]) throw new Error(`conv() input channels: ${lhsShape[1]} != ${rhsShape[1]}`);
-	const outShape = [lhsShape[0], rhsShape[0]];
+	if (lhsShape[vmapDims + 1] !== rhsShape[vmapDims + 1]) throw new Error(`conv() input channels: ${lhsShape[1]} != ${rhsShape[1]}`);
+	const outShape = [
+		...generalBroadcast(lhsShape.slice(0, vmapDims), rhsShape.slice(0, vmapDims)),
+		lhsShape[vmapDims],
+		rhsShape[vmapDims]
+	];
 	for (let i = 0; i < n; i++) {
 		if (strides[i] <= 0 || !Number.isInteger(strides[i])) throw new Error(`conv() strides[${i}] must be a positive integer`);
 		if (padding[i].length !== 2 || !padding[i].every(Number.isInteger)) throw new Error(`conv() padding[${i}] must be a 2-tuple of integers`);
 		if (lhsDilation[i] <= 0 || !Number.isInteger(lhsDilation[i])) throw new Error(`conv() lhsDilation[${i}] must be a positive integer`);
 		if (rhsDilation[i] <= 0 || !Number.isInteger(rhsDilation[i])) throw new Error(`conv() rhsDilation[${i}] must be a positive integer`);
-		const [x, k] = [lhsShape[i + 2], rhsShape[i + 2]];
+		const [x, k] = [lhsShape[i + vmapDims + 2], rhsShape[i + vmapDims + 2]];
 		if (k <= 0) throw new Error("conv() kernel size must be positive");
 		const [pl, pr] = padding[i];
 		if (pl < -x || pr < -x || pl + pr < -x) throw new Error(`conv() padding[${i}]=(${pl},${pr}) is too negative for input size ${x}`);
@@ -147,27 +155,13 @@ function poolTranspose(st, inShape, ks, strides = 1, dilation = 1) {
 function applyDilation(st, dilation) {
 	if (dilation.every((s) => s === 1)) return st;
 	const s_ = dilation;
-	const [a, b, ...k_] = st.shape;
-	st = st.reshape([
-		a,
-		b,
-		...k_.flatMap((k) => [k, 1])
-	]);
-	st = st.pad([
-		[0, 0],
-		[0, 0],
-		...s_.flatMap((s) => [[0, 0], [0, s - 1]])
-	]);
-	st = st.reshape([
-		a,
-		b,
-		...k_.map((k, i) => k * s_[i])
-	]);
-	st = st.shrink([
-		[0, a],
-		[0, b],
-		...k_.map((k, i) => [0, (k - 1) * s_[i] + 1])
-	]);
+	const n = s_.length;
+	const prefix = st.shape.slice(0, -n);
+	const k_ = st.shape.slice(-n);
+	st = st.reshape([...prefix, ...k_.flatMap((k) => [k, 1])]);
+	st = st.pad([...prefix.map(() => [0, 0]), ...s_.flatMap((s) => [[0, 0], [0, s - 1]])]);
+	st = st.reshape([...prefix, ...k_.map((k, i) => k * s_[i])]);
+	st = st.shrink([...prefix.map((p) => [0, p]), ...k_.map((k, i) => [0, (k - 1) * s_[i] + 1])]);
 	return st;
 }
 /**
@@ -177,25 +171,26 @@ function applyDilation(st, dilation) {
 * beforehand using `checkConvShape()`.
 */
 function prepareConv(stX, stY, params) {
-	const n = stX.shape.length - 2;
+	const v = params.vmapDims;
+	const n = stX.shape.length - 2 - v;
+	const vmapShape = stX.shape.slice(0, v);
 	stX = applyDilation(stX, params.lhsDilation);
-	const ks = stY.shape.slice(2);
-	stX = stX.padOrShrink([
-		[0, 0],
-		[0, 0],
-		...params.padding
-	]);
+	const ks = stY.shape.slice(v + 2);
+	stX = stX.padOrShrink([...rep(v + 2, [0, 0]), ...params.padding]);
 	stX = pool(stX, ks, params.strides, params.rhsDilation);
-	stX = stX.moveaxis(1, n + 1).reshape([
-		stX.shape[0],
+	stX = stX.moveaxis(v + 1, v + n + 1).reshape([
+		...vmapShape,
+		stX.shape[v],
 		1,
-		...stX.shape.slice(2, n + 2),
-		stX.shape[1] * prod(ks)
+		...stX.shape.slice(v + 2, v + n + 2),
+		stX.shape[v + 1] * prod(ks)
 	]);
 	stY = stY.reshape([
-		stY.shape[0],
+		...vmapShape,
+		1,
+		stY.shape[v],
 		...rep(n, 1),
-		stY.shape[1] * prod(ks)
+		stY.shape[v + 1] * prod(ks)
 	]);
 	return [stX, stY];
 }
@@ -336,6 +331,8 @@ let Primitive = /* @__PURE__ */ function(Primitive$1) {
 	Primitive$1["Mul"] = "mul";
 	Primitive$1["Idiv"] = "idiv";
 	Primitive$1["Mod"] = "mod";
+	Primitive$1["Min"] = "min";
+	Primitive$1["Max"] = "max";
 	Primitive$1["Neg"] = "neg";
 	Primitive$1["Reciprocal"] = "reciprocal";
 	Primitive$1["Floor"] = "floor";
@@ -343,7 +340,6 @@ let Primitive = /* @__PURE__ */ function(Primitive$1) {
 	Primitive$1["StopGradient"] = "stop_gradient";
 	Primitive$1["Cast"] = "cast";
 	Primitive$1["Bitcast"] = "bitcast";
-	Primitive$1["RandomBits"] = "random_bits";
 	Primitive$1["Sin"] = "sin";
 	Primitive$1["Cos"] = "cos";
 	Primitive$1["Asin"] = "asin";
@@ -353,8 +349,6 @@ let Primitive = /* @__PURE__ */ function(Primitive$1) {
 	Primitive$1["Erf"] = "erf";
 	Primitive$1["Erfc"] = "erfc";
 	Primitive$1["Sqrt"] = "sqrt";
-	Primitive$1["Min"] = "min";
-	Primitive$1["Max"] = "max";
 	Primitive$1["Reduce"] = "reduce";
 	Primitive$1["Dot"] = "dot";
 	Primitive$1["Conv"] = "conv";
@@ -362,14 +356,19 @@ let Primitive = /* @__PURE__ */ function(Primitive$1) {
 	Primitive$1["PoolTranspose"] = "pool_transpose";
 	Primitive$1["Compare"] = "compare";
 	Primitive$1["Where"] = "where";
+	Primitive$1["RandomBits"] = "random_bits";
+	Primitive$1["Gather"] = "gather";
 	Primitive$1["Transpose"] = "transpose";
 	Primitive$1["Broadcast"] = "broadcast";
 	Primitive$1["Reshape"] = "reshape";
 	Primitive$1["Flip"] = "flip";
 	Primitive$1["Shrink"] = "shrink";
 	Primitive$1["Pad"] = "pad";
-	Primitive$1["Gather"] = "gather";
-	Primitive$1["JitCall"] = "jit_call";
+	Primitive$1["Sort"] = "sort";
+	Primitive$1["Argsort"] = "argsort";
+	Primitive$1["TriangularSolve"] = "triangular_solve";
+	Primitive$1["Cholesky"] = "cholesky";
+	Primitive$1["Jit"] = "jit";
 	return Primitive$1;
 }({});
 let CompareOp = /* @__PURE__ */ function(CompareOp$1) {
@@ -391,6 +390,12 @@ function idiv(x, y) {
 function mod(x, y) {
 	return bind1(Primitive.Mod, [x, y]);
 }
+function min$1(x, y) {
+	return bind1(Primitive.Min, [x, y]);
+}
+function max$1(x, y) {
+	return bind1(Primitive.Max, [x, y]);
+}
 function neg(x) {
 	return bind1(Primitive.Neg, [x]);
 }
@@ -412,12 +417,6 @@ function cast(x, dtype) {
 function bitcast(x, dtype) {
 	return bind1(Primitive.Bitcast, [x], { dtype });
 }
-function randomBits(k0, k1, shape$1, mode = "xor") {
-	return bind1(Primitive.RandomBits, [k0, k1], {
-		shape: shape$1,
-		mode
-	});
-}
 function sin$1(x) {
 	return bind1(Primitive.Sin, [x]);
 }
@@ -445,12 +444,6 @@ function erfc$1(x) {
 function sqrt$1(x) {
 	return bind1(Primitive.Sqrt, [x]);
 }
-function min$1(x, y) {
-	return bind1(Primitive.Min, [x, y]);
-}
-function max$1(x, y) {
-	return bind1(Primitive.Max, [x, y]);
-}
 function reduce(x, op, axis = null, opts) {
 	if (!AluGroup.Reduce.has(op)) throw new TypeError(`Invalid reduce operation: ${op}`);
 	axis = normalizeAxis(axis, ndim$1(x));
@@ -467,9 +460,11 @@ function dot$2(x, y) {
 }
 function conv$1(x, y, params = {}) {
 	if (x.ndim !== y.ndim) throw new Error(`conv() requires inputs with the same number of dimensions, got ${x.ndim} and ${y.ndim}`);
-	const n = x.ndim - 2;
+	const vmapDims = params.vmapDims ?? 0;
+	const n = x.ndim - 2 - vmapDims;
 	if (n < 0) throw new Error("conv() requires at least 2D inputs");
 	return bind1(Primitive.Conv, [x, y], {
+		vmapDims,
 		strides: params.strides ?? rep(n, 1),
 		padding: params.padding ?? rep(n, [0, 0]),
 		lhsDilation: params.lhsDilation ?? rep(n, 1),
@@ -504,6 +499,23 @@ function where$1(cond, x, y) {
 		y
 	]);
 }
+function randomBits(k0, k1, shape$1, mode = "xor") {
+	return bind1(Primitive.RandomBits, [k0, k1], {
+		shape: shape$1,
+		mode
+	});
+}
+function gather(x, indices, axis, outDim) {
+	if (indices.length === 0) throw new Error("gather() requires at least one index");
+	if (!Array.isArray(axis) || axis.length !== indices.length) throw new Error(`Invalid gather() axis: expected ${indices.length} axes, got ${JSON.stringify(axis)}`);
+	axis = axis.map((a) => checkAxis(a, ndim$1(x)));
+	if (new Set(axis).size !== axis.length) throw new Error(`Invalid gather() axis: duplicate axes ${JSON.stringify(axis)}`);
+	outDim = checkAxis(outDim, ndim$1(x) - axis.length + 1);
+	return bind1(Primitive.Gather, [x, ...indices], {
+		axis,
+		outDim
+	});
+}
 function transpose$1(x, perm) {
 	perm = perm ? perm.map((a) => checkAxis(a, ndim$1(x))) : range(ndim$1(x)).reverse();
 	if (!isPermutation(perm, ndim$1(x))) throw new Error(`Invalid transpose permutation for ${ndim$1(x)} axes: ${JSON.stringify(perm)}`);
@@ -553,16 +565,27 @@ function pad$1(x, width) {
 	} else if (width.length !== nd) throw new Error(`Invalid pad(): expected ${nd} axes, got ${width.length}`);
 	return bind1(Primitive.Pad, [x], { width });
 }
-function gather(x, indices, axis, outDim) {
-	if (indices.length === 0) throw new Error("gather() requires at least one index");
-	if (!Array.isArray(axis) || axis.length !== indices.length) throw new Error(`Invalid gather() axis: expected ${indices.length} axes, got ${JSON.stringify(axis)}`);
-	axis = axis.map((a) => checkAxis(a, ndim$1(x)));
-	if (new Set(axis).size !== axis.length) throw new Error(`Invalid gather() axis: duplicate axes ${JSON.stringify(axis)}`);
-	outDim = checkAxis(outDim, ndim$1(x) - axis.length + 1);
-	return bind1(Primitive.Gather, [x, ...indices], {
-		axis,
-		outDim
-	});
+function triangularSolve$1(a, b, { lower = false, unitDiagonal = false } = {}) {
+	if (lower) {
+		a = flip$1(a, [-2, -1]);
+		b = flip$1(b, [-1]);
+	}
+	let x = bind1(Primitive.TriangularSolve, [a, b], { unitDiagonal });
+	if (lower) x = flip$1(x, [-1]);
+	return x;
+}
+function cholesky$2(x) {
+	return bind1(Primitive.Cholesky, [x]);
+}
+function sort$1(x) {
+	const nd = ndim$1(x);
+	if (nd === 0) throw new Error("sort: requires at least 1D input");
+	return bind1(Primitive.Sort, [x]);
+}
+function argsort$1(x) {
+	const nd = ndim$1(x);
+	if (nd === 0) throw new Error("argsort: requires at least 1D input");
+	return bind(Primitive.Argsort, [x]);
 }
 function bind1(prim, args, params = {}) {
 	const [results] = bind(prim, args, params);
@@ -693,8 +716,10 @@ var Tracer = class Tracer {
 		axis = normalizeAxis(axis, this.ndim);
 		const n = axis.reduce((acc, a) => acc * this.shape[a], 1);
 		if (n === 0) throw new Error("mean: cannot compute mean over zero-length axis");
-		const result = reduce(this, AluOp.Add, axis, opts);
-		return result.mul(1 / n);
+		const originalDtype = this.dtype;
+		const castDtype = promoteTypes(originalDtype, DType.Float32);
+		const result = reduce(this.astype(castDtype), AluOp.Add, axis, opts);
+		return result.mul(1 / n).astype(originalDtype);
 	}
 	/** Permute the dimensions of an array. Defaults to reversing the axis order. */
 	transpose(perm) {
@@ -723,7 +748,7 @@ var Tracer = class Tracer {
 		if (isFloatDtype(this.dtype)) return this.mul(reciprocal$1(other));
 		return idiv(this, other);
 	}
-	/** Return specified diagonals. See `numpy.diagonal` for full docs. */
+	/** Return specified diagonals. See `jax.numpy.diagonal` for full docs. */
 	diagonal(offset = 0, axis1 = 0, axis2 = 1) {
 		if (!Number.isInteger(offset)) throw new TypeError(`offset must be an integer, got ${offset}`);
 		if (offset < 0) return this.diagonal(-offset, axis2, axis1);
@@ -776,6 +801,34 @@ var Tracer = class Tracer {
 		this.dispose();
 	}
 	/**
+	* Return a sorted copy of an array in ascending order.
+	*
+	* See `jax.numpy.sort` for full docs.
+	*/
+	sort(axis = -1) {
+		axis = checkAxis(axis, this.ndim);
+		if (this.shape[axis] <= 1) return this;
+		if (axis === this.ndim - 1) return sort$1(this);
+		const perm = range(this.ndim);
+		perm.splice(axis, 1);
+		perm.push(axis);
+		return sort$1(this.transpose(perm)).transpose(invertPermutation(perm));
+	}
+	/**
+	* Return the indices that would sort an array. This may not be a stable
+	* sorting algorithm; it need not preserve order of indices in ties.
+	*
+	* See `jax.numpy.argsort` for full docs.
+	*/
+	argsort(axis = -1) {
+		axis = checkAxis(axis, this.ndim);
+		if (axis === this.ndim - 1) return argsort$1(this)[1];
+		const perm = range(this.ndim);
+		perm.splice(axis, 1);
+		perm.push(axis);
+		return argsort$1(this.transpose(perm))[1].transpose(invertPermutation(perm));
+	}
+	/**
 	* Slice an array along one or more axes.
 	*
 	* This is the equivalent of slicing in Python, e.g. `x[1:3, 2, :, None]`. To
@@ -892,6 +945,9 @@ var ShapedArray = class ShapedArray {
 	get ndim() {
 		return this.shape.length;
 	}
+	get size() {
+		return prod(this.shape);
+	}
 	toString() {
 		return `${this.dtype}[${this.shape.join(",")}]`;
 	}
@@ -1170,7 +1226,7 @@ var Jaxpr = class Jaxpr {
 			} else if (eqn.primitive === Primitive.Idiv) {
 				const [a, b] = inputs;
 				const c = eqn.outBinders[0];
-				if (atomIsLit(b, 1)) context.set(c, a);
+				if (atomIsLit(b, 1) && !isFloatDtype(a.aval.dtype)) context.set(c, a);
 				else newEqns.push(eqn);
 			} else if ((eqn.primitive === Primitive.Broadcast || eqn.primitive === Primitive.Reshape) && deepEqual(eqn.params.shape, eqn.inputs[0].aval.shape) || eqn.primitive === Primitive.Transpose && eqn.params.perm.every((p, i) => p === i) || eqn.primitive === Primitive.Flip && eqn.params.axis.length === 0 || eqn.primitive === Primitive.Shrink && eqn.params.slice.every(([s, e$2], i) => s === 0 && e$2 === eqn.inputs[0].aval.shape[i]) || eqn.primitive === Primitive.Pad && eqn.params.width.every(([w0, w1]) => w0 === 0 && w1 === 0)) context.set(eqn.outBinders[0], eqn.inputs[0]);
 			else newEqns.push(eqn);
@@ -1187,13 +1243,13 @@ var Jaxpr = class Jaxpr {
 		}
 		return new Jaxpr(this.inBinders, liveEqns.reverse(), outs);
 	}
-	/** Flattens nested JitCall in a Jaxpr. Useful for handling jit-of-jit. */
+	/** Flattens nested Jit in a Jaxpr. Useful for handling jit-of-jit. */
 	flatten() {
-		if (!this.eqns.some((eqn) => eqn.primitive === Primitive.JitCall)) return this;
+		if (!this.eqns.some((eqn) => eqn.primitive === Primitive.Jit)) return this;
 		const newEqns = [];
 		const varMap = /* @__PURE__ */ new Map();
 		const varMapF = (x) => x instanceof Var ? varMap.get(x) ?? x : x;
-		for (const eqn of this.eqns) if (eqn.primitive === Primitive.JitCall) {
+		for (const eqn of this.eqns) if (eqn.primitive === Primitive.Jit) {
 			const jaxpr = eqn.params.jaxpr.flatten();
 			const translation = /* @__PURE__ */ new Map();
 			const translationF = (x) => x instanceof Var ? translation.get(x) : x;
@@ -1294,19 +1350,48 @@ function evalJaxpr(jaxpr, args) {
 function jaxprAsFun(jaxpr) {
 	return (...args) => evalJaxpr(jaxpr, args);
 }
+/** Jaxpr with a collection of associated, traced constants. */
+var ClosedJaxpr = class ClosedJaxpr {
+	constructor(jaxpr, consts) {
+		this.jaxpr = jaxpr;
+		this.consts = consts;
+	}
+	/** String representation of this Jaxpr. */
+	toString() {
+		return this.jaxpr.toString();
+	}
+	/** Apply a function to the underlying Jaxpr. */
+	mapJaxpr(f) {
+		return new ClosedJaxpr(f(this.jaxpr), this.consts);
+	}
+	/** Dispose of the constants in this Jaxpr. */
+	dispose() {
+		for (const c of this.consts) c.dispose();
+	}
+};
 /** Tracer that records its operations to dynamically construct a Jaxpr. */
 var JaxprTracer = class extends Tracer {
+	#rc;
 	constructor(trace$1, aval) {
 		super(trace$1);
 		this.aval = aval;
+		this.#rc = 1;
 	}
 	toString() {
 		return `JaxprTracer(${this.aval.toString()})`;
 	}
 	get ref() {
+		if (this.#rc <= 0) throw new UseAfterFreeError(this);
+		this.#rc++;
 		return this;
 	}
-	dispose() {}
+	dispose() {
+		if (this.#rc <= 0) throw new UseAfterFreeError(this);
+		this.#rc--;
+	}
+	trackLiftedConstant() {
+		this.#rc++;
+	}
 };
 /** Analogous to the 'DynamicJaxprTrace' class in JAX. */
 var JaxprTrace = class extends Trace {
@@ -1319,17 +1404,24 @@ var JaxprTrace = class extends Trace {
 	}
 	/** Register a constant / literal in this Jaxpr. */
 	getOrMakeConstTracer(val) {
+		if (!(val instanceof Tracer)) val = pureArray(val);
 		let tracer = this.builder.constTracers.get(val);
 		if (tracer === void 0) {
 			tracer = this.builder.newTracer(this, ShapedArray.fromAval(getAval(val)));
-			this.builder.addConst(tracer, val instanceof Tracer ? val.ref : array(val));
+			this.builder.addConst(tracer, val);
+		} else {
+			val.dispose();
+			tracer.trackLiftedConstant();
 		}
 		return tracer;
 	}
 	pure = this.getOrMakeConstTracer;
 	lift = this.getOrMakeConstTracer;
 	processPrimitive(primitive, tracers, params) {
-		const avalsIn = tracers.map((t) => t.aval);
+		const avalsIn = tracers.map((t) => {
+			t.dispose();
+			return t.aval;
+		});
 		const avalsOut = abstractEvalRules[primitive](avalsIn, params);
 		const outTracers = avalsOut.map((aval) => this.builder.newTracer(this, aval));
 		this.builder.addEqn(new JaxprEqn(primitive, tracers.map((t) => this.builder.getVar(t)), params, outTracers.map((t) => this.builder.addVar(t))));
@@ -1372,20 +1464,17 @@ var JaxprBuilder = class {
 		return v;
 	}
 	build(inTracers, outTracers) {
-		let [constVars, consts] = unzip2(this.constVals.entries());
+		const [constVars, consts] = unzip2(this.constVals.entries());
 		const t2v = this.getVar.bind(this);
 		const inBinders = [...constVars, ...inTracers.map(t2v)];
 		const outVars = outTracers.map(t2v);
-		let jaxpr = new Jaxpr(inBinders, this.eqns, outVars);
+		const jaxpr = new Jaxpr(inBinders, this.eqns, outVars);
 		typecheckJaxpr(jaxpr);
-		[jaxpr, consts] = _inlineLiterals(jaxpr, consts);
-		return {
-			jaxpr,
-			consts
-		};
+		const cjaxpr = new ClosedJaxpr(jaxpr, consts);
+		return _inlineLiterals(cjaxpr);
 	}
 };
-function _inlineLiterals(jaxpr, consts) {
+function _inlineLiterals({ jaxpr, consts }) {
 	const literals = /* @__PURE__ */ new Map();
 	const constBinders = [];
 	const newConsts = [];
@@ -1400,7 +1489,7 @@ function _inlineLiterals(jaxpr, consts) {
 	const newOuts = jaxpr.outs.map((x) => literals.get(x) ?? x);
 	const newJaxpr = new Jaxpr([...constBinders, ...jaxpr.inBinders.slice(consts.length)], newEqns, newOuts);
 	typecheckJaxpr(newJaxpr);
-	return [newJaxpr, newConsts];
+	return new ClosedJaxpr(newJaxpr, newConsts);
 }
 function binopAbstractEval([x, y]) {
 	if (!(x instanceof ShapedArray) || !(y instanceof ShapedArray)) throw new TypeError("binopAbstractEval expects ShapedArray inputs");
@@ -1419,6 +1508,8 @@ const abstractEvalRules = {
 	[Primitive.Mul]: binopAbstractEval,
 	[Primitive.Idiv]: binopAbstractEval,
 	[Primitive.Mod]: binopAbstractEval,
+	[Primitive.Min]: binopAbstractEval,
+	[Primitive.Max]: binopAbstractEval,
 	[Primitive.Neg]: vectorizedUnopAbstractEval,
 	[Primitive.Reciprocal]: vectorizedUnopAbstractEval,
 	[Primitive.Floor]: vectorizedUnopAbstractEval,
@@ -1432,12 +1523,6 @@ const abstractEvalRules = {
 		if (byteWidth(x.dtype) !== byteWidth(dtype)) throw new TypeError(`Bitcast from ${x.dtype} to ${dtype} with different byte width`);
 		return [new ShapedArray(x.shape, dtype, false)];
 	},
-	[Primitive.RandomBits]([k0, k1], { shape: shape$1 }) {
-		if (k0.dtype !== DType.Uint32 || k1.dtype !== DType.Uint32) throw new TypeError(`RandomBits requires uint32 keys, got ${k0.dtype} and ${k1.dtype}`);
-		const keyShape = generalBroadcast(k0.shape, k1.shape);
-		if (!deepEqual(generalBroadcast(keyShape, shape$1), shape$1)) throw new TypeError(`Keys of shapes ${k0.shape} and ${k1.shape} cannot be broadcast to shape ${shape$1}`);
-		return [new ShapedArray(shape$1, DType.Uint32, false)];
-	},
 	[Primitive.Sin]: vectorizedUnopAbstractEval,
 	[Primitive.Cos]: vectorizedUnopAbstractEval,
 	[Primitive.Asin]: vectorizedUnopAbstractEval,
@@ -1447,8 +1532,6 @@ const abstractEvalRules = {
 	[Primitive.Erf]: vectorizedUnopAbstractEval,
 	[Primitive.Erfc]: vectorizedUnopAbstractEval,
 	[Primitive.Sqrt]: vectorizedUnopAbstractEval,
-	[Primitive.Min]: binopAbstractEval,
-	[Primitive.Max]: binopAbstractEval,
 	[Primitive.Reduce]([x], { axis }) {
 		const axisSet = new Set(axis);
 		const newShape = x.shape.filter((_, i) => !axisSet.has(i));
@@ -1481,6 +1564,25 @@ const abstractEvalRules = {
 		const shape$1 = generalBroadcast(cond.shape, xy.shape);
 		return [new ShapedArray(shape$1, xy.dtype, xy.weakType)];
 	},
+	[Primitive.RandomBits]([k0, k1], { shape: shape$1 }) {
+		if (k0.dtype !== DType.Uint32 || k1.dtype !== DType.Uint32) throw new TypeError(`RandomBits requires uint32 keys, got ${k0.dtype} and ${k1.dtype}`);
+		const keyShape = generalBroadcast(k0.shape, k1.shape);
+		if (!deepEqual(generalBroadcast(keyShape, shape$1), shape$1)) throw new TypeError(`Keys of shapes ${k0.shape} and ${k1.shape} cannot be broadcast to shape ${shape$1}`);
+		return [new ShapedArray(shape$1, DType.Uint32, false)];
+	},
+	[Primitive.Gather]([x, ...indices], { axis, outDim }) {
+		for (const a of indices) if (a.dtype !== DType.Int32 && a.dtype !== DType.Uint32) throw new TypeError(`Gather indices must be Int32 or Uint32, got ${a.dtype}`);
+		if (axis.length !== indices.length) throw new TypeError(`Gather: ${axis} axes but ${indices.length} indices`);
+		if (indices.length === 0) throw new TypeError("Gather must have 1+ indices with same shape");
+		if (axis.some((a) => a < 0 || a >= x.shape.length)) throw new TypeError("Gather axis out of bounds");
+		if (outDim < 0 || outDim > x.shape.length - axis.length) throw new TypeError("Gather outDim out of bounds");
+		const axisSet = new Set(axis);
+		if (axisSet.size !== axis.length) throw new TypeError("Gather axes are not unique");
+		const gatherShape = indices.reduce((shape$1, a) => generalBroadcast(shape$1, a.shape), []);
+		const newShape = x.shape.filter((_, i) => !axisSet.has(i));
+		newShape.splice(outDim, 0, ...gatherShape);
+		return [new ShapedArray(newShape, x.dtype, x.weakType)];
+	},
 	[Primitive.Transpose]([x], { perm }) {
 		return [new ShapedArray(perm.map((i) => x.shape[i]), x.dtype, x.weakType)];
 	},
@@ -1501,23 +1603,31 @@ const abstractEvalRules = {
 		const newShape = x.shape.map((dim, i) => dim + width[i][0] + width[i][1]);
 		return [new ShapedArray(newShape, x.dtype, x.weakType)];
 	},
-	[Primitive.Gather]([x, ...indices], { axis, outDim }) {
-		for (const a of indices) if (a.dtype !== DType.Int32 && a.dtype !== DType.Uint32) throw new TypeError(`Gather indices must be Int32 or Uint32, got ${a.dtype}`);
-		if (axis.length !== indices.length) throw new TypeError(`Gather: ${axis} axes but ${indices.length} indices`);
-		if (indices.length === 0) throw new TypeError("Gather must have 1+ indices with same shape");
-		if (axis.some((a) => a < 0 || a >= x.shape.length)) throw new TypeError("Gather axis out of bounds");
-		if (outDim < 0 || outDim > x.shape.length - axis.length) throw new TypeError("Gather outDim out of bounds");
-		const axisSet = new Set(axis);
-		if (axisSet.size !== axis.length) throw new TypeError("Gather axes are not unique");
-		const gatherShape = indices.reduce((shape$1, a) => generalBroadcast(shape$1, a.shape), []);
-		const newShape = x.shape.filter((_, i) => !axisSet.has(i));
-		newShape.splice(outDim, 0, ...gatherShape);
-		return [new ShapedArray(newShape, x.dtype, x.weakType)];
+	[Primitive.Sort]([x]) {
+		if (x.ndim === 0) throw new TypeError("sort: requires at least 1D input");
+		return [ShapedArray.fromAval(x)];
+	},
+	[Primitive.Argsort]([x]) {
+		if (x.ndim === 0) throw new TypeError("argsort: requires at least 1D input");
+		return [ShapedArray.fromAval(x), new ShapedArray(x.shape, DType.Int32, false)];
+	},
+	[Primitive.TriangularSolve]([a, b]) {
+		if (a.ndim < 2) throw new TypeError(`triangular_solve: a must be at least 2D, got ${a}`);
+		if (b.ndim < 2) throw new TypeError(`triangular_solve: b must be at least 2D, got ${b}`);
+		const [m, n] = a.shape.slice(-2);
+		const [_batch, q] = b.shape.slice(-2);
+		if (!deepEqual(a.shape.slice(0, -2), b.shape.slice(0, -2)) || a.dtype !== b.dtype || m !== n || n !== q) throw new TypeError(`triangular_solve: mismatch ${a} vs ${b}`);
+		return [new ShapedArray(b.shape, b.dtype, a.weakType && b.weakType)];
+	},
+	[Primitive.Cholesky]([a]) {
+		if (a.ndim < 2) throw new TypeError(`cholesky: requires at least 2D input, got ${a}`);
+		if (a.shape[a.ndim - 2] !== a.shape[a.ndim - 1]) throw new TypeError(`cholesky: must be square, got ${a}`);
+		return [ShapedArray.fromAval(a)];
 	},
-	[Primitive.JitCall](args, { jaxpr }) {
+	[Primitive.Jit](args, { jaxpr }) {
 		const { inTypes, outTypes } = typecheckJaxpr(jaxpr);
-		if (args.length !== inTypes.length) throw new TypeError(`jit_call expected ${inTypes.length} arguments, got ${args.length}`);
-		for (let i = 0; i < inTypes.length; i++) if (!args[i].equals(inTypes[i])) throw new TypeError(`jit_call argument ${i} has type ${args[i]}, expected ${inTypes[i]}`);
+		if (args.length !== inTypes.length) throw new TypeError(`jit expected ${inTypes.length} arguments, got ${args.length}`);
+		for (let i = 0; i < inTypes.length; i++) if (!args[i].equals(inTypes[i])) throw new TypeError(`jit argument ${i} has type ${args[i]}, expected ${inTypes[i]}`);
 		return outTypes;
 	}
 };
@@ -1553,11 +1663,10 @@ function makeJaxpr$1(f, opts) {
 			const tracersIn = avalsIn.map((aval) => trace$1.newArg(typeof aval === "object" ? aval : pureArray(aval)));
 			const outs = fFlat(...tracersIn);
 			const tracersOut = outs.map((out) => fullRaise(trace$1, out));
-			const { jaxpr, consts } = builder.build(tracersIn, tracersOut);
+			const jaxpr = builder.build(tracersIn, tracersOut);
 			if (outTree.value === void 0) throw new Error("outTree was not set in makeJaxpr");
 			return {
-				jaxpr: jaxpr.simplify(),
-				consts,
+				jaxpr: jaxpr.mapJaxpr((j) => j.simplify()),
 				treedef: outTree.value
 			};
 		} catch (_) {
@@ -1576,22 +1685,28 @@ function jit$1(f, opts) {
 		const avalsInFlat = argsFlat.map((x) => ShapedArray.fromAval(getAval(x)));
 		const avalsIn = unflatten(inTree, avalsInFlat);
 		const jaxprArgs = joinIdx(args.length, staticArgs, avalsIn, staticArgnums);
-		const { jaxpr, consts, treedef: outTree } = runWithCache(cache, jaxprArgs, () => makeJaxpr$1(f, opts)(...jaxprArgs));
-		const outs = bind(Primitive.JitCall, [...consts.map((c) => c.ref), ...argsFlat], {
+		const { jaxpr, treedef: outTree } = runWithCache(cache, jaxprArgs, () => makeJaxpr$1(f, opts)(...jaxprArgs));
+		const outs = bind(Primitive.Jit, [...jaxpr.consts.map((c) => c.ref), ...argsFlat], {
 			name: f.name || "closure",
-			jaxpr,
-			numConsts: consts.length
+			jaxpr: jaxpr.jaxpr,
+			numConsts: jaxpr.consts.length
 		});
 		return unflatten(outTree, outs);
 	});
 	result.dispose = () => {
-		for (const { consts } of cache.values()) for (const c of consts) c.dispose();
+		for (const { jaxpr } of cache.values()) jaxpr.dispose();
 	};
 	return result;
 }
 //#endregion
 //#region src/frontend/jit.ts
+const routinePrimitives = new Map([
+	[Primitive.Sort, Routines.Sort],
+	[Primitive.Argsort, Routines.Argsort],
+	[Primitive.TriangularSolve, Routines.TriangularSolve],
+	[Primitive.Cholesky, Routines.Cholesky]
+]);
 /** Result of compiling a Jaxpr. Can be evaluated on a series of inputs. */
 var JitProgram = class {
 	constructor(backend, steps, inputs, outputs) {
@@ -1606,9 +1721,14 @@ var JitProgram = class {
 				case "execute": {
 					const inputsNice = step.inputs.map((id, i) => `${i}: %${id}`).join(", ");
 					const outputsNice = step.outputs.map((id) => `%${id}`).join(", ");
-					return PPrint.pp(`execute (${inputsNice}) -> ${outputsNice}, kernel`).concat(step.kernel.pprint().indent(2));
+					const executeText = `execute (${inputsNice}) -> ${outputsNice}`;
+					if (step.source instanceof Kernel) return PPrint.pp(`${executeText}, kernel`).concat(step.source.pprint().indent(2));
+					else if (step.source instanceof Routine) return PPrint.pp(`${executeText}, routine ${step.source.name}`);
+					else {
+						step.source;
+						return PPrint.pp(executeText);
+					}
 				}
-				case "const": return PPrint.pp(`%${step.output} = const <Slot ${step.slot}>`);
 				case "malloc": return PPrint.pp(`%${step.output} = malloc <${step.size} bytes>`);
 				case "incref": return PPrint.pp(`incref ${step.input}`);
 				case "free": return PPrint.pp(`free ${step.input}`);
@@ -1631,12 +1751,9 @@ var JitProgram = class {
 				const inputs$1 = step.inputs.map((id) => scope.get(id));
 				const outputs = step.outputs.map((id) => scope.get(id));
 				if (inputs$1.some((s) => s === void 0) || outputs.some((s) => s === void 0)) throw new Error(`internal: JitProgram scope undefined`);
-				pending.push(new PendingExecute(this.backend, step.kernel, inputs$1, outputs));
+				pending.push(new PendingExecute(this.backend, step.source, inputs$1, outputs));
 				break;
 			}
-			case "const":
-				scope.set(step.output, step.slot);
-				break;
 			case "malloc": {
 				const slot = this.backend.malloc(step.size);
 				scope.set(step.output, slot);
@@ -1670,34 +1787,37 @@ var JitProgramBuilder = class {
 		this.#nextId = nargs;
 		this.steps = [];
 	}
-	pushConst(slot) {
-		const id = this.#nextId++;
-		this.steps.push({
-			type: "const",
-			slot,
-			output: id
-		});
-		return id;
-	}
 	pushLit(lit) {
-		const kernel = new Kernel(0, prod(lit.aval.shape), AluExp.const(lit.dtype, lit.value));
+		const kernel = new Kernel(0, lit.aval.size, AluExp.const(lit.dtype, lit.value));
 		return this.pushKernel(kernel, []);
 	}
-	pushKernel(kernel, inputs) {
+	pushBuffer(size$1) {
 		const id = this.#nextId++;
 		this.steps.push({
 			type: "malloc",
-			size: kernel.bytes,
+			size: size$1,
 			output: id
 		});
+		return id;
+	}
+	pushKernel(kernel, inputs) {
+		const id = this.pushBuffer(kernel.bytes);
 		this.steps.push({
 			type: "execute",
-			kernel,
+			source: kernel,
 			inputs,
 			outputs: [id]
 		});
 		return id;
 	}
+	pushRoutine(routine, inputs, outputs) {
+		this.steps.push({
+			type: "execute",
+			source: routine,
+			inputs,
+			outputs
+		});
+	}
 	pushIncref(id) {
 		this.steps.push({
 			type: "incref",
@@ -1723,28 +1843,18 @@ var JitProgramBuilder = class {
 	}
 };
 const jitCompileCache = /* @__PURE__ */ new Map();
-function jitCompile(backend, jaxpr, consts) {
-	if (jaxpr.inBinders.length < consts.length) throw new TypeError(`Jaxpr has ${jaxpr.inBinders.length} inputs, but ${consts.length} consts were provided`);
-	for (let i = 0; i < consts.length; i++) if (consts[i].device !== backend.type) throw new TypeError(`Const ${i} has device ${consts[i].device}, but expected ${backend.type}`);
-	const cacheKey = backend.type + FpHash.hash(jaxpr, ...consts.map((c) => c.id));
+function jitCompile(backend, jaxpr) {
+	const cacheKey = backend.type + "," + FpHash.hash(jaxpr);
 	const cached = jitCompileCache.get(cacheKey);
 	if (cached) return cached;
 	if (DEBUG >= 1) console.info("=========== JIT Compile ===========\n" + jaxpr.toString());
 	jaxpr = jaxpr.flatten().simplify();
-	const nargs = jaxpr.inBinders.length - consts.length;
+	const nargs = jaxpr.inBinders.length;
 	const builder = new JitProgramBuilder(backend, nargs);
 	const blackNodes = splitGraphDataflow(backend, jaxpr);
 	const ctx = /* @__PURE__ */ new Map();
-	for (let i = 0; i < consts.length; i++) {
-		const v = jaxpr.inBinders[i];
-		const slot = consts[i]._realizeSource();
-		ctx.set(v, {
-			type: "imm",
-			arg: builder.pushConst(slot)
-		});
-	}
 	for (let i = 0; i < nargs; i++) {
-		const v = jaxpr.inBinders[consts.length + i];
+		const v = jaxpr.inBinders[i];
 		ctx.set(v, {
 			type: "imm",
 			arg: i
@@ -1752,51 +1862,101 @@ function jitCompile(backend, jaxpr, consts) {
 	}
 	for (let i = 0; i < jaxpr.eqns.length; i++) {
 		const eqn = jaxpr.eqns[i];
+		if (routinePrimitives.has(eqn.primitive)) {
+			const routine = new Routine(routinePrimitives.get(eqn.primitive), {
+				inputShapes: eqn.inputs.map((x) => x.aval.shape),
+				inputDtypes: eqn.inputs.map((x) => x.aval.dtype),
+				outputShapes: eqn.outBinders.map((x) => x.aval.shape),
+				outputDtypes: eqn.outBinders.map((x) => x.aval.dtype)
+			}, eqn.params);
+			const inputs = [];
+			for (const input of eqn.inputs) if (input instanceof Var) {
+				const jv = ctx.get(input);
+				if (jv.type !== "imm") throw new Error(`jit: routine primitive ${eqn.primitive} input is not imm`);
+				inputs.push(jv.arg);
+			} else if (input instanceof Lit) inputs.push(builder.pushLit(input));
+			const outputs = [];
+			for (const outVar$1 of eqn.outBinders) {
+				const outId = builder.pushBuffer(outVar$1.aval.size * byteWidth(outVar$1.aval.dtype));
+				outputs.push(outId);
+				ctx.set(outVar$1, {
+					type: "imm",
+					arg: outId
+				});
+			}
+			builder.pushRoutine(routine, inputs, outputs);
+			continue;
+		}
 		const inputExps = [];
 		const inputAvals = [];
 		const inputArgs = [];
-		for (const input of eqn.inputs) if (input instanceof Var) {
-			const jitValue = ctx.get(input);
-			if (jitValue.type === "exp") {
-				const gidMap = /* @__PURE__ */ new Map();
-				for (const [gid, jitId] of jitValue.args.entries()) {
-					let newGid = inputArgs.indexOf(jitId);
-					if (newGid === -1) {
-						newGid = inputArgs.length;
-						inputArgs.push(jitId);
-					}
-					gidMap.set(gid, newGid);
-				}
-				inputExps.push(jitValue.exp.reindexGids(gidMap));
-			} else if (jitValue.type === "imm") {
-				let gid = inputArgs.indexOf(jitValue.arg);
-				if (gid === -1) {
-					gid = inputArgs.length;
-					inputArgs.push(jitValue.arg);
+		let inputReduction = null;
+		const addArgs = (args) => {
+			const newGids = [];
+			for (const jitId of args) {
+				let newGid = inputArgs.indexOf(jitId);
+				if (newGid === -1) {
+					newGid = inputArgs.length;
+					inputArgs.push(jitId);
 				}
+				newGids.push(newGid);
+			}
+			return newGids;
+		};
+		for (const input of eqn.inputs) if (input instanceof Var) {
+			const jv = ctx.get(input);
+			if (jv.type === "exp") {
+				const newGids = addArgs(jv.args);
+				inputExps.push(jv.exp.reindexGids(newGids));
+			} else if (jv.type === "imm") {
+				const [gid] = addArgs([jv.arg]);
 				const st = ShapeTracker.fromShape(input.aval.shape);
 				const indices = unravelAlu(st.shape, AluVar.gidx);
 				inputExps.push(AluExp.globalView(input.aval.dtype, gid, st, indices));
+			} else if (jv.type === "red") {
+				if (inputReduction) throw new Error("jit: unexpected, multiple red inputs");
+				const newGids = addArgs(jv.args);
+				inputExps.push(jv.reduction.epilogue.reindexGids(newGids));
+				inputReduction = jv;
 			}
 			inputAvals.push(input.aval);
 		} else if (input instanceof Lit) {
 			inputExps.push(AluExp.const(input.dtype, input.value));
 			inputAvals.push(input.aval);
 		} else throw new TypeError(`Unexpected input in Jaxpr: ${input}`);
-		const nargs$1 = inputArgs.length;
 		const rule = jitRules[eqn.primitive];
 		if (!rule) throw new TypeError(`JIT not implemented for primitive ${eqn.primitive}`);
-		const kernel = rule(nargs$1, inputExps, inputAvals, eqn.params);
+		let exp$2;
+		let reduction;
+		if (inputReduction) {
+			const jv = inputReduction;
+			const newEpilogue = rule(inputExps, inputAvals, eqn.params).exp;
+			exp$2 = jv.exp.reindexGids(addArgs(jv.args));
+			reduction = new Reduction(jv.reduction.dtype, jv.reduction.op, jv.reduction.size, newEpilogue);
+		} else {
+			const ruleOutput = rule(inputExps, inputAvals, eqn.params);
+			exp$2 = ruleOutput.exp;
+			reduction = ruleOutput.reduction;
+		}
 		const outVar = eqn.outBinders[0];
-		if (kernel.reduction || blackNodes.has(outVar)) {
+		if (blackNodes.has(outVar)) {
+			const nargs$1 = inputArgs.length;
+			const size$1 = outVar.aval.size;
+			const kernel = new Kernel(nargs$1, size$1, exp$2, reduction);
 			const outId = builder.pushKernel(kernel, inputArgs);
 			ctx.set(outVar, {
 				type: "imm",
 				arg: outId
 			});
-		} else ctx.set(outVar, {
+		} else if (reduction) ctx.set(outVar, {
+			type: "red",
+			exp: exp$2,
+			reduction,
+			args: inputArgs
+		});
+		else ctx.set(outVar, {
 			type: "exp",
-			exp: kernel.exp,
+			exp: exp$2,
 			args: inputArgs
 		});
 	}
@@ -1806,7 +1966,7 @@ function jitCompile(backend, jaxpr, consts) {
 		if (jitValue.type !== "imm") throw new Error("internal: Expected imm, since outs are black nodes");
 		outputIds.push(jitValue.arg);
 	} else if (out instanceof Lit) outputIds.push(builder.pushLit(out));
-	const outputNeedsRef = new Set([...range(nargs), ...builder.steps.filter((s) => s.type === "const").map((s) => s.output)]);
+	const outputNeedsRef = new Set(range(nargs));
 	for (const outputId of outputIds) if (outputNeedsRef.has(outputId)) builder.pushIncref(outputId);
 	else outputNeedsRef.add(outputId);
 	builder.insertFreeSteps(outputIds);
@@ -1828,31 +1988,33 @@ function reshapeViews(exp$2, mapping, reduceAxis = false) {
 	});
 }
 function broadcastedJit(fn, opts) {
-	return (nargs, exps, avals, params) => {
+	return (exps, avals, params) => {
 		let { shape: newShape, dtype: newDtype } = avals.reduce(promoteAvals);
 		const skipCastIdx = opts?.skipCastIdx ?? [];
 		if (skipCastIdx.length) newDtype = avals.filter((_, i) => !skipCastIdx.includes(i)).reduce(promoteAvals).dtype;
-		exps = exps.map((exp$3, i) => {
-			exp$3 = reshapeViews(exp$3, (st) => {
+		exps = exps.map((exp$2, i) => {
+			exp$2 = reshapeViews(exp$2, (st) => {
 				if (!deepEqual(st.shape, newShape)) return st.broadcast(newShape, range(newShape.length - st.shape.length));
 			});
-			if (exp$3.dtype !== newDtype && !skipCastIdx.includes(i)) exp$3 = AluExp.cast(newDtype, exp$3);
-			return exp$3;
+			if (exp$2.dtype !== newDtype && !skipCastIdx.includes(i)) exp$2 = AluExp.cast(newDtype, exp$2);
+			return exp$2;
 		});
-		const exp$2 = fn(exps, params);
-		return new Kernel(nargs, prod(newShape), exp$2);
+		return { exp: fn(exps, params) };
 	};
 }
 function unopJit(fn) {
-	return (nargs, [a], [as], params) => {
-		return new Kernel(nargs, prod(as.shape), fn(a, params));
+	return ([a], [_as], params) => {
+		return { exp: fn(a, params) };
 	};
 }
 function reshapeJit(fn) {
-	return (nargs, [a], [as], params) => {
-		a = reshapeViews(a, (st) => fn(st, params));
-		const newShape = fn(ShapeTracker.fromShape(as.shape), params).shape;
-		return new Kernel(nargs, prod(newShape), a);
+	return ([a], [_as], params) => {
+		return { exp: reshapeViews(a, (st) => fn(st, params)) };
+	};
+}
+function routineNoJit() {
+	return () => {
+		throw new Error("jit: rule is not implemented for routines");
 	};
 }
 const jitRules = {
@@ -1860,6 +2022,8 @@ const jitRules = {
 	[Primitive.Mul]: broadcastedJit(([a, b]) => AluExp.mul(a, b)),
 	[Primitive.Idiv]: broadcastedJit(([a, b]) => AluExp.idiv(a, b)),
 	[Primitive.Mod]: broadcastedJit(([a, b]) => AluExp.mod(a, b)),
+	[Primitive.Min]: broadcastedJit(([a, b]) => AluExp.min(a, b)),
+	[Primitive.Max]: broadcastedJit(([a, b]) => AluExp.max(a, b)),
 	[Primitive.Neg]: unopJit((a) => AluExp.sub(AluExp.const(a.dtype, 0), a)),
 	[Primitive.Reciprocal]: unopJit(AluExp.reciprocal),
 	[Primitive.Floor]: unopJit(AluExp.floor),
@@ -1867,17 +2031,6 @@ const jitRules = {
 	[Primitive.StopGradient]: unopJit((a) => a),
 	[Primitive.Cast]: unopJit((a, { dtype }) => AluExp.cast(dtype, a)),
 	[Primitive.Bitcast]: unopJit((a, { dtype }) => AluExp.bitcast(dtype, a)),
-	[Primitive.RandomBits]: (nargs, keys, keyShapes, { shape: shape$1, mode }) => {
-		const mapping = (st) => {
-			if (!deepEqual(st.shape, shape$1)) return st.broadcast(shape$1, range(shape$1.length - st.shape.length));
-		};
-		const k0 = reshapeViews(keys[0], mapping);
-		const k1 = reshapeViews(keys[1], mapping);
-		const c0 = AluExp.u32(0);
-		const c1 = AluExp.cast(DType.Uint32, AluVar.gidx);
-		const exp$2 = AluExp.threefry2x32(k0, k1, c0, c1, mode);
-		return new Kernel(nargs, prod(shape$1), exp$2);
-	},
 	[Primitive.Sin]: unopJit(AluExp.sin),
 	[Primitive.Cos]: unopJit(AluExp.cos),
 	[Primitive.Asin]: unopJit(AluExp.asin),
@@ -1887,9 +2040,7 @@ const jitRules = {
 	[Primitive.Erf]: unopJit(AluExp.erf),
 	[Primitive.Erfc]: unopJit(AluExp.erfc),
 	[Primitive.Sqrt]: unopJit(AluExp.sqrt),
-	[Primitive.Min]: broadcastedJit(([a, b]) => AluExp.min(a, b)),
-	[Primitive.Max]: broadcastedJit(([a, b]) => AluExp.max(a, b)),
-	[Primitive.Reduce](nargs, [a], [as], { op, axis }) {
+	[Primitive.Reduce]([a], [as], { op, axis }) {
 		const keptAxes = [];
 		const shiftedAxes = [];
 		const newShape = [];
@@ -1898,53 +2049,58 @@ const jitRules = {
 			keptAxes.push(i);
 			newShape.push(as.shape[i]);
 		}
-		const size$1 = prod(newShape);
 		const reductionSize = prod(shiftedAxes.map((ax) => as.shape[ax]));
 		newShape.push(reductionSize);
 		const perm = keptAxes.concat(shiftedAxes);
 		a = reshapeViews(a, (st) => st.permute(perm).reshape(newShape), true);
 		const reduction = new Reduction(a.dtype, op, reductionSize);
-		return new Kernel(nargs, size$1, a, reduction);
+		return {
+			exp: a,
+			reduction
+		};
 	},
 	[Primitive.Pool]: reshapeJit((st, { window, strides }) => pool(st, window, strides)),
-	[Primitive.PoolTranspose](nargs, [a], [as], { inShape, window, strides }) {
+	[Primitive.PoolTranspose]([a], [as], { inShape, window, strides }) {
 		let stX = poolTranspose(ShapeTracker.fromShape(as.shape), inShape, window, strides);
-		const size$1 = prod(inShape);
 		stX = stX.reshape([...inShape, prod(stX.shape.slice(inShape.length))]);
 		a = reshapeViews(a, (st) => st.compose(stX), true);
 		const reduction = new Reduction(a.dtype, AluOp.Add, stX.shape[stX.shape.length - 1]);
-		return new Kernel(nargs, size$1, a, reduction);
+		return {
+			exp: a,
+			reduction
+		};
 	},
-	[Primitive.Dot](nargs, [a, b], [as, bs]) {
-		const k1 = jitRules[Primitive.Mul](nargs, [a, b], [as, bs], {});
+	[Primitive.Dot]([a, b], [as, bs]) {
+		const k1 = jitRules[Primitive.Mul]([a, b], [as, bs], {});
 		const c = k1.exp;
 		const cs = promoteAvals(as, bs);
-		return jitRules[Primitive.Reduce](nargs, [c], [cs], {
+		return jitRules[Primitive.Reduce]([c], [cs], {
 			op: AluOp.Add,
 			axis: [cs.ndim - 1]
 		});
 	},
-	[Primitive.Conv](nargs, [a, b], [as, bs], params) {
+	[Primitive.Conv]([a, b], [as, bs], params) {
 		const [stX, stY] = prepareConv(ShapeTracker.fromShape(as.shape), ShapeTracker.fromShape(bs.shape), params);
 		a = reshapeViews(a, (st) => st.compose(stX));
 		b = reshapeViews(b, (st) => st.compose(stY));
 		as = new ShapedArray(stX.shape, as.dtype, as.weakType);
 		bs = new ShapedArray(stY.shape, bs.dtype, bs.weakType);
-		return jitRules[Primitive.Dot](nargs, [a, b], [as, bs], {});
+		return jitRules[Primitive.Dot]([a, b], [as, bs], {});
 	},
 	[Primitive.Compare]: broadcastedJit(([a, b], { op }) => aluCompare(a, b, op)),
 	[Primitive.Where]: broadcastedJit(([cond, a, b]) => AluExp.where(cond, a, b), { skipCastIdx: [0] }),
-	[Primitive.Transpose]: reshapeJit((st, { perm }) => st.permute(perm)),
-	[Primitive.Broadcast]: reshapeJit((st, { shape: shape$1, axis }) => st.broadcast(shape$1, axis)),
-	[Primitive.Reshape]: reshapeJit((st, { shape: shape$1 }) => st.reshape(shape$1)),
-	[Primitive.Flip]: reshapeJit((st, { axis }) => {
-		const arg = rep(st.shape.length, false);
-		for (const ax of axis) arg[ax] = true;
-		return st.flip(arg);
-	}),
-	[Primitive.Shrink]: reshapeJit((st, { slice }) => st.shrink(slice)),
-	[Primitive.Pad]: reshapeJit((st, { width }) => st.pad(width)),
-	[Primitive.Gather](nargs, [x, ...indices], [xs, ...indicesShapes], { axis, outDim }) {
+	[Primitive.RandomBits]: (keys, keyShapes, { shape: shape$1, mode }) => {
+		const mapping = (st) => {
+			if (!deepEqual(st.shape, shape$1)) return st.broadcast(shape$1, range(shape$1.length - st.shape.length));
+		};
+		const k0 = reshapeViews(keys[0], mapping);
+		const k1 = reshapeViews(keys[1], mapping);
+		const c0 = AluExp.u32(0);
+		const c1 = AluExp.cast(DType.Uint32, AluVar.gidx);
+		const exp$2 = AluExp.threefry2x32(k0, k1, c0, c1, mode);
+		return { exp: exp$2 };
+	},
+	[Primitive.Gather]([x, ...indices], [xs, ...indicesShapes], { axis, outDim }) {
 		const axisSet = new Set(axis);
 		const indexShape = indicesShapes.map((c) => c.shape).reduce(generalBroadcast);
 		const finalShape = xs.shape.filter((_, i) => !axisSet.has(i));
@@ -1957,24 +2113,38 @@ const jitRules = {
 		for (const [i, iexp] of indices.entries()) src[axis[i]] = AluExp.cast(DType.Int32, reshapeViews(iexp, (st) => st.broadcast(finalShape, [...range(outDim + indexShape.length - st.shape.length), ...range(outDim + indexShape.length, finalShape.length)])));
 		const [index, valid] = ShapeTracker.fromShape(xs.shape).toAluExp(src);
 		if (!valid.resolve()) throw new Error("internal: expected full validity mask in Gather");
-		return new Kernel(nargs, prod(finalShape), x.substitute({ gidx: index }));
+		return { exp: x.substitute({ gidx: index }) };
 	},
-	[Primitive.JitCall]() {
-		throw new Error("internal: JitCall should have been flattened before JIT compilation");
+	[Primitive.Transpose]: reshapeJit((st, { perm }) => st.permute(perm)),
+	[Primitive.Broadcast]: reshapeJit((st, { shape: shape$1, axis }) => st.broadcast(shape$1, axis)),
+	[Primitive.Reshape]: reshapeJit((st, { shape: shape$1 }) => st.reshape(shape$1)),
+	[Primitive.Flip]: reshapeJit((st, { axis }) => {
+		const arg = rep(st.shape.length, false);
+		for (const ax of axis) arg[ax] = true;
+		return st.flip(arg);
+	}),
+	[Primitive.Shrink]: reshapeJit((st, { slice }) => st.shrink(slice)),
+	[Primitive.Pad]: reshapeJit((st, { width }) => st.pad(width)),
+	[Primitive.Sort]: routineNoJit(),
+	[Primitive.Argsort]: routineNoJit(),
+	[Primitive.TriangularSolve]: routineNoJit(),
+	[Primitive.Cholesky]: routineNoJit(),
+	[Primitive.Jit]() {
+		throw new Error("internal: Jit should have been flattened before JIT compilation");
 	}
 };
 /** Determines how to split the Jaxpr into kernels via dataflow analysis. */
 function splitGraphDataflow(backend, jaxpr) {
-	const varToEqn = /* @__PURE__ */ new Map();
+	const varToDefn = /* @__PURE__ */ new Map();
+	const varToUsages = /* @__PURE__ */ new Map();
 	for (let i = 0; i < jaxpr.eqns.length; i++) {
 		const eqn = jaxpr.eqns[i];
-		for (const v of eqn.outBinders) if (v instanceof Var) varToEqn.set(v, i);
-	}
-	const blackNodes = /* @__PURE__ */ new Set();
-	const p1NextBlack = /* @__PURE__ */ new Map();
-	for (const v of jaxpr.outs) if (v instanceof Var) {
-		blackNodes.add(v);
-		p1NextBlack.set(v, v);
+		for (const v of eqn.outBinders) if (v instanceof Var) varToDefn.set(v, i);
+		for (const input of eqn.inputs) if (input instanceof Var) {
+			const usages = varToUsages.get(input);
+			if (usages) usages.push(i);
+			else varToUsages.set(input, [i]);
+		}
 	}
 	const reducePrimitives = [
 		Primitive.Reduce,
@@ -1982,28 +2152,94 @@ function splitGraphDataflow(backend, jaxpr) {
 		Primitive.Conv,
 		Primitive.PoolTranspose
 	];
-	const heterogeneousViewPrimitives = [Primitive.Gather, Primitive.RandomBits];
-	for (let i = jaxpr.eqns.length - 1; i >= 0; i--) {
+	const reductionEpilogueEqns = /* @__PURE__ */ new Set();
+	const reductionEndpointEqns = /* @__PURE__ */ new Set();
+	for (let i = 0; i < jaxpr.eqns.length; i++) {
 		const eqn = jaxpr.eqns[i];
-		if (reducePrimitives.includes(eqn.primitive) || heterogeneousViewPrimitives.includes(eqn.primitive) || eqn.outBinders.some((v) => blackNodes.has(v))) {
-			for (const v of eqn.outBinders) {
-				blackNodes.add(v);
-				p1NextBlack.set(v, v);
+		if (reducePrimitives.includes(eqn.primitive)) {
+			let head = i;
+			while (true) {
+				reductionEpilogueEqns.add(head);
+				const outVar = jaxpr.eqns[head].outBinders[0];
+				const usages = varToUsages.get(outVar) ?? [];
+				if (jaxpr.outs.includes(outVar) || usages.length !== 1) break;
+				if (reductionEpilogueEqns.has(usages[0])) break;
+				const nextEqn = jaxpr.eqns[usages[0]];
+				switch (nextEqn.primitive) {
+					case Primitive.Neg:
+					case Primitive.Reciprocal:
+					case Primitive.Floor:
+					case Primitive.Ceil:
+					case Primitive.StopGradient:
+					case Primitive.Cast:
+					case Primitive.Bitcast:
+					case Primitive.Sin:
+					case Primitive.Cos:
+					case Primitive.Asin:
+					case Primitive.Atan:
+					case Primitive.Exp:
+					case Primitive.Log:
+					case Primitive.Erf:
+					case Primitive.Erfc:
+					case Primitive.Sqrt:
+						head = usages[0];
+						continue;
+					case Primitive.Add:
+					case Primitive.Mul:
+					case Primitive.Idiv:
+					case Primitive.Mod:
+					case Primitive.Min:
+					case Primitive.Max: {
+						const otherInput = nextEqn.inputs.find((v) => v !== outVar);
+						if (otherInput instanceof Lit || deepEqual(generalBroadcast(otherInput.aval.shape, outVar.aval.shape), outVar.aval.shape)) {
+							head = usages[0];
+							continue;
+						}
+						break;
+					}
+				}
+				break;
 			}
-			continue;
+			reductionEndpointEqns.add(head);
 		}
-		const reach = /* @__PURE__ */ new Set();
-		for (let j = i + 1; j < jaxpr.eqns.length; j++) for (const v of jaxpr.eqns[j].inputs) if (v instanceof Var && eqn.outBinders.includes(v)) for (const o of jaxpr.eqns[j].outBinders) {
-			const u = p1NextBlack.get(o);
-			if (u) reach.add(u);
-		}
-		if (reach.size === 1) {
-			const b = reach.values().next().value;
-			for (const v of eqn.outBinders) p1NextBlack.set(v, b);
-		} else if (reach.size > 1) for (const v of eqn.outBinders) {
+	}
+	const blackNodes = /* @__PURE__ */ new Set();
+	const p1NextBlack = /* @__PURE__ */ new Map();
+	for (const v of jaxpr.outs) if (v instanceof Var) {
+		blackNodes.add(v);
+		p1NextBlack.set(v, v);
+	}
+	const heterogeneousViewPrimitives = [Primitive.RandomBits, Primitive.Gather];
+	const needsCleanShapePrimitives = [Primitive.Pad];
+	for (let i = jaxpr.eqns.length - 1; i >= 0; i--) {
+		const eqn = jaxpr.eqns[i];
+		if (reductionEndpointEqns.has(i) || heterogeneousViewPrimitives.includes(eqn.primitive) || routinePrimitives.has(eqn.primitive) || eqn.outBinders.some((v) => blackNodes.has(v))) {
+			for (const v of eqn.outBinders) {
+				blackNodes.add(v);
+				p1NextBlack.set(v, v);
+			}
+			continue;
+		}
+		const reach = /* @__PURE__ */ new Set();
+		let needsCleanOutput = false;
+		outer: for (const v of eqn.outBinders) for (const j of varToUsages.get(v) ?? []) {
+			if (needsCleanShapePrimitives.includes(jaxpr.eqns[j].primitive) || routinePrimitives.has(jaxpr.eqns[j].primitive)) {
+				needsCleanOutput = true;
+				break outer;
+			}
+			for (const o of jaxpr.eqns[j].outBinders) {
+				const u = p1NextBlack.get(o);
+				if (u) reach.add(u);
+			}
+		}
+		if (reach.size > 1 || needsCleanOutput) for (const v of eqn.outBinders) {
 			blackNodes.add(v);
 			p1NextBlack.set(v, v);
 		}
+		else if (reach.size === 1) {
+			const b = reach.values().next().value;
+			for (const v of eqn.outBinders) p1NextBlack.set(v, b);
+		}
 	}
 	const p2Deps = /* @__PURE__ */ new Map();
 	for (const v of jaxpr.inBinders) p2Deps.set(v, new Set([v]));
@@ -2011,7 +2247,6 @@ function splitGraphDataflow(backend, jaxpr) {
 	while (p2idx < jaxpr.eqns.length) {
 		const eqn = jaxpr.eqns[p2idx++];
 		const deps = [];
-		if (eqn.outBinders.some((v) => blackNodes.has(v))) continue;
 		for (const input of eqn.inputs) if (input instanceof Var) if (blackNodes.has(input)) deps.push(new Set([input]));
 		else deps.push(p2Deps.get(input));
 		else deps.push(/* @__PURE__ */ new Set());
@@ -2022,7 +2257,7 @@ function splitGraphDataflow(backend, jaxpr) {
 			let assocInput = -1;
 			for (let i = 0; i < eqn.inputs.length; i++) {
 				const input = eqn.inputs[i];
-				if (input instanceof Var && varToEqn.has(input)) {
+				if (input instanceof Var && varToDefn.has(input)) {
 					let uniqueDeps = 0;
 					for (const dep of deps[i]) if (depCounter.get(dep) === 1) uniqueDeps++;
 					if (uniqueDeps > maxUniqueDeps) {
@@ -2033,8 +2268,8 @@ function splitGraphDataflow(backend, jaxpr) {
 			}
 			if (assocInput === -1) throw new Error(`internal: maxArgs, no input found to mark as black in Jaxpr equation ${eqn}`);
 			const assocVar = eqn.inputs[assocInput];
-			p2idx = varToEqn.get(assocVar);
-			for (const out of jaxpr.eqns[p2idx].outBinders) blackNodes.add(out);
+			p2idx = varToDefn.get(assocVar);
+			for (const out of jaxpr.eqns[p2idx++].outBinders) blackNodes.add(out);
 		} else {
 			const s = new Set(depCounter.keys());
 			for (const out of eqn.outBinders) p2Deps.set(out, s);
@@ -2060,9 +2295,9 @@ var PendingExecute = class {
 	submitted = false;
 	#promise = null;
 	#rc = 1;
-	constructor(backend, kernel, inputs, outputs) {
+	constructor(backend, source, inputs, outputs) {
 		this.backend = backend;
-		this.kernel = kernel;
+		this.source = source;
 		this.inputs = inputs;
 		this.outputs = outputs;
 		for (const slot of inputs) this.backend.incRef(slot);
@@ -2083,13 +2318,15 @@ var PendingExecute = class {
 			return;
 		}
 		this.#promise = (async () => {
-			this.prepared = await this.backend.prepare(this.kernel);
+			if (this.source instanceof Kernel) this.prepared = await this.backend.prepareKernel(this.source);
+			else this.prepared = await this.backend.prepareRoutine(this.source);
 		})();
 		await this.#promise;
 	}
 	prepareSync() {
 		if (this.prepared) return;
-		this.prepared = this.backend.prepareSync(this.kernel);
+		if (this.source instanceof Kernel) this.prepared = this.backend.prepareKernelSync(this.source);
+		else this.prepared = this.backend.prepareRoutineSync(this.source);
 	}
 	submit() {
 		if (this.submitted) return;
@@ -2112,8 +2349,6 @@ var PendingExecute = class {
 * "Array" type by name.
 */
 var Array$1 = class Array$1 extends Tracer {
-	static #nextId = 1001;
-	id;
 	#dtype;
 	#weakType;
 	#source;
@@ -2130,7 +2365,6 @@ var Array$1 = class Array$1 extends Tracer {
 	*/
 	constructor(args) {
 		super(baseArrayTrace);
-		this.id = Array$1.#nextId++;
 		this.#dtype = args.dtype;
 		this.#weakType = args.weakType;
 		this.#source = args.source;
@@ -2439,6 +2673,27 @@ var Array$1 = class Array$1 extends Tracer {
 			pending
 		});
 	}
+	/** Apply an operation with custom lowering to this array. */
+	static #routine(routine, arrays, outputWeakType) {
+		const { backend, committed } = Array$1.#computeBackend(routine.name, arrays);
+		for (const ar of arrays) ar.#realize();
+		const inputs = arrays.map((ar) => ar.#source);
+		const outputs = routine.type.outputDtypes.map((dtype, i) => backend.malloc(byteWidth(dtype) * prod(routine.type.outputShapes[i])));
+		const pending = arrays.flatMap((ar) => ar.#pending);
+		for (const exe of pending) exe.updateRc(+outputs.length);
+		pending.push(new PendingExecute(backend, routine, inputs, outputs));
+		pending[pending.length - 1].updateRc(+outputs.length - 1);
+		arrays.forEach((ar) => ar.dispose());
+		return outputs.map((output, i) => new Array$1({
+			source: output,
+			st: ShapeTracker.fromShape(routine.type.outputShapes[i]),
+			dtype: routine.type.outputDtypes[i],
+			weakType: outputWeakType[i],
+			backend,
+			committed,
+			pending
+		}));
+	}
 	/**
 	* Normalizes this array into one backed by a `Slot`.
 	*
@@ -2599,6 +2854,12 @@ var Array$1 = class Array$1 extends Tracer {
 			[Primitive.Mod]([x, y]) {
 				return [x.#binary(AluOp.Mod, y)];
 			},
+			[Primitive.Min]([x, y]) {
+				return [x.#binary(AluOp.Min, y)];
+			},
+			[Primitive.Max]([x, y]) {
+				return [x.#binary(AluOp.Max, y)];
+			},
 			[Primitive.Neg]([x]) {
 				return [zerosLike$1(x.ref).#binary(AluOp.Sub, x)];
 			},
@@ -2635,25 +2896,6 @@ var Array$1 = class Array$1 extends Tracer {
 					return [y];
 				}
 			},
-			[Primitive.RandomBits]([k0, k1], { shape: shape$1, mode }) {
-				const keyShape = generalBroadcast(k0.shape, k1.shape);
-				if (!deepEqual(generalBroadcast(keyShape, shape$1), shape$1)) throw new TypeError(`Keys of shapes ${k0.shape} and ${k1.shape} cannot be broadcast to shape ${shape$1}`);
-				const c0 = zeros(shape$1, {
-					dtype: DType.Uint32,
-					device: k0.device
-				});
-				const c1 = arange(0, prod(shape$1), 1, {
-					dtype: DType.Uint32,
-					device: k0.device
-				}).reshape(shape$1);
-				const custom = ([k0$1, k1$1, c0$1, c1$1]) => AluExp.threefry2x32(k0$1, k1$1, c0$1, c1$1, mode);
-				return [Array$1.#naryCustom("random_bits", custom, [
-					k0,
-					k1,
-					c0,
-					c1
-				])];
-			},
 			[Primitive.Sin]([x]) {
 				return [x.#unary(AluOp.Sin)];
 			},
@@ -2681,12 +2923,6 @@ var Array$1 = class Array$1 extends Tracer {
 			[Primitive.Sqrt]([x]) {
 				return [x.#unary(AluOp.Sqrt)];
 			},
-			[Primitive.Min]([x, y]) {
-				return [x.#binary(AluOp.Min, y)];
-			},
-			[Primitive.Max]([x, y]) {
-				return [x.#binary(AluOp.Max, y)];
-			},
 			[Primitive.Reduce]([x], { op, axis }) {
 				if (axis.length === 0) return [x];
 				return [x.#moveAxesDown(axis).#reduce(op)];
@@ -2721,6 +2957,28 @@ var Array$1 = class Array$1 extends Tracer {
 					y
 				], { dtypeOverride: [DType.Bool] })];
 			},
+			[Primitive.RandomBits]([k0, k1], { shape: shape$1, mode }) {
+				const keyShape = generalBroadcast(k0.shape, k1.shape);
+				if (!deepEqual(generalBroadcast(keyShape, shape$1), shape$1)) throw new TypeError(`Keys of shapes ${k0.shape} and ${k1.shape} cannot be broadcast to shape ${shape$1}`);
+				const c0 = zeros(shape$1, {
+					dtype: DType.Uint32,
+					device: k0.device
+				});
+				const c1 = arange(0, prod(shape$1), 1, {
+					dtype: DType.Uint32,
+					device: k0.device
+				}).reshape(shape$1);
+				const custom = ([k0$1, k1$1, c0$1, c1$1]) => AluExp.threefry2x32(k0$1, k1$1, c0$1, c1$1, mode);
+				return [Array$1.#naryCustom("random_bits", custom, [
+					k0,
+					k1,
+					c0,
+					c1
+				])];
+			},
+			[Primitive.Gather]([x, ...indices], { axis, outDim }) {
+				return [x.#gather(indices, axis, outDim)];
+			},
 			[Primitive.Transpose]([x], { perm }) {
 				return [x.#transpose(perm)];
 			},
@@ -2741,17 +2999,48 @@ var Array$1 = class Array$1 extends Tracer {
 			[Primitive.Pad]([x], { width }) {
 				return [x.#reshape(x.#st.pad(width))];
 			},
-			[Primitive.Gather]([x, ...indices], { axis, outDim }) {
-				return [x.#gather(indices, axis, outDim)];
+			[Primitive.Sort]([x]) {
+				const routine = new Routine(Routines.Sort, {
+					inputShapes: [x.aval.shape],
+					inputDtypes: [x.aval.dtype],
+					outputShapes: [x.aval.shape],
+					outputDtypes: [x.aval.dtype]
+				});
+				return Array$1.#routine(routine, [x], [x.#weakType]);
+			},
+			[Primitive.Argsort]([x]) {
+				const routine = new Routine(Routines.Argsort, {
+					inputShapes: [x.aval.shape],
+					inputDtypes: [x.aval.dtype],
+					outputShapes: [x.aval.shape, x.aval.shape],
+					outputDtypes: [x.aval.dtype, DType.Int32]
+				});
+				return Array$1.#routine(routine, [x], [x.#weakType, false]);
+			},
+			[Primitive.TriangularSolve]([a, b], { unitDiagonal }) {
+				const routine = new Routine(Routines.TriangularSolve, {
+					inputShapes: [a.aval.shape, b.aval.shape],
+					inputDtypes: [a.aval.dtype, b.aval.dtype],
+					outputShapes: [b.aval.shape],
+					outputDtypes: [b.aval.dtype]
+				}, { unitDiagonal });
+				return Array$1.#routine(routine, [a, b], [a.#weakType && b.#weakType]);
 			},
-			[Primitive.JitCall](args, { jaxpr, numConsts }) {
-				if (jaxpr.inBinders.length !== args.length) throw new Error(`jit_call expects ${jaxpr.inBinders.length} args, got ${args.length}`);
-				const { backend, committed } = Array$1.#computeBackend("jit_call", args);
+			[Primitive.Cholesky]([a]) {
+				const routine = new Routine(Routines.Cholesky, {
+					inputShapes: [a.aval.shape],
+					inputDtypes: [a.aval.dtype],
+					outputShapes: [a.aval.shape],
+					outputDtypes: [a.aval.dtype]
+				});
+				return Array$1.#routine(routine, [a], [a.#weakType]);
+			},
+			[Primitive.Jit](args, { jaxpr }) {
+				if (jaxpr.inBinders.length !== args.length) throw new Error(`jit expects ${jaxpr.inBinders.length} args, got ${args.length}`);
+				const { backend, committed } = Array$1.#computeBackend("jit", args);
 				args = args.map((ar) => ar._putSync(backend));
-				const consts = args.slice(0, numConsts);
-				const tracers = args.slice(numConsts);
-				const jp = jitCompile(backend, jaxpr, consts);
-				const { outputs, pending } = jp.execute(tracers.map((x) => x._realizeSource()));
+				const jp = jitCompile(backend, jaxpr);
+				const { outputs, pending } = jp.execute(args.map((x) => x._realizeSource()));
 				for (const exe of pending) exe.updateRc(+outputs.length - 1);
 				const prevPending = [...new Set(args.flatMap((x) => x.#pending))];
 				for (const exe of prevPending) exe.updateRc(+outputs.length);
@@ -3050,6 +3339,43 @@ function arange(start, stop, step = 1, { dtype, device } = {}) {
 	});
 }
 /**
+* Return an array with ones on and below the diagonal and zeros elsewhere.
+*
+* If `k` is provided, it specifies the sub-diagonal on and below which the
+* array is filled with ones. `k=0` is the main diagonal, `k<0` is below it, and
+* `k>0` is above it.
+*/
+function tri(n, m, k = 0, { dtype, device } = {}) {
+	m ??= n;
+	dtype ??= DType.Float32;
+	if (!Number.isInteger(n) || n < 0) throw new Error(`tri: n must be a non-negative integer, got ${n}`);
+	if (!Number.isInteger(m) || m < 0) throw new Error(`tri: m must be a non-negative integer, got ${m}`);
+	if (!Number.isInteger(k)) throw new Error(`tri: k must be an integer, got ${k}`);
+	const rows = arange(k, n + k, 1, {
+		dtype: DType.Int32,
+		device
+	});
+	const cols = arange(0, m, 1, {
+		dtype: DType.Int32,
+		device
+	});
+	return rows.reshape([n, 1]).greaterEqual(cols).astype(dtype);
+}
+/** Return the lower triangle of an array. Must be of dimension >= 2. */
+function tril(a, k = 0) {
+	if (ndim$1(a) < 2) throw new Error(`tril: input array must be at least 2D, got ${ndim$1(a)}D`);
+	a = fudgeArray(a);
+	const [n, m] = a.shape.slice(-2);
+	return where$1(tri(n, m, k, { dtype: DType.Bool }), a.ref, zerosLike$1(a));
+}
+/** Return the upper triangle of an array. Must be of dimension >= 2. */
+function triu(a, k = 0) {
+	if (ndim$1(a) < 2) throw new Error(`tril: input array must be at least 2D, got ${ndim$1(a)}D`);
+	a = fudgeArray(a);
+	const [n, m] = a.shape.slice(-2);
+	return where$1(tri(n, m, k - 1, { dtype: DType.Bool }), zerosLike$1(a.ref), a);
+}
+/**
 * Return evenly spaced numbers over a specified interval.
 *
 * Returns _num_ evenly spaced samples, calculated over the interval
@@ -3096,333 +3422,106 @@ function aluCompare(a, b, op) {
 }
 //#endregion
-//#region src/frontend/jvp.ts
-var JVPTracer = class extends Tracer {
-	constructor(trace$1, primal, tangent) {
+//#region src/frontend/vmap.ts
+function mappedAval(batchDim, aval) {
+	const shape$1 = [...aval.shape];
+	shape$1.splice(batchDim, 1);
+	return new ShapedArray(shape$1, aval.dtype, aval.weakType);
+}
+/** Move one axis to a different index. */
+function moveaxis(x, src, dst) {
+	const t = pureArray(x);
+	src = checkAxis(src, t.ndim);
+	dst = checkAxis(dst, t.ndim);
+	if (src === dst) return t;
+	const perm = range(t.ndim);
+	perm.splice(src, 1);
+	perm.splice(dst, 0, src);
+	return transpose$1(t, perm);
+}
+function moveBatchAxis(axisSize, src, dst, x) {
+	if (src === null) {
+		const targetShape = [...x.shape];
+		targetShape.splice(dst, 0, axisSize);
+		return broadcast(x, targetShape, [dst]);
+	} else if (src === dst) return x;
+	else return moveaxis(x, src, dst);
+}
+var BatchTracer = class extends Tracer {
+	constructor(trace$1, val, batchDim) {
 		super(trace$1);
-		this.primal = primal;
-		this.tangent = tangent;
+		this.val = val;
+		this.batchDim = batchDim;
 	}
 	get aval() {
-		return this.primal.aval;
+		if (this.batchDim === null) return this.val.aval;
+		else return mappedAval(this.batchDim, this.val.aval);
 	}
 	toString() {
-		return `JVPTracer(${this.primal.toString()}, ${this.tangent.toString()})`;
+		return `BatchTracer(${this.val.toString()}, ${this.batchDim})`;
 	}
 	get ref() {
-		this.primal.ref, this.tangent.ref;
+		this.val.ref;
 		return this;
 	}
 	dispose() {
-		this.primal.dispose();
-		this.tangent.dispose();
+		this.val.dispose();
+	}
+	fullLower() {
+		if (this.batchDim === null) return this.val.fullLower();
+		else return this;
 	}
 };
-var JVPTrace = class extends Trace {
+var BatchTrace = class extends Trace {
 	pure(val) {
 		return this.lift(pureArray(val));
 	}
 	lift(val) {
-		return new JVPTracer(this, val, zerosLike$1(val.ref));
+		return new BatchTracer(this, val, null);
 	}
 	processPrimitive(primitive, tracers, params) {
-		const [primalsIn, tangentsIn] = unzip2(tracers.map((x) => [x.primal, x.tangent]));
-		const jvpRule = jvpRules[primitive];
-		if (jvpRule === void 0) throw new Error(`No JVP rule for: ${primitive}`);
-		const [primalsOut, tangentsOut] = jvpRule(primalsIn, tangentsIn, params);
-		return zip(primalsOut, tangentsOut).map(([x, t]) => new JVPTracer(this, x, t));
+		const [valsIn, bdimsIn] = unzip2(tracers.map((t) => [t.val, t.batchDim]));
+		const vmapRule = vmapRules[primitive];
+		if (vmapRule === void 0) throw new Error(`No vmap rule for: ${primitive}`);
+		if (bdimsIn.every((d) => d === null)) {
+			const valOuts$1 = bind(primitive, valsIn, params);
+			return valOuts$1.map((x) => new BatchTracer(this, x, null));
+		}
+		const [valOuts, bdimOuts] = vmapRule(this.axisSize, valsIn, bdimsIn, params);
+		return zip(valOuts, bdimOuts).map(([x, bd]) => new BatchTracer(this, x, bd));
+	}
+	get axisSize() {
+		return this.main.globalData;
 	}
 };
-/** Rule that applies the same operation to primals and tangents. */
-function linearTangentsJvp(primitive) {
-	return (primals, tangents, params) => {
-		const ys = bind(primitive, primals, params);
-		const dys = bind(primitive, tangents, params);
-		return [ys, dys];
-	};
-}
-/** Rule for product of gradients in bilinear operations. */
-function bilinearTangentsJvp(primitive) {
-	return ([x, y], [dx, dy], params) => {
-		const primal = bind1(primitive, [x.ref, y.ref], params);
-		const tangent = bind1(primitive, [x, dy], params).add(bind1(primitive, [dx, y], params));
-		return [[primal], [tangent]];
+/**
+* Process a primitive with built-in broadcasting.
+*
+* Reference: https://github.com/jax-ml/jax/blob/jax-v0.8.1/jax/_src/interpreters/batching.py#L1029
+*/
+function broadcastBatcher(op) {
+	return (axisSize, args, dims) => {
+		if (args.length === 0) throw new Error("Empty list in broadcastBatcher");
+		const nd = Math.max(...args.map((x, i) => ndim$1(x) + (dims[i] === null ? 1 : 0)));
+		const firstIdx = dims.findIndex((d) => d !== null);
+		const firstBdim = dims[firstIdx] - args[firstIdx].ndim;
+		if (zip(args, dims).every(([x, d]) => d === null && ndim$1(x) < -firstBdim || d !== null && d - x.ndim === firstBdim)) return [[op(...args)], [nd + firstBdim]];
+		args = args.map((x, i) => {
+			if (dims[i] === null) return x;
+			x = moveBatchAxis(axisSize, dims[i], 0, x);
+			if (x.ndim < nd) x = x.reshape([
+				x.shape[0],
+				...rep(nd - x.ndim, 1),
+				...x.shape.slice(1)
+			]);
+			return x;
+		});
+		return [[op(...args)], [0]];
 	};
 }
-/** Rule that zeros out any tangents. */
-function zeroTangentsJvp(primitive) {
-	return (primals, tangents, params) => {
-		for (const t of tangents) t.dispose();
-		const ys = bind(primitive, primals, params);
-		return [ys, ys.map((y) => zerosLike$1(y.ref))];
-	};
-}
-const jvpRules = {
-	[Primitive.Add]: linearTangentsJvp(Primitive.Add),
-	[Primitive.Mul]: bilinearTangentsJvp(Primitive.Mul),
-	[Primitive.Idiv]: zeroTangentsJvp(Primitive.Idiv),
-	[Primitive.Mod]([x, y], [dx, dy]) {
-		if (!isFloatDtype(x.dtype) && !isFloatDtype(y.dtype)) {
-			dx.dispose();
-			dy.dispose();
-			return [[x.ref, y.ref], [zerosLike$1(x), zerosLike$1(y)]];
-		}
-		const q = idiv(x.ref, y.ref);
-		return [[mod(x, y)], [dx.sub(dy.mul(q))]];
-	},
-	[Primitive.Neg]: linearTangentsJvp(Primitive.Neg),
-	[Primitive.Reciprocal]([x], [dx]) {
-		const xRecip = reciprocal$1(x.ref);
-		return [[xRecip.ref], [neg(xRecip.ref.mul(xRecip)).mul(dx)]];
-	},
-	[Primitive.Floor]: zeroTangentsJvp(Primitive.Floor),
-	[Primitive.Ceil]: zeroTangentsJvp(Primitive.Ceil),
-	[Primitive.StopGradient]: zeroTangentsJvp(Primitive.StopGradient),
-	[Primitive.Cast]([x], [dx], { dtype }) {
-		if (x.dtype === dtype) return [[x], [dx]];
-		if (isFloatDtype(dtype) && isFloatDtype(x.dtype)) return [[cast(x, dtype)], [cast(dx, dtype)]];
-		else {
-			dx.dispose();
-			return [[cast(x.ref, dtype)], [zerosLike$1(x)]];
-		}
-	},
-	[Primitive.Bitcast]([x], [dx], { dtype }) {
-		if (x.dtype === dtype) return [[x], [dx]];
-		dx.dispose();
-		return [[bitcast(x.ref, dtype)], [zerosLike$1(x)]];
-	},
-	[Primitive.RandomBits]: zeroTangentsJvp(Primitive.RandomBits),
-	[Primitive.Sin]([x], [dx]) {
-		return [[sin$1(x.ref)], [cos$1(x).mul(dx)]];
-	},
-	[Primitive.Cos]([x], [dx]) {
-		return [[cos$1(x.ref)], [neg(sin$1(x)).mul(dx)]];
-	},
-	[Primitive.Asin]([x], [dx]) {
-		const denom = sqrt$1(reciprocal$1(cast(1, x.dtype).sub(x.ref.mul(x.ref))));
-		return [[asin$1(x)], [denom.mul(dx)]];
-	},
-	[Primitive.Atan]([x], [dx]) {
-		const denom = cast(1, x.dtype).add(x.ref.mul(x.ref));
-		return [[atan$1(x)], [dx.div(denom)]];
-	},
-	[Primitive.Exp]([x], [dx]) {
-		const z = exp$1(x);
-		return [[z.ref], [z.mul(dx)]];
-	},
-	[Primitive.Log]([x], [dx]) {
-		return [[log$1(x.ref)], [reciprocal$1(x).mul(dx)]];
-	},
-	[Primitive.Erf]([x], [dx]) {
-		const coeff = 2 / Math.sqrt(Math.PI);
-		const expTerm = exp$1(neg(x.ref.mul(x.ref)));
-		return [[erf$1(x)], [expTerm.mul(coeff).mul(dx)]];
-	},
-	[Primitive.Erfc]([x], [dx]) {
-		const coeff = -2 / Math.sqrt(Math.PI);
-		const expTerm = exp$1(neg(x.ref.mul(x.ref)));
-		return [[erfc$1(x)], [expTerm.mul(coeff).mul(dx)]];
-	},
-	[Primitive.Sqrt]([x], [dx]) {
-		const z = sqrt$1(x);
-		return [[z.ref], [reciprocal$1(z.mul(2)).mul(dx)]];
-	},
-	[Primitive.Min]([x, y], [dx, dy]) {
-		return [[min$1(x.ref, y.ref)], [where$1(less$1(y, x), dy, dx)]];
-	},
-	[Primitive.Max]([x, y], [dx, dy]) {
-		return [[max$1(x.ref, y.ref)], [where$1(less$1(x, y), dy, dx)]];
-	},
-	[Primitive.Reduce]([x], [dx], { op, axis }) {
-		if (op === AluOp.Add) return [[reduce(x, op, axis)], [reduce(dx, op, axis)]];
-		else if (op === AluOp.Mul) {
-			const primal = reduce(x.ref, op, axis);
-			const tangent = broadcast(primal.ref, x.shape, axis).mul(reciprocal$1(x)).mul(dx).sum(axis);
-			return [[primal], [tangent]];
-		} else if (op === AluOp.Min || op === AluOp.Max) {
-			const primal = reduce(x.ref, op, axis);
-			const notMin = notEqual$1(x, broadcast(primal.ref, x.shape, axis));
-			const minCount = where$1(notMin.ref, 0, 1).sum(axis);
-			const tangent = where$1(notMin, 0, dx).sum(axis).div(minCount);
-			return [[primal], [tangent]];
-		} else throw new Error(`JVP rule not implemented for reduce op: ${op}`);
-	},
-	[Primitive.Pool]: linearTangentsJvp(Primitive.Pool),
-	[Primitive.PoolTranspose]: linearTangentsJvp(Primitive.PoolTranspose),
-	[Primitive.Dot]: bilinearTangentsJvp(Primitive.Dot),
-	[Primitive.Conv]: bilinearTangentsJvp(Primitive.Conv),
-	[Primitive.Compare]: zeroTangentsJvp(Primitive.Compare),
-	[Primitive.Where]([cond, x, y], [dcond, dx, dy]) {
-		dcond.dispose();
-		return [[where$1(cond.ref, x, y)], [where$1(cond, dx, dy)]];
-	},
-	[Primitive.Transpose]: linearTangentsJvp(Primitive.Transpose),
-	[Primitive.Broadcast]: linearTangentsJvp(Primitive.Broadcast),
-	[Primitive.Reshape]: linearTangentsJvp(Primitive.Reshape),
-	[Primitive.Flip]: linearTangentsJvp(Primitive.Flip),
-	[Primitive.Shrink]: linearTangentsJvp(Primitive.Shrink),
-	[Primitive.Pad]: linearTangentsJvp(Primitive.Pad),
-	[Primitive.Gather]([x, ...indices], [dx, ..._], { axis, outDim }) {
-		const indicesRef = indices.map((t) => t.ref);
-		return [[gather(x, indices, axis, outDim)], [gather(dx, indicesRef, axis, outDim)]];
-	},
-	[Primitive.JitCall](primals, tangents, { name, jaxpr }) {
-		const { newJaxpr, newConsts } = jvpJaxpr(jaxpr);
-		const outs = bind(Primitive.JitCall, [
-			...newConsts.map((c) => c.ref),
-			...primals,
-			...tangents
-		], {
-			name: `${name}_jvp`,
-			jaxpr: newJaxpr,
-			numConsts: newConsts.length
-		});
-		const n = outs.length / 2;
-		if (!Number.isInteger(n)) throw new Error("internal: JVP Jaxpr output length is not even");
-		const [primalsOut, tangentsOut] = [outs.slice(0, n), outs.slice(n)];
-		return [primalsOut, tangentsOut];
-	}
-};
-const jvpJaxprCache = /* @__PURE__ */ new Map();
-function jvpJaxpr(jaxpr) {
-	if (jvpJaxprCache.has(jaxpr)) return jvpJaxprCache.get(jaxpr);
-	const inAvals = jaxpr.inBinders.map((v) => v.aval);
-	const { jaxpr: newJaxpr, consts: newConsts } = makeJaxpr$1((primals, tangents) => jvpFlat(jaxprAsFun(jaxpr), primals, tangents))(inAvals, inAvals);
-	const result = {
-		newJaxpr,
-		newConsts
-	};
-	jvpJaxprCache.set(jaxpr, result);
-	return result;
-}
-function jvpFlat(f, primals, tangents) {
-	try {
-		var _usingCtx$1 = _usingCtx();
-		const main = _usingCtx$1.u(newMain(JVPTrace));
-		const trace$1 = new JVPTrace(main);
-		const tracersIn = zip(primals, tangents).map(([x, t]) => new JVPTracer(trace$1, pureArray(x), pureArray(t)));
-		const outs = f(...tracersIn);
-		const tracersOut = outs.map((out) => fullRaise(trace$1, out));
-		return unzip2(tracersOut.map((t) => [t.primal, t.tangent]));
-	} catch (_) {
-		_usingCtx$1.e = _;
-	} finally {
-		_usingCtx$1.d();
-	}
-}
-function jvp$1(f, primals, tangents) {
-	const [primalsFlat, inTree] = flatten(primals);
-	const [tangentsFlat, inTree2] = flatten(tangents);
-	if (!inTree.equals(inTree2)) throw new TreeMismatchError("jvp", inTree, inTree2);
-	const [flatFun, outTree] = flattenFun(f, inTree);
-	const [primalsOutFlat, tangentsOutFlat] = jvpFlat(flatFun, primalsFlat, tangentsFlat);
-	if (outTree.value === void 0) throw new Error("outTree was not set in jvp");
-	const primalsOut = unflatten(outTree.value, primalsOutFlat);
-	const tangentsOut = unflatten(outTree.value, tangentsOutFlat);
-	return [primalsOut, tangentsOut];
-}
-//#endregion
-//#region src/frontend/vmap.ts
-function mappedAval(batchDim, aval) {
-	const shape$1 = [...aval.shape];
-	shape$1.splice(batchDim, 1);
-	return new ShapedArray(shape$1, aval.dtype, aval.weakType);
-}
-/** Move one axis to a different index. */
-function moveaxis(x, src, dst) {
-	const t = pureArray(x);
-	src = checkAxis(src, t.ndim);
-	dst = checkAxis(dst, t.ndim);
-	if (src === dst) return t;
-	const perm = range(t.ndim);
-	perm.splice(src, 1);
-	perm.splice(dst, 0, src);
-	return transpose$1(t, perm);
-}
-function moveBatchAxis(axisSize, src, dst, x) {
-	if (src === null) {
-		const targetShape = [...x.shape];
-		targetShape.splice(dst, 0, axisSize);
-		return broadcast(x, targetShape, [dst]);
-	} else if (src === dst) return x;
-	else return moveaxis(x, src, dst);
-}
-var BatchTracer = class extends Tracer {
-	constructor(trace$1, val, batchDim) {
-		super(trace$1);
-		this.val = val;
-		this.batchDim = batchDim;
-	}
-	get aval() {
-		if (this.batchDim === null) return this.val.aval;
-		else return mappedAval(this.batchDim, this.val.aval);
-	}
-	toString() {
-		return `BatchTracer(${this.val.toString()}, ${this.batchDim})`;
-	}
-	get ref() {
-		this.val.ref;
-		return this;
-	}
-	dispose() {
-		this.val.dispose();
-	}
-	fullLower() {
-		if (this.batchDim === null) return this.val.fullLower();
-		else return this;
-	}
-};
-var BatchTrace = class extends Trace {
-	pure(val) {
-		return this.lift(pureArray(val));
-	}
-	lift(val) {
-		return new BatchTracer(this, val, null);
-	}
-	processPrimitive(primitive, tracers, params) {
-		const [valsIn, bdimsIn] = unzip2(tracers.map((t) => [t.val, t.batchDim]));
-		const vmapRule = vmapRules[primitive];
-		if (vmapRule === void 0) throw new Error(`No vmap rule for: ${primitive}`);
-		if (bdimsIn.every((d) => d === null)) {
-			const valOuts$1 = bind(primitive, valsIn, params);
-			return valOuts$1.map((x) => new BatchTracer(this, x, null));
-		}
-		const [valOuts, bdimOuts] = vmapRule(this.axisSize, valsIn, bdimsIn, params);
-		return zip(valOuts, bdimOuts).map(([x, bd]) => new BatchTracer(this, x, bd));
-	}
-	get axisSize() {
-		return this.main.globalData;
-	}
-};
-/**
-* Process a primitive with built-in broadcasting.
-*
-* Reference: https://github.com/jax-ml/jax/blob/jax-v0.8.1/jax/_src/interpreters/batching.py#L1029
-*/
-function broadcastBatcher(op) {
-	return (axisSize, args, dims) => {
-		if (args.length === 0) throw new Error("Empty list in broadcastBatcher");
-		const nd = Math.max(...args.map((x, i) => ndim$1(x) + (dims[i] === null ? 1 : 0)));
-		const firstIdx = dims.findIndex((d) => d !== null);
-		const firstBdim = dims[firstIdx] - args[firstIdx].ndim;
-		if (zip(args, dims).every(([x, d]) => d === null && ndim$1(x) < -firstBdim || d !== null && d - x.ndim === firstBdim)) return [[op(...args)], [nd + firstBdim]];
-		args = args.map((x, i) => {
-			if (dims[i] === null) return x;
-			x = moveBatchAxis(axisSize, dims[i], 0, x);
-			if (x.ndim < nd) x = x.reshape([
-				x.shape[0],
-				...rep(nd - x.ndim, 1),
-				...x.shape.slice(1)
-			]);
-			return x;
-		});
-		return [[op(...args)], [0]];
-	};
-}
-function unopBatcher(op) {
-	return (axisSize, [x], [xBdim], params) => {
-		return [[op(x, params)], [xBdim]];
+function unopBatcher(op) {
+	return (axisSize, [x], [xBdim], params) => {
+		return [[op(x, params)], [xBdim]];
 	};
 }
 const vmapRules = {
@@ -3430,6 +3529,8 @@ const vmapRules = {
 	[Primitive.Mul]: broadcastBatcher(mul),
 	[Primitive.Idiv]: broadcastBatcher(idiv),
 	[Primitive.Mod]: broadcastBatcher(mod),
+	[Primitive.Min]: broadcastBatcher(min$1),
+	[Primitive.Max]: broadcastBatcher(max$1),
 	[Primitive.Neg]: unopBatcher(neg),
 	[Primitive.Reciprocal]: unopBatcher(reciprocal$1),
 	[Primitive.Floor]: unopBatcher(floor$1),
@@ -3446,8 +3547,6 @@ const vmapRules = {
 	[Primitive.Erf]: unopBatcher(erf$1),
 	[Primitive.Erfc]: unopBatcher(erfc$1),
 	[Primitive.Sqrt]: unopBatcher(sqrt$1),
-	[Primitive.Min]: broadcastBatcher(min$1),
-	[Primitive.Max]: broadcastBatcher(max$1),
 	[Primitive.Reduce](axisSize, [x], [xBdim], { op, axis }) {
 		assertNonNull(xBdim);
 		const newAxis = axis.map((ax) => ax + (xBdim <= ax ? 1 : 0));
@@ -3460,10 +3559,49 @@ const vmapRules = {
 		const z = dot$2(x, y);
 		return [[z], [z.ndim - 1]];
 	},
+	[Primitive.Conv](axisSize, [x, y], [xBdim, yBdim], params) {
+		x = moveBatchAxis(axisSize, xBdim, 0, x);
+		y = moveBatchAxis(axisSize, yBdim, 0, y);
+		const z = conv$1(x, y, {
+			...params,
+			vmapDims: params.vmapDims + 1
+		});
+		return [[z], [0]];
+	},
 	[Primitive.Compare](axisSize, args, dims, { op }) {
 		return broadcastBatcher((x, y) => compare(x, y, op))(axisSize, args, dims, {});
 	},
-	[Primitive.Where]: broadcastBatcher(where$1),
+	[Primitive.Where]: broadcastBatcher(where$1),
+	[Primitive.Gather](axisSize, [x, ...indices], [xBdim, ...indicesBdim], { axis, outDim }) {
+		if (indicesBdim.every((d) => d === null)) {
+			assertNonNull(xBdim);
+			const newAxis = axis.map((ax) => ax + (xBdim <= ax ? 1 : 0));
+			let newBdim = xBdim - axis.filter((ax) => ax < xBdim).length;
+			let newOutDim = outDim;
+			if (newOutDim < newBdim) newBdim += axis.length;
+			else newOutDim += 1;
+			return [[gather(x, indices, newAxis, newOutDim)], [newBdim]];
+		}
+		const nd = Math.max(...indices.map((m, i) => ndim$1(m) + (indicesBdim[i] === null ? 1 : 0)));
+		indices = indices.map((m, i) => {
+			if (indicesBdim[i] === null) return m;
+			m = moveBatchAxis(axisSize, indicesBdim[i], 0, m);
+			if (m.ndim < nd) m = m.reshape([
+				m.shape[0],
+				...rep(nd - m.ndim, 1),
+				...m.shape.slice(1)
+			]);
+			return m;
+		});
+		if (xBdim === null) return [[gather(x, indices, axis, outDim)], [outDim]];
+		else {
+			x = moveBatchAxis(axisSize, xBdim, 0, x);
+			const newAxis = [0, ...axis.map((ax) => ax + 1)];
+			const extraBatchIndex = arange(axisSize).reshape([-1, ...rep(nd - 1, 1)]);
+			indices.splice(0, 0, extraBatchIndex);
+			return [[gather(x, indices, newAxis, outDim)], [outDim]];
+		}
+	},
 	[Primitive.Transpose](axisSize, [x], [xBdim], { perm }) {
 		assertNonNull(xBdim);
 		const newPerm = perm.map((p) => p + (xBdim <= p ? 1 : 0));
@@ -3495,42 +3633,53 @@ const vmapRules = {
 		const newWidth = width.toSpliced(xBdim, 0, [0, 0]);
 		return [[pad$1(x, newWidth)], [xBdim]];
 	},
-	[Primitive.Gather](axisSize, [x, ...indices], [xBdim, ...indicesBdim], { axis, outDim }) {
-		if (indicesBdim.every((d) => d === null)) {
-			assertNonNull(xBdim);
-			const newAxis = axis.map((ax) => ax + (xBdim <= ax ? 1 : 0));
-			let newBdim = xBdim - axis.filter((ax) => ax < xBdim).length;
-			let newOutDim = outDim;
-			if (newOutDim < newBdim) newBdim += axis.length;
-			else newOutDim += 1;
-			return [[gather(x, indices, newAxis, newOutDim)], [newBdim]];
-		}
-		const nd = Math.max(...indices.map((m, i) => ndim$1(m) + (indicesBdim[i] === null ? 1 : 0)));
-		indices = indices.map((m, i) => {
-			if (indicesBdim[i] === null) return m;
-			m = moveBatchAxis(axisSize, indicesBdim[i], 0, m);
-			if (m.ndim < nd) m = m.reshape([
-				m.shape[0],
-				...rep(nd - m.ndim, 1),
-				...m.shape.slice(1)
+	[Primitive.Sort](axisSize, [x], [xBdim]) {
+		assertNonNull(xBdim);
+		if (xBdim !== x.ndim - 1) return [[sort$1(x)], [xBdim]];
+		x = moveBatchAxis(axisSize, xBdim, 0, x);
+		return [[sort$1(x)], [0]];
+	},
+	[Primitive.Argsort](axisSize, [x], [xBdim]) {
+		assertNonNull(xBdim);
+		if (xBdim !== x.ndim - 1) return [argsort$1(x), [xBdim, xBdim]];
+		x = moveBatchAxis(axisSize, xBdim, 0, x);
+		return [argsort$1(x), [0, 0]];
+	},
+	[Primitive.TriangularSolve](axisSize, [a, b], [aBdim, bBdim], { unitDiagonal }) {
+		if (aBdim === null) {
+			b = moveBatchAxis(axisSize, bBdim, -3, b);
+			const [s, m, n] = b.shape.slice(-3);
+			b = b.reshape([
+				...b.shape.slice(0, -3),
+				s * m,
+				n
 			]);
-			return m;
-		});
-		if (xBdim === null) return [[gather(x, indices, axis, outDim)], [outDim]];
-		else {
-			x = moveBatchAxis(axisSize, xBdim, 0, x);
-			const newAxis = [0, ...axis.map((ax) => ax + 1)];
-			const extraBatchIndex = arange(axisSize).reshape([-1, ...rep(nd - 1, 1)]);
-			indices.splice(0, 0, extraBatchIndex);
-			return [[gather(x, indices, newAxis, outDim)], [outDim]];
+			let x$1 = bind1(Primitive.TriangularSolve, [a, b], { unitDiagonal });
+			x$1 = x$1.reshape([
+				...b.shape.slice(0, -2),
+				s,
+				m,
+				n
+			]);
+			return [[x$1], [x$1.ndim - 3]];
 		}
+		a = moveBatchAxis(axisSize, aBdim, 0, a);
+		b = moveBatchAxis(axisSize, bBdim, 0, b);
+		const x = bind1(Primitive.TriangularSolve, [a, b], { unitDiagonal });
+		return [[x], [0]];
+	},
+	[Primitive.Cholesky](axisSize, [x], [xBdim]) {
+		assertNonNull(xBdim);
+		if (xBdim < x.ndim - 2) return [[cholesky$2(x)], [xBdim]];
+		x = moveBatchAxis(axisSize, xBdim, 0, x);
+		return [[cholesky$2(x)], [0]];
 	},
-	[Primitive.JitCall](axisSize, args, dims, { name, jaxpr }) {
-		const { newJaxpr, newConsts } = vmapJaxpr(jaxpr, axisSize, dims);
-		const outs = bind(Primitive.JitCall, [...newConsts.map((c) => c.ref), ...args], {
+	[Primitive.Jit](axisSize, args, dims, { name, jaxpr }) {
+		const newJaxpr = vmapJaxpr(jaxpr, axisSize, dims);
+		const outs = bind(Primitive.Jit, [...newJaxpr.consts.map((c) => c.ref), ...args], {
 			name: `${name}_vmap`,
-			jaxpr: newJaxpr,
-			numConsts: newConsts.length
+			jaxpr: newJaxpr.jaxpr,
+			numConsts: newJaxpr.consts.length
 		});
 		return [outs, rep(outs.length, 0)];
 	}
@@ -3546,14 +3695,10 @@ function vmapJaxpr(jaxpr, axisSize, dims) {
 		shape$1.splice(dims[i], 0, axisSize);
 		return new ShapedArray(shape$1, v.aval.dtype, v.aval.weakType);
 	});
-	const { jaxpr: newJaxpr, consts: newConsts } = makeJaxpr$1((args) => vmapFlat(jaxprAsFun(jaxpr), dims, args))(inAvals);
-	const result = {
-		newJaxpr,
-		newConsts
-	};
+	const { jaxpr: newJaxpr } = makeJaxpr$1((args) => vmapFlat(jaxprAsFun(jaxpr), dims, args))(inAvals);
 	if (!vmapJaxprCache.has(jaxpr)) vmapJaxprCache.set(jaxpr, /* @__PURE__ */ new Map());
-	vmapJaxprCache.get(jaxpr).set(cacheKey, result);
-	return result;
+	vmapJaxprCache.get(jaxpr).set(cacheKey, newJaxpr);
+	return newJaxpr;
 }
 function vmapFlat(f, inAxes, args) {
 	let axisSize = void 0;
@@ -3608,6 +3753,260 @@ function jacfwd$1(f) {
 	};
 }
+//#endregion
+//#region src/frontend/jvp.ts
+var JVPTracer = class extends Tracer {
+	constructor(trace$1, primal, tangent) {
+		super(trace$1);
+		this.primal = primal;
+		this.tangent = tangent;
+	}
+	get aval() {
+		return this.primal.aval;
+	}
+	toString() {
+		return `JVPTracer(${this.primal.toString()}, ${this.tangent.toString()})`;
+	}
+	get ref() {
+		this.primal.ref, this.tangent.ref;
+		return this;
+	}
+	dispose() {
+		this.primal.dispose();
+		this.tangent.dispose();
+	}
+};
+var JVPTrace = class extends Trace {
+	pure(val) {
+		return this.lift(pureArray(val));
+	}
+	lift(val) {
+		return new JVPTracer(this, val, zerosLike$1(val.ref));
+	}
+	processPrimitive(primitive, tracers, params) {
+		const [primalsIn, tangentsIn] = unzip2(tracers.map((x) => [x.primal, x.tangent]));
+		const jvpRule = jvpRules[primitive];
+		if (jvpRule === void 0) throw new Error(`No JVP rule for: ${primitive}`);
+		const [primalsOut, tangentsOut] = jvpRule(primalsIn, tangentsIn, params);
+		return zip(primalsOut, tangentsOut).map(([x, t]) => new JVPTracer(this, x, t));
+	}
+};
+/** Rule that applies the same operation to primals and tangents. */
+function linearTangentsJvp(primitive) {
+	return (primals, tangents, params) => {
+		const ys = bind(primitive, primals, params);
+		const dys = bind(primitive, tangents, params);
+		return [ys, dys];
+	};
+}
+/** Rule for product of gradients in bilinear operations. */
+function bilinearTangentsJvp(primitive) {
+	return ([x, y], [dx, dy], params) => {
+		const primal = bind1(primitive, [x.ref, y.ref], params);
+		const tangent = bind1(primitive, [x, dy], params).add(bind1(primitive, [dx, y], params));
+		return [[primal], [tangent]];
+	};
+}
+/** Rule that zeros out any tangents. */
+function zeroTangentsJvp(primitive) {
+	return (primals, tangents, params) => {
+		for (const t of tangents) t.dispose();
+		const ys = bind(primitive, primals, params);
+		return [ys, ys.map((y) => zerosLike$1(y.ref))];
+	};
+}
+/** Compute `a @ b.T`, batched to last two axes. */
+function batchMatmulT(a, b) {
+	return dot$2(a.reshape(a.shape.toSpliced(-1, 0, 1)), b.reshape(b.shape.toSpliced(-2, 0, 1)));
+}
+/** Batch matrix transpose. */
+function mT(a) {
+	return moveaxis(a, -2, -1);
+}
+const jvpRules = {
+	[Primitive.Add]: linearTangentsJvp(Primitive.Add),
+	[Primitive.Mul]: bilinearTangentsJvp(Primitive.Mul),
+	[Primitive.Idiv]: zeroTangentsJvp(Primitive.Idiv),
+	[Primitive.Mod]([x, y], [dx, dy]) {
+		if (!isFloatDtype(x.dtype) && !isFloatDtype(y.dtype)) {
+			dx.dispose();
+			dy.dispose();
+			return [[x.ref, y.ref], [zerosLike$1(x), zerosLike$1(y)]];
+		}
+		const q = idiv(x.ref, y.ref);
+		return [[mod(x, y)], [dx.sub(dy.mul(q))]];
+	},
+	[Primitive.Min]([x, y], [dx, dy]) {
+		return [[min$1(x.ref, y.ref)], [where$1(less$1(y, x), dy, dx)]];
+	},
+	[Primitive.Max]([x, y], [dx, dy]) {
+		return [[max$1(x.ref, y.ref)], [where$1(less$1(x, y), dy, dx)]];
+	},
+	[Primitive.Neg]: linearTangentsJvp(Primitive.Neg),
+	[Primitive.Reciprocal]([x], [dx]) {
+		const xRecip = reciprocal$1(x.ref);
+		return [[xRecip.ref], [neg(xRecip.ref.mul(xRecip)).mul(dx)]];
+	},
+	[Primitive.Floor]: zeroTangentsJvp(Primitive.Floor),
+	[Primitive.Ceil]: zeroTangentsJvp(Primitive.Ceil),
+	[Primitive.StopGradient]: zeroTangentsJvp(Primitive.StopGradient),
+	[Primitive.Cast]([x], [dx], { dtype }) {
+		if (x.dtype === dtype) return [[x], [dx]];
+		if (isFloatDtype(dtype) && isFloatDtype(x.dtype)) return [[cast(x, dtype)], [cast(dx, dtype)]];
+		else {
+			dx.dispose();
+			return [[cast(x.ref, dtype)], [zerosLike$1(x)]];
+		}
+	},
+	[Primitive.Bitcast]([x], [dx], { dtype }) {
+		if (x.dtype === dtype) return [[x], [dx]];
+		dx.dispose();
+		return [[bitcast(x.ref, dtype)], [zerosLike$1(x)]];
+	},
+	[Primitive.Sin]([x], [dx]) {
+		return [[sin$1(x.ref)], [cos$1(x).mul(dx)]];
+	},
+	[Primitive.Cos]([x], [dx]) {
+		return [[cos$1(x.ref)], [neg(sin$1(x)).mul(dx)]];
+	},
+	[Primitive.Asin]([x], [dx]) {
+		const denom = sqrt$1(reciprocal$1(cast(1, x.dtype).sub(x.ref.mul(x.ref))));
+		return [[asin$1(x)], [denom.mul(dx)]];
+	},
+	[Primitive.Atan]([x], [dx]) {
+		const denom = cast(1, x.dtype).add(x.ref.mul(x.ref));
+		return [[atan$1(x)], [dx.div(denom)]];
+	},
+	[Primitive.Exp]([x], [dx]) {
+		const z = exp$1(x);
+		return [[z.ref], [z.mul(dx)]];
+	},
+	[Primitive.Log]([x], [dx]) {
+		return [[log$1(x.ref)], [reciprocal$1(x).mul(dx)]];
+	},
+	[Primitive.Erf]([x], [dx]) {
+		const coeff = 2 / Math.sqrt(Math.PI);
+		const expTerm = exp$1(neg(x.ref.mul(x.ref)));
+		return [[erf$1(x)], [expTerm.mul(coeff).mul(dx)]];
+	},
+	[Primitive.Erfc]([x], [dx]) {
+		const coeff = -2 / Math.sqrt(Math.PI);
+		const expTerm = exp$1(neg(x.ref.mul(x.ref)));
+		return [[erfc$1(x)], [expTerm.mul(coeff).mul(dx)]];
+	},
+	[Primitive.Sqrt]([x], [dx]) {
+		const z = sqrt$1(x);
+		return [[z.ref], [reciprocal$1(z.mul(2)).mul(dx)]];
+	},
+	[Primitive.Reduce]([x], [dx], { op, axis }) {
+		if (op === AluOp.Add) return [[reduce(x, op, axis)], [reduce(dx, op, axis)]];
+		else if (op === AluOp.Mul) {
+			const primal = reduce(x.ref, op, axis);
+			const tangent = broadcast(primal.ref, x.shape, axis).mul(reciprocal$1(x)).mul(dx).sum(axis);
+			return [[primal], [tangent]];
+		} else if (op === AluOp.Min || op === AluOp.Max) {
+			const primal = reduce(x.ref, op, axis);
+			const notMin = notEqual$1(x, broadcast(primal.ref, x.shape, axis));
+			const minCount = where$1(notMin.ref, 0, 1).sum(axis);
+			const tangent = where$1(notMin, 0, dx).sum(axis).div(minCount);
+			return [[primal], [tangent]];
+		} else throw new Error(`JVP rule not implemented for reduce op: ${op}`);
+	},
+	[Primitive.Pool]: linearTangentsJvp(Primitive.Pool),
+	[Primitive.PoolTranspose]: linearTangentsJvp(Primitive.PoolTranspose),
+	[Primitive.Dot]: bilinearTangentsJvp(Primitive.Dot),
+	[Primitive.Conv]: bilinearTangentsJvp(Primitive.Conv),
+	[Primitive.Compare]: zeroTangentsJvp(Primitive.Compare),
+	[Primitive.Where]([cond, x, y], [dcond, dx, dy]) {
+		dcond.dispose();
+		return [[where$1(cond.ref, x, y)], [where$1(cond, dx, dy)]];
+	},
+	[Primitive.RandomBits]: zeroTangentsJvp(Primitive.RandomBits),
+	[Primitive.Gather]([x, ...indices], [dx, ..._], { axis, outDim }) {
+		const indicesRef = indices.map((t) => t.ref);
+		return [[gather(x, indices, axis, outDim)], [gather(dx, indicesRef, axis, outDim)]];
+	},
+	[Primitive.Transpose]: linearTangentsJvp(Primitive.Transpose),
+	[Primitive.Broadcast]: linearTangentsJvp(Primitive.Broadcast),
+	[Primitive.Reshape]: linearTangentsJvp(Primitive.Reshape),
+	[Primitive.Flip]: linearTangentsJvp(Primitive.Flip),
+	[Primitive.Shrink]: linearTangentsJvp(Primitive.Shrink),
+	[Primitive.Pad]: linearTangentsJvp(Primitive.Pad),
+	[Primitive.Sort]([x], [dx]) {
+		const [y, idx] = argsort$1(x);
+		return [[y], [gather(dx, [idx], [-1], -1)]];
+	},
+	[Primitive.Argsort]([x], [dx]) {
+		const [y, idx] = argsort$1(x);
+		return [[y, idx.ref], [gather(dx, [idx.ref], [-1], -1), zerosLike$1(idx)]];
+	},
+	[Primitive.TriangularSolve]([a, b], [da, db], { unitDiagonal }) {
+		const x = triangularSolve$1(a.ref, b, { unitDiagonal });
+		const dax = batchMatmulT(da, x.ref);
+		const rhsT = db.sub(mT(dax));
+		const dx = triangularSolve$1(a, rhsT, { unitDiagonal });
+		return [[x], [dx]];
+	},
+	[Primitive.Cholesky]([a], [da]) {
+		const L = cholesky$2(a.ref);
+		da = da.ref.add(mT(da)).mul(.5);
+		const W = triangularSolve$1(L.ref, da, { lower: true });
+		const ST = triangularSolve$1(L.ref, mT(W), { lower: true });
+		const dL = batchMatmulT(L.ref, triu(ST.ref, 1).add(triu(ST)).mul(.5));
+		return [[L], [dL]];
+	},
+	[Primitive.Jit](primals, tangents, { name, jaxpr }) {
+		const newJaxpr = jvpJaxpr(jaxpr);
+		const outs = bind(Primitive.Jit, [
+			...newJaxpr.consts.map((c) => c.ref),
+			...primals,
+			...tangents
+		], {
+			name: `${name}_jvp`,
+			jaxpr: newJaxpr.jaxpr,
+			numConsts: newJaxpr.consts.length
+		});
+		const n = outs.length / 2;
+		if (!Number.isInteger(n)) throw new Error("internal: JVP Jaxpr output length is not even");
+		const [primalsOut, tangentsOut] = [outs.slice(0, n), outs.slice(n)];
+		return [primalsOut, tangentsOut];
+	}
+};
+const jvpJaxprCache = /* @__PURE__ */ new Map();
+function jvpJaxpr(jaxpr) {
+	if (jvpJaxprCache.has(jaxpr)) return jvpJaxprCache.get(jaxpr);
+	const inAvals = jaxpr.inBinders.map((v) => v.aval);
+	const { jaxpr: newJaxpr } = makeJaxpr$1((primals, tangents) => jvpFlat(jaxprAsFun(jaxpr), primals, tangents))(inAvals, inAvals);
+	jvpJaxprCache.set(jaxpr, newJaxpr);
+	return newJaxpr;
+}
+function jvpFlat(f, primals, tangents) {
+	try {
+		var _usingCtx$1 = _usingCtx();
+		const main = _usingCtx$1.u(newMain(JVPTrace));
+		const trace$1 = new JVPTrace(main);
+		const tracersIn = zip(primals, tangents).map(([x, t]) => new JVPTracer(trace$1, pureArray(x), pureArray(t)));
+		const outs = f(...tracersIn);
+		const tracersOut = outs.map((out) => fullRaise(trace$1, out));
+		return unzip2(tracersOut.map((t) => [t.primal, t.tangent]));
+	} catch (_) {
+		_usingCtx$1.e = _;
+	} finally {
+		_usingCtx$1.d();
+	}
+}
+function jvp$1(f, primals, tangents) {
+	const [primalsFlat, inTree] = flatten(primals);
+	const [tangentsFlat, inTree2] = flatten(tangents);
+	if (!inTree.equals(inTree2)) throw new TreeMismatchError("jvp", inTree, inTree2);
+	const [flatFun, outTree] = flattenFun(f, inTree);
+	const [primalsOutFlat, tangentsOutFlat] = jvpFlat(flatFun, primalsFlat, tangentsFlat);
+	if (outTree.value === void 0) throw new Error("outTree was not set in jvp");
+	const primalsOut = unflatten(outTree.value, primalsOutFlat);
+	const tangentsOut = unflatten(outTree.value, tangentsOutFlat);
+	return [primalsOut, tangentsOut];
+}
 //#endregion
 //#region src/frontend/linearize.ts
 /** Array value that can either be known or unknown. */
@@ -3638,11 +4037,10 @@ function partialEvalFlat(f, pvalsIn) {
 	const tracersOut = outs.map((out) => fullRaise(trace$1, out));
 	const pvalsOut = tracersOut.map((t) => t.pval);
 	const unknownTracersOut = tracersOut.filter((t) => !t.pval.isKnown);
-	const { jaxpr, consts } = partialEvalGraphToJaxpr(unknownTracersIn, unknownTracersOut);
+	const jaxpr = partialEvalGraphToJaxpr(unknownTracersIn, unknownTracersOut);
 	return {
 		jaxpr,
-		pvalsOut,
-		consts
+		pvalsOut
 	};
 }
 /**
@@ -3659,22 +4057,19 @@ function linearizeFlatUtil(f, primalsIn) {
 		const [primalsOut$1, tangentsOut] = jvp$1(f, x.slice(0, k), x.slice(k, 2 * k));
 		return [...primalsOut$1, ...tangentsOut];
 	};
-	const { jaxpr, pvalsOut, consts } = partialEvalFlat(fJvp, pvalsIn);
+	const { jaxpr, pvalsOut } = partialEvalFlat(fJvp, pvalsIn);
 	const primalPvals = pvalsOut.slice(0, pvalsOut.length / 2);
 	if (!primalPvals.every((pval) => pval.isKnown)) throw new Error("Not all primal values are known after partial evaluation");
 	const primalsOut = primalPvals.map((pval) => pval.val);
 	return {
 		primalsOut,
-		jaxpr,
-		consts
+		jaxpr
 	};
 }
 function linearizeFlat(f, primalsIn) {
-	const { primalsOut, jaxpr, consts } = linearizeFlatUtil(f, primalsIn);
-	const fLin = (...tangents) => evalJaxpr(jaxpr, [...consts.map((c) => c.ref), ...tangents]);
-	const dispose$1 = () => {
-		for (const c of consts) c.dispose();
-	};
+	const { primalsOut, jaxpr } = linearizeFlatUtil(f, primalsIn);
+	const fLin = (...tangents) => evalJaxpr(jaxpr.jaxpr, [...jaxpr.consts.map((c) => c.ref), ...tangents]);
+	const dispose$1 = () => jaxpr.dispose();
 	return [
 		primalsOut,
 		fLin,
@@ -3758,7 +4153,7 @@ var PartialEvalTrace = class extends Trace {
 	}
 	processPrimitive(primitive, tracers, params) {
 		if (tracers.every((t) => t.pval.isKnown)) return bind(primitive, tracers.map((t) => t.fullLower()), params);
-		if (primitive === Primitive.JitCall) {
+		if (primitive === Primitive.Jit) {
 			const { name, jaxpr, numConsts } = params;
 			return this.#partialEvalJaxpr(name, jaxpr, numConsts, tracers);
 		}
@@ -3784,14 +4179,14 @@ var PartialEvalTrace = class extends Trace {
 	* Evaluate a Jaxpr on a set of PartialEvalTracers, computing as many known
 	* values as possible (with JIT) and forwarding the unknown ones.
 	*
-	* Used when encountering a JitCall rule during the trace.
+	* Used when encountering a Jit rule during the trace.
 	*/
 	#partialEvalJaxpr(name, jaxpr, numConsts, tracers) {
 		jaxpr = jaxpr.flatten();
 		const inUnknowns = tracers.map((t) => !t.pval.isKnown);
 		const { jaxpr1, jaxpr2, outUnknowns, numRes } = partialEvalJaxpr(jaxpr, inUnknowns);
 		const [knownTracers, unknownTracers] = partitionList(inUnknowns, tracers);
-		const outs1Res = bind(Primitive.JitCall, knownTracers.map((t) => t.ref.fullLower()), {
+		const outs1Res = bind(Primitive.Jit, knownTracers.map((t) => t.ref.fullLower()), {
 			name: `${name}_peval`,
 			jaxpr: jaxpr1,
 			numConsts: 0
@@ -3801,7 +4196,7 @@ var PartialEvalTrace = class extends Trace {
 		const resTracers = res.map((x) => this.instantiateConst(fullRaise(this, x)));
 		const recipe = {
 			type: "JaxprEqn",
-			prim: Primitive.JitCall,
+			prim: Primitive.Jit,
 			tracersIn: resTracers.concat(unknownTracers),
 			params: {
 				name: `${name}_resid`,
@@ -3830,7 +4225,7 @@ function partialEvalJaxpr(jaxpr, inUnknowns, instantiate) {
 	const eqns1 = [];
 	const eqns2 = [];
 	for (const eqn of jaxpr.eqns) {
-		if (eqn.primitive === Primitive.JitCall) throw new TypeError("partialEvalJaxpr requires flattened Jaxpr");
+		if (eqn.primitive === Primitive.Jit) throw new TypeError("partialEvalJaxpr requires flattened Jaxpr");
 		const hasUnknowns = eqn.inputs.some((x) => x instanceof Var && !knownVars.has(x));
 		if (hasUnknowns) {
 			for (const x of eqn.inputs) if (x instanceof Var && knownVars.has(x)) residuals.add(x);
@@ -3904,11 +4299,8 @@ function partialEvalGraphToJaxpr(tracersIn, tracersOut) {
 	for (const t of tracersIn) t.dispose();
 	for (const t of tracersOut) t.dispose();
 	jaxpr = jaxpr.simplify();
-	if (DEBUG >= 5) console.log("jaxpr from partial evaluation:\n" + jaxpr.toString());
-	return {
-		jaxpr,
-		consts
-	};
+	if (DEBUG >= 5) console.info("jaxpr from partial evaluation:\n" + jaxpr.toString());
+	return new ClosedJaxpr(jaxpr, consts);
 }
 /** Marker type for pullback, used by transpose rules. */
 var UndefPrimal = class {
@@ -4038,22 +4430,25 @@ const transposeRules = {
 	},
 	[Primitive.Conv]([ct], [lhs, rhs], params) {
 		if (lhs instanceof UndefPrimal === rhs instanceof UndefPrimal) throw new NonlinearError(Primitive.Conv);
+		const v = params.vmapDims;
 		const rev01 = [
-			1,
-			0,
-			...range(2, ct.ndim)
+			...range(v),
+			v + 1,
+			v,
+			...range(v + 2, ct.ndim)
 		];
 		if (lhs instanceof UndefPrimal) {
 			let kernel = rhs;
 			kernel = transpose$1(kernel, rev01);
-			kernel = flip$1(kernel, range(2, kernel.ndim));
+			kernel = flip$1(kernel, range(v + 2, kernel.ndim));
 			const result = conv$1(ct, kernel, {
+				vmapDims: v,
 				strides: params.lhsDilation,
 				padding: params.padding.map(([pl, _pr], i) => {
-					const dilatedKernel = (kernel.shape[i + 2] - 1) * params.rhsDilation[i] + 1;
-					const dilatedCt = (ct.shape[i + 2] - 1) * params.strides[i] + 1;
+					const dilatedKernel = (kernel.shape[i + v + 2] - 1) * params.rhsDilation[i] + 1;
+					const dilatedCt = (ct.shape[i + v + 2] - 1) * params.strides[i] + 1;
 					const padBefore = dilatedKernel - 1 - pl;
-					const dilatedLhs = (lhs.aval.shape[i + 2] - 1) * params.lhsDilation[i] + 1;
+					const dilatedLhs = (lhs.aval.shape[i + v + 2] - 1) * params.lhsDilation[i] + 1;
 					const padAfter = dilatedLhs + dilatedKernel - 1 - dilatedCt - padBefore;
 					return [padBefore, padAfter];
 				}),
@@ -4065,11 +4460,12 @@ const transposeRules = {
 			const newLhs = transpose$1(lhs, rev01);
 			const newRhs = transpose$1(ct, rev01);
 			let result = conv$1(newLhs, newRhs, {
+				vmapDims: v,
 				strides: params.rhsDilation,
 				padding: params.padding.map(([pl, _pr], i) => {
-					const dilatedLhs = (lhs.aval.shape[i + 2] - 1) * params.lhsDilation[i] + 1;
-					const dilatedKernel = (rhs.aval.shape[i + 2] - 1) * params.rhsDilation[i] + 1;
-					const dilatedCt = (ct.shape[i + 2] - 1) * params.strides[i] + 1;
+					const dilatedLhs = (lhs.aval.shape[i + v + 2] - 1) * params.lhsDilation[i] + 1;
+					const dilatedKernel = (rhs.aval.shape[i + v + 2] - 1) * params.rhsDilation[i] + 1;
+					const dilatedCt = (ct.shape[i + v + 2] - 1) * params.strides[i] + 1;
 					const padFromLhs = dilatedCt - dilatedLhs;
 					const padFromRhs = dilatedKernel - pl - 1;
 					return [pl, padFromLhs + padFromRhs];
@@ -4096,6 +4492,11 @@ const transposeRules = {
 		cond.dispose();
 		return cts;
 	},
+	[Primitive.Gather]([ct], [x, ...indices], { axis, outDim }) {
+		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Gather);
+		if (indices.some((i) => i instanceof UndefPrimal)) throw new NonlinearError(Primitive.Gather);
+		throw new Error("Gather transpose rule is not yet implemented, requires complex Scatter sum operation");
+	},
 	[Primitive.Transpose]([ct], [x], { perm }) {
 		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Transpose);
 		return [transpose$1(ct, invertPermutation(perm))];
@@ -4122,23 +4523,26 @@ const transposeRules = {
 		const slice = width.map(([s, _e], i) => [s, s + x.aval.shape[i]]);
 		return [shrink(ct, slice)];
 	},
-	[Primitive.Gather]([ct], [x, ...indices], { axis, outDim }) {
-		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Gather);
-		if (indices.some((i) => i instanceof UndefPrimal)) throw new NonlinearError(Primitive.Gather);
-		throw new Error("Gather transpose rule is not yet implemented, requires complex Scatter sum operation");
+	[Primitive.TriangularSolve]([ct], [a, b], { unitDiagonal }) {
+		if (a instanceof UndefPrimal || !(b instanceof UndefPrimal)) throw new NonlinearError(Primitive.TriangularSolve);
+		const ctB = triangularSolve$1(moveaxis(a, -2, -1), ct, {
+			lower: true,
+			unitDiagonal
+		});
+		return [null, ctB];
 	},
-	[Primitive.JitCall](cts, args, { name, jaxpr }) {
+	[Primitive.Jit](cts, args, { name, jaxpr }) {
 		const undefPrimals = args.map((x) => x instanceof UndefPrimal);
-		const { newJaxpr, newConsts } = transposeJaxpr(jaxpr, undefPrimals);
+		const newJaxpr = transposeJaxpr(jaxpr, undefPrimals);
 		const residuals = args.filter((x, i$1) => !undefPrimals[i$1]);
-		const outs = bind(Primitive.JitCall, [
-			...newConsts.map((c) => c.ref),
+		const outs = bind(Primitive.Jit, [
+			...newJaxpr.consts.map((c) => c.ref),
 			...residuals,
 			...cts
 		], {
 			name: `${name}_t`,
-			jaxpr: newJaxpr,
-			numConsts: newConsts.length
+			jaxpr: newJaxpr.jaxpr,
+			numConsts: newJaxpr.consts.length
 		});
 		let i = 0;
 		return undefPrimals.map((isUndef) => isUndef ? outs[i++] : null);
@@ -4151,31 +4555,25 @@ function transposeJaxpr(jaxpr, undefPrimals) {
 	if (prevResult) return prevResult;
 	const { inTypes, outTypes } = typecheckJaxpr(jaxpr);
 	const forwardInTypes = inTypes.filter((_, i) => !undefPrimals[i]);
-	const { jaxpr: newJaxpr, consts: newConsts } = makeJaxpr$1((forwardIn, cotangents) => {
+	const { jaxpr: newJaxpr } = makeJaxpr$1((forwardIn, cotangents) => {
 		const args = [];
 		let forwardInIdx = 0;
 		for (let i = 0; i < undefPrimals.length; i++) if (undefPrimals[i]) args.push(new UndefPrimal(inTypes[i]));
 		else args.push(forwardIn[forwardInIdx++]);
 		return evalJaxprTransposed(jaxpr, args, cotangents);
 	})(forwardInTypes, outTypes);
-	typecheckJaxpr(newJaxpr);
-	const result = {
-		newJaxpr,
-		newConsts
-	};
+	typecheckJaxpr(newJaxpr.jaxpr);
 	if (!transposeJaxprCache.has(jaxpr)) transposeJaxprCache.set(jaxpr, /* @__PURE__ */ new Map());
-	transposeJaxprCache.get(jaxpr).set(cacheKey, result);
-	return result;
+	transposeJaxprCache.get(jaxpr).set(cacheKey, newJaxpr);
+	return newJaxpr;
 }
 function vjpFlat(f, primalsIn) {
-	const { primalsOut, jaxpr, consts } = linearizeFlatUtil(f, primalsIn);
+	const { primalsOut, jaxpr } = linearizeFlatUtil(f, primalsIn);
 	const fVjp = (...cotangents) => {
-		const transposeInputs = [...consts.map((c) => c.ref), ...primalsIn.map((t) => new UndefPrimal(t.aval))];
-		return evalJaxprTransposed(jaxpr, transposeInputs, cotangents);
-	};
-	const dispose$1 = () => {
-		for (const c of consts) c.dispose();
+		const transposeInputs = [...jaxpr.consts.map((c) => c.ref), ...primalsIn.map((t) => new UndefPrimal(t.aval))];
+		return evalJaxprTransposed(jaxpr.jaxpr, transposeInputs, cotangents);
 	};
+	const dispose$1 = () => jaxpr.dispose();
 	return [
 		primalsOut,
 		fVjp,
@@ -4232,150 +4630,6 @@ function jacrev$1(f) {
 	};
 }
-//#endregion
-//#region src/library/lax.ts
-var lax_exports = {};
-__export(lax_exports, {
-	conv: () => conv,
-	convGeneralDilated: () => convGeneralDilated,
-	convWithGeneralPadding: () => convWithGeneralPadding,
-	dot: () => dot$1,
-	erf: () => erf,
-	erfc: () => erfc,
-	reduceWindow: () => reduceWindow,
-	stopGradient: () => stopGradient$1
-});
-/**
-* General dot product/contraction operator.
-*
-* Prefer higher-level functions like `jax.numpy.dot()`, `jax.numpy.matmul()`,
-* `jax.numpy.tensordot(), and `jax.numpy.einsum()` where possible.
-*/
-function dot$1(lhs, rhs, { lhsContractingDims: lc = [], rhsContractingDims: rc = [], lhsBatchDims: lb = [], rhsBatchDims: rb = [] } = {}) {
-	if (lc.length !== rc.length) throw new Error(`dot: contracting dims lengths mismatch, got ${JSON.stringify(lc)} and ${JSON.stringify(rc)}`);
-	else if (lb.length !== rb.length) throw new Error(`dot: batch dims lengths mismatch, got ${JSON.stringify(lb)} and ${JSON.stringify(rb)}`);
-	lc = lc.map((a) => checkAxis(a, lhs.ndim));
-	rc = rc.map((a) => checkAxis(a, rhs.ndim));
-	lb = lb.map((a) => checkAxis(a, lhs.ndim));
-	rb = rb.map((a) => checkAxis(a, rhs.ndim));
-	if (lc.some((a) => lb.includes(a))) throw new Error(`dot: lhs contracting dims ${JSON.stringify(lc)} overlap with batch dims ${JSON.stringify(lb)}`);
-	else if (rc.some((a) => rb.includes(a))) throw new Error(`dot: rhs contracting dims ${JSON.stringify(rc)} overlap with batch dims ${JSON.stringify(rb)}`);
-	const lf = range(lhs.ndim).filter((a) => !lc.includes(a) && !lb.includes(a));
-	const rf = range(rhs.ndim).filter((a) => !rc.includes(a) && !rb.includes(a));
-	const lhs2 = lhs.transpose([
-		...lb,
-		...lf,
-		...lc
-	]);
-	const rhs2 = rhs.transpose([
-		...rb,
-		...rf,
-		...rc
-	]);
-	if (lc.length === 0) return mul(lhs2.reshape([
-		...lb.map((a) => lhs.shape[a]),
-		...lf.map((a) => lhs.shape[a]),
-		...rep(rf.length, 1)
-	]), rhs2.reshape([
-		...rb.map((a) => rhs.shape[a]),
-		...rep(lf.length, 1),
-		...rf.map((a) => rhs.shape[a])
-	]));
-	const dotShapeX = lc.map((a) => lhs.shape[a]);
-	const dotShapeY = rc.map((a) => rhs.shape[a]);
-	if (!deepEqual(dotShapeX, dotShapeY)) throw new Error(`dot: shapes not aligned along contracting dims: ${JSON.stringify(dotShapeX)} != ${JSON.stringify(dotShapeY)}`);
-	return dot$2(lhs2.reshape([
-		...lb.map((a) => lhs.shape[a]),
-		...lf.map((a) => lhs.shape[a]),
-		...rep(rf.length, 1),
-		prod(dotShapeX)
-	]), rhs2.reshape([
-		...rb.map((a) => rhs.shape[a]),
-		...rep(lf.length, 1),
-		...rf.map((a) => rhs.shape[a]),
-		prod(dotShapeY)
-	]));
-}
-function padtypeToPads(inShape, filterShape, strides, dilation, padding) {
-	const padType = padding.toUpperCase();
-	switch (padType) {
-		case "VALID": return rep(inShape.length, [0, 0]);
-		case "SAME":
-		case "SAME_LOWER": {
-			const outShape = inShape.map((size$1, i) => Math.ceil(size$1 / strides[i]));
-			const padSizes = zipn(outShape, strides, filterShape, dilation, inShape).map(([o, s, k, d, i]) => Math.max(0, (o - 1) * s + 1 + (k - 1) * d - i));
-			if (padType === "SAME") return padSizes.map((size$1) => [size$1 >> 1, size$1 - (size$1 >> 1)]);
-			else return padSizes.map((size$1) => [size$1 - (size$1 >> 1), size$1 >> 1]);
-		}
-		default: throw new Error(`Unknown padding type: ${padType}`);
-	}
-}
-/**
-* General n-dimensional convolution operator, with optional dilation.
-*
-* The semantics of this operation mimic the `jax.lax.conv_general_dilated`
-* function in JAX, which wraps XLA's general convolution operator.
-*
-* Grouped convolutions are not supported right now.
-*/
-function convGeneralDilated(lhs, rhs, windowStrides, padding, { lhsDilation, rhsDilation } = {}) {
-	if (lhs.ndim < 2) throw new Error("lhs must have at least 2 dimensions");
-	if (rhs.ndim < 2) throw new Error("rhs must have at least 2 dimensions");
-	if (typeof padding === "string") {
-		if (lhsDilation?.some((d) => d !== 1)) throw new Error("String padding is not supported for transposed convolutions");
-		padding = padtypeToPads(lhs.shape.slice(2), rhs.shape.slice(2), windowStrides, rhsDilation ?? rep(rhs.ndim - 2, 1), padding);
-	}
-	return conv$1(lhs, rhs, {
-		strides: windowStrides,
-		padding,
-		lhsDilation,
-		rhsDilation
-	});
-}
-/** Convenience wrapper around `convGeneralDilated`. */
-function convWithGeneralPadding(lhs, rhs, windowStrides, padding, lhsDilation, rhsDilation) {
-	return convGeneralDilated(lhs, rhs, windowStrides, padding, {
-		lhsDilation,
-		rhsDilation
-	});
-}
-/** Convenience wrapper around `convGeneralDilated`. */
-function conv(lhs, rhs, windowStrides, padding) {
-	return convGeneralDilated(lhs, rhs, windowStrides, padding);
-}
-/** Reduce a computation over padded windows. */
-function reduceWindow(operand, computation, windowDimensions, windowStrides) {
-	if (operand.ndim < windowDimensions.length) throw new Error(`Operand dimensions ${operand.ndim} < window ${windowDimensions.length}`);
-	if (!windowStrides) windowStrides = rep(windowDimensions.length, 1);
-	for (let i = 0; i < operand.ndim; i++) computation = vmap$1(computation, 0);
-	return computation(bind1(Primitive.Pool, [operand], {
-		window: windowDimensions,
-		strides: windowStrides
-	}));
-}
-/** The error function: `erf(x) = 2/sqrt(pi) * int[0..x] exp(-t^2) dt`. */
-function erf(x) {
-	return erf$1(x);
-}
-/**
-* The complementary error function: `erfc(x) = 1 - erf(x)`.
-*
-* This function is more accurate than `1 - erf(x)` for large values of `x`,
-* where `erf(x)` is very close to 1.
-*/
-function erfc(x) {
-	return erfc$1(x);
-}
-/**
-* Stops gradient computation.
-*
-* Behaves as the identity function but prevents the flow of gradients during
-* forward or reverse-mode automatic differentiation.
-*/
-function stopGradient$1(x) {
-	return stopGradient(x);
-}
 //#endregion
 //#region src/library/numpy/einsum.ts
 const bprod = (...xs) => xs.reduce((acc, x) => acc * BigInt(x), 1n);
@@ -4571,34 +4825,207 @@ function* allPaths(tensors, next) {
 	}
 }
+//#endregion
+//#region src/library/numpy-fft.ts
+var numpy_fft_exports = {};
+__export(numpy_fft_exports, {
+	fft: () => fft,
+	ifft: () => ifft
+});
+function checkPairInput(name, a) {
+	const fullName = `jax.numpy.fft.${name}`;
+	if (!deepEqual(a.real.shape, a.imag.shape)) throw new Error(`${fullName}: real and imaginary parts must have the same shape, got ${JSON.stringify(a.real.shape)} and ${JSON.stringify(a.imag.shape)}`);
+	if (a.real.dtype !== a.imag.dtype) throw new Error(`${fullName}: real and imaginary parts must have the same dtype, got ${a.real.dtype} and ${a.imag.dtype}`);
+	if (!isFloatDtype(a.real.dtype)) throw new Error(`${fullName}: input must have a float dtype, got ${a.real.dtype}`);
+}
+function checkPowerOfTwo(name, n) {
+	if ((n & n - 1) !== 0) throw new Error(`jax.numpy.fft.${name}: size must be a power of two, got ${n}`);
+}
+const fftUpdate = jit$1(function fftUpdate$1(i, { real, imag }) {
+	const half = 2 ** i;
+	real = real.reshape([-1, 2 * half]);
+	imag = imag.reshape([-1, 2 * half]);
+	const k = arange(0, half, 1, { dtype: real.dtype });
+	const theta = k.mul(-Math.PI / half);
+	const wr = cos(theta.ref);
+	const wi = sin(theta);
+	const ur = real.ref.slice([], [0, half]);
+	const ui = imag.ref.slice([], [0, half]);
+	const vr = real.slice([], [half, 2 * half]);
+	const vi = imag.slice([], [half, 2 * half]);
+	const tr = vr.ref.mul(wr.ref).sub(vi.ref.mul(wi.ref));
+	const ti = vr.mul(wi).add(vi.mul(wr));
+	return {
+		real: concatenate([ur.ref.add(tr.ref), ur.sub(tr)], -1),
+		imag: concatenate([ui.ref.add(ti.ref), ui.sub(ti)], -1)
+	};
+}, { staticArgnums: [0] });
+/**
+* Compute a one-dimensional discrete Fourier transform.
+*
+* Currently, the size of the axis must be a power of two.
+*/
+function fft(a, axis = -1) {
+	checkPairInput("fft", a);
+	let { real, imag } = a;
+	axis = checkAxis(axis, real.ndim);
+	const n = real.shape[axis];
+	checkPowerOfTwo("fft", n);
+	const logN = Math.log2(n);
+	let perm = null;
+	if (axis !== real.ndim - 1) {
+		perm = range(real.ndim);
+		perm.splice(axis, 1);
+		perm.push(axis);
+		real = real.transpose(perm);
+		imag = imag.transpose(perm);
+	}
+	const originalShape = real.shape;
+	real = real.reshape([-1, ...rep(logN, 2)]).transpose([0, ...range(1, logN + 1).reverse()]).flatten();
+	imag = imag.reshape([-1, ...rep(logN, 2)]).transpose([0, ...range(1, logN + 1).reverse()]).flatten();
+	for (let i = 0; i < logN; i++) ({real, imag} = fftUpdate(i, {
+		real,
+		imag
+	}));
+	real = real.reshape(originalShape);
+	imag = imag.reshape(originalShape);
+	if (perm !== null) {
+		real = real.transpose(invertPermutation(perm));
+		imag = imag.transpose(invertPermutation(perm));
+	}
+	return {
+		real,
+		imag
+	};
+}
+/**
+* Compute a one-dimensional inverse discrete Fourier transform.
+*
+* Currently, the size of the axis must be a power of two.
+*/
+function ifft(a, axis = -1) {
+	checkPairInput("ifft", a);
+	let { real, imag } = a;
+	axis = checkAxis(axis, real.ndim);
+	const n = real.shape[axis];
+	checkPowerOfTwo("ifft", n);
+	imag = imag.mul(-1);
+	const result = fft({
+		real,
+		imag
+	}, axis);
+	return {
+		real: result.real.div(n),
+		imag: result.imag.mul(-1).div(n)
+	};
+}
+//#endregion
+//#region src/library/numpy-linalg.ts
+var numpy_linalg_exports = {};
+__export(numpy_linalg_exports, {
+	cholesky: () => cholesky$1,
+	diagonal: () => diagonal,
+	lstsq: () => lstsq,
+	matmul: () => matmul,
+	matrixTranspose: () => matrixTranspose,
+	outer: () => outer,
+	tensordot: () => tensordot,
+	trace: () => trace,
+	vecdot: () => vecdot
+});
+/**
+* Compute the Cholesky decomposition of a (batched) positive-definite matrix.
+*
+* This is like `jax.lax.linalg.cholesky()`, except with an option to symmetrize
+* the input matrix, which is on by default.
+*/
+function cholesky$1(a, { upper = false, symmetrizeInput = true } = {}) {
+	a = fudgeArray(a);
+	if (a.ndim < 2 || a.shape[a.ndim - 1] !== a.shape[a.ndim - 2]) throw new Error(`cholesky: input must be at least 2D square matrix, got ${a.aval}`);
+	if (symmetrizeInput) a = a.ref.add(matrixTranspose(a)).mul(.5);
+	return cholesky(a, { upper });
+}
+/**
+* Return the least-squares solution to a linear equation.
+*
+* For overdetermined systems, this finds the `x` that minimizes `norm(ax - b)`.
+* For underdetermined systems, this finds the minimum-norm solution for `x`.
+*
+* This currently uses Cholesky decomposition to solve the normal equations,
+* under the hood. The method is not as robust as QR or SVD.
+*
+* @param a coefficient matrix of shape `(M, N)`
+* @param b right-hand side of shape `(M,)` or `(M, K)`
+* @return least-squares solution of shape `(N,)` or `(N, K)`
+*/
+function lstsq(a, b) {
+	a = fudgeArray(a);
+	b = fudgeArray(b);
+	if (a.ndim !== 2) throw new Error(`lstsq: 'a' must be a 2D array, got ${a.aval}`);
+	const [m, n] = a.shape;
+	if (b.shape[0] !== m) throw new Error(`lstsq: leading dimension of 'b' must match number of rows of 'a', got ${b.aval}`);
+	const at = matrixTranspose(a.ref);
+	if (m <= n) {
+		const aat = matmul(a, at.ref);
+		const l = cholesky$1(aat, { symmetrizeInput: false });
+		const lb = triangularSolve(l.ref, b, {
+			leftSide: true,
+			lower: true
+		});
+		const llb = triangularSolve(l, lb, {
+			leftSide: true,
+			transposeA: true
+		});
+		return matmul(at, llb.ref);
+	} else {
+		const ata = matmul(at.ref, a);
+		const l = cholesky$1(ata, { symmetrizeInput: false });
+		const atb = matmul(at, b);
+		const lb = triangularSolve(l.ref, atb, {
+			leftSide: true,
+			lower: true
+		});
+		const llb = triangularSolve(l, lb, {
+			leftSide: true,
+			transposeA: true
+		});
+		return llb;
+	}
+}
 //#endregion
 //#region src/library/numpy.ts
 var numpy_exports = {};
 __export(numpy_exports, {
 	Array: () => Array$1,
 	DType: () => DType,
-	abs: () => abs,
+	abs: () => absolute,
 	absolute: () => absolute,
 	acos: () => acos,
-	acosh: () => acosh,
+	acosh: () => arccosh,
 	add: () => add,
+	all: () => all,
 	allclose: () => allclose,
+	any: () => any,
 	arange: () => arange,
-	arccos: () => arccos,
+	arccos: () => acos,
 	arccosh: () => arccosh,
+	arcsin: () => asin,
 	arcsinh: () => arcsinh,
-	arctan: () => arctan,
-	arctan2: () => arctan2,
+	arctan: () => atan,
+	arctan2: () => atan2,
 	arctanh: () => arctanh,
 	argmax: () => argmax,
 	argmin: () => argmin,
+	argsort: () => argsort,
 	array: () => array,
 	asin: () => asin,
-	asinh: () => asinh,
+	asinh: () => arcsinh,
 	astype: () => astype,
 	atan: () => atan,
 	atan2: () => atan2,
-	atanh: () => atanh,
+	atanh: () => arctanh,
 	bool: () => bool,
 	broadcastArrays: () => broadcastArrays,
 	broadcastShapes: () => broadcastShapes,
@@ -4608,14 +5035,20 @@ __export(numpy_exports, {
 	clip: () => clip,
 	columnStack: () => columnStack,
 	concatenate: () => concatenate,
+	convolve: () => convolve,
+	corrcoef: () => corrcoef,
+	correlate: () => correlate,
 	cos: () => cos,
 	cosh: () => cosh,
+	cov: () => cov,
+	cumsum: () => cumsum,
+	cumulativeSum: () => cumsum,
 	deg2rad: () => deg2rad,
 	degrees: () => degrees,
 	diag: () => diag,
 	diagonal: () => diagonal,
-	divide: () => divide,
-	dot: () => dot,
+	divide: () => trueDivide,
+	dot: () => dot$1,
 	dstack: () => dstack,
 	e: () => e,
 	einsum: () => einsum,
@@ -4623,8 +5056,10 @@ __export(numpy_exports, {
 	eulerGamma: () => eulerGamma,
 	exp: () => exp,
 	exp2: () => exp2,
+	expandDims: () => expandDims,
 	expm1: () => expm1,
 	eye: () => eye,
+	fft: () => numpy_fft_exports,
 	flip: () => flip,
 	fliplr: () => fliplr,
 	flipud: () => flipud,
@@ -4655,12 +5090,14 @@ __export(numpy_exports, {
 	ldexp: () => ldexp,
 	less: () => less,
 	lessEqual: () => lessEqual,
+	linalg: () => numpy_linalg_exports,
 	linspace: () => linspace,
 	log: () => log,
 	log10: () => log10,
 	log1p: () => log1p,
 	log2: () => log2,
 	matmul: () => matmul,
+	matrixTranspose: () => matrixTranspose,
 	max: () => max,
 	maximum: () => maximum,
 	mean: () => mean,
@@ -4677,10 +5114,10 @@ __export(numpy_exports, {
 	onesLike: () => onesLike,
 	outer: () => outer,
 	pad: () => pad,
-	permuteDims: () => permuteDims,
+	permuteDims: () => transpose,
 	pi: () => pi,
 	positive: () => positive,
-	pow: () => pow,
+	pow: () => power,
 	power: () => power,
 	prod: () => prod$1,
 	promoteTypes: () => promoteTypes,
@@ -4697,6 +5134,7 @@ __export(numpy_exports, {
 	sin: () => sin,
 	sinh: () => sinh,
 	size: () => size,
+	sort: () => sort,
 	sqrt: () => sqrt,
 	square: () => square,
 	squeeze: () => squeeze,
@@ -4861,6 +5299,26 @@ function min(a, axis = null, opts) {
 function max(a, axis = null, opts) {
 	return reduce(a, AluOp.Max, axis, opts);
 }
+/**
+* Test whether all array elements along a given axis evaluate to True.
+*
+* Returns a boolean array with the same shape as `a` with the specified axis
+* removed. If axis is None, returns a scalar.
+*/
+function all(a, axis = null, opts) {
+	a = fudgeArray(a).astype(DType.Bool);
+	return min(a, axis, opts);
+}
+/**
+* Test whether any array element along a given axis evaluates to True.
+*
+* Returns a boolean array with the same shape as `a` with the specified axis
+* removed. If axis is None, returns a scalar.
+*/
+function any(a, axis = null, opts) {
+	a = fudgeArray(a).astype(DType.Bool);
+	return max(a, axis, opts);
+}
 /** Return the peak-to-peak range along a given axis (`max - min`). */
 function ptp(a, axis = null, opts) {
 	a = fudgeArray(a);
@@ -4918,6 +5376,23 @@ function argmax(a, axis, opts) {
 	}).reshape([shape$1[axis], ...rep(shape$1.length - axis - 1, 1)]));
 	return length.sub(max(idx, axis, opts));
 }
+/**
+* Cumulative sum of elements along an axis.
+*
+* Currently this function is `O(n^2)`, we'll improve this later on with a
+* two-phase parallel reduction algorithm.
+*/
+function cumsum(a, axis) {
+	a = fudgeArray(a);
+	if (axis === void 0) {
+		a = a.ravel();
+		axis = 0;
+	} else axis = checkAxis(axis, a.ndim);
+	const n = a.shape[axis];
+	a = moveaxis$1(a, axis, -1);
+	a = broadcast(a, a.shape.concat(n), [-2]);
+	return moveaxis$1(tril(a).sum(-1), -1, axis);
+}
 /** Reverse the elements in an array along the given axes. */
 function flip(x, axis = null) {
 	const nd = ndim(x);
@@ -5027,8 +5502,11 @@ function flipud(x) {
 function fliplr(x) {
 	return flip(x, 1);
 }
-/** @function Alternative name for `numpy.transpose()`. */
-const permuteDims = transpose;
+/** Transpose the last two dimensions of an array. */
+function matrixTranspose(a) {
+	if (ndim(a) < 2) throw new Error(`matrixTranspose: input array must be at least 2D`);
+	return moveaxis$1(a, -1, -2);
+}
 /** Return a 1-D flattened array containing the elements of the input. */
 function ravel(a) {
 	return fudgeArray(a).ravel();
@@ -5044,6 +5522,32 @@ function squeeze(a, axis = null) {
 	return reshape(a, newShape);
 }
 /**
+* Expand the shape of an array by inserting new axes of length 1.
+*
+* @param a - Input array.
+* @param axis - Position(s) in the expanded axes where the new axis (or axes)
+*   is placed. Can be a single integer or an array of integers.
+* @returns Array with the number of dimensions increased.
+*
+* @example
+* ```ts
+* const x = np.array([1, 2]);
+* np.expandDims(x, 0); // Shape [1, 2]
+* np.expandDims(x, 1); // Shape [2, 1]
+* np.expandDims(x, [0, 2]); // Shape [1, 2, 1]
+* ```
+*/
+function expandDims(a, axis) {
+	const as = shape(a);
+	axis = typeof axis === "number" ? [axis] : axis;
+	axis = normalizeAxis(axis, as.length + axis.length);
+	const newShape = [];
+	let srcIdx = 0;
+	for (let i = 0; i < as.length + axis.length; i++) if (axis.includes(i)) newShape.push(1);
+	else newShape.push(as[srcIdx++]);
+	return reshape(a, newShape);
+}
+/**
 * Repeat each element of an array after themselves.
 *
 * If no axis is provided, use the flattened input array, and return a flat
@@ -5131,7 +5635,7 @@ function diagonal(a, offset, axis1, axis2) {
 */
 function diag(v, k = 0) {
 	const a = fudgeArray(v);
-	if (!Number.isInteger(k)) throw new TypeError(`k must be an integer, got ${k}`);
+	if (!Number.isInteger(k)) throw new Error(`k must be an integer, got ${k}`);
 	if (a.ndim === 1) {
 		const n = a.shape[0];
 		const ret = where(eye(n).equal(1), a.ref, zerosLike(a));
@@ -5139,12 +5643,32 @@ function diag(v, k = 0) {
 		else if (k < 0) return pad(ret, [[-k, 0], [0, -k]]);
 		else return ret;
 	} else if (a.ndim === 2) return diagonal(a, k);
-	else throw new TypeError("numpy.diag only supports 1D and 2D arrays");
+	else throw new Error("numpy.diag only supports 1D and 2D arrays");
 }
 /** Calculate the sum of the diagonal of an array along the given axes. */
 function trace(a, offset = 0, axis1 = 0, axis2 = 1) {
 	return diagonal(a, offset, axis1, axis2).sum(-1);
 }
+/**
+* Return a sorted copy of an array.
+*
+* The array is sorted along a specified axis (the last by default). This may be
+* an unstable sort, and it dispatches to device-specific implementation.
+*/
+function sort(a, axis = -1) {
+	return fudgeArray(a).sort(axis);
+}
+/**
+* Return indices that would sort an array. This may be an unstable sorting
+* algorithm; it need not preserve order of indices in ties.
+*
+* Returns an array of `int32` indices.
+*
+* The array is sorted along a specified axis (the last by default).
+*/
+function argsort(a, axis = -1) {
+	return fudgeArray(a).argsort(axis);
+}
 /** Return if two arrays are element-wise equal within a tolerance. */
 function allclose(actual, expected, options) {
 	const { rtol = 1e-5, atol = 1e-7 } = options ?? {};
@@ -5153,16 +5677,19 @@ function allclose(actual, expected, options) {
 	if (!deepEqual(x.shape, y.shape)) return false;
 	const xData = x.dataSync();
 	const yData = y.dataSync();
-	for (let i = 0; i < xData.length; i++) if (Math.abs(xData[i] - yData[i]) > atol + rtol * Math.abs(yData[i])) return false;
+	for (let i = 0; i < xData.length; i++) {
+		if (isNaN(xData[i]) !== isNaN(yData[i])) return false;
+		if (Math.abs(xData[i] - yData[i]) > atol + rtol * Math.abs(yData[i])) return false;
+	}
 	return true;
 }
 /** Matrix product of two arrays. */
 function matmul(x, y) {
-	if (ndim(x) === 0 || ndim(y) === 0) throw new TypeError("matmul: x and y must be at least 1D");
+	if (ndim(x) === 0 || ndim(y) === 0) throw new Error("matmul: x and y must be at least 1D");
 	x = x, y = y;
 	if (y.ndim === 1) return dot$2(x, y);
 	const numBatchDims = Math.min(Math.max(x.ndim, 2), y.ndim) - 2;
-	return dot$1(x, y, {
+	return dot(x, y, {
 		lhsContractingDims: [-1],
 		rhsContractingDims: [-2],
 		lhsBatchDims: range(-2 - numBatchDims, -2),
@@ -5170,11 +5697,11 @@ function matmul(x, y) {
 	});
 }
 /** Dot product of two arrays. */
-function dot(x, y) {
+function dot$1(x, y) {
 	if (ndim(x) === 0 || ndim(y) === 0) return multiply(x, y);
 	x = x, y = y;
 	if (y.ndim === 1) return dot$2(x, y);
-	return dot$1(x, y, {
+	return dot(x, y, {
 		lhsContractingDims: [-1],
 		rhsContractingDims: [-2]
 	});
@@ -5190,7 +5717,7 @@ function tensordot(x, y, axes = 2) {
 	x = fudgeArray(x);
 	y = fudgeArray(y);
 	if (typeof axes === "number") axes = [range(-axes, 0), range(axes)];
-	return dot$1(x, y, {
+	return dot(x, y, {
 		lhsContractingDims: axes[0],
 		rhsContractingDims: axes[1]
 	});
@@ -5283,7 +5810,7 @@ function einsum(...args) {
 		const [b, bidx] = processSingleTensor(operands[j], indices[j], indices[i]);
 		indexReduced = indexReduced.filter((idx) => aidx.includes(idx));
 		const indexBatch = aidx.filter((idx) => bidx.includes(idx) && !indexReduced.includes(idx));
-		const result = dot$1(a, b, {
+		const result = dot(a, b, {
 			lhsContractingDims: indexReduced.map((idx) => aidx.indexOf(idx)),
 			rhsContractingDims: indexReduced.map((idx) => bidx.indexOf(idx)),
 			lhsBatchDims: indexBatch.map((idx) => aidx.indexOf(idx)),
@@ -5311,7 +5838,7 @@ function einsum(...args) {
 * Returned array has shape `[...x.shape[:-1], ...y.shape[:-1]]`.
 */
 function inner(x, y) {
-	return dot$1(fudgeArray(x), fudgeArray(y), {
+	return dot(fudgeArray(x), fudgeArray(y), {
 		lhsContractingDims: [-1],
 		rhsContractingDims: [-1]
 	});
@@ -5344,6 +5871,30 @@ function vecdot(x, y, { axis } = {}) {
 function vdot(x, y) {
 	return dot$2(ravel(x), ravel(y));
 }
+function _convImpl(name, x, y, mode) {
+	if (x.ndim !== 1 || y.ndim !== 1) throw new Error(`${name}: both inputs must be 1D arrays, got ${x.ndim}D and ${y.ndim}D`);
+	let flipOutput = false;
+	if (x.shape[0] < y.shape[0]) {
+		[x, y] = [y, x];
+		if (name === "correlate") flipOutput = true;
+	}
+	if (name === "convolve") y = flip(y);
+	let padding;
+	if (mode === "valid") padding = "VALID";
+	else if (mode === "same") padding = "SAME_LOWER";
+	else if (mode === "full") padding = [[y.shape[0] - 1, y.shape[0] - 1]];
+	else throw new Error(`${name}: invalid mode ${mode}, expected "full", "same", or "valid"`);
+	const z = conv(x.slice(null, null), y.slice(null, null), [1], padding).slice(0, 0);
+	return flipOutput ? flip(z) : z;
+}
+/** Convolution of two one-dimensional arrays. */
+function convolve(x, y, mode = "full") {
+	return _convImpl("convolve", x, y, mode);
+}
+/** Correlation of two one dimensional arrays. */
+function correlate(x, y, mode = "valid") {
+	return _convImpl("correlate", x, y, mode);
+}
 /**
 * Return a tuple of coordinate matrices from coordinate vectors.
 *
@@ -5352,7 +5903,7 @@ function vdot(x, y) {
 */
 function meshgrid(xs, { indexing } = {}) {
 	indexing ??= "xy";
-	for (const x of xs) if (x.ndim !== 1) throw new TypeError(`meshgrid: all inputs must be 1D arrays, got ${x.ndim}D array`);
+	for (const x of xs) if (x.ndim !== 1) throw new Error(`meshgrid: all inputs must be 1D arrays, got ${x.ndim}D array`);
 	if (xs.length <= 1) return xs;
 	if (indexing === "xy") {
 		const [a, b, ...rest] = xs;
@@ -5371,43 +5922,6 @@ function meshgrid(xs, { indexing } = {}) {
 	return xs.map((x, i) => broadcast(x, shape$1, [...range(i), ...range(i + 1, xs.length)]));
 }
 /**
-* Return an array with ones on and below the diagonal and zeros elsewhere.
-*
-* If `k` is provided, it specifies the sub-diagonal on and below which the
-* array is filled with ones. `k=0` is the main diagonal, `k<0` is below it, and
-* `k>0` is above it.
-*/
-function tri(n, m, k = 0, { dtype, device } = {}) {
-	m ??= n;
-	dtype ??= DType.Float32;
-	if (!Number.isInteger(n) || n < 0) throw new TypeError(`tri: n must be a non-negative integer, got ${n}`);
-	if (!Number.isInteger(m) || m < 0) throw new TypeError(`tri: m must be a non-negative integer, got ${m}`);
-	if (!Number.isInteger(k)) throw new TypeError(`tri: k must be an integer, got ${k}`);
-	const rows = arange(k, n + k, 1, {
-		dtype: DType.Int32,
-		device
-	});
-	const cols = arange(0, m, 1, {
-		dtype: DType.Int32,
-		device
-	});
-	return rows.reshape([n, 1]).greaterEqual(cols).astype(dtype);
-}
-/** Return the lower triangle of an array. Must be of dimension >= 2. */
-function tril(a, k = 0) {
-	if (ndim(a) < 2) throw new TypeError(`tril: input array must be at least 2D, got ${ndim(a)}D`);
-	a = fudgeArray(a);
-	const [n, m] = a.shape.slice(-2);
-	return where(tri(n, m, k, { dtype: bool }), a.ref, zerosLike(a));
-}
-/** Return the upper triangle of an array. Must be of dimension >= 2. */
-function triu(a, k = 0) {
-	if (ndim(a) < 2) throw new TypeError(`tril: input array must be at least 2D, got ${ndim(a)}D`);
-	a = fudgeArray(a);
-	const [n, m] = a.shape.slice(-2);
-	return where(tri(n, m, k - 1, { dtype: bool }), zerosLike(a.ref), a);
-}
-/**
 * Clip (limit) the values in an array.
 *
 * Given an interval, values outside the interval are clipped to the interval
@@ -5431,8 +5945,6 @@ function absolute(x) {
 	x = fudgeArray(x);
 	return where(less(x.ref, 0), x.ref.mul(-1), x);
 }
-/** @function Alias of `jax.numpy.absolute()`. */
-const abs = absolute;
 /** Return an element-wise indication of sign of the input. */
 function sign(x) {
 	x = fudgeArray(x);
@@ -5511,12 +6023,6 @@ const atan2 = jit$1(function atan2$1(y, x) {
 	const denom = where(xNeg, y, r.add(x));
 	return atan(numer.div(denom)).mul(2);
 });
-/** @function Alias of `jax.numpy.acos()`. */
-const arccos = acos;
-/** @function Alias of `jax.numpy.atan()`. */
-const arctan = atan;
-/** @function Alias of `jax.numpy.atan2()`. */
-const arctan2 = atan2;
 /** Element-wise subtraction, with broadcasting. */
 function subtract(x, y) {
 	x = fudgeArray(x);
@@ -5547,8 +6053,6 @@ const fmod = jit$1(function fmod$1(x, y) {
 const remainder = jit$1(function remainder$1(x, y) {
 	return mod(mod(x, y.ref).add(y.ref), y);
 });
-/** @function Alias of `jax.numpy.trueDivide()`. */
-const divide = trueDivide;
 /** Round input to the nearest integer towards zero. */
 function trunc(x) {
 	return idiv(x, 1);
@@ -5570,9 +6074,9 @@ function ldexp(x1, x2) {
 */
 function frexp(x) {
 	x = fudgeArray(x);
-	const absx = abs(x.ref);
+	const absx = absolute(x.ref);
 	const exponent = where(equal(x.ref, 0), 0, floor(log2(absx)).add(1).astype(DType.Int32));
-	const mantissa = divide(x, exp2(exponent.ref.astype(x.dtype)));
+	const mantissa = x.div(exp2(exponent.ref.astype(x.dtype)));
 	return [mantissa, exponent];
 }
 /** Calculate `2**p` for all p in the input array. */
@@ -5612,10 +6116,11 @@ const degrees = rad2deg;
 * Computes first array raised to power of second array, element-wise.
 */
 const power = jit$1(function power$1(x1, x2) {
-	return exp(log(x1).mul(x2));
+	const x2i = trunc(x2.ref);
+	const shouldBeNaN = multiply(x2.ref.notEqual(x2i.ref), x1.ref.less(0));
+	const resultSign = where(mod(x2i, 2).notEqual(0), where(x1.ref.less(0), -1, 1), 1);
+	return where(shouldBeNaN, nan, exp(log(absolute(x1)).mul(x2)).mul(resultSign));
 });
-/** @function Alias of `jax.numpy.power()`. */
-const pow = power;
 /** @function Calculate the element-wise cube root of the input array. */
 const cbrt = jit$1(function cbrt$1(x) {
 	const sgn = where(less(x.ref, 0), -1, 1);
@@ -5681,12 +6186,6 @@ const arccosh = jit$1(function arccosh$1(x) {
 const arctanh = jit$1(function arctanh$1(x) {
 	return log(add(1, x.ref).div(subtract(1, x))).mul(.5);
 });
-/** @function Alias of `jax.numpy.arcsinh()`. */
-const asinh = arcsinh;
-/** @function Alias of `jax.numpy.arccosh()`. */
-const acosh = arccosh;
-/** @function Alias of `jax.numpy.arctanh()`. */
-const atanh = arctanh;
 /**
 * Compute the variance of an array.
 *
@@ -5716,6 +6215,26 @@ function var_(x, axis = null, opts) {
 function std(x, axis = null, opts) {
 	return sqrt(var_(x, axis, opts));
 }
+/** Estimate the sample covariance of a set of variables. */
+function cov(x, y) {
+	x = fudgeArray(x);
+	if (x.ndim === 1) x = x.reshape([1, x.shape[0]]);
+	if (y !== void 0) {
+		y = fudgeArray(y);
+		if (y.ndim === 1) y = y.reshape([1, y.shape[0]]);
+		x = vstack([x, y]);
+	}
+	const [_M, N] = x.shape;
+	x = x.ref.sub(x.mean(1, { keepdims: true }));
+	return dot$1(x.ref, x.transpose()).div(N - 1);
+}
+/** Compute the Pearson correlation coefficients (in range `[-1, 1]`). */
+function corrcoef(x, y) {
+	const c = cov(x, y);
+	const variances = diag(c.ref);
+	const norm = sqrt(outer(variances.ref, variances));
+	return c.div(norm);
+}
 /** Test element-wise for positive or negative infinity, return bool array. */
 function isinf(x) {
 	x = fudgeArray(x);
@@ -5745,6 +6264,253 @@ const isfinite = jit$1(function isfinite$1(x) {
 	return isnan(x.ref).add(isinf(x)).notEqual(true);
 });
+//#endregion
+//#region src/library/lax-linalg.ts
+var lax_linalg_exports = {};
+__export(lax_linalg_exports, {
+	cholesky: () => cholesky,
+	triangularSolve: () => triangularSolve
+});
+/**
+* Compute the Cholesky decomposition of a symmetric positive-definite matrix.
+*
+* The Cholesky decomposition of a matrix `A` is:
+*
+* - A = L @ L^T  (for upper=false, default)
+* - A = U^T @ U  (for upper=true)
+*
+* where `L` is a lower-triangular matrix and `U` is an upper-triangular matrix.
+* The input matrix must be symmetric and positive-definite.
+*
+* @example
+* ```ts
+* import { lax, numpy as np } from "@jax-js/jax";
+*
+* const x = np.array([[2., 1.], [1., 2.]]);
+*
+* // Lower Cholesky factorization (default):
+* const L = lax.linalg.cholesky(x);
+* // L ≈ [[1.4142135, 0], [0.70710677, 1.2247449]]
+*
+* // Upper Cholesky factorization:
+* const U = lax.linalg.cholesky(x, { upper: true });
+* // U ≈ [[1.4142135, 0.70710677], [0, 1.2247449]]
+* ```
+*/
+function cholesky(a, { upper = false } = {}) {
+	const L = cholesky$2(a);
+	return upper ? moveaxis$1(L, -2, -1) : L;
+}
+/**
+* Solve a triangular linear system.
+*
+* Solves `a @ x = b` (if leftSide=true) or `x @ a = b` (if leftSide=false)
+* where `a` is a triangular matrix.
+*
+* @example
+* ```ts
+* import { lax, numpy as np } from "@jax-js/jax";
+*
+* const L = np.array([[2., 0.], [1., 3.]]);
+* const b = np.array([4., 7.]).reshape([2, 1]);
+*
+* // Solve L @ x = b
+* const x = lax.linalg.triangularSolve(L, b, { leftSide: true, lower: true });
+* // x = [[2.], [5./3.]]
+* ```
+*/
+function triangularSolve(a, b, { leftSide = false, lower = false, transposeA = false, unitDiagonal = false } = {}) {
+	a = fudgeArray(a);
+	b = fudgeArray(b);
+	if (!leftSide) transposeA = !transposeA;
+	else b = moveaxis$1(b, -2, -1);
+	if (transposeA) a = moveaxis$1(a, -2, -1);
+	let x = triangularSolve$1(a, b, {
+		lower,
+		unitDiagonal
+	});
+	if (leftSide) x = moveaxis$1(x, -2, -1);
+	return x;
+}
+//#endregion
+//#region src/library/lax.ts
+var lax_exports = {};
+__export(lax_exports, {
+	conv: () => conv,
+	convGeneralDilated: () => convGeneralDilated,
+	convWithGeneralPadding: () => convWithGeneralPadding,
+	dot: () => dot,
+	erf: () => erf,
+	erfc: () => erfc,
+	linalg: () => lax_linalg_exports,
+	reduceWindow: () => reduceWindow,
+	stopGradient: () => stopGradient$1
+});
+/**
+* General dot product/contraction operator.
+*
+* Prefer higher-level functions like `jax.numpy.dot()`, `jax.numpy.matmul()`,
+* `jax.numpy.tensordot(), and `jax.numpy.einsum()` where possible.
+*/
+function dot(lhs, rhs, { lhsContractingDims: lc = [], rhsContractingDims: rc = [], lhsBatchDims: lb = [], rhsBatchDims: rb = [] } = {}) {
+	if (lc.length !== rc.length) throw new Error(`dot: contracting dims lengths mismatch, got ${JSON.stringify(lc)} and ${JSON.stringify(rc)}`);
+	else if (lb.length !== rb.length) throw new Error(`dot: batch dims lengths mismatch, got ${JSON.stringify(lb)} and ${JSON.stringify(rb)}`);
+	lc = lc.map((a) => checkAxis(a, lhs.ndim));
+	rc = rc.map((a) => checkAxis(a, rhs.ndim));
+	lb = lb.map((a) => checkAxis(a, lhs.ndim));
+	rb = rb.map((a) => checkAxis(a, rhs.ndim));
+	if (lc.some((a) => lb.includes(a))) throw new Error(`dot: lhs contracting dims ${JSON.stringify(lc)} overlap with batch dims ${JSON.stringify(lb)}`);
+	else if (rc.some((a) => rb.includes(a))) throw new Error(`dot: rhs contracting dims ${JSON.stringify(rc)} overlap with batch dims ${JSON.stringify(rb)}`);
+	const lf = range(lhs.ndim).filter((a) => !lc.includes(a) && !lb.includes(a));
+	const rf = range(rhs.ndim).filter((a) => !rc.includes(a) && !rb.includes(a));
+	const lhs2 = lhs.transpose([
+		...lb,
+		...lf,
+		...lc
+	]);
+	const rhs2 = rhs.transpose([
+		...rb,
+		...rf,
+		...rc
+	]);
+	if (lc.length === 0) return mul(lhs2.reshape([
+		...lb.map((a) => lhs.shape[a]),
+		...lf.map((a) => lhs.shape[a]),
+		...rep(rf.length, 1)
+	]), rhs2.reshape([
+		...rb.map((a) => rhs.shape[a]),
+		...rep(lf.length, 1),
+		...rf.map((a) => rhs.shape[a])
+	]));
+	const dotShapeX = lc.map((a) => lhs.shape[a]);
+	const dotShapeY = rc.map((a) => rhs.shape[a]);
+	if (!deepEqual(dotShapeX, dotShapeY)) throw new Error(`dot: shapes not aligned along contracting dims: ${JSON.stringify(dotShapeX)} != ${JSON.stringify(dotShapeY)}`);
+	return dot$2(lhs2.reshape([
+		...lb.map((a) => lhs.shape[a]),
+		...lf.map((a) => lhs.shape[a]),
+		...rep(rf.length, 1),
+		prod(dotShapeX)
+	]), rhs2.reshape([
+		...rb.map((a) => rhs.shape[a]),
+		...rep(lf.length, 1),
+		...rf.map((a) => rhs.shape[a]),
+		prod(dotShapeY)
+	]));
+}
+function padtypeToPads(inShape, filterShape, strides, dilation, padding) {
+	const padType = padding.toUpperCase();
+	switch (padType) {
+		case "VALID": return rep(inShape.length, [0, 0]);
+		case "SAME":
+		case "SAME_LOWER": {
+			const outShape = inShape.map((size$1, i) => Math.ceil(size$1 / strides[i]));
+			const padSizes = zipn(outShape, strides, filterShape, dilation, inShape).map(([o, s, k, d, i]) => Math.max(0, (o - 1) * s + 1 + (k - 1) * d - i));
+			if (padType === "SAME") return padSizes.map((size$1) => [size$1 >> 1, size$1 - (size$1 >> 1)]);
+			else return padSizes.map((size$1) => [size$1 - (size$1 >> 1), size$1 >> 1]);
+		}
+		default: throw new Error(`Unknown padding type: ${padType}`);
+	}
+}
+/**
+* General n-dimensional convolution operator, with optional dilation.
+*
+* The semantics of this operation mimic the `jax.lax.conv_general_dilated`
+* function in JAX, which wraps XLA's general convolution operator.
+*
+* Grouped convolutions are not supported right now.
+*/
+function convGeneralDilated(lhs, rhs, windowStrides, padding, { lhsDilation, rhsDilation, featureGroupCount = 1 } = {}) {
+	if (lhs.ndim < 2) throw new Error("lhs must have at least 2 dimensions");
+	if (rhs.ndim < 2) throw new Error("rhs must have at least 2 dimensions");
+	if (typeof padding === "string") {
+		if (lhsDilation?.some((d) => d !== 1)) throw new Error("String padding is not supported for transposed convolutions");
+		padding = padtypeToPads(lhs.shape.slice(2), rhs.shape.slice(2), windowStrides, rhsDilation ?? rep(rhs.ndim - 2, 1), padding);
+	}
+	if (featureGroupCount !== 1) {
+		const G = featureGroupCount;
+		const [N, C_in, ...xs] = lhs.shape;
+		const [C_out, C_in_per_group, ...ks] = rhs.shape;
+		if (C_in % G !== 0) throw new Error(`featureGroupCount=${G} must divide input channels=${C_in}`);
+		if (C_out % G !== 0) throw new Error(`featureGroupCount=${G} must divide output channels=${C_out}`);
+		if (C_in / G !== C_in_per_group) throw new Error(`rhs input channels=${C_in_per_group} must equal lhs input channels / groups=${C_in / G}`);
+		const lhsGrouped = moveaxis(lhs.reshape([
+			N,
+			G,
+			C_in / G,
+			...xs
+		]), 1, 0);
+		const rhsGrouped = rhs.reshape([
+			G,
+			C_out / G,
+			C_in_per_group,
+			...ks
+		]);
+		const result = conv$1(lhsGrouped, rhsGrouped, {
+			vmapDims: 1,
+			strides: windowStrides,
+			padding,
+			lhsDilation,
+			rhsDilation
+		});
+		const ys = result.shape.slice(3);
+		return moveaxis(result, 0, 1).reshape([
+			N,
+			C_out,
+			...ys
+		]);
+	}
+	return conv$1(lhs, rhs, {
+		strides: windowStrides,
+		padding,
+		lhsDilation,
+		rhsDilation
+	});
+}
+/** Convenience wrapper around `convGeneralDilated`. */
+function convWithGeneralPadding(lhs, rhs, windowStrides, padding, lhsDilation, rhsDilation) {
+	return convGeneralDilated(lhs, rhs, windowStrides, padding, {
+		lhsDilation,
+		rhsDilation
+	});
+}
+/** Convenience wrapper around `convGeneralDilated`. */
+function conv(lhs, rhs, windowStrides, padding) {
+	return convGeneralDilated(lhs, rhs, windowStrides, padding);
+}
+/** Reduce a computation over padded windows. */
+function reduceWindow(operand, computation, windowDimensions, windowStrides) {
+	if (operand.ndim < windowDimensions.length) throw new Error(`Operand dimensions ${operand.ndim} < window ${windowDimensions.length}`);
+	if (!windowStrides) windowStrides = rep(windowDimensions.length, 1);
+	for (let i = 0; i < operand.ndim; i++) computation = vmap$1(computation, 0);
+	return computation(bind1(Primitive.Pool, [operand], {
+		window: windowDimensions,
+		strides: windowStrides
+	}));
+}
+/** The error function: `erf(x) = 2/sqrt(pi) * int[0..x] exp(-t^2) dt`. */
+function erf(x) {
+	return erf$1(x);
+}
+/**
+* The complementary error function: `erfc(x) = 1 - erf(x)`.
+*
+* This function is more accurate than `1 - erf(x)` for large values of `x`,
+* where `erf(x)` is very close to 1.
+*/
+function erfc(x) {
+	return erfc$1(x);
+}
+/**
+* Stops gradient computation.
+*
+* Behaves as the identity function but prevents the flow of gradients during
+* forward or reverse-mode automatic differentiation.
+*/
+function stopGradient$1(x) {
+	return stopGradient(x);
+}
 //#endregion
 //#region src/library/nn.ts
 var nn_exports = {};
@@ -5753,6 +6519,10 @@ __export(nn_exports, {
 	elu: () => elu,
 	gelu: () => gelu,
 	glu: () => glu,
+	hardSigmoid: () => hardSigmoid,
+	hardSilu: () => hardSilu,
+	hardSwish: () => hardSilu,
+	hardTanh: () => hardTanh,
 	identity: () => identity,
 	leakyRelu: () => leakyRelu,
 	logSigmoid: () => logSigmoid,
@@ -5763,14 +6533,17 @@ __export(nn_exports, {
 	oneHot: () => oneHot,
 	relu: () => relu,
 	relu6: () => relu6,
+	selu: () => selu,
 	sigmoid: () => sigmoid,
 	silu: () => silu,
 	softSign: () => softSign,
 	softmax: () => softmax,
 	softplus: () => softplus,
+	sparsePlus: () => sparsePlus,
+	sparseSigmoid: () => sparseSigmoid,
 	squareplus: () => squareplus,
 	standardize: () => standardize,
-	swish: () => swish
+	swish: () => silu
 });
 /**
 * Rectified Linear Unit (ReLU) activation function:
@@ -5805,6 +6578,28 @@ function softplus(x) {
 	return log(exp(x).add(1));
 }
 /**
+* @function
+* Sparse plus function:
+*
+* - When `x <= -1`: `0`
+* - When `-1 < x < 1`: `(x+1)**2 / 4`
+* - When `x >= 1`: `x`
+*/
+const sparsePlus = jit$1((x) => {
+	return where(x.ref.lessEqual(-1), 0, where(x.ref.less(1), square(x.ref.add(1)).mul(.25), x));
+});
+/**
+* @function
+* Sparse sigmoid activation function.
+*
+* - When `x <= -1`: `0`
+* - When `-1 < x < 1`: `(x + 1) / 2`
+* - When `x >= 1`: `1`
+*/
+const sparseSigmoid = jit$1((x) => {
+	return clip(x.add(1).mul(.5), 0, 1);
+});
+/**
 * Soft-sign activation function, computed element-wise:
 * `softsign(x) = x / (|x| + 1)`.
 */
@@ -5826,17 +6621,6 @@ const silu = jit$1(function silu$1(x) {
 	return x.ref.mul(sigmoid(x));
 });
 /**
-* @function
-* Sigmoid-weighted Linear Unit (SiLU) activation function, also known as
-* Swish, computed element-wise:
-* `silu(x) = x * sigmoid(x) = x / (1 + exp(-x))`.
-*
-* `swish()` and `silu()` are both aliases for the same function.
-*
-* Reference: https://en.wikipedia.org/wiki/Swish_function
-*/
-const swish = silu;
-/**
 * Log-sigmoid activation function, computed element-wise:
 * `log_sigmoid(x) = log(sigmoid(x)) = -log(1 + exp(-x))`.
 */
@@ -5853,6 +6637,19 @@ function leakyRelu(x, negativeSlope = .01) {
 	x = fudgeArray(x);
 	return where(less(x.ref, 0), x.ref.mul(negativeSlope), x);
 }
+/** Hard sigmoid activation function: `relu6(x+3)/6`. */
+function hardSigmoid(x) {
+	return relu6(add(x, 3)).mul(1 / 6);
+}
+/** Hard SiLU (swish) activation function: `x * hardSigmoid(x)`. */
+function hardSilu(x) {
+	x = fudgeArray(x);
+	return x.ref.mul(hardSigmoid(x));
+}
+/** Hard tanh activation function: `clip(x, -1, 1)`. */
+function hardTanh(x) {
+	return clip(x, -1, 1);
+}
 /**
 * Exponential linear unit activation function.
 *
@@ -5875,6 +6672,20 @@ function celu(x, alpha = 1) {
 }
 /**
 * @function
+* Scaled exponential linear unit activation.
+*
+* Computes the element-wise function:
+* `selu(x) = lambda * (x > 0 ? x : alpha * (exp(x) - 1))`
+*
+* Where `alpha = 1.6732632423543772` and `lambda = 1.0507009873554805`.
+*/
+const selu = jit$1(function selu$1(x) {
+	const alpha = 1.6732632423543772;
+	const lambda = 1.0507009873554805;
+	return where(x.ref.less(0), expm1(x.ref).mul(alpha), x).mul(lambda);
+});
+/**
+* @function
 * Gaussion error linear unit (GELU) activation function.
 *
 * This is computed element-wise. There are two variants depending on whether
@@ -5968,22 +6779,22 @@ function logSoftmax(x, axis = -1) {
 *
 * Reference: https://en.wikipedia.org/wiki/LogSumExp
 */
-function logsumexp(x, axis = null) {
+function logsumexp(x, axis = null, opts) {
 	x = fudgeArray(x);
 	axis = normalizeAxis(axis, x.ndim);
 	if (axis.length === 0) return x;
-	const xMax = stopGradient(max(x.ref, axis));
-	const xMaxDims = broadcast(xMax.ref, x.shape, axis);
-	const shifted = x.sub(xMaxDims);
-	return xMax.add(log(exp(shifted).sum(axis)));
+	const xMax = stopGradient(max(x.ref, axis, { keepdims: true }));
+	const shifted = x.sub(xMax.ref);
+	const result = xMax.add(log(exp(shifted).sum(axis, { keepdims: true })));
+	return opts?.keepdims ? result : squeeze(result, axis);
 }
 /** Log-mean-exp reduction, like `jax.nn.logsumexp()` but subtracts `log(n)`. */
-function logmeanexp(x, axis = null) {
+function logmeanexp(x, axis = null, opts) {
 	x = fudgeArray(x);
 	axis = normalizeAxis(axis, x.ndim);
 	if (axis.length === 0) return x;
 	const n = axis.reduce((acc, a) => acc * x.shape[a], 1);
-	return logsumexp(x, axis).sub(Math.log(n));
+	return logsumexp(x, axis, opts).sub(Math.log(n));
 }
 /**
 * Standardizes input to zero mean and unit variance.
@@ -6028,8 +6839,11 @@ var random_exports = {};
 __export(random_exports, {
 	bernoulli: () => bernoulli,
 	bits: () => bits,
+	cauchy: () => cauchy,
 	exponential: () => exponential,
+	gumbel: () => gumbel,
 	key: () => key,
+	laplace: () => laplace,
 	normal: () => normal,
 	split: () => split,
 	uniform: () => uniform
@@ -6088,6 +6902,16 @@ function bernoulli(key$1, p = .5, shape$1 = []) {
 }
 /**
 * @function
+* Sample from a Cauchy distribution with location 0 and scale 1.
+*
+* Uses inverse transform sampling: `x = tan(π * (u - 0.5))` where u ~ Uniform(0, 1).
+*/
+const cauchy = jit$1(function cauchy$1(key$1, shape$1 = []) {
+	const u = uniform(key$1, shape$1);
+	return tan(u.sub(.5).mul(Math.PI));
+}, { staticArgnums: [1] });
+/**
+* @function
 * Sample exponential random values according to `p(x) = exp(-x)`.
 */
 const exponential = jit$1(function exponential$1(key$1, shape$1 = []) {
@@ -6096,6 +6920,30 @@ const exponential = jit$1(function exponential$1(key$1, shape$1 = []) {
 }, { staticArgnums: [1] });
 /**
 * @function
+* Sample from a Gumbel distribution with location 0 and scale 1.
+*
+* Uses inverse transform sampling: `x = -log(-log(u))` where u ~ Uniform(0, 1).
+*/
+const gumbel = jit$1(function gumbel$1(key$1, shape$1 = []) {
+	const u = uniform(key$1, shape$1);
+	return negative(log(negative(log1p(negative(u)))));
+}, { staticArgnums: [1] });
+/**
+* @function
+* Sample from a Laplace distribution with location 0 and scale 1.
+*
+* Uses inverse transform sampling: the CDF is `F(x) = 0.5 + 0.5 * sign(x) * (1 - exp(-|x|))`.
+* Inverting: `x = -sign(u - 0.5) * log(1 - 2 * |u - 0.5|)`.
+*/
+const laplace = jit$1(function laplace$1(key$1, shape$1 = []) {
+	const u = uniform(key$1, shape$1);
+	const centered = u.sub(.5);
+	const s = sign(centered.ref);
+	const absVal = absolute(centered);
+	return s.mul(log1p(absVal.mul(-2)).mul(-1));
+}, { staticArgnums: [1] });
+/**
+* @function
 * Sample random values according to `p(x) = 1/sqrt(2pi) * exp(-x^2/2)`.
 *
 * Unlike JAX, this uses the Box-Muller transform. JAX uses the erf_inv primitive instead and
@@ -6204,11 +7052,6 @@ const valueAndGrad = valueAndGrad$1;
 */
 const jacrev = jacrev$1;
 /**
-* @function
-* Compute the Jacobian with reverse-mode AD. Alias for `jacrev()`.
-*/
-const jacobian = jacrev;
-/**
 * Wait until all `Array` leaves are ready by calling `Array.blockUntilReady()`.
 *
 * This can be used to wait for the results of an intermediate computation to
@@ -6243,5 +7086,4 @@ async function devicePut(x, device) {
 }
 //#endregion
-export { Array$1 as Array, DType, Jaxpr, blockUntilReady, defaultDevice, devicePut, devices, grad, init, jacfwd, jacobian, jacrev, jit, jvp, lax_exports as lax, linearize, makeJaxpr, nn_exports as nn, numpy_exports as numpy, random_exports as random, scipy_special_exports as scipySpecial, setDebug, tree_exports as tree, valueAndGrad, vjp, vmap };
-//# sourceMappingURL=index.js.map
+export { Array$1 as Array, ClosedJaxpr, DType, Jaxpr, blockUntilReady, defaultDevice, devicePut, devices, grad, init, jacfwd, jacrev as jacobian, jacrev, jit, jvp, lax_exports as lax, linearize, makeJaxpr, nn_exports as nn, numpy_exports as numpy, random_exports as random, scipy_special_exports as scipySpecial, setDebug, tree_exports as tree, valueAndGrad, vjp, vmap };