npm - @jax-js/jax - Versions diffs - 0.1.4 → 0.1.5 - Mend

@jax-js/jax 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +10 -7
package/dist/{backend-tngXtWe4.js → backend-DaqL-MNz.js} +96 -7
package/dist/{backend-Bu9GY6sK.cjs → backend-DziQSaoQ.cjs} +101 -6
package/dist/index.cjs +737 -141
package/dist/index.d.cts +238 -9
package/dist/index.d.ts +238 -9
package/dist/index.js +737 -141
package/dist/webgl-ClIYb8jP.cjs +522 -0
package/dist/webgl-RSuZKvgc.js +522 -0
package/dist/{webgpu-Oj3Kd-kd.cjs → webgpu-Db2JrNBr.cjs} +296 -3
package/dist/{webgpu-ChVgx3b6.js → webgpu-Dh7k9io0.js} +296 -3
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { __export } from "./chunk-Cl8Af3a2.js";
-import { AluExp, AluGroup, AluOp, AluVar, DEBUG, DType, FpHash, Kernel, PPrint, Reduction, Routine, Routines, ShapeTracker, accessorAluExp, accessorGlobal, assertNonNull, byteWidth, checkAxis, deepEqual, defaultDevice, devices, dtypedArray, dtypedJsArray, generalBroadcast, getBackend, init, invertPermutation, isFloatDtype, isNumberPair, isPermutation, normalizeAxis, partitionList, prod, promoteTypes, range, recursiveFlatten, rep, runWithCache, setDebug, toposort, unravelAlu, unzip2, zip, zipn } from "./backend-tngXtWe4.js";
+import { AluExp, AluGroup, AluOp, AluVar, DEBUG, DType, FpHash, Kernel, PPrint, Reduction, Routine, Routines, ShapeTracker, accessorAluExp, accessorGlobal, assertNonNull, byteWidth, checkAxis, deepEqual, defaultDevice, devices, dtypedArray, dtypedJsArray, generalBroadcast, getBackend, init, invertPermutation, isFloatDtype, isNumberPair, isPermutation, normalizeAxis, partitionList, prod, promoteTypes, range, recursiveFlatten, rep, runWithCache, setDebug, toposort, unravelAlu, unzip2, zip, zipn } from "./backend-DaqL-MNz.js";
 //#region src/frontend/convolution.ts
 /**
@@ -356,6 +356,8 @@ let Primitive = /* @__PURE__ */ function(Primitive$1) {
 	Primitive$1["PoolTranspose"] = "pool_transpose";
 	Primitive$1["Compare"] = "compare";
 	Primitive$1["Where"] = "where";
+	Primitive$1["Concatenate"] = "concatenate";
+	Primitive$1["Split"] = "split";
 	Primitive$1["RandomBits"] = "random_bits";
 	Primitive$1["Gather"] = "gather";
 	Primitive$1["Transpose"] = "transpose";
@@ -368,6 +370,7 @@ let Primitive = /* @__PURE__ */ function(Primitive$1) {
 	Primitive$1["Argsort"] = "argsort";
 	Primitive$1["TriangularSolve"] = "triangular_solve";
 	Primitive$1["Cholesky"] = "cholesky";
+	Primitive$1["LU"] = "lu";
 	Primitive$1["Jit"] = "jit";
 	return Primitive$1;
 }({});
@@ -499,7 +502,25 @@ function where$1(cond, x, y) {
 		y
 	]);
 }
+function concatenate$1(xs, axis) {
+	if (xs.length === 0) throw new Error("concatenate requires at least one input");
+	const avals = xs.map((x) => ShapedArray.fromAval(getAval(x)));
+	axis = checkAxis(axis, avals[0].ndim);
+	for (const x of avals) if (x.ndim !== avals[0].ndim || !x.shape.every((s, i) => i === axis || s === avals[0].shape[i])) throw new Error(`Concatenate: inputs ${avals[0]} and ${x} must match shapes except on axis ${axis}`);
+	return bind1(Primitive.Concatenate, xs, { axis });
+}
+function split$2(x, axis, sizes) {
+	axis = checkAxis(axis, ndim$1(x));
+	if (sizes.some((s) => s < 0 || !Number.isInteger(s))) throw new Error(`split: sizes must be nonnegative integers, got ${JSON.stringify(sizes)}`);
+	const totalSize = sizes.reduce((a, b) => a + b, 0);
+	if (totalSize !== getShape(x)[axis]) throw new Error(`split: sizes must sum to the size of the axis ${axis}, got ${totalSize}`);
+	return bind(Primitive.Split, [x], {
+		axis,
+		sizes
+	});
+}
 function randomBits(k0, k1, shape$1, mode = "xor") {
+	if (!deepEqual(k0.shape, k1.shape) || k0.dtype !== DType.Uint32 || k1.dtype !== DType.Uint32) throw new Error(`randomBits: key parts must be uint32 with the same shape, got ${ShapedArray.fromAval(k0.aval)} and ${ShapedArray.fromAval(k1.aval)}`);
 	return bind1(Primitive.RandomBits, [k0, k1], {
 		shape: shape$1,
 		mode
@@ -566,6 +587,11 @@ function pad$1(x, width) {
 	return bind1(Primitive.Pad, [x], { width });
 }
 function triangularSolve$1(a, b, { lower = false, unitDiagonal = false } = {}) {
+	const as = getShape(a);
+	const bs = getShape(b);
+	if (as.length < 2 || bs.length < 2) throw new Error(`triangular_solve: must be >=2D, got a=${as}, b=${bs}`);
+	const n = as[as.length - 2];
+	if (n !== as[as.length - 1] || n !== bs[bs.length - 1]) throw new Error(`triangular_solve: incompatible shapes a=${as}, b=${bs}`);
 	if (lower) {
 		a = flip$1(a, [-2, -1]);
 		b = flip$1(b, [-1]);
@@ -575,8 +601,15 @@ function triangularSolve$1(a, b, { lower = false, unitDiagonal = false } = {}) {
 	return x;
 }
 function cholesky$2(x) {
+	const aval = ShapedArray.fromAval(getAval(x));
+	if (aval.ndim < 2 || aval.shape[aval.ndim - 1] !== aval.shape[aval.ndim - 2]) throw new Error(`cholesky: expected batch of square matrices, got ${aval}`);
 	return bind1(Primitive.Cholesky, [x]);
 }
+function lu$1(x) {
+	const aval = ShapedArray.fromAval(getAval(x));
+	if (aval.ndim < 2) throw new Error(`lu: expected batch of matrices, got ${aval}`);
+	return bind(Primitive.LU, [x]);
+}
 function sort$1(x) {
 	const nd = ndim$1(x);
 	if (nd === 0) throw new Error("sort: requires at least 1D input");
@@ -685,6 +718,9 @@ var Tracer = class Tracer {
 	mul(other) {
 		return mul(this, other);
 	}
+	mod(other) {
+		return mod(this, other);
+	}
 	greater(other) {
 		return greater$1(this, other);
 	}
@@ -797,8 +833,14 @@ var Tracer = class Tracer {
 	*/
 	*[Symbol.iterator]() {
 		if (this.ndim === 0) throw new Error("Cannot iterate over a scalar array");
-		for (let i = 0; i < this.shape[0]; i++) yield this.ref.slice(i);
-		this.dispose();
+		let residual = this;
+		const subarrayShape = this.shape.slice(1);
+		for (let i = 0; i < this.shape[0]; i++) {
+			const lr = split$2(residual, 0, [1, residual.shape[0] - 1]);
+			yield lr[0].reshape(subarrayShape);
+			residual = lr[1];
+		}
+		residual.dispose();
 	}
 	/**
 	* Return a sorted copy of an array in ascending order.
@@ -948,6 +990,9 @@ var ShapedArray = class ShapedArray {
 	get size() {
 		return prod(this.shape);
 	}
+	scalar() {
+		return new ShapedArray([], this.dtype, this.weakType);
+	}
 	toString() {
 		return `${this.dtype}[${this.shape.join(",")}]`;
 	}
@@ -1553,7 +1598,7 @@ const abstractEvalRules = {
 		return [new ShapedArray(shape$1, dtype, weakType)];
 	},
 	[Primitive.Conv]([lhs, rhs], params) {
-		const { dtype, weakType } = promoteAvals(new ShapedArray([], lhs.dtype, lhs.weakType), new ShapedArray([], rhs.dtype, rhs.weakType));
+		const { dtype, weakType } = promoteAvals(lhs.scalar(), rhs.scalar());
 		const shape$1 = checkConvShape(lhs.shape, rhs.shape, params);
 		return [new ShapedArray(shape$1, dtype, weakType)];
 	},
@@ -1564,10 +1609,25 @@ const abstractEvalRules = {
 		const shape$1 = generalBroadcast(cond.shape, xy.shape);
 		return [new ShapedArray(shape$1, xy.dtype, xy.weakType)];
 	},
+	[Primitive.Concatenate](xs, { axis }) {
+		if (xs.length === 0) throw new TypeError("Concatenate requires at least one input");
+		for (const x of xs) if (x.ndim !== xs[0].ndim || !x.shape.every((s, i) => i === axis || s === xs[0].shape[i])) throw new TypeError(`Concatenate: inputs ${xs[0]} and ${x} must match shapes except on axis ${axis}`);
+		const shape$1 = xs[0].shape.slice();
+		shape$1[axis] = xs.reduce((sum$1, x) => sum$1 + x.shape[axis], 0);
+		const { dtype, weakType } = xs.map((x) => x.scalar()).reduce(promoteAvals);
+		return [new ShapedArray(shape$1, dtype, weakType)];
+	},
+	[Primitive.Split]([x], { axis, sizes }) {
+		const totalSize = sizes.reduce((a, b) => a + b, 0);
+		if (x.shape[axis] !== totalSize) throw new TypeError(`Split: sizes ${sizes} do not sum to dimension ${x.shape[axis]} on axis ${axis}`);
+		return sizes.map((size$1) => {
+			return new ShapedArray(x.shape.toSpliced(axis, 1, size$1), x.dtype, x.weakType);
+		});
+	},
 	[Primitive.RandomBits]([k0, k1], { shape: shape$1 }) {
 		if (k0.dtype !== DType.Uint32 || k1.dtype !== DType.Uint32) throw new TypeError(`RandomBits requires uint32 keys, got ${k0.dtype} and ${k1.dtype}`);
-		const keyShape = generalBroadcast(k0.shape, k1.shape);
-		if (!deepEqual(generalBroadcast(keyShape, shape$1), shape$1)) throw new TypeError(`Keys of shapes ${k0.shape} and ${k1.shape} cannot be broadcast to shape ${shape$1}`);
+		if (!deepEqual(k0.shape, k1.shape)) throw new TypeError(`RandomBits: Keys have different shapes ${k0.shape} and ${k1.shape}`);
+		if (!deepEqual(shape$1.slice(0, k0.ndim), k0.shape)) throw new TypeError(`RandomBits: generated shape ${shape$1} must match key shape ${k0.shape}`);
 		return [new ShapedArray(shape$1, DType.Uint32, false)];
 	},
 	[Primitive.Gather]([x, ...indices], { axis, outDim }) {
@@ -1624,6 +1684,16 @@ const abstractEvalRules = {
 		if (a.shape[a.ndim - 2] !== a.shape[a.ndim - 1]) throw new TypeError(`cholesky: must be square, got ${a}`);
 		return [ShapedArray.fromAval(a)];
 	},
+	[Primitive.LU]([a]) {
+		if (a.ndim < 2) throw new TypeError(`lu: requires at least 2D input, got ${a}`);
+		const batch = a.shape.slice(0, -2);
+		const [m, n] = a.shape.slice(-2);
+		return [
+			ShapedArray.fromAval(a),
+			new ShapedArray([...batch, Math.min(m, n)], DType.Int32, false),
+			new ShapedArray([...batch, m], DType.Int32, false)
+		];
+	},
 	[Primitive.Jit](args, { jaxpr }) {
 		const { inTypes, outTypes } = typecheckJaxpr(jaxpr);
 		if (args.length !== inTypes.length) throw new TypeError(`jit expected ${inTypes.length} arguments, got ${args.length}`);
@@ -1705,7 +1775,8 @@ const routinePrimitives = new Map([
 	[Primitive.Sort, Routines.Sort],
 	[Primitive.Argsort, Routines.Argsort],
 	[Primitive.TriangularSolve, Routines.TriangularSolve],
-	[Primitive.Cholesky, Routines.Cholesky]
+	[Primitive.Cholesky, Routines.Cholesky],
+	[Primitive.LU, Routines.LU]
 ]);
 /** Result of compiling a Jaxpr. Can be evaluated on a series of inputs. */
 var JitProgram = class {
@@ -1876,10 +1947,10 @@ function jitCompile(backend, jaxpr) {
 				inputs.push(jv.arg);
 			} else if (input instanceof Lit) inputs.push(builder.pushLit(input));
 			const outputs = [];
-			for (const outVar$1 of eqn.outBinders) {
-				const outId = builder.pushBuffer(outVar$1.aval.size * byteWidth(outVar$1.aval.dtype));
+			for (const outVar of eqn.outBinders) {
+				const outId = builder.pushBuffer(outVar.aval.size * byteWidth(outVar.aval.dtype));
 				outputs.push(outId);
-				ctx.set(outVar$1, {
+				ctx.set(outVar, {
 					type: "imm",
 					arg: outId
 				});
@@ -1930,35 +2001,37 @@ function jitCompile(backend, jaxpr) {
 		let reduction;
 		if (inputReduction) {
 			const jv = inputReduction;
-			const newEpilogue = rule(inputExps, inputAvals, eqn.params).exp;
-			exp$2 = jv.exp.reindexGids(addArgs(jv.args));
+			const newEpilogue = rule(inputExps, inputAvals, eqn.params).exp[0];
+			exp$2 = [jv.exp.reindexGids(addArgs(jv.args))];
 			reduction = new Reduction(jv.reduction.dtype, jv.reduction.op, jv.reduction.size, newEpilogue);
 		} else {
 			const ruleOutput = rule(inputExps, inputAvals, eqn.params);
 			exp$2 = ruleOutput.exp;
 			reduction = ruleOutput.reduction;
 		}
-		const outVar = eqn.outBinders[0];
-		if (blackNodes.has(outVar)) {
-			const nargs$1 = inputArgs.length;
-			const size$1 = outVar.aval.size;
-			const kernel = new Kernel(nargs$1, size$1, exp$2, reduction);
-			const outId = builder.pushKernel(kernel, inputArgs);
-			ctx.set(outVar, {
-				type: "imm",
-				arg: outId
+		for (let i$1 = 0; i$1 < eqn.outBinders.length; i$1++) {
+			const outVar = eqn.outBinders[i$1];
+			if (blackNodes.has(outVar)) {
+				const nargs$1 = inputArgs.length;
+				const size$1 = outVar.aval.size;
+				const kernel = new Kernel(nargs$1, size$1, exp$2[i$1], reduction);
+				const outId = builder.pushKernel(kernel, inputArgs);
+				ctx.set(outVar, {
+					type: "imm",
+					arg: outId
+				});
+			} else if (reduction) ctx.set(outVar, {
+				type: "red",
+				exp: exp$2[i$1],
+				reduction,
+				args: inputArgs
 			});
-		} else if (reduction) ctx.set(outVar, {
-			type: "red",
-			exp: exp$2,
-			reduction,
-			args: inputArgs
-		});
-		else ctx.set(outVar, {
-			type: "exp",
-			exp: exp$2,
-			args: inputArgs
-		});
+			else ctx.set(outVar, {
+				type: "exp",
+				exp: exp$2[i$1],
+				args: inputArgs
+			});
+		}
 	}
 	const outputIds = [];
 	for (const out of jaxpr.outs) if (out instanceof Var) {
@@ -1999,17 +2072,17 @@ function broadcastedJit(fn, opts) {
 			if (exp$2.dtype !== newDtype && !skipCastIdx.includes(i)) exp$2 = AluExp.cast(newDtype, exp$2);
 			return exp$2;
 		});
-		return { exp: fn(exps, params) };
+		return { exp: [fn(exps, params)] };
 	};
 }
 function unopJit(fn) {
 	return ([a], [_as], params) => {
-		return { exp: fn(a, params) };
+		return { exp: [fn(a, params)] };
 	};
 }
 function reshapeJit(fn) {
 	return ([a], [_as], params) => {
-		return { exp: reshapeViews(a, (st) => fn(st, params)) };
+		return { exp: [reshapeViews(a, (st) => fn(st, params))] };
 	};
 }
 function routineNoJit() {
@@ -2055,7 +2128,7 @@ const jitRules = {
 		a = reshapeViews(a, (st) => st.permute(perm).reshape(newShape), true);
 		const reduction = new Reduction(a.dtype, op, reductionSize);
 		return {
-			exp: a,
+			exp: [a],
 			reduction
 		};
 	},
@@ -2066,13 +2139,13 @@ const jitRules = {
 		a = reshapeViews(a, (st) => st.compose(stX), true);
 		const reduction = new Reduction(a.dtype, AluOp.Add, stX.shape[stX.shape.length - 1]);
 		return {
-			exp: a,
+			exp: [a],
 			reduction
 		};
 	},
 	[Primitive.Dot]([a, b], [as, bs]) {
 		const k1 = jitRules[Primitive.Mul]([a, b], [as, bs], {});
-		const c = k1.exp;
+		const [c] = k1.exp;
 		const cs = promoteAvals(as, bs);
 		return jitRules[Primitive.Reduce]([c], [cs], {
 			op: AluOp.Add,
@@ -2089,16 +2162,41 @@ const jitRules = {
 	},
 	[Primitive.Compare]: broadcastedJit(([a, b], { op }) => aluCompare(a, b, op)),
 	[Primitive.Where]: broadcastedJit(([cond, a, b]) => AluExp.where(cond, a, b), { skipCastIdx: [0] }),
+	[Primitive.Concatenate](exps, avals, { axis }) {
+		const ndim$2 = avals[0].ndim;
+		const sizes = avals.map((x) => x.shape[axis]);
+		const finalSize = sizes.reduce((a, b) => a + b, 0);
+		const makePadAxis = (start, end) => range(ndim$2).map((i) => i === axis ? [start, end] : [0, 0]);
+		let cum = 0;
+		const src = [];
+		for (let i = 0; i < exps.length; i++) {
+			const padding = makePadAxis(cum, finalSize - cum - sizes[i]);
+			src.push(reshapeViews(exps[i], (st) => st.pad(padding)));
+			cum += sizes[i];
+		}
+		return { exp: [src.reduce(AluExp.add)] };
+	},
+	[Primitive.Split]([a], [as], { axis, sizes }) {
+		const exp$2 = [];
+		let start = 0;
+		for (const size$1 of sizes) {
+			const slice = range(as.ndim).map((d) => d === axis ? [start, start + size$1] : [0, as.shape[d]]);
+			exp$2.push(reshapeViews(a, (st) => st.shrink(slice)));
+			start += size$1;
+		}
+		return { exp: exp$2 };
+	},
 	[Primitive.RandomBits]: (keys, keyShapes, { shape: shape$1, mode }) => {
+		const keyShape = keyShapes[0].shape;
 		const mapping = (st) => {
-			if (!deepEqual(st.shape, shape$1)) return st.broadcast(shape$1, range(shape$1.length - st.shape.length));
+			if (!deepEqual(st.shape, shape$1)) return st.broadcast(shape$1, range(st.shape.length, shape$1.length));
 		};
 		const k0 = reshapeViews(keys[0], mapping);
 		const k1 = reshapeViews(keys[1], mapping);
 		const c0 = AluExp.u32(0);
-		const c1 = AluExp.cast(DType.Uint32, AluVar.gidx);
+		const c1 = AluExp.mod(AluExp.cast(DType.Uint32, AluVar.gidx), AluExp.u32(Math.max(prod(shape$1.slice(keyShape.length)), 1)));
 		const exp$2 = AluExp.threefry2x32(k0, k1, c0, c1, mode);
-		return { exp: exp$2 };
+		return { exp: [exp$2] };
 	},
 	[Primitive.Gather]([x, ...indices], [xs, ...indicesShapes], { axis, outDim }) {
 		const axisSet = new Set(axis);
@@ -2113,7 +2211,7 @@ const jitRules = {
 		for (const [i, iexp] of indices.entries()) src[axis[i]] = AluExp.cast(DType.Int32, reshapeViews(iexp, (st) => st.broadcast(finalShape, [...range(outDim + indexShape.length - st.shape.length), ...range(outDim + indexShape.length, finalShape.length)])));
 		const [index, valid] = ShapeTracker.fromShape(xs.shape).toAluExp(src);
 		if (!valid.resolve()) throw new Error("internal: expected full validity mask in Gather");
-		return { exp: x.substitute({ gidx: index }) };
+		return { exp: [x.substitute({ gidx: index })] };
 	},
 	[Primitive.Transpose]: reshapeJit((st, { perm }) => st.permute(perm)),
 	[Primitive.Broadcast]: reshapeJit((st, { shape: shape$1, axis }) => st.broadcast(shape$1, axis)),
@@ -2129,6 +2227,7 @@ const jitRules = {
 	[Primitive.Argsort]: routineNoJit(),
 	[Primitive.TriangularSolve]: routineNoJit(),
 	[Primitive.Cholesky]: routineNoJit(),
+	[Primitive.LU]: routineNoJit(),
 	[Primitive.Jit]() {
 		throw new Error("internal: Jit should have been flattened before JIT compilation");
 	}
@@ -2407,6 +2506,10 @@ var Array$1 = class Array$1 extends Tracer {
 		this.#rc++;
 		return this;
 	}
+	/** Get the current reference count (for debugging memory management). */
+	get refCount() {
+		return this.#rc;
+	}
 	dispose() {
 		this.#check();
 		if (--this.#rc === 0) {
@@ -2564,7 +2667,7 @@ var Array$1 = class Array$1 extends Tracer {
 		} else if (castDtype === void 0) {
 			castDtype = arrays[i].#dtype;
 			castWeakType = arrays[i].#weakType;
-		} else ({dtype: castDtype, weakType: castWeakType} = promoteAvals(new ShapedArray([], castDtype, castWeakType), new ShapedArray([], arrays[i].#dtype, arrays[i].#weakType)));
+		} else ({dtype: castDtype, weakType: castWeakType} = promoteAvals(new ShapedArray([], castDtype, castWeakType), arrays[i].aval.scalar()));
 		const weakType = castWeakType && !strongTypeOutput;
 		const { backend, committed } = Array$1.#computeBackend(name, arrays);
 		arrays = arrays.map((ar) => ar._putSync(backend));
@@ -2957,17 +3060,44 @@ var Array$1 = class Array$1 extends Tracer {
 					y
 				], { dtypeOverride: [DType.Bool] })];
 			},
+			[Primitive.Concatenate](xs, { axis }) {
+				const ndim$2 = xs[0].ndim;
+				const sizes = xs.map((x) => x.shape[axis]);
+				const finalSize = sizes.reduce((a, b) => a + b, 0);
+				const makePadAxis = (start, end) => range(ndim$2).map((i) => i === axis ? [start, end] : [0, 0]);
+				let cum = 0;
+				const xsPadded = [];
+				for (let i = 0; i < xs.length; i++) {
+					const padding = makePadAxis(cum, finalSize - cum - sizes[i]);
+					xsPadded.push(xs[i].#reshape(xs[i].#st.pad(padding)));
+					cum += sizes[i];
+				}
+				const custom = (exps) => exps.reduce(AluExp.add);
+				return [Array$1.#naryCustom("concatenate", custom, xsPadded)];
+			},
+			[Primitive.Split]([x], { axis, sizes }) {
+				const outputs = [];
+				for (let i = 0, start = 0; i < sizes.length; i++) {
+					const slice = range(x.ndim).map((d) => d === axis ? [start, start + sizes[i]] : [0, x.shape[d]]);
+					outputs.push(x.ref.#reshape(x.#st.shrink(slice)));
+					start += sizes[i];
+				}
+				x.dispose();
+				return outputs;
+			},
 			[Primitive.RandomBits]([k0, k1], { shape: shape$1, mode }) {
-				const keyShape = generalBroadcast(k0.shape, k1.shape);
-				if (!deepEqual(generalBroadcast(keyShape, shape$1), shape$1)) throw new TypeError(`Keys of shapes ${k0.shape} and ${k1.shape} cannot be broadcast to shape ${shape$1}`);
-				const c0 = zeros(shape$1, {
+				const keyShape = k0.shape;
+				const genShape = shape$1.slice(keyShape.length);
+				const c0 = zeros(genShape, {
 					dtype: DType.Uint32,
 					device: k0.device
 				});
-				const c1 = arange(0, prod(shape$1), 1, {
+				const c1 = arange(0, prod(genShape), 1, {
 					dtype: DType.Uint32,
 					device: k0.device
-				}).reshape(shape$1);
+				}).reshape(genShape);
+				k0 = k0.#reshape(k0.#st.reshape(keyShape.concat(rep(genShape.length, 1))));
+				k1 = k1.#reshape(k1.#st.reshape(keyShape.concat(rep(genShape.length, 1))));
 				const custom = ([k0$1, k1$1, c0$1, c1$1]) => AluExp.threefry2x32(k0$1, k1$1, c0$1, c1$1, mode);
 				return [Array$1.#naryCustom("random_bits", custom, [
 					k0,
@@ -3001,40 +3131,63 @@ var Array$1 = class Array$1 extends Tracer {
 			},
 			[Primitive.Sort]([x]) {
 				const routine = new Routine(Routines.Sort, {
-					inputShapes: [x.aval.shape],
-					inputDtypes: [x.aval.dtype],
-					outputShapes: [x.aval.shape],
-					outputDtypes: [x.aval.dtype]
+					inputShapes: [x.shape],
+					inputDtypes: [x.dtype],
+					outputShapes: [x.shape],
+					outputDtypes: [x.dtype]
 				});
 				return Array$1.#routine(routine, [x], [x.#weakType]);
 			},
 			[Primitive.Argsort]([x]) {
 				const routine = new Routine(Routines.Argsort, {
-					inputShapes: [x.aval.shape],
-					inputDtypes: [x.aval.dtype],
-					outputShapes: [x.aval.shape, x.aval.shape],
-					outputDtypes: [x.aval.dtype, DType.Int32]
+					inputShapes: [x.shape],
+					inputDtypes: [x.dtype],
+					outputShapes: [x.shape, x.shape],
+					outputDtypes: [x.dtype, DType.Int32]
 				});
 				return Array$1.#routine(routine, [x], [x.#weakType, false]);
 			},
 			[Primitive.TriangularSolve]([a, b], { unitDiagonal }) {
 				const routine = new Routine(Routines.TriangularSolve, {
-					inputShapes: [a.aval.shape, b.aval.shape],
-					inputDtypes: [a.aval.dtype, b.aval.dtype],
-					outputShapes: [b.aval.shape],
-					outputDtypes: [b.aval.dtype]
+					inputShapes: [a.shape, b.shape],
+					inputDtypes: [a.dtype, b.dtype],
+					outputShapes: [b.shape],
+					outputDtypes: [b.dtype]
 				}, { unitDiagonal });
 				return Array$1.#routine(routine, [a, b], [a.#weakType && b.#weakType]);
 			},
 			[Primitive.Cholesky]([a]) {
 				const routine = new Routine(Routines.Cholesky, {
-					inputShapes: [a.aval.shape],
-					inputDtypes: [a.aval.dtype],
-					outputShapes: [a.aval.shape],
-					outputDtypes: [a.aval.dtype]
+					inputShapes: [a.shape],
+					inputDtypes: [a.dtype],
+					outputShapes: [a.shape],
+					outputDtypes: [a.dtype]
 				});
 				return Array$1.#routine(routine, [a], [a.#weakType]);
 			},
+			[Primitive.LU]([a]) {
+				const batch = a.shape.slice(0, -2);
+				const [m, n] = a.shape.slice(-2);
+				const routine = new Routine(Routines.LU, {
+					inputShapes: [a.shape],
+					inputDtypes: [a.dtype],
+					outputShapes: [
+						a.shape,
+						[...batch, Math.min(m, n)],
+						[...batch, m]
+					],
+					outputDtypes: [
+						a.dtype,
+						DType.Int32,
+						DType.Int32
+					]
+				});
+				return Array$1.#routine(routine, [a], [
+					a.#weakType,
+					false,
+					false
+				]);
+			},
 			[Primitive.Jit](args, { jaxpr }) {
 				if (jaxpr.inBinders.length !== args.length) throw new Error(`jit expects ${jaxpr.inBinders.length} args, got ${args.length}`);
 				const { backend, committed } = Array$1.#computeBackend("jit", args);
@@ -3140,7 +3293,7 @@ function array(values, { shape: shape$1, dtype, device } = {}) {
 				device
 			});
 		} else {
-			const weakType = dtype == void 0;
+			const weakType = dtype == void 0 && shape$1.length === 0;
 			dtype = dtype ?? DType.Float32;
 			const data = dtypedJsArray(dtype, flat);
 			return arrayFromData(data, shape$1, {
@@ -3254,7 +3407,7 @@ function ones(shape$1, { dtype, device } = {}) {
 }
 /** Return a new array of given shape and type, filled with `fill_value`. */
 function full(shape$1, fillValue, { dtype, device } = {}) {
-	let weakType = dtype == void 0;
+	let weakType = dtype == void 0 && shape$1.length === 0;
 	if (typeof fillValue === "number") dtype = dtype ?? DType.Float32;
 	else if (typeof fillValue === "boolean") {
 		dtype = dtype ?? DType.Bool;
@@ -3412,6 +3565,27 @@ function linspace(start, stop, num = 50, endpoint = true, { dtype, device } = {}
 		committed: device != void 0
 	});
 }
+/**
+* Return numbers spaced evenly on a log scale.
+*
+* In linear space, the sequence starts at `base ** start` and ends at
+* `base ** stop` (see `endpoint` below).
+*
+* @param start - `base ** start` is the starting value of the sequence.
+* @param stop - `base ** stop` is the final value of the sequence, unless `endpoint` is false.
+* @param num - Number of samples to generate. Default is 50.
+* @param endpoint - If true, `stop` is the last sample. Otherwise, it is not included. Default is true.
+* @param base - The base of the log space. Default is 10.
+* @returns Array of evenly spaced values on a log scale.
+*/
+function logspace(start, stop, num = 50, endpoint = true, base = 10, { dtype, device } = {}) {
+	const y = linspace(start, stop, num, endpoint, {
+		dtype,
+		device
+	});
+	const logBase = Math.log(base);
+	return exp$1(mul(y, logBase));
+}
 function aluCompare(a, b, op) {
 	switch (op) {
 		case CompareOp.Less: return AluExp.cmplt(a, b);
@@ -3488,6 +3662,7 @@ var BatchTrace = class extends Trace {
 			return valOuts$1.map((x) => new BatchTracer(this, x, null));
 		}
 		const [valOuts, bdimOuts] = vmapRule(this.axisSize, valsIn, bdimsIn, params);
+		if (valOuts.length !== bdimOuts.length) throw new Error(`vmap rule for ${primitive} returned mismatched lengths: ${valOuts.length} vs ${bdimOuts.length}`);
 		return zip(valOuts, bdimOuts).map(([x, bd]) => new BatchTracer(this, x, bd));
 	}
 	get axisSize() {
@@ -3499,13 +3674,13 @@ var BatchTrace = class extends Trace {
 *
 * Reference: https://github.com/jax-ml/jax/blob/jax-v0.8.1/jax/_src/interpreters/batching.py#L1029
 */
-function broadcastBatcher(op) {
-	return (axisSize, args, dims) => {
+function broadcastBatcher(prim) {
+	return (axisSize, args, dims, params) => {
 		if (args.length === 0) throw new Error("Empty list in broadcastBatcher");
 		const nd = Math.max(...args.map((x, i) => ndim$1(x) + (dims[i] === null ? 1 : 0)));
 		const firstIdx = dims.findIndex((d) => d !== null);
 		const firstBdim = dims[firstIdx] - args[firstIdx].ndim;
-		if (zip(args, dims).every(([x, d]) => d === null && ndim$1(x) < -firstBdim || d !== null && d - x.ndim === firstBdim)) return [[op(...args)], [nd + firstBdim]];
+		if (zip(args, dims).every(([x, d]) => d === null && ndim$1(x) < -firstBdim || d !== null && d - x.ndim === firstBdim)) return [[bind1(prim, args, params)], [nd + firstBdim]];
 		args = args.map((x, i) => {
 			if (dims[i] === null) return x;
 			x = moveBatchAxis(axisSize, dims[i], 0, x);
@@ -3516,37 +3691,45 @@ function broadcastBatcher(op) {
 			]);
 			return x;
 		});
-		return [[op(...args)], [0]];
+		return [[bind1(prim, args, params)], [0]];
 	};
 }
-function unopBatcher(op) {
+function unopBatcher(prim) {
 	return (axisSize, [x], [xBdim], params) => {
-		return [[op(x, params)], [xBdim]];
+		return [[bind1(prim, [x], params)], [xBdim]];
+	};
+}
+function lastDimsBatcher(prim, inputDims, numOutputs = 1) {
+	return (axisSize, [x], [xBdim], params) => {
+		assertNonNull(xBdim);
+		if (xBdim < x.ndim - inputDims) return [bind(prim, [x], params), rep(numOutputs, xBdim)];
+		x = moveBatchAxis(axisSize, xBdim, 0, x);
+		return [bind(prim, [x], params), rep(numOutputs, 0)];
 	};
 }
 const vmapRules = {
-	[Primitive.Add]: broadcastBatcher(add$1),
-	[Primitive.Mul]: broadcastBatcher(mul),
-	[Primitive.Idiv]: broadcastBatcher(idiv),
-	[Primitive.Mod]: broadcastBatcher(mod),
-	[Primitive.Min]: broadcastBatcher(min$1),
-	[Primitive.Max]: broadcastBatcher(max$1),
-	[Primitive.Neg]: unopBatcher(neg),
-	[Primitive.Reciprocal]: unopBatcher(reciprocal$1),
-	[Primitive.Floor]: unopBatcher(floor$1),
-	[Primitive.Ceil]: unopBatcher(ceil$1),
-	[Primitive.StopGradient]: unopBatcher(stopGradient),
-	[Primitive.Cast]: unopBatcher((x, { dtype }) => cast(x, dtype)),
-	[Primitive.Bitcast]: unopBatcher((x, { dtype }) => bitcast(x, dtype)),
-	[Primitive.Sin]: unopBatcher(sin$1),
-	[Primitive.Cos]: unopBatcher(cos$1),
-	[Primitive.Asin]: unopBatcher(asin$1),
-	[Primitive.Atan]: unopBatcher(atan$1),
-	[Primitive.Exp]: unopBatcher(exp$1),
-	[Primitive.Log]: unopBatcher(log$1),
-	[Primitive.Erf]: unopBatcher(erf$1),
-	[Primitive.Erfc]: unopBatcher(erfc$1),
-	[Primitive.Sqrt]: unopBatcher(sqrt$1),
+	[Primitive.Add]: broadcastBatcher(Primitive.Add),
+	[Primitive.Mul]: broadcastBatcher(Primitive.Mul),
+	[Primitive.Idiv]: broadcastBatcher(Primitive.Idiv),
+	[Primitive.Mod]: broadcastBatcher(Primitive.Mod),
+	[Primitive.Min]: broadcastBatcher(Primitive.Min),
+	[Primitive.Max]: broadcastBatcher(Primitive.Max),
+	[Primitive.Neg]: unopBatcher(Primitive.Neg),
+	[Primitive.Reciprocal]: unopBatcher(Primitive.Reciprocal),
+	[Primitive.Floor]: unopBatcher(Primitive.Floor),
+	[Primitive.Ceil]: unopBatcher(Primitive.Ceil),
+	[Primitive.StopGradient]: unopBatcher(Primitive.StopGradient),
+	[Primitive.Cast]: unopBatcher(Primitive.Cast),
+	[Primitive.Bitcast]: unopBatcher(Primitive.Bitcast),
+	[Primitive.Sin]: unopBatcher(Primitive.Sin),
+	[Primitive.Cos]: unopBatcher(Primitive.Cos),
+	[Primitive.Asin]: unopBatcher(Primitive.Asin),
+	[Primitive.Atan]: unopBatcher(Primitive.Atan),
+	[Primitive.Exp]: unopBatcher(Primitive.Exp),
+	[Primitive.Log]: unopBatcher(Primitive.Log),
+	[Primitive.Erf]: unopBatcher(Primitive.Erf),
+	[Primitive.Erfc]: unopBatcher(Primitive.Erfc),
+	[Primitive.Sqrt]: unopBatcher(Primitive.Sqrt),
 	[Primitive.Reduce](axisSize, [x], [xBdim], { op, axis }) {
 		assertNonNull(xBdim);
 		const newAxis = axis.map((ax) => ax + (xBdim <= ax ? 1 : 0));
@@ -3568,10 +3751,25 @@ const vmapRules = {
 		});
 		return [[z], [0]];
 	},
-	[Primitive.Compare](axisSize, args, dims, { op }) {
-		return broadcastBatcher((x, y) => compare(x, y, op))(axisSize, args, dims, {});
+	[Primitive.Compare]: broadcastBatcher(Primitive.Compare),
+	[Primitive.Where]: broadcastBatcher(Primitive.Where),
+	[Primitive.Concatenate](axisSize, xs, xBdims, { axis }) {
+		const minBdim = Math.min(...xBdims.filter((d) => d !== null));
+		xs = xs.map((x, i) => moveBatchAxis(axisSize, xBdims[i], minBdim, x));
+		const newAxis = axis + (minBdim <= axis ? 1 : 0);
+		return [[concatenate$1(xs, newAxis)], [minBdim]];
+	},
+	[Primitive.Split](axisSize, [x], [xBdim], { axis, sizes }) {
+		assertNonNull(xBdim);
+		const newAxis = axis + (xBdim <= axis ? 1 : 0);
+		const outs = split$2(x, newAxis, sizes);
+		return [outs, rep(outs.length, xBdim)];
+	},
+	[Primitive.RandomBits](axisSize, [k0, k1], [bdim0, bdim1], { shape: shape$1, mode }) {
+		k0 = moveBatchAxis(axisSize, bdim0, 0, k0);
+		k1 = moveBatchAxis(axisSize, bdim1, 0, k1);
+		return [[randomBits(k0, k1, [axisSize, ...shape$1], mode)], [0]];
 	},
-	[Primitive.Where]: broadcastBatcher(where$1),
 	[Primitive.Gather](axisSize, [x, ...indices], [xBdim, ...indicesBdim], { axis, outDim }) {
 		if (indicesBdim.every((d) => d === null)) {
 			assertNonNull(xBdim);
@@ -3633,18 +3831,8 @@ const vmapRules = {
 		const newWidth = width.toSpliced(xBdim, 0, [0, 0]);
 		return [[pad$1(x, newWidth)], [xBdim]];
 	},
-	[Primitive.Sort](axisSize, [x], [xBdim]) {
-		assertNonNull(xBdim);
-		if (xBdim !== x.ndim - 1) return [[sort$1(x)], [xBdim]];
-		x = moveBatchAxis(axisSize, xBdim, 0, x);
-		return [[sort$1(x)], [0]];
-	},
-	[Primitive.Argsort](axisSize, [x], [xBdim]) {
-		assertNonNull(xBdim);
-		if (xBdim !== x.ndim - 1) return [argsort$1(x), [xBdim, xBdim]];
-		x = moveBatchAxis(axisSize, xBdim, 0, x);
-		return [argsort$1(x), [0, 0]];
-	},
+	[Primitive.Sort]: lastDimsBatcher(Primitive.Sort, 1),
+	[Primitive.Argsort]: lastDimsBatcher(Primitive.Argsort, 1, 2),
 	[Primitive.TriangularSolve](axisSize, [a, b], [aBdim, bBdim], { unitDiagonal }) {
 		if (aBdim === null) {
 			b = moveBatchAxis(axisSize, bBdim, -3, b);
@@ -3668,12 +3856,8 @@ const vmapRules = {
 		const x = bind1(Primitive.TriangularSolve, [a, b], { unitDiagonal });
 		return [[x], [0]];
 	},
-	[Primitive.Cholesky](axisSize, [x], [xBdim]) {
-		assertNonNull(xBdim);
-		if (xBdim < x.ndim - 2) return [[cholesky$2(x)], [xBdim]];
-		x = moveBatchAxis(axisSize, xBdim, 0, x);
-		return [[cholesky$2(x)], [0]];
-	},
+	[Primitive.Cholesky]: lastDimsBatcher(Primitive.Cholesky, 2),
+	[Primitive.LU]: lastDimsBatcher(Primitive.LU, 2, 3),
 	[Primitive.Jit](axisSize, args, dims, { name, jaxpr }) {
 		const newJaxpr = vmapJaxpr(jaxpr, axisSize, dims);
 		const outs = bind(Primitive.Jit, [...newJaxpr.consts.map((c) => c.ref), ...args], {
@@ -3823,6 +4007,16 @@ function batchMatmulT(a, b) {
 function mT(a) {
 	return moveaxis(a, -2, -1);
 }
+function sliceAxis(a, axis, p) {
+	const slices = Array(a.shape.length).fill([]);
+	slices[checkAxis(axis, a.ndim)] = p;
+	return a.slice(...slices);
+}
+function padAxis(a, axis, p) {
+	const pads = Array(a.shape.length).fill([0, 0]);
+	pads[checkAxis(axis, a.ndim)] = p;
+	return pad$1(a, pads);
+}
 const jvpRules = {
 	[Primitive.Add]: linearTangentsJvp(Primitive.Add),
 	[Primitive.Mul]: bilinearTangentsJvp(Primitive.Mul),
@@ -3921,6 +4115,8 @@ const jvpRules = {
 		dcond.dispose();
 		return [[where$1(cond.ref, x, y)], [where$1(cond, dx, dy)]];
 	},
+	[Primitive.Concatenate]: linearTangentsJvp(Primitive.Concatenate),
+	[Primitive.Split]: linearTangentsJvp(Primitive.Split),
 	[Primitive.RandomBits]: zeroTangentsJvp(Primitive.RandomBits),
 	[Primitive.Gather]([x, ...indices], [dx, ..._], { axis, outDim }) {
 		const indicesRef = indices.map((t) => t.ref);
@@ -3955,6 +4151,38 @@ const jvpRules = {
 		const dL = batchMatmulT(L.ref, triu(ST.ref, 1).add(triu(ST)).mul(.5));
 		return [[L], [dL]];
 	},
+	[Primitive.LU]([a], [da]) {
+		const [luMatrix, pivots, permutation] = lu$1(a);
+		const [m, n] = a.shape.slice(-2);
+		const k = Math.min(m, n);
+		const luSliceL = sliceAxis(luMatrix.ref, -1, [0, k]);
+		const lLower = tril(luSliceL, -1);
+		const lPadded = m > k ? padAxis(lLower, -1, [0, m - k]) : lLower;
+		const L = lPadded.add(eye(m));
+		const luSliceU = sliceAxis(luMatrix.ref, -2, [0, k]);
+		const uUpper = triu(luSliceU);
+		const uPadded = n > k ? padAxis(uUpper, -2, [0, n - k]) : uUpper;
+		const uEye = n > k ? padAxis(padAxis(eye(n - k), -1, [k, 0]), -2, [k, 0]) : zerosLike$1(uPadded.ref);
+		const U = uPadded.add(uEye);
+		const P = permutation.ref.reshape([...permutation.shape, 1]).equal(arange(m)).astype(da.dtype);
+		const pda = batchMatmulT(P, mT(da));
+		const la = mT(triangularSolve$1(L.ref, mT(pda), {
+			lower: true,
+			unitDiagonal: true
+		}));
+		const lau = triangularSolve$1(mT(U.ref), la, { lower: true });
+		const lDot = batchMatmulT(L, mT(tril(lau.ref, -1)));
+		const uDot = batchMatmulT(triu(lau), mT(U));
+		return [[
+			luMatrix,
+			pivots,
+			permutation
+		], [
+			lDot.add(uDot),
+			zerosLike$1(pivots.ref),
+			zerosLike$1(permutation.ref)
+		]];
+	},
 	[Primitive.Jit](primals, tangents, { name, jaxpr }) {
 		const newJaxpr = jvpJaxpr(jaxpr);
 		const outs = bind(Primitive.Jit, [
@@ -4492,6 +4720,15 @@ const transposeRules = {
 		cond.dispose();
 		return cts;
 	},
+	[Primitive.Concatenate]([ct], inputs, { axis }) {
+		if (inputs.some((x) => !(x instanceof UndefPrimal))) throw new NonlinearError(Primitive.Concatenate);
+		const sizes = inputs.map((x) => x.aval.shape[axis]);
+		return split$2(ct, axis, sizes);
+	},
+	[Primitive.Split](cts, [x], { axis }) {
+		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Split);
+		return [concatenate$1(cts, axis)];
+	},
 	[Primitive.Gather]([ct], [x, ...indices], { axis, outDim }) {
 		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Gather);
 		if (indices.some((i) => i instanceof UndefPrimal)) throw new NonlinearError(Primitive.Gather);
@@ -4767,8 +5004,8 @@ function computeSizeMap({ shapes, lhsIndices, rhsIndex }) {
 			const idx = lhsIndex[j];
 			const dim = shape$1[j];
 			const existing = sizeMap.get(idx);
-			if (existing === void 0) sizeMap.set(idx, dim);
-			else if (existing !== dim) throw new Error(`Inconsistent size for index ${idx} in einsum: ${existing} vs ${dim}`);
+			if (existing === void 0 || existing === 1) sizeMap.set(idx, dim);
+			else if (existing !== dim && dim !== 1) throw new Error(`Inconsistent size for index ${idx} in einsum: ${existing} vs ${dim}`);
 		}
 	}
 	for (const [idx, size$1] of sizeMap) if (!Number.isInteger(idx) || idx < 0) throw new Error(`Invalid index ${idx} in einsum expression, must be non-negative integer`);
@@ -4924,27 +5161,53 @@ function ifft(a, axis = -1) {
 //#region src/library/numpy-linalg.ts
 var numpy_linalg_exports = {};
 __export(numpy_linalg_exports, {
-	cholesky: () => cholesky$1,
+	cholesky: () => cholesky,
+	det: () => det,
 	diagonal: () => diagonal,
+	inv: () => inv,
 	lstsq: () => lstsq,
 	matmul: () => matmul,
+	matrixPower: () => matrixPower,
 	matrixTranspose: () => matrixTranspose,
 	outer: () => outer,
+	slogdet: () => slogdet,
+	solve: () => solve,
 	tensordot: () => tensordot,
 	trace: () => trace,
 	vecdot: () => vecdot
 });
+function checkSquare(name, a) {
+	if (a.ndim < 2 || a.shape[a.ndim - 1] !== a.shape[a.ndim - 2]) throw new Error(`${name}: input must be at least 2D square matrix, got ${a.aval}`);
+	return a.shape[a.ndim - 1];
+}
 /**
 * Compute the Cholesky decomposition of a (batched) positive-definite matrix.
 *
 * This is like `jax.lax.linalg.cholesky()`, except with an option to symmetrize
 * the input matrix, which is on by default.
 */
-function cholesky$1(a, { upper = false, symmetrizeInput = true } = {}) {
+function cholesky(a, { upper = false, symmetrizeInput = true } = {}) {
 	a = fudgeArray(a);
-	if (a.ndim < 2 || a.shape[a.ndim - 1] !== a.shape[a.ndim - 2]) throw new Error(`cholesky: input must be at least 2D square matrix, got ${a.aval}`);
+	checkSquare("cholesky", a);
 	if (symmetrizeInput) a = a.ref.add(matrixTranspose(a)).mul(.5);
-	return cholesky(a, { upper });
+	return cholesky$1(a, { upper });
+}
+/** Compute the determinant of a square matrix (batched). */
+function det(a) {
+	a = fudgeArray(a);
+	const n = checkSquare("det", a);
+	const [lu$2, pivots, permutation] = lu(a);
+	permutation.dispose();
+	const parity = pivots.notEqual(arange(n)).astype(int32).sum(-1).mod(2);
+	const sign$1 = parity.mul(-2).add(1);
+	const diag$1 = lu$2.diagonal(0, -1, -2);
+	return prod$1(diag$1, -1).mul(sign$1);
+}
+/** Compute the inverse of a square matrix (batched). */
+function inv(a) {
+	a = fudgeArray(a);
+	const n = checkSquare("inv", a);
+	return solve(a, eye(n));
 }
 /**
 * Return the least-squares solution to a linear equation.
@@ -4968,7 +5231,7 @@ function lstsq(a, b) {
 	const at = matrixTranspose(a.ref);
 	if (m <= n) {
 		const aat = matmul(a, at.ref);
-		const l = cholesky$1(aat, { symmetrizeInput: false });
+		const l = cholesky(aat, { symmetrizeInput: false });
 		const lb = triangularSolve(l.ref, b, {
 			leftSide: true,
 			lower: true
@@ -4980,7 +5243,7 @@ function lstsq(a, b) {
 		return matmul(at, llb.ref);
 	} else {
 		const ata = matmul(at.ref, a);
-		const l = cholesky$1(ata, { symmetrizeInput: false });
+		const l = cholesky(ata, { symmetrizeInput: false });
 		const atb = matmul(at, b);
 		const lb = triangularSolve(l.ref, atb, {
 			leftSide: true,
@@ -4993,6 +5256,169 @@ function lstsq(a, b) {
 		return llb;
 	}
 }
+/** Raise a square matrix to an integer power, via repeated squarings. */
+function matrixPower(a, n) {
+	if (!Number.isInteger(n)) throw new Error(`matrixPower: exponent must be an integer, got ${n}`);
+	a = fudgeArray(a);
+	const m = checkSquare("matrixPower", a);
+	if (n === 0) {
+		a.dispose();
+		return broadcastTo(eye(m), a.shape);
+	}
+	if (n < 0) {
+		a = inv(a);
+		n = -n;
+	}
+	let result = null;
+	let a2k = a;
+	for (let k = 0; n; k++) {
+		if (k > 0) a2k = matmul(a2k.ref, a2k);
+		if (n % 2 === 1) result = result === null ? a2k.ref : matmul(result, a2k.ref);
+		n = Math.floor(n / 2);
+	}
+	a2k.dispose();
+	return result;
+}
+/** Return sign and natural logarithm of the determinant of `a`. */
+function slogdet(a) {
+	a = fudgeArray(a);
+	const n = checkSquare("slogdet", a);
+	const [lu$2, pivots, permutation] = lu(a);
+	permutation.dispose();
+	let parity = pivots.notEqual(arange(n)).astype(int32).sum(-1);
+	const diag$1 = lu$2.diagonal(0, -1, -2);
+	parity = parity.add(diag$1.ref.less(0).astype(int32).sum(-1)).mod(2);
+	const logabsdet = log(absolute(diag$1)).sum(-1);
+	const sign$1 = parity.mul(-2).add(1);
+	return [sign$1, logabsdet];
+}
+/**
+* Solve a linear system of equations.
+*
+* This solves a (batched) linear system of equations `a @ x = b` for `x` given
+* `a` and `b`. If `a` is singular, this will return `nan` or `inf` values.
+*
+* @param a - Coefficient matrix of shape `(..., N, N)`.
+* @param b - Values of shape `(N,)` or `(..., N, M)`.
+* @returns Solution `x` of shape `(..., N)` or `(..., N, M)`.
+*/
+function solve(a, b) {
+	a = fudgeArray(a);
+	b = fudgeArray(b);
+	const n = checkSquare("solve", a);
+	if (b.ndim === 0) throw new Error(`solve: b cannot be scalar`);
+	const bIs1d = b.ndim === 1;
+	if (bIs1d) b = b.reshape([...b.shape, 1]);
+	if (b.shape[b.ndim - 2] !== n) throw new Error(`solve: leading dimension of b must match size of a, got a=${a.aval}, b=${b.aval}`);
+	const m = b.shape[b.ndim - 1];
+	const batchDims = generalBroadcast(a.shape.slice(0, -2), b.shape.slice(0, -2));
+	a = broadcastTo(a, [
+		...batchDims,
+		n,
+		n
+	]);
+	b = broadcastTo(b, [
+		...batchDims,
+		n,
+		m
+	]);
+	const [lu$2, pivots, permutation] = lu(a);
+	pivots.dispose();
+	const P = arange(n).equal(permutation.reshape([...permutation.shape, 1])).astype(b.dtype);
+	const LPb = triangularSolve(lu$2.ref, matmul(P, b), {
+		leftSide: true,
+		lower: true,
+		unitDiagonal: true
+	});
+	let x = triangularSolve(lu$2, LPb.ref, {
+		leftSide: true,
+		lower: false
+	});
+	if (bIs1d) x = squeeze(x, -1);
+	return x;
+}
+//#endregion
+//#region src/library/numpy/dtype-info.ts
+/** Machine limits for floating-point types. */
+function finfo(dtype) {
+	if (!isFloatDtype(dtype)) throw new Error(`finfo: received ${dtype}, must be a floating-point type`);
+	switch (dtype) {
+		case DType.Float16: return Object.freeze({
+			bits: 16,
+			dtype: DType.Float16,
+			eps: 2 ** -10,
+			epsneg: 2 ** -11,
+			machep: -10,
+			max: 65504,
+			maxexp: 16,
+			min: -65504,
+			minexp: -14,
+			negep: -24,
+			nexp: 5,
+			nmant: 10,
+			precision: 3,
+			resolution: .001,
+			smallestNormal: 2 ** -14,
+			smallestSubnormal: 2 ** -24
+		});
+		case DType.Float32: return Object.freeze({
+			bits: 32,
+			dtype: DType.Float32,
+			eps: 2 ** -23,
+			epsneg: 2 ** -24,
+			machep: -23,
+			max: 34028234663852886e22,
+			maxexp: 128,
+			min: -34028234663852886e22,
+			minexp: -126,
+			negep: -24,
+			nexp: 8,
+			nmant: 23,
+			precision: 6,
+			resolution: 1e-6,
+			smallestNormal: 2 ** -126,
+			smallestSubnormal: 2 ** -149
+		});
+		case DType.Float64: return Object.freeze({
+			bits: 64,
+			dtype: DType.Float64,
+			eps: 2 ** -52,
+			epsneg: 2 ** -53,
+			machep: -52,
+			max: Number.MAX_VALUE,
+			maxexp: 1024,
+			min: -Number.MAX_VALUE,
+			minexp: -1022,
+			negep: -53,
+			nexp: 11,
+			nmant: 52,
+			precision: 15,
+			resolution: 1e-15,
+			smallestNormal: 2 ** -1022,
+			smallestSubnormal: 2 ** -1074
+		});
+		default: throw new Error(`finfo: unsupported dtype ${dtype}`);
+	}
+}
+/** Machine limits for integer types. */
+function iinfo(dtype) {
+	switch (dtype) {
+		case DType.Int32: return Object.freeze({
+			bits: 32,
+			dtype: DType.Int32,
+			max: 2147483647,
+			min: -2147483648
+		});
+		case DType.Uint32: return Object.freeze({
+			bits: 32,
+			dtype: DType.Uint32,
+			max: 4294967295,
+			min: 0
+		});
+		default: throw new Error(`iinfo: unsupported dtype ${dtype}`);
+	}
+}
 //#endregion
 //#region src/library/numpy.ts
@@ -5048,6 +5474,7 @@ __export(numpy_exports, {
 	diag: () => diag,
 	diagonal: () => diagonal,
 	divide: () => trueDivide,
+	divmod: () => divmod,
 	dot: () => dot$1,
 	dstack: () => dstack,
 	e: () => e,
@@ -5060,6 +5487,7 @@ __export(numpy_exports, {
 	expm1: () => expm1,
 	eye: () => eye,
 	fft: () => numpy_fft_exports,
+	finfo: () => finfo,
 	flip: () => flip,
 	fliplr: () => fliplr,
 	flipud: () => flipud,
@@ -5067,6 +5495,7 @@ __export(numpy_exports, {
 	float32: () => float32,
 	float64: () => float64,
 	floor: () => floor,
+	floorDivide: () => floorDivide,
 	fmod: () => fmod,
 	frexp: () => frexp,
 	full: () => full,
@@ -5079,6 +5508,7 @@ __export(numpy_exports, {
 	hstack: () => hstack,
 	hypot: () => hypot,
 	identity: () => identity$1,
+	iinfo: () => iinfo,
 	inf: () => inf,
 	inner: () => inner,
 	int32: () => int32,
@@ -5096,6 +5526,7 @@ __export(numpy_exports, {
 	log10: () => log10,
 	log1p: () => log1p,
 	log2: () => log2,
+	logspace: () => logspace,
 	matmul: () => matmul,
 	matrixTranspose: () => matrixTranspose,
 	max: () => max,
@@ -5132,9 +5563,11 @@ __export(numpy_exports, {
 	shape: () => shape,
 	sign: () => sign,
 	sin: () => sin,
+	sinc: () => sinc,
 	sinh: () => sinh,
 	size: () => size,
 	sort: () => sort,
+	split: () => split$1,
 	sqrt: () => sqrt,
 	square: () => square,
 	squeeze: () => squeeze,
@@ -5142,6 +5575,7 @@ __export(numpy_exports, {
 	std: () => std,
 	subtract: () => subtract,
 	sum: () => sum,
+	take: () => take,
 	tan: () => tan,
 	tanh: () => tanh,
 	tensordot: () => tensordot,
@@ -5400,6 +5834,45 @@ function flip(x, axis = null) {
 	return flip$1(x, axis);
 }
 /**
+* Split an array into multiple sub-arrays along an axis.
+*
+* @param a - The input array to split.
+* @param indicesOrSections - If an integer, it indicates the number of equal
+* sections to create along the specified axis. If a list of integers, it
+* specifies the indices at which to split the array.
+* @param axis - The axis along which to split the array. Default is 0.
+*/
+function split$1(a, indicesOrSections, axis = 0) {
+	a = fudgeArray(a);
+	axis = checkAxis(axis, a.ndim);
+	const size$1 = a.shape[axis];
+	let sizes;
+	if (typeof indicesOrSections === "number") {
+		if (size$1 % indicesOrSections !== 0) throw new Error(`Array of size ${size$1} cannot be split into ${indicesOrSections} equal parts`);
+		const partSize = size$1 / indicesOrSections;
+		sizes = rep(indicesOrSections, partSize);
+	} else {
+		const indices = indicesOrSections;
+		sizes = [indices[0]];
+		for (let i = 1; i < indices.length; i++) sizes.push(indices[i] - indices[i - 1]);
+		sizes.push(size$1 - indices[indices.length - 1]);
+	}
+	const results = [];
+	for (let i = 0; i < sizes.length; i += 7) if (i === sizes.length) {
+		results.push(a);
+		break;
+	} else if (i + 8 >= sizes.length) {
+		results.push(...split$2(a, axis, sizes.slice(i)));
+		break;
+	} else {
+		const groupSizes = [...sizes.slice(i, i + 7), sizes.slice(i + 7).reduce((x, y) => x + y, 0)];
+		const outs = split$2(a, axis, groupSizes);
+		results.push(...outs.slice(0, -1));
+		a = outs[outs.length - 1];
+	}
+	return results;
+}
+/**
 * Join a sequence of arrays along an existing axis.
 *
 * The arrays must have the same shape, except in the dimension corresponding to
@@ -5411,13 +5884,11 @@ function concatenate(xs, axis = 0) {
 	if (xs.length === 0) throw new Error("Need at least one array to concatenate");
 	const shapes = xs.map(shape);
 	axis = checkAxis(axis, shapes[0].length);
-	for (let i = 1; i < shapes.length; i++) if (shapes[i].length !== shapes[0].length || !shapes[i].every((d, j) => j === axis || d === shapes[0][j])) throw new Error(`Cannot concatenate arrays with shapes ${JSON.stringify(shapes)} along axis ${axis}`);
-	const makePadAxis = (start, end) => shapes[0].map((_, i) => i === axis ? [start, end] : [0, 0]);
+	for (let i = 1; i < shapes.length; i++) if (shapes[i].length !== shapes[0].length || !shapes[i].every((d, j) => j === axis || d === shapes[0][j])) throw new Error(`Cannot concatenate arrays ${xs[0].aval} and ${xs[i].aval} along axis ${axis}`);
 	let result = xs[0];
-	for (let i = 1; i < xs.length; i++) {
-		const len1 = result.shape[axis];
-		const len2 = shapes[i][axis];
-		result = pad(result, makePadAxis(0, len2)).add(pad(xs[i], makePadAxis(len1, 0)));
+	for (let i = 1; i < xs.length; i += 7) {
+		const group = xs.slice(i, i + 7);
+		result = concatenate$1([result, ...group], axis);
 	}
 	return result;
 }
@@ -5669,6 +6140,20 @@ function sort(a, axis = -1) {
 function argsort(a, axis = -1) {
 	return fudgeArray(a).argsort(axis);
 }
+/**
+* Take elements from an array along an axis.
+*
+* This is equivalent to advanced indexing with integer indices over that
+* numbered axis. By default, the flattened array is used.
+*/
+function take(a, indices, axis = null) {
+	if (axis === null) {
+		a = ravel(a);
+		axis = 0;
+	}
+	axis = checkAxis(axis, ndim(a));
+	return gather(a, [indices], [axis], axis);
+}
 /** Return if two arrays are element-wise equal within a tolerance. */
 function allclose(actual, expected, options) {
 	const { rtol = 1e-5, atol = 1e-7 } = options ?? {};
@@ -5988,6 +6473,20 @@ function tan(x) {
 	x = fudgeArray(x);
 	return sin(x.ref).div(cos(x));
 }
+/**
+* @function
+* Return the normalized sinc function.
+*
+* The sinc function is defined as `sin(πx) / (πx)` for `x != 0`, and `1` for `x = 0`.
+* This is the normalized sinc function commonly used in signal processing.
+*
+* **Note:** JVP is not supported at x=0 due to discontinuous derivative. This
+* requires a custom JVP rule to handle properly (see JAX implementation).
+*/
+const sinc = jit$1(function sinc$1(x) {
+	const pix = x.ref.mul(Math.PI);
+	return where(equal(x, 0), 1, sin(pix.ref).div(pix));
+});
 /** Element-wise inverse cosine function (inverse of cos). */
 function acos(x) {
 	return subtract(pi / 2, asin(x));
@@ -6040,6 +6539,25 @@ function trueDivide(x, y) {
 	return x.div(y);
 }
 /**
+* Return the largest integer smaller or equal to the division of the inputs.
+*
+* The result is always rounded towards negative infinity.
+*
+* For floating-point inputs, this is equivalent to `floor(x / y)`.
+* For integer inputs, we use `(x - remainder(x, y)) / y` to handle
+* negative values correctly (note: may overflow near int32 boundaries).
+*
+* @param x - Dividend array.
+* @param y - Divisor array.
+* @returns Element-wise floor division of x by y.
+*/
+function floorDivide(x, y) {
+	x = fudgeArray(x);
+	y = fudgeArray(y);
+	if (isFloatDtype(x.dtype) || isFloatDtype(y.dtype)) return floor(trueDivide(x, y));
+	return subtract(x, remainder(x.ref, y.ref)).div(y);
+}
+/**
 * @function
 * Calculate element-wise floating-point modulo operation.
 */
@@ -6053,6 +6571,20 @@ const fmod = jit$1(function fmod$1(x, y) {
 const remainder = jit$1(function remainder$1(x, y) {
 	return mod(mod(x, y.ref).add(y.ref), y);
 });
+/**
+* Return element-wise quotient and remainder simultaneously.
+*
+* Equivalent to `[floorDivide(x, y), remainder(x, y)]`.
+*
+* @param x - Dividend array.
+* @param y - Divisor array.
+* @returns Tuple of [quotient, remainder].
+*/
+function divmod(x, y) {
+	const xArr = fudgeArray(x);
+	const yArr = fudgeArray(y);
+	return [floorDivide(xArr.ref, yArr.ref), remainder(xArr, yArr)];
+}
 /** Round input to the nearest integer towards zero. */
 function trunc(x) {
 	return idiv(x, 1);
@@ -6216,14 +6748,15 @@ function std(x, axis = null, opts) {
 	return sqrt(var_(x, axis, opts));
 }
 /** Estimate the sample covariance of a set of variables. */
-function cov(x, y) {
+function cov(x, y = null, { rowvar = true } = {}) {
 	x = fudgeArray(x);
 	if (x.ndim === 1) x = x.reshape([1, x.shape[0]]);
-	if (y !== void 0) {
+	if (y !== null) {
 		y = fudgeArray(y);
 		if (y.ndim === 1) y = y.reshape([1, y.shape[0]]);
 		x = vstack([x, y]);
 	}
+	if (!rowvar) x = x.transpose();
 	const [_M, N] = x.shape;
 	x = x.ref.sub(x.mean(1, { keepdims: true }));
 	return dot$1(x.ref, x.transpose()).div(N - 1);
@@ -6268,7 +6801,8 @@ const isfinite = jit$1(function isfinite$1(x) {
 //#region src/library/lax-linalg.ts
 var lax_linalg_exports = {};
 __export(lax_linalg_exports, {
-	cholesky: () => cholesky,
+	cholesky: () => cholesky$1,
+	lu: () => lu,
 	triangularSolve: () => triangularSolve
 });
 /**
@@ -6297,11 +6831,39 @@ __export(lax_linalg_exports, {
 * // U ≈ [[1.4142135, 0.70710677], [0, 1.2247449]]
 * ```
 */
-function cholesky(a, { upper = false } = {}) {
+function cholesky$1(a, { upper = false } = {}) {
 	const L = cholesky$2(a);
 	return upper ? moveaxis$1(L, -2, -1) : L;
 }
 /**
+* LU decomposition with partial pivoting.
+*
+* Computes the matrix decomposition: `P @ A = L @ U`, where `P` is a
+* permutation of the rows of `A`, `L` is lower-triangular with unit diagonal,
+* and `U` is upper-triangular.
+*
+* @param x - A batch of matrices with shape `[..., m, n]`.
+*
+* @returns A tuple `(lu, pivots, permutation)` where:
+* - `lu`: combined lower and upper triangular matrices.
+* - `pivots`: an array of pivot indices with shape `[..., min(m, n)]`.
+* - `permutation`: the permutation generated by pivots with shape `[..., m]`.
+*
+* @example
+* ```ts
+* import { lax, numpy as np } from "@jax-js/jax";
+*
+* const A = np.array([[4., 3.], [6., 3.]]);
+* const [lu, pivots, permutation] = lax.linalg.lu(A);
+* // lu ≈ [[6., 3.], [0.6666667, 1.0]]
+* // pivots = [1, 1]
+* // permutation = [1, 0]
+* ```
+*/
+function lu(x) {
+	return lu$1(x);
+}
+/**
 * Solve a triangular linear system.
 *
 * Solves `a @ x = b` (if leftSide=true) or `x @ a = b` (if leftSide=false)
@@ -6844,33 +7406,41 @@ __export(random_exports, {
 	gumbel: () => gumbel,
 	key: () => key,
 	laplace: () => laplace,
+	multivariateNormal: () => multivariateNormal,
 	normal: () => normal,
 	split: () => split,
 	uniform: () => uniform
 });
-function validateKeyShape(key$1) {
+function validateKeyShape(key$1, scalar = false) {
 	if (key$1.ndim === 0) throw new Error("Key must have at least one dimension.");
 	if (key$1.shape[key$1.shape.length - 1] !== 2) throw new Error(`Invalid key shape: ${key$1.shape}. Expected last dimension to be 2.`);
+	if (scalar && key$1.shape.length > 1) throw new Error(`Expected a single PRNG key, but got a batch of keys with shape ${JSON.stringify(key$1.shape)} - use jax.vmap for batching.`);
 	return key$1.shape.slice(0, -1);
 }
+function getK01(key$1) {
+	const keyShape = validateKeyShape(key$1, true);
+	let [k0, k1] = split$2(key$1, -1, [1, 1]);
+	k0 = k0.reshape(keyShape);
+	k1 = k1.reshape(keyShape);
+	return [k0, k1];
+}
 /** Create a pseudo-random number generator (PRNG) key from 32-bit integer seed. */
 function key(seed) {
-	seed = seed >>> 0;
-	return array([0, seed], { dtype: DType.Uint32 });
+	seed = array(seed, { dtype: DType.Uint32 });
+	if (seed.ndim !== 0) throw new Error(`key: seed must be a scalar integer, but got shape ${seed.shape} - use jax.vmap for batching.`);
+	return stack([0, seed]);
 }
 /** Splits a PRNG key into `num` new keys by adding a leading axis. */
 function split(key$1, num = 2) {
 	const shape$1 = typeof num === "number" ? [num] : num;
 	for (const len of shape$1) if (len <= 0 || !Number.isInteger(len)) throw new Error(`Invalid split length: ${len}. Must be a positive integer.`);
-	const keyShape = validateKeyShape(key$1);
-	const k0 = key$1.ref.slice(...keyShape.map(() => null), 0);
-	const k1 = key$1.slice(...keyShape.map(() => null), 1);
+	const [k0, k1] = getK01(key$1);
 	return stack([randomBits(k0.ref, k1.ref, shape$1, 0), randomBits(k0, k1, shape$1, 1)], -1);
 }
 /** Sample uniform bits in the form of unsigned integers. */
 function bits(key$1, shape$1 = []) {
-	const keyShape = validateKeyShape(key$1);
-	return randomBits(key$1.ref.slice(...keyShape.map(() => null), 0), key$1.slice(...keyShape.map(() => null), 1), shape$1);
+	const [k0, k1] = getK01(key$1);
+	return randomBits(k0, k1, shape$1);
 }
 /**
 * @function
@@ -6944,6 +7514,32 @@ const laplace = jit$1(function laplace$1(key$1, shape$1 = []) {
 }, { staticArgnums: [1] });
 /**
 * @function
+* Sample multivariate normal random values with given mean and covariance.
+*
+* The values are returned with the given shape, along with the final dimension
+* used to represent the n-dimensional multivariate normal factors.
+*
+* This uses Cholesky decomposition on the covariance matrix.
+*
+* - `key` - PRNG key
+* - `mean` - Mean vector of shape `[..., n]`
+* - `cov` - Covariance of shape `[..., n, n]`, must be positive-definite
+* - `shape` - Result batch shape, must be broadcastable with
+*            `mean.shape[:-1]` and `cov.shape[:-2]`
+* @returns Random samples of shape `[...shape, n]`
+*/
+const multivariateNormal = jit$1(function multivariateNormal$1(key$1, mean$1, cov$1, shape$1 = []) {
+	mean$1 = fudgeArray(mean$1);
+	cov$1 = fudgeArray(cov$1);
+	const n = mean$1.shape[mean$1.ndim - 1];
+	if (cov$1.shape[cov$1.ndim - 1] !== n || cov$1.shape[cov$1.ndim - 2] !== n) throw new Error(`Invalid covariance shape: ${cov$1.shape}. Expected last two dimensions to be [${n}, ${n}].`);
+	const outputShape = broadcastShapes(shape$1, mean$1.shape.slice(0, -1), cov$1.shape.slice(0, -2)).concat(n);
+	const L = cholesky(cov$1);
+	const z = normal(key$1, outputShape);
+	return einsum("...ij,...j->...i", L, z).add(mean$1);
+}, { staticArgnums: [3] });
+/**
+* @function
 * Sample random values according to `p(x) = 1/sqrt(2pi) * exp(-x^2/2)`.
 *
 * Unlike JAX, this uses the Box-Muller transform. JAX uses the erf_inv primitive instead and