npm - @jax-js/jax - Versions diffs - 0.1.4 → 0.1.6 - Mend

@jax-js/jax 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +10 -7
package/dist/{backend-Bu9GY6sK.cjs → backend-D7s-Retx.cjs} +122 -8
package/dist/{backend-tngXtWe4.js → backend-Dx6Ob2D1.js} +111 -9
package/dist/index.cjs +1059 -208
package/dist/index.d.cts +429 -21
package/dist/index.d.ts +429 -21
package/dist/index.js +1059 -209
package/dist/webgl-CLLvzJlO.js +522 -0
package/dist/webgl-CyfzNW8T.cjs +522 -0
package/dist/{webgpu-ChVgx3b6.js → webgpu-C-VfevQW.js} +296 -3
package/dist/{webgpu-Oj3Kd-kd.cjs → webgpu-rraa6dfz.cjs} +296 -3
package/package.json +1 -1

package/dist/index.cjs CHANGED Viewed

@@ -30,7 +30,7 @@ var __toESM = (mod$1, isNodeMode, target) => (target = mod$1 != null ? __create(
 }) : target, mod$1));
 //#endregion
-const require_backend = require('./backend-Bu9GY6sK.cjs');
+const require_backend = require('./backend-D7s-Retx.cjs');
 //#region src/frontend/convolution.ts
 /**
@@ -240,7 +240,7 @@ __export(tree_exports, {
 	structure: () => structure,
 	unflatten: () => unflatten
 });
-const JsArray$1 = globalThis.Array;
+const JsArray$2 = globalThis.Array;
 let NodeType = /* @__PURE__ */ function(NodeType$1) {
 	NodeType$1["Array"] = "Array";
 	NodeType$1["Object"] = "Object";
@@ -288,7 +288,7 @@ function flatten(tree) {
 	return [leaves$1, treedef];
 }
 function _flatten(tree, leaves$1) {
-	if (JsArray$1.isArray(tree)) {
+	if (JsArray$2.isArray(tree)) {
 		const childTrees = tree.map((c) => _flatten(c, leaves$1));
 		return new JsTreeDef(NodeType.Array, null, childTrees);
 	} else if (typeof tree === "object" && tree !== null && tree.constructor === Object) {
@@ -387,6 +387,8 @@ let Primitive = /* @__PURE__ */ function(Primitive$1) {
 	Primitive$1["PoolTranspose"] = "pool_transpose";
 	Primitive$1["Compare"] = "compare";
 	Primitive$1["Where"] = "where";
+	Primitive$1["Concatenate"] = "concatenate";
+	Primitive$1["Split"] = "split";
 	Primitive$1["RandomBits"] = "random_bits";
 	Primitive$1["Gather"] = "gather";
 	Primitive$1["Transpose"] = "transpose";
@@ -399,6 +401,7 @@ let Primitive = /* @__PURE__ */ function(Primitive$1) {
 	Primitive$1["Argsort"] = "argsort";
 	Primitive$1["TriangularSolve"] = "triangular_solve";
 	Primitive$1["Cholesky"] = "cholesky";
+	Primitive$1["LU"] = "lu";
 	Primitive$1["Jit"] = "jit";
 	return Primitive$1;
 }({});
@@ -409,6 +412,13 @@ let CompareOp = /* @__PURE__ */ function(CompareOp$1) {
 	CompareOp$1["LessEqual"] = "less_equal";
 	return CompareOp$1;
 }({});
+const routinePrimitives = new Map([
+	[Primitive.Sort, require_backend.Routines.Sort],
+	[Primitive.Argsort, require_backend.Routines.Argsort],
+	[Primitive.TriangularSolve, require_backend.Routines.TriangularSolve],
+	[Primitive.Cholesky, require_backend.Routines.Cholesky],
+	[Primitive.LU, require_backend.Routines.LU]
+]);
 function add$1(x, y) {
 	return bind1(Primitive.Add, [x, y]);
 }
@@ -530,7 +540,25 @@ function where$1(cond, x, y) {
 		y
 	]);
 }
+function concatenate$1(xs, axis) {
+	if (xs.length === 0) throw new Error("concatenate requires at least one input");
+	const avals = xs.map((x) => ShapedArray.fromAval(getAval(x)));
+	axis = require_backend.checkAxis(axis, avals[0].ndim);
+	for (const x of avals) if (x.ndim !== avals[0].ndim || !x.shape.every((s, i) => i === axis || s === avals[0].shape[i])) throw new Error(`Concatenate: inputs ${avals[0]} and ${x} must match shapes except on axis ${axis}`);
+	return bind1(Primitive.Concatenate, xs, { axis });
+}
+function split$2(x, axis, sizes) {
+	axis = require_backend.checkAxis(axis, ndim$1(x));
+	if (sizes.some((s) => s < 0 || !Number.isInteger(s))) throw new Error(`split: sizes must be nonnegative integers, got ${JSON.stringify(sizes)}`);
+	const totalSize = sizes.reduce((a, b) => a + b, 0);
+	if (totalSize !== getShape(x)[axis]) throw new Error(`split: sizes must sum to the size of the axis ${axis}, got ${totalSize}`);
+	return bind(Primitive.Split, [x], {
+		axis,
+		sizes
+	});
+}
 function randomBits(k0, k1, shape$1, mode = "xor") {
+	if (!require_backend.deepEqual(k0.shape, k1.shape) || k0.dtype !== require_backend.DType.Uint32 || k1.dtype !== require_backend.DType.Uint32) throw new Error(`randomBits: key parts must be uint32 with the same shape, got ${ShapedArray.fromAval(k0.aval)} and ${ShapedArray.fromAval(k1.aval)}`);
 	return bind1(Primitive.RandomBits, [k0, k1], {
 		shape: shape$1,
 		mode
@@ -597,6 +625,11 @@ function pad$1(x, width) {
 	return bind1(Primitive.Pad, [x], { width });
 }
 function triangularSolve$1(a, b, { lower = false, unitDiagonal = false } = {}) {
+	const as = getShape(a);
+	const bs = getShape(b);
+	if (as.length < 2 || bs.length < 2) throw new Error(`triangular_solve: must be >=2D, got a=${as}, b=${bs}`);
+	const n = as[as.length - 2];
+	if (n !== as[as.length - 1] || n !== bs[bs.length - 1]) throw new Error(`triangular_solve: incompatible shapes a=${as}, b=${bs}`);
 	if (lower) {
 		a = flip$1(a, [-2, -1]);
 		b = flip$1(b, [-1]);
@@ -606,8 +639,15 @@ function triangularSolve$1(a, b, { lower = false, unitDiagonal = false } = {}) {
 	return x;
 }
 function cholesky$2(x) {
+	const aval = ShapedArray.fromAval(getAval(x));
+	if (aval.ndim < 2 || aval.shape[aval.ndim - 1] !== aval.shape[aval.ndim - 2]) throw new Error(`cholesky: expected batch of square matrices, got ${aval}`);
 	return bind1(Primitive.Cholesky, [x]);
 }
+function lu$1(x) {
+	const aval = ShapedArray.fromAval(getAval(x));
+	if (aval.ndim < 2) throw new Error(`lu: expected batch of matrices, got ${aval}`);
+	return bind(Primitive.LU, [x]);
+}
 function sort$1(x) {
 	const nd = ndim$1(x);
 	if (nd === 0) throw new Error("sort: requires at least 1D input");
@@ -652,6 +692,9 @@ function newDynamic(main) {
 		dynamicTrace = prevDynamicTrace;
 	} };
 }
+function currentTraceLevel() {
+	return traceStack[traceStack.length - 1].level;
+}
 var Trace = class {
 	constructor(main) {
 		this.main = main;
@@ -716,6 +759,9 @@ var Tracer = class Tracer {
 	mul(other) {
 		return mul(this, other);
 	}
+	mod(other) {
+		return mod(this, other);
+	}
 	greater(other) {
 		return greater$1(this, other);
 	}
@@ -828,8 +874,14 @@ var Tracer = class Tracer {
 	*/
 	*[Symbol.iterator]() {
 		if (this.ndim === 0) throw new Error("Cannot iterate over a scalar array");
-		for (let i = 0; i < this.shape[0]; i++) yield this.ref.slice(i);
-		this.dispose();
+		let residual = this;
+		const subarrayShape = this.shape.slice(1);
+		for (let i = 0; i < this.shape[0]; i++) {
+			const lr = split$2(residual, 0, [1, residual.shape[0] - 1]);
+			yield lr[0].reshape(subarrayShape);
+			residual = lr[1];
+		}
+		residual.dispose();
 	}
 	/**
 	* Return a sorted copy of an array in ascending order.
@@ -979,6 +1031,9 @@ var ShapedArray = class ShapedArray {
 	get size() {
 		return require_backend.prod(this.shape);
 	}
+	scalar() {
+		return new ShapedArray([], this.dtype, this.weakType);
+	}
 	toString() {
 		return `${this.dtype}[${this.shape.join(",")}]`;
 	}
@@ -1017,6 +1072,7 @@ var TreeMismatchError = class extends TypeError {
 		super(`Mismatched tree structures in ${where$2}: ${left} != ${right}`);
 	}
 };
+/** Flatten a function of `JsTree` input/output for use in tracing. */
 function flattenFun(f, inTree) {
 	const store = { value: void 0 };
 	const flatFun = (...argsFlat) => {
@@ -1028,6 +1084,26 @@ function flattenFun(f, inTree) {
 	};
 	return [flatFun, store];
 }
+/** Like flattenFun, but expects f to return [main, aux] tuple. */
+function flattenFunWithAux(f, inTree) {
+	const store = { value: void 0 };
+	const auxStore = { value: void 0 };
+	const flatFun = (...argsFlat) => {
+		const pytreeArgs = unflatten(inTree, argsFlat);
+		const result = f(...pytreeArgs);
+		if (!Array.isArray(result) || result.length !== 2) throw new Error("Function with `hasAux: true` must return [output, aux] tuple");
+		const [out, aux] = result;
+		const [outFlat, outTree] = flatten(out);
+		store.value = outTree;
+		auxStore.value = aux;
+		return outFlat;
+	};
+	return [
+		flatFun,
+		store,
+		auxStore
+	];
+}
 var UseAfterFreeError = class extends ReferenceError {
 	constructor(tracer) {
 		super(`Referenced tracer ${tracer.toString()} freed, please use .ref move semantics`);
@@ -1588,7 +1664,7 @@ const abstractEvalRules = {
 		return [new ShapedArray(shape$1, dtype, weakType)];
 	},
 	[Primitive.Conv]([lhs, rhs], params) {
-		const { dtype, weakType } = promoteAvals(new ShapedArray([], lhs.dtype, lhs.weakType), new ShapedArray([], rhs.dtype, rhs.weakType));
+		const { dtype, weakType } = promoteAvals(lhs.scalar(), rhs.scalar());
 		const shape$1 = checkConvShape(lhs.shape, rhs.shape, params);
 		return [new ShapedArray(shape$1, dtype, weakType)];
 	},
@@ -1599,10 +1675,25 @@ const abstractEvalRules = {
 		const shape$1 = require_backend.generalBroadcast(cond.shape, xy.shape);
 		return [new ShapedArray(shape$1, xy.dtype, xy.weakType)];
 	},
+	[Primitive.Concatenate](xs, { axis }) {
+		if (xs.length === 0) throw new TypeError("Concatenate requires at least one input");
+		for (const x of xs) if (x.ndim !== xs[0].ndim || !x.shape.every((s, i) => i === axis || s === xs[0].shape[i])) throw new TypeError(`Concatenate: inputs ${xs[0]} and ${x} must match shapes except on axis ${axis}`);
+		const shape$1 = xs[0].shape.slice();
+		shape$1[axis] = xs.reduce((sum$1, x) => sum$1 + x.shape[axis], 0);
+		const { dtype, weakType } = xs.map((x) => x.scalar()).reduce(promoteAvals);
+		return [new ShapedArray(shape$1, dtype, weakType)];
+	},
+	[Primitive.Split]([x], { axis, sizes }) {
+		const totalSize = sizes.reduce((a, b) => a + b, 0);
+		if (x.shape[axis] !== totalSize) throw new TypeError(`Split: sizes ${sizes} do not sum to dimension ${x.shape[axis]} on axis ${axis}`);
+		return sizes.map((size$1) => {
+			return new ShapedArray(x.shape.toSpliced(axis, 1, size$1), x.dtype, x.weakType);
+		});
+	},
 	[Primitive.RandomBits]([k0, k1], { shape: shape$1 }) {
 		if (k0.dtype !== require_backend.DType.Uint32 || k1.dtype !== require_backend.DType.Uint32) throw new TypeError(`RandomBits requires uint32 keys, got ${k0.dtype} and ${k1.dtype}`);
-		const keyShape = require_backend.generalBroadcast(k0.shape, k1.shape);
-		if (!require_backend.deepEqual(require_backend.generalBroadcast(keyShape, shape$1), shape$1)) throw new TypeError(`Keys of shapes ${k0.shape} and ${k1.shape} cannot be broadcast to shape ${shape$1}`);
+		if (!require_backend.deepEqual(k0.shape, k1.shape)) throw new TypeError(`RandomBits: Keys have different shapes ${k0.shape} and ${k1.shape}`);
+		if (!require_backend.deepEqual(shape$1.slice(0, k0.ndim), k0.shape)) throw new TypeError(`RandomBits: generated shape ${shape$1} must match key shape ${k0.shape}`);
 		return [new ShapedArray(shape$1, require_backend.DType.Uint32, false)];
 	},
 	[Primitive.Gather]([x, ...indices], { axis, outDim }) {
@@ -1659,6 +1750,16 @@ const abstractEvalRules = {
 		if (a.shape[a.ndim - 2] !== a.shape[a.ndim - 1]) throw new TypeError(`cholesky: must be square, got ${a}`);
 		return [ShapedArray.fromAval(a)];
 	},
+	[Primitive.LU]([a]) {
+		if (a.ndim < 2) throw new TypeError(`lu: requires at least 2D input, got ${a}`);
+		const batch = a.shape.slice(0, -2);
+		const [m, n] = a.shape.slice(-2);
+		return [
+			ShapedArray.fromAval(a),
+			new ShapedArray([...batch, Math.min(m, n)], require_backend.DType.Int32, false),
+			new ShapedArray([...batch, m], require_backend.DType.Int32, false)
+		];
+	},
 	[Primitive.Jit](args, { jaxpr }) {
 		const { inTypes, outTypes } = typecheckJaxpr(jaxpr);
 		if (args.length !== inTypes.length) throw new TypeError(`jit expected ${inTypes.length} arguments, got ${args.length}`);
@@ -1736,12 +1837,6 @@ function jit$1(f, opts) {
 //#endregion
 //#region src/frontend/jit.ts
-const routinePrimitives = new Map([
-	[Primitive.Sort, require_backend.Routines.Sort],
-	[Primitive.Argsort, require_backend.Routines.Argsort],
-	[Primitive.TriangularSolve, require_backend.Routines.TriangularSolve],
-	[Primitive.Cholesky, require_backend.Routines.Cholesky]
-]);
 /** Result of compiling a Jaxpr. Can be evaluated on a series of inputs. */
 var JitProgram = class {
 	constructor(backend, steps, inputs, outputs) {
@@ -1911,10 +2006,10 @@ function jitCompile(backend, jaxpr) {
 				inputs.push(jv.arg);
 			} else if (input instanceof Lit) inputs.push(builder.pushLit(input));
 			const outputs = [];
-			for (const outVar$1 of eqn.outBinders) {
-				const outId = builder.pushBuffer(outVar$1.aval.size * require_backend.byteWidth(outVar$1.aval.dtype));
+			for (const outVar of eqn.outBinders) {
+				const outId = builder.pushBuffer(outVar.aval.size * require_backend.byteWidth(outVar.aval.dtype));
 				outputs.push(outId);
-				ctx.set(outVar$1, {
+				ctx.set(outVar, {
 					type: "imm",
 					arg: outId
 				});
@@ -1965,35 +2060,37 @@ function jitCompile(backend, jaxpr) {
 		let reduction;
 		if (inputReduction) {
 			const jv = inputReduction;
-			const newEpilogue = rule(inputExps, inputAvals, eqn.params).exp;
-			exp$2 = jv.exp.reindexGids(addArgs(jv.args));
+			const newEpilogue = rule(inputExps, inputAvals, eqn.params).exp[0];
+			exp$2 = [jv.exp.reindexGids(addArgs(jv.args))];
 			reduction = new require_backend.Reduction(jv.reduction.dtype, jv.reduction.op, jv.reduction.size, newEpilogue);
 		} else {
 			const ruleOutput = rule(inputExps, inputAvals, eqn.params);
 			exp$2 = ruleOutput.exp;
 			reduction = ruleOutput.reduction;
 		}
-		const outVar = eqn.outBinders[0];
-		if (blackNodes.has(outVar)) {
-			const nargs$1 = inputArgs.length;
-			const size$1 = outVar.aval.size;
-			const kernel = new require_backend.Kernel(nargs$1, size$1, exp$2, reduction);
-			const outId = builder.pushKernel(kernel, inputArgs);
-			ctx.set(outVar, {
-				type: "imm",
-				arg: outId
+		for (let i$1 = 0; i$1 < eqn.outBinders.length; i$1++) {
+			const outVar = eqn.outBinders[i$1];
+			if (blackNodes.has(outVar)) {
+				const nargs$1 = inputArgs.length;
+				const size$1 = outVar.aval.size;
+				const kernel = new require_backend.Kernel(nargs$1, size$1, exp$2[i$1], reduction);
+				const outId = builder.pushKernel(kernel, inputArgs);
+				ctx.set(outVar, {
+					type: "imm",
+					arg: outId
+				});
+			} else if (reduction) ctx.set(outVar, {
+				type: "red",
+				exp: exp$2[i$1],
+				reduction,
+				args: inputArgs
 			});
-		} else if (reduction) ctx.set(outVar, {
-			type: "red",
-			exp: exp$2,
-			reduction,
-			args: inputArgs
-		});
-		else ctx.set(outVar, {
-			type: "exp",
-			exp: exp$2,
-			args: inputArgs
-		});
+			else ctx.set(outVar, {
+				type: "exp",
+				exp: exp$2[i$1],
+				args: inputArgs
+			});
+		}
 	}
 	const outputIds = [];
 	for (const out of jaxpr.outs) if (out instanceof Var) {
@@ -2034,17 +2131,17 @@ function broadcastedJit(fn, opts) {
 			if (exp$2.dtype !== newDtype && !skipCastIdx.includes(i)) exp$2 = require_backend.AluExp.cast(newDtype, exp$2);
 			return exp$2;
 		});
-		return { exp: fn(exps, params) };
+		return { exp: [fn(exps, params)] };
 	};
 }
 function unopJit(fn) {
 	return ([a], [_as], params) => {
-		return { exp: fn(a, params) };
+		return { exp: [fn(a, params)] };
 	};
 }
 function reshapeJit(fn) {
 	return ([a], [_as], params) => {
-		return { exp: reshapeViews(a, (st) => fn(st, params)) };
+		return { exp: [reshapeViews(a, (st) => fn(st, params))] };
 	};
 }
 function routineNoJit() {
@@ -2090,7 +2187,7 @@ const jitRules = {
 		a = reshapeViews(a, (st) => st.permute(perm).reshape(newShape), true);
 		const reduction = new require_backend.Reduction(a.dtype, op, reductionSize);
 		return {
-			exp: a,
+			exp: [a],
 			reduction
 		};
 	},
@@ -2101,13 +2198,13 @@ const jitRules = {
 		a = reshapeViews(a, (st) => st.compose(stX), true);
 		const reduction = new require_backend.Reduction(a.dtype, require_backend.AluOp.Add, stX.shape[stX.shape.length - 1]);
 		return {
-			exp: a,
+			exp: [a],
 			reduction
 		};
 	},
 	[Primitive.Dot]([a, b], [as, bs]) {
 		const k1 = jitRules[Primitive.Mul]([a, b], [as, bs], {});
-		const c = k1.exp;
+		const [c] = k1.exp;
 		const cs = promoteAvals(as, bs);
 		return jitRules[Primitive.Reduce]([c], [cs], {
 			op: require_backend.AluOp.Add,
@@ -2124,16 +2221,42 @@ const jitRules = {
 	},
 	[Primitive.Compare]: broadcastedJit(([a, b], { op }) => aluCompare(a, b, op)),
 	[Primitive.Where]: broadcastedJit(([cond, a, b]) => require_backend.AluExp.where(cond, a, b), { skipCastIdx: [0] }),
+	[Primitive.Concatenate](exps, avals, { axis }) {
+		const ndim$2 = avals[0].ndim;
+		const sizes = avals.map((x) => x.shape[axis]);
+		const finalSize = sizes.reduce((a, b) => a + b, 0);
+		const { dtype: dtypeOut } = avals.map((x) => x.scalar()).reduce(promoteAvals);
+		const makePadAxis = (start, end) => require_backend.range(ndim$2).map((i) => i === axis ? [start, end] : [0, 0]);
+		let cum = 0;
+		const src = [];
+		for (let i = 0; i < exps.length; i++) {
+			const padding = makePadAxis(cum, finalSize - cum - sizes[i]);
+			src.push(reshapeViews(require_backend.AluExp.cast(dtypeOut, exps[i]), (st) => st.pad(padding)));
+			cum += sizes[i];
+		}
+		return { exp: [src.reduce(require_backend.AluExp.add)] };
+	},
+	[Primitive.Split]([a], [as], { axis, sizes }) {
+		const exp$2 = [];
+		let start = 0;
+		for (const size$1 of sizes) {
+			const slice = require_backend.range(as.ndim).map((d) => d === axis ? [start, start + size$1] : [0, as.shape[d]]);
+			exp$2.push(reshapeViews(a, (st) => st.shrink(slice)));
+			start += size$1;
+		}
+		return { exp: exp$2 };
+	},
 	[Primitive.RandomBits]: (keys, keyShapes, { shape: shape$1, mode }) => {
+		const keyShape = keyShapes[0].shape;
 		const mapping = (st) => {
-			if (!require_backend.deepEqual(st.shape, shape$1)) return st.broadcast(shape$1, require_backend.range(shape$1.length - st.shape.length));
+			if (!require_backend.deepEqual(st.shape, shape$1)) return st.broadcast(shape$1, require_backend.range(st.shape.length, shape$1.length));
 		};
 		const k0 = reshapeViews(keys[0], mapping);
 		const k1 = reshapeViews(keys[1], mapping);
 		const c0 = require_backend.AluExp.u32(0);
-		const c1 = require_backend.AluExp.cast(require_backend.DType.Uint32, require_backend.AluVar.gidx);
+		const c1 = require_backend.AluExp.mod(require_backend.AluExp.cast(require_backend.DType.Uint32, require_backend.AluVar.gidx), require_backend.AluExp.u32(Math.max(require_backend.prod(shape$1.slice(keyShape.length)), 1)));
 		const exp$2 = require_backend.AluExp.threefry2x32(k0, k1, c0, c1, mode);
-		return { exp: exp$2 };
+		return { exp: [exp$2] };
 	},
 	[Primitive.Gather]([x, ...indices], [xs, ...indicesShapes], { axis, outDim }) {
 		const axisSet = new Set(axis);
@@ -2148,7 +2271,7 @@ const jitRules = {
 		for (const [i, iexp] of indices.entries()) src[axis[i]] = require_backend.AluExp.cast(require_backend.DType.Int32, reshapeViews(iexp, (st) => st.broadcast(finalShape, [...require_backend.range(outDim + indexShape.length - st.shape.length), ...require_backend.range(outDim + indexShape.length, finalShape.length)])));
 		const [index, valid] = require_backend.ShapeTracker.fromShape(xs.shape).toAluExp(src);
 		if (!valid.resolve()) throw new Error("internal: expected full validity mask in Gather");
-		return { exp: x.substitute({ gidx: index }) };
+		return { exp: [x.substitute({ gidx: index })] };
 	},
 	[Primitive.Transpose]: reshapeJit((st, { perm }) => st.permute(perm)),
 	[Primitive.Broadcast]: reshapeJit((st, { shape: shape$1, axis }) => st.broadcast(shape$1, axis)),
@@ -2164,6 +2287,7 @@ const jitRules = {
 	[Primitive.Argsort]: routineNoJit(),
 	[Primitive.TriangularSolve]: routineNoJit(),
 	[Primitive.Cholesky]: routineNoJit(),
+	[Primitive.LU]: routineNoJit(),
 	[Primitive.Jit]() {
 		throw new Error("internal: Jit should have been flattened before JIT compilation");
 	}
@@ -2245,7 +2369,7 @@ function splitGraphDataflow(backend, jaxpr) {
 		p1NextBlack.set(v, v);
 	}
 	const heterogeneousViewPrimitives = [Primitive.RandomBits, Primitive.Gather];
-	const needsCleanShapePrimitives = [Primitive.Pad];
+	const needsCleanShapePrimitives = [Primitive.Concatenate, Primitive.Pad];
 	for (let i = jaxpr.eqns.length - 1; i >= 0; i--) {
 		const eqn = jaxpr.eqns[i];
 		if (reductionEndpointEqns.has(i) || heterogeneousViewPrimitives.includes(eqn.primitive) || routinePrimitives.has(eqn.primitive) || eqn.outBinders.some((v) => blackNodes.has(v))) {
@@ -2315,7 +2439,7 @@ function splitGraphDataflow(backend, jaxpr) {
 //#endregion
 //#region src/frontend/array.ts
-const JsArray = globalThis.Array;
+const JsArray$1 = globalThis.Array;
 const inlineArrayLimit = 128;
 /** Version of pureArray with fudged types. */
 const fudgeArray = pureArray;
@@ -2442,6 +2566,10 @@ var Array$1 = class Array$1 extends Tracer {
 		this.#rc++;
 		return this;
 	}
+	/** Get the current reference count (for debugging memory management). */
+	get refCount() {
+		return this.#rc;
+	}
 	dispose() {
 		this.#check();
 		if (--this.#rc === 0) {
@@ -2599,7 +2727,7 @@ var Array$1 = class Array$1 extends Tracer {
 		} else if (castDtype === void 0) {
 			castDtype = arrays[i].#dtype;
 			castWeakType = arrays[i].#weakType;
-		} else ({dtype: castDtype, weakType: castWeakType} = promoteAvals(new ShapedArray([], castDtype, castWeakType), new ShapedArray([], arrays[i].#dtype, arrays[i].#weakType)));
+		} else ({dtype: castDtype, weakType: castWeakType} = promoteAvals(new ShapedArray([], castDtype, castWeakType), arrays[i].aval.scalar()));
 		const weakType = castWeakType && !strongTypeOutput;
 		const { backend, committed } = Array$1.#computeBackend(name, arrays);
 		arrays = arrays.map((ar) => ar._putSync(backend));
@@ -2709,25 +2837,35 @@ var Array$1 = class Array$1 extends Tracer {
 		});
 	}
 	/** Apply an operation with custom lowering to this array. */
-	static #routine(routine, arrays, outputWeakType) {
-		const { backend, committed } = Array$1.#computeBackend(routine.name, arrays);
-		for (const ar of arrays) ar.#realize();
-		const inputs = arrays.map((ar) => ar.#source);
-		const outputs = routine.type.outputDtypes.map((dtype, i) => backend.malloc(require_backend.byteWidth(dtype) * require_backend.prod(routine.type.outputShapes[i])));
-		const pending = arrays.flatMap((ar) => ar.#pending);
-		for (const exe of pending) exe.updateRc(+outputs.length);
-		pending.push(new PendingExecute(backend, routine, inputs, outputs));
-		pending[pending.length - 1].updateRc(+outputs.length - 1);
-		arrays.forEach((ar) => ar.dispose());
-		return outputs.map((output, i) => new Array$1({
-			source: output,
-			st: require_backend.ShapeTracker.fromShape(routine.type.outputShapes[i]),
-			dtype: routine.type.outputDtypes[i],
-			weakType: outputWeakType[i],
-			backend,
-			committed,
-			pending
-		}));
+	static #routine(prim) {
+		return (arrays, params) => {
+			const { backend, committed } = Array$1.#computeBackend(prim, arrays);
+			for (const ar of arrays) ar.#realize();
+			const avals = arrays.map((ar) => ar.aval);
+			const avalsOut = abstractEvalRules[prim](avals, params);
+			const routine = new require_backend.Routine(routinePrimitives.get(prim), {
+				inputShapes: avals.map((a) => a.shape),
+				inputDtypes: avals.map((a) => a.dtype),
+				outputShapes: avalsOut.map((a) => a.shape),
+				outputDtypes: avalsOut.map((a) => a.dtype)
+			}, params);
+			const inputs = arrays.map((ar) => ar.#source);
+			const outputs = avalsOut.map((x) => backend.malloc(require_backend.byteWidth(x.dtype) * x.size));
+			const pending = arrays.flatMap((ar) => ar.#pending);
+			for (const exe of pending) exe.updateRc(+outputs.length);
+			pending.push(new PendingExecute(backend, routine, inputs, outputs));
+			pending[pending.length - 1].updateRc(+outputs.length - 1);
+			arrays.forEach((ar) => ar.dispose());
+			return outputs.map((output, i) => new Array$1({
+				source: output,
+				st: require_backend.ShapeTracker.fromShape(avalsOut[i].shape),
+				dtype: avalsOut[i].dtype,
+				weakType: avalsOut[i].weakType,
+				backend,
+				committed,
+				pending
+			}));
+		};
 	}
 	/**
 	* Normalizes this array into one backed by a `Slot`.
@@ -2992,17 +3130,44 @@ var Array$1 = class Array$1 extends Tracer {
 					y
 				], { dtypeOverride: [require_backend.DType.Bool] })];
 			},
+			[Primitive.Concatenate](xs, { axis }) {
+				const ndim$2 = xs[0].ndim;
+				const sizes = xs.map((x) => x.shape[axis]);
+				const finalSize = sizes.reduce((a, b) => a + b, 0);
+				const makePadAxis = (start, end) => require_backend.range(ndim$2).map((i) => i === axis ? [start, end] : [0, 0]);
+				let cum = 0;
+				const xsPadded = [];
+				for (let i = 0; i < xs.length; i++) {
+					const padding = makePadAxis(cum, finalSize - cum - sizes[i]);
+					xsPadded.push(xs[i].#reshape(xs[i].#st.pad(padding)));
+					cum += sizes[i];
+				}
+				const custom = (exps) => exps.reduce(require_backend.AluExp.add);
+				return [Array$1.#naryCustom("concatenate", custom, xsPadded)];
+			},
+			[Primitive.Split]([x], { axis, sizes }) {
+				const outputs = [];
+				for (let i = 0, start = 0; i < sizes.length; i++) {
+					const slice = require_backend.range(x.ndim).map((d) => d === axis ? [start, start + sizes[i]] : [0, x.shape[d]]);
+					outputs.push(x.ref.#reshape(x.#st.shrink(slice)));
+					start += sizes[i];
+				}
+				x.dispose();
+				return outputs;
+			},
 			[Primitive.RandomBits]([k0, k1], { shape: shape$1, mode }) {
-				const keyShape = require_backend.generalBroadcast(k0.shape, k1.shape);
-				if (!require_backend.deepEqual(require_backend.generalBroadcast(keyShape, shape$1), shape$1)) throw new TypeError(`Keys of shapes ${k0.shape} and ${k1.shape} cannot be broadcast to shape ${shape$1}`);
-				const c0 = zeros(shape$1, {
+				const keyShape = k0.shape;
+				const genShape = shape$1.slice(keyShape.length);
+				const c0 = zeros(genShape, {
 					dtype: require_backend.DType.Uint32,
 					device: k0.device
 				});
-				const c1 = arange(0, require_backend.prod(shape$1), 1, {
+				const c1 = arange(0, require_backend.prod(genShape), 1, {
 					dtype: require_backend.DType.Uint32,
 					device: k0.device
-				}).reshape(shape$1);
+				}).reshape(genShape);
+				k0 = k0.#reshape(k0.#st.reshape(keyShape.concat(require_backend.rep(genShape.length, 1))));
+				k1 = k1.#reshape(k1.#st.reshape(keyShape.concat(require_backend.rep(genShape.length, 1))));
 				const custom = ([k0$1, k1$1, c0$1, c1$1]) => require_backend.AluExp.threefry2x32(k0$1, k1$1, c0$1, c1$1, mode);
 				return [Array$1.#naryCustom("random_bits", custom, [
 					k0,
@@ -3034,42 +3199,11 @@ var Array$1 = class Array$1 extends Tracer {
 			[Primitive.Pad]([x], { width }) {
 				return [x.#reshape(x.#st.pad(width))];
 			},
-			[Primitive.Sort]([x]) {
-				const routine = new require_backend.Routine(require_backend.Routines.Sort, {
-					inputShapes: [x.aval.shape],
-					inputDtypes: [x.aval.dtype],
-					outputShapes: [x.aval.shape],
-					outputDtypes: [x.aval.dtype]
-				});
-				return Array$1.#routine(routine, [x], [x.#weakType]);
-			},
-			[Primitive.Argsort]([x]) {
-				const routine = new require_backend.Routine(require_backend.Routines.Argsort, {
-					inputShapes: [x.aval.shape],
-					inputDtypes: [x.aval.dtype],
-					outputShapes: [x.aval.shape, x.aval.shape],
-					outputDtypes: [x.aval.dtype, require_backend.DType.Int32]
-				});
-				return Array$1.#routine(routine, [x], [x.#weakType, false]);
-			},
-			[Primitive.TriangularSolve]([a, b], { unitDiagonal }) {
-				const routine = new require_backend.Routine(require_backend.Routines.TriangularSolve, {
-					inputShapes: [a.aval.shape, b.aval.shape],
-					inputDtypes: [a.aval.dtype, b.aval.dtype],
-					outputShapes: [b.aval.shape],
-					outputDtypes: [b.aval.dtype]
-				}, { unitDiagonal });
-				return Array$1.#routine(routine, [a, b], [a.#weakType && b.#weakType]);
-			},
-			[Primitive.Cholesky]([a]) {
-				const routine = new require_backend.Routine(require_backend.Routines.Cholesky, {
-					inputShapes: [a.aval.shape],
-					inputDtypes: [a.aval.dtype],
-					outputShapes: [a.aval.shape],
-					outputDtypes: [a.aval.dtype]
-				});
-				return Array$1.#routine(routine, [a], [a.#weakType]);
-			},
+			[Primitive.Sort]: Array$1.#routine(Primitive.Sort),
+			[Primitive.Argsort]: Array$1.#routine(Primitive.Argsort),
+			[Primitive.TriangularSolve]: Array$1.#routine(Primitive.TriangularSolve),
+			[Primitive.Cholesky]: Array$1.#routine(Primitive.Cholesky),
+			[Primitive.LU]: Array$1.#routine(Primitive.LU),
 			[Primitive.Jit](args, { jaxpr }) {
 				if (jaxpr.inBinders.length !== args.length) throw new Error(`jit expects ${jaxpr.inBinders.length} args, got ${args.length}`);
 				const { backend, committed } = Array$1.#computeBackend("jit", args);
@@ -3151,7 +3285,7 @@ function array(values, { shape: shape$1, dtype, device } = {}) {
 		if (!shape$1) {
 			shape$1 = [];
 			let cur = values;
-			while (JsArray.isArray(cur)) {
+			while (JsArray$1.isArray(cur)) {
 				shape$1.push(cur.length);
 				cur = cur[0];
 			}
@@ -3175,7 +3309,7 @@ function array(values, { shape: shape$1, dtype, device } = {}) {
 				device
 			});
 		} else {
-			const weakType = dtype == void 0;
+			const weakType = dtype == void 0 && shape$1.length === 0;
 			dtype = dtype ?? require_backend.DType.Float32;
 			const data = require_backend.dtypedJsArray(dtype, flat);
 			return arrayFromData(data, shape$1, {
@@ -3289,7 +3423,7 @@ function ones(shape$1, { dtype, device } = {}) {
 }
 /** Return a new array of given shape and type, filled with `fill_value`. */
 function full(shape$1, fillValue, { dtype, device } = {}) {
-	let weakType = dtype == void 0;
+	let weakType = dtype == void 0 && shape$1.length === 0;
 	if (typeof fillValue === "number") dtype = dtype ?? require_backend.DType.Float32;
 	else if (typeof fillValue === "boolean") {
 		dtype = dtype ?? require_backend.DType.Bool;
@@ -3447,6 +3581,27 @@ function linspace(start, stop, num = 50, endpoint = true, { dtype, device } = {}
 		committed: device != void 0
 	});
 }
+/**
+* Return numbers spaced evenly on a log scale.
+*
+* In linear space, the sequence starts at `base ** start` and ends at
+* `base ** stop` (see `endpoint` below).
+*
+* @param start - `base ** start` is the starting value of the sequence.
+* @param stop - `base ** stop` is the final value of the sequence, unless `endpoint` is false.
+* @param num - Number of samples to generate. Default is 50.
+* @param endpoint - If true, `stop` is the last sample. Otherwise, it is not included. Default is true.
+* @param base - The base of the log space. Default is 10.
+* @returns Array of evenly spaced values on a log scale.
+*/
+function logspace(start, stop, num = 50, endpoint = true, base = 10, { dtype, device } = {}) {
+	const y = linspace(start, stop, num, endpoint, {
+		dtype,
+		device
+	});
+	const logBase = Math.log(base);
+	return exp$1(mul(y, logBase));
+}
 function aluCompare(a, b, op) {
 	switch (op) {
 		case CompareOp.Less: return require_backend.AluExp.cmplt(a, b);
@@ -3524,6 +3679,7 @@ var BatchTrace = class extends Trace {
 			return valOuts$1.map((x) => new BatchTracer(this, x, null));
 		}
 		const [valOuts, bdimOuts] = vmapRule(this.axisSize, valsIn, bdimsIn, params);
+		if (valOuts.length !== bdimOuts.length) throw new Error(`vmap rule for ${primitive} returned mismatched lengths: ${valOuts.length} vs ${bdimOuts.length}`);
 		return require_backend.zip(valOuts, bdimOuts).map(([x, bd]) => new BatchTracer(this, x, bd));
 	}
 	get axisSize() {
@@ -3535,13 +3691,13 @@ var BatchTrace = class extends Trace {
 *
 * Reference: https://github.com/jax-ml/jax/blob/jax-v0.8.1/jax/_src/interpreters/batching.py#L1029
 */
-function broadcastBatcher(op) {
-	return (axisSize, args, dims) => {
+function broadcastBatcher(prim) {
+	return (axisSize, args, dims, params) => {
 		if (args.length === 0) throw new Error("Empty list in broadcastBatcher");
 		const nd = Math.max(...args.map((x, i) => ndim$1(x) + (dims[i] === null ? 1 : 0)));
 		const firstIdx = dims.findIndex((d) => d !== null);
 		const firstBdim = dims[firstIdx] - args[firstIdx].ndim;
-		if (require_backend.zip(args, dims).every(([x, d]) => d === null && ndim$1(x) < -firstBdim || d !== null && d - x.ndim === firstBdim)) return [[op(...args)], [nd + firstBdim]];
+		if (require_backend.zip(args, dims).every(([x, d]) => d === null && ndim$1(x) < -firstBdim || d !== null && d - x.ndim === firstBdim)) return [[bind1(prim, args, params)], [nd + firstBdim]];
 		args = args.map((x, i) => {
 			if (dims[i] === null) return x;
 			x = moveBatchAxis(axisSize, dims[i], 0, x);
@@ -3552,37 +3708,45 @@ function broadcastBatcher(op) {
 			]);
 			return x;
 		});
-		return [[op(...args)], [0]];
+		return [[bind1(prim, args, params)], [0]];
 	};
 }
-function unopBatcher(op) {
+function unopBatcher(prim) {
 	return (axisSize, [x], [xBdim], params) => {
-		return [[op(x, params)], [xBdim]];
+		return [[bind1(prim, [x], params)], [xBdim]];
+	};
+}
+function lastDimsBatcher(prim, inputDims, numOutputs = 1) {
+	return (axisSize, [x], [xBdim], params) => {
+		require_backend.assertNonNull(xBdim);
+		if (xBdim < x.ndim - inputDims) return [bind(prim, [x], params), require_backend.rep(numOutputs, xBdim)];
+		x = moveBatchAxis(axisSize, xBdim, 0, x);
+		return [bind(prim, [x], params), require_backend.rep(numOutputs, 0)];
 	};
 }
 const vmapRules = {
-	[Primitive.Add]: broadcastBatcher(add$1),
-	[Primitive.Mul]: broadcastBatcher(mul),
-	[Primitive.Idiv]: broadcastBatcher(idiv),
-	[Primitive.Mod]: broadcastBatcher(mod),
-	[Primitive.Min]: broadcastBatcher(min$1),
-	[Primitive.Max]: broadcastBatcher(max$1),
-	[Primitive.Neg]: unopBatcher(neg),
-	[Primitive.Reciprocal]: unopBatcher(reciprocal$1),
-	[Primitive.Floor]: unopBatcher(floor$1),
-	[Primitive.Ceil]: unopBatcher(ceil$1),
-	[Primitive.StopGradient]: unopBatcher(stopGradient),
-	[Primitive.Cast]: unopBatcher((x, { dtype }) => cast(x, dtype)),
-	[Primitive.Bitcast]: unopBatcher((x, { dtype }) => bitcast(x, dtype)),
-	[Primitive.Sin]: unopBatcher(sin$1),
-	[Primitive.Cos]: unopBatcher(cos$1),
-	[Primitive.Asin]: unopBatcher(asin$1),
-	[Primitive.Atan]: unopBatcher(atan$1),
-	[Primitive.Exp]: unopBatcher(exp$1),
-	[Primitive.Log]: unopBatcher(log$1),
-	[Primitive.Erf]: unopBatcher(erf$1),
-	[Primitive.Erfc]: unopBatcher(erfc$1),
-	[Primitive.Sqrt]: unopBatcher(sqrt$1),
+	[Primitive.Add]: broadcastBatcher(Primitive.Add),
+	[Primitive.Mul]: broadcastBatcher(Primitive.Mul),
+	[Primitive.Idiv]: broadcastBatcher(Primitive.Idiv),
+	[Primitive.Mod]: broadcastBatcher(Primitive.Mod),
+	[Primitive.Min]: broadcastBatcher(Primitive.Min),
+	[Primitive.Max]: broadcastBatcher(Primitive.Max),
+	[Primitive.Neg]: unopBatcher(Primitive.Neg),
+	[Primitive.Reciprocal]: unopBatcher(Primitive.Reciprocal),
+	[Primitive.Floor]: unopBatcher(Primitive.Floor),
+	[Primitive.Ceil]: unopBatcher(Primitive.Ceil),
+	[Primitive.StopGradient]: unopBatcher(Primitive.StopGradient),
+	[Primitive.Cast]: unopBatcher(Primitive.Cast),
+	[Primitive.Bitcast]: unopBatcher(Primitive.Bitcast),
+	[Primitive.Sin]: unopBatcher(Primitive.Sin),
+	[Primitive.Cos]: unopBatcher(Primitive.Cos),
+	[Primitive.Asin]: unopBatcher(Primitive.Asin),
+	[Primitive.Atan]: unopBatcher(Primitive.Atan),
+	[Primitive.Exp]: unopBatcher(Primitive.Exp),
+	[Primitive.Log]: unopBatcher(Primitive.Log),
+	[Primitive.Erf]: unopBatcher(Primitive.Erf),
+	[Primitive.Erfc]: unopBatcher(Primitive.Erfc),
+	[Primitive.Sqrt]: unopBatcher(Primitive.Sqrt),
 	[Primitive.Reduce](axisSize, [x], [xBdim], { op, axis }) {
 		require_backend.assertNonNull(xBdim);
 		const newAxis = axis.map((ax) => ax + (xBdim <= ax ? 1 : 0));
@@ -3604,10 +3768,25 @@ const vmapRules = {
 		});
 		return [[z], [0]];
 	},
-	[Primitive.Compare](axisSize, args, dims, { op }) {
-		return broadcastBatcher((x, y) => compare(x, y, op))(axisSize, args, dims, {});
+	[Primitive.Compare]: broadcastBatcher(Primitive.Compare),
+	[Primitive.Where]: broadcastBatcher(Primitive.Where),
+	[Primitive.Concatenate](axisSize, xs, xBdims, { axis }) {
+		const minBdim = Math.min(...xBdims.filter((d) => d !== null));
+		xs = xs.map((x, i) => moveBatchAxis(axisSize, xBdims[i], minBdim, x));
+		const newAxis = axis + (minBdim <= axis ? 1 : 0);
+		return [[concatenate$1(xs, newAxis)], [minBdim]];
+	},
+	[Primitive.Split](axisSize, [x], [xBdim], { axis, sizes }) {
+		require_backend.assertNonNull(xBdim);
+		const newAxis = axis + (xBdim <= axis ? 1 : 0);
+		const outs = split$2(x, newAxis, sizes);
+		return [outs, require_backend.rep(outs.length, xBdim)];
+	},
+	[Primitive.RandomBits](axisSize, [k0, k1], [bdim0, bdim1], { shape: shape$1, mode }) {
+		k0 = moveBatchAxis(axisSize, bdim0, 0, k0);
+		k1 = moveBatchAxis(axisSize, bdim1, 0, k1);
+		return [[randomBits(k0, k1, [axisSize, ...shape$1], mode)], [0]];
 	},
-	[Primitive.Where]: broadcastBatcher(where$1),
 	[Primitive.Gather](axisSize, [x, ...indices], [xBdim, ...indicesBdim], { axis, outDim }) {
 		if (indicesBdim.every((d) => d === null)) {
 			require_backend.assertNonNull(xBdim);
@@ -3669,18 +3848,8 @@ const vmapRules = {
 		const newWidth = width.toSpliced(xBdim, 0, [0, 0]);
 		return [[pad$1(x, newWidth)], [xBdim]];
 	},
-	[Primitive.Sort](axisSize, [x], [xBdim]) {
-		require_backend.assertNonNull(xBdim);
-		if (xBdim !== x.ndim - 1) return [[sort$1(x)], [xBdim]];
-		x = moveBatchAxis(axisSize, xBdim, 0, x);
-		return [[sort$1(x)], [0]];
-	},
-	[Primitive.Argsort](axisSize, [x], [xBdim]) {
-		require_backend.assertNonNull(xBdim);
-		if (xBdim !== x.ndim - 1) return [argsort$1(x), [xBdim, xBdim]];
-		x = moveBatchAxis(axisSize, xBdim, 0, x);
-		return [argsort$1(x), [0, 0]];
-	},
+	[Primitive.Sort]: lastDimsBatcher(Primitive.Sort, 1),
+	[Primitive.Argsort]: lastDimsBatcher(Primitive.Argsort, 1, 2),
 	[Primitive.TriangularSolve](axisSize, [a, b], [aBdim, bBdim], { unitDiagonal }) {
 		if (aBdim === null) {
 			b = moveBatchAxis(axisSize, bBdim, -3, b);
@@ -3704,12 +3873,8 @@ const vmapRules = {
 		const x = bind1(Primitive.TriangularSolve, [a, b], { unitDiagonal });
 		return [[x], [0]];
 	},
-	[Primitive.Cholesky](axisSize, [x], [xBdim]) {
-		require_backend.assertNonNull(xBdim);
-		if (xBdim < x.ndim - 2) return [[cholesky$2(x)], [xBdim]];
-		x = moveBatchAxis(axisSize, xBdim, 0, x);
-		return [[cholesky$2(x)], [0]];
-	},
+	[Primitive.Cholesky]: lastDimsBatcher(Primitive.Cholesky, 2),
+	[Primitive.LU]: lastDimsBatcher(Primitive.LU, 2, 3),
 	[Primitive.Jit](axisSize, args, dims, { name, jaxpr }) {
 		const newJaxpr = vmapJaxpr(jaxpr, axisSize, dims);
 		const outs = bind(Primitive.Jit, [...newJaxpr.consts.map((c) => c.ref), ...args], {
@@ -3860,6 +4025,16 @@ function batchMatmulT(a, b) {
 function mT(a) {
 	return moveaxis(a, -2, -1);
 }
+function sliceAxis(a, axis, p) {
+	const slices = Array(a.shape.length).fill([]);
+	slices[require_backend.checkAxis(axis, a.ndim)] = p;
+	return a.slice(...slices);
+}
+function padAxis(a, axis, p) {
+	const pads = Array(a.shape.length).fill([0, 0]);
+	pads[require_backend.checkAxis(axis, a.ndim)] = p;
+	return pad$1(a, pads);
+}
 const jvpRules = {
 	[Primitive.Add]: linearTangentsJvp(Primitive.Add),
 	[Primitive.Mul]: bilinearTangentsJvp(Primitive.Mul),
@@ -3958,6 +4133,8 @@ const jvpRules = {
 		dcond.dispose();
 		return [[where$1(cond.ref, x, y)], [where$1(cond, dx, dy)]];
 	},
+	[Primitive.Concatenate]: linearTangentsJvp(Primitive.Concatenate),
+	[Primitive.Split]: linearTangentsJvp(Primitive.Split),
 	[Primitive.RandomBits]: zeroTangentsJvp(Primitive.RandomBits),
 	[Primitive.Gather]([x, ...indices], [dx, ..._], { axis, outDim }) {
 		const indicesRef = indices.map((t) => t.ref);
@@ -3992,6 +4169,38 @@ const jvpRules = {
 		const dL = batchMatmulT(L.ref, triu(ST.ref, 1).add(triu(ST)).mul(.5));
 		return [[L], [dL]];
 	},
+	[Primitive.LU]([a], [da]) {
+		const [luMatrix, pivots, permutation] = lu$1(a);
+		const [m, n] = a.shape.slice(-2);
+		const k = Math.min(m, n);
+		const luSliceL = sliceAxis(luMatrix.ref, -1, [0, k]);
+		const lLower = tril(luSliceL, -1);
+		const lPadded = m > k ? padAxis(lLower, -1, [0, m - k]) : lLower;
+		const L = lPadded.add(eye(m));
+		const luSliceU = sliceAxis(luMatrix.ref, -2, [0, k]);
+		const uUpper = triu(luSliceU);
+		const uPadded = n > k ? padAxis(uUpper, -2, [0, n - k]) : uUpper;
+		const uEye = n > k ? padAxis(padAxis(eye(n - k), -1, [k, 0]), -2, [k, 0]) : zerosLike$1(uPadded.ref);
+		const U = uPadded.add(uEye);
+		const P = permutation.ref.reshape([...permutation.shape, 1]).equal(arange(m)).astype(da.dtype);
+		const pda = batchMatmulT(P, mT(da));
+		const la = mT(triangularSolve$1(L.ref, mT(pda), {
+			lower: true,
+			unitDiagonal: true
+		}));
+		const lau = triangularSolve$1(mT(U.ref), la, { lower: true });
+		const lDot = batchMatmulT(L, mT(tril(lau.ref, -1)));
+		const uDot = batchMatmulT(triu(lau), mT(U));
+		return [[
+			luMatrix,
+			pivots,
+			permutation
+		], [
+			lDot.add(uDot),
+			zerosLike$1(pivots.ref),
+			zerosLike$1(permutation.ref)
+		]];
+	},
 	[Primitive.Jit](primals, tangents, { name, jaxpr }) {
 		const newJaxpr = jvpJaxpr(jaxpr);
 		const outs = bind(Primitive.Jit, [
@@ -4032,17 +4241,39 @@ function jvpFlat(f, primals, tangents) {
 		_usingCtx$1.d();
 	}
 }
-function jvp$1(f, primals, tangents) {
+function jvp$1(f, primals, tangents, { hasAux = false } = {}) {
 	const [primalsFlat, inTree] = flatten(primals);
 	const [tangentsFlat, inTree2] = flatten(tangents);
 	if (!inTree.equals(inTree2)) throw new TreeMismatchError("jvp", inTree, inTree2);
-	const [flatFun, outTree] = flattenFun(f, inTree);
+	let flatFun, outTree, aux;
+	if (hasAux) [flatFun, outTree, aux] = flattenFunWithAux(f, inTree);
+	else [flatFun, outTree] = flattenFun(f, inTree);
 	const [primalsOutFlat, tangentsOutFlat] = jvpFlat(flatFun, primalsFlat, tangentsFlat);
 	if (outTree.value === void 0) throw new Error("outTree was not set in jvp");
 	const primalsOut = unflatten(outTree.value, primalsOutFlat);
 	const tangentsOut = unflatten(outTree.value, tangentsOutFlat);
+	if (hasAux) return [
+		primalsOut,
+		tangentsOut,
+		lowerAux(aux.value)
+	];
 	return [primalsOut, tangentsOut];
 }
+/** Lowering for auxiliary data returned in `hasAux: true` methods. */
+function lowerAux(aux) {
+	const level = currentTraceLevel();
+	return map((x) => {
+		if (x instanceof Tracer) while (x._trace.main.level > level) if (x instanceof JVPTracer) {
+			x.tangent.dispose();
+			x = x.primal;
+		} else {
+			const y = x.fullLower();
+			if (y._trace.main.level >= x._trace.main.level) throw new Error("internal: lowerAux did not reduce trace level");
+			x = y;
+		}
+		return x;
+	}, aux);
+}
 //#endregion
 //#region src/frontend/linearize.ts
@@ -4113,9 +4344,11 @@ function linearizeFlat(f, primalsIn) {
 		dispose$1
 	];
 }
-function linearize$1(f, ...primalsIn) {
+function linearize$1(f, primalsIn, { hasAux = false } = {}) {
 	const [primalsInFlat, inTree] = flatten(primalsIn);
-	const [fFlat, outTree] = flattenFun(f, inTree);
+	let fFlat, outTree, aux;
+	if (hasAux) [fFlat, outTree, aux] = flattenFunWithAux(f, inTree);
+	else [fFlat, outTree] = flattenFun(f, inTree);
 	const [primalsOutFlat, fLinFlat, dispose$1] = linearizeFlat(fFlat, primalsInFlat.map(pureArray));
 	if (outTree.value === void 0) throw new Error("outTree was not set in linearize");
 	const primalsOut = unflatten(outTree.value, primalsOutFlat);
@@ -4126,6 +4359,11 @@ function linearize$1(f, ...primalsIn) {
 		return unflatten(outTree.value, tangentsOutFlat);
 	});
 	fLin.dispose = dispose$1;
+	if (hasAux) return [
+		primalsOut,
+		fLin,
+		lowerAux(aux.value)
+	];
 	return [primalsOut, fLin];
 }
 var PartialEvalTracer = class extends Tracer {
@@ -4529,6 +4767,15 @@ const transposeRules = {
 		cond.dispose();
 		return cts;
 	},
+	[Primitive.Concatenate]([ct], inputs, { axis }) {
+		if (inputs.some((x) => !(x instanceof UndefPrimal))) throw new NonlinearError(Primitive.Concatenate);
+		const sizes = inputs.map((x) => x.aval.shape[axis]);
+		return split$2(ct, axis, sizes);
+	},
+	[Primitive.Split](cts, [x], { axis }) {
+		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Split);
+		return [concatenate$1(cts, axis)];
+	},
 	[Primitive.Gather]([ct], [x, ...indices], { axis, outDim }) {
 		if (!(x instanceof UndefPrimal)) throw new NonlinearError(Primitive.Gather);
 		if (indices.some((i) => i instanceof UndefPrimal)) throw new NonlinearError(Primitive.Gather);
@@ -4617,9 +4864,11 @@ function vjpFlat(f, primalsIn) {
 		dispose$1
 	];
 }
-function vjp$1(f, ...primalsIn) {
+function vjp$1(f, primalsIn, { hasAux = false } = {}) {
 	const [primalsInFlat, inTree] = flatten(primalsIn);
-	const [fFlat, outTree] = flattenFun(f, inTree);
+	let fFlat, outTree, aux;
+	if (hasAux) [fFlat, outTree, aux] = flattenFunWithAux(f, inTree);
+	else [fFlat, outTree] = flattenFun(f, inTree);
 	const [primalsOutFlat, fVjpFlat, dispose$1] = vjpFlat(fFlat, primalsInFlat.map(pureArray));
 	if (outTree.value === void 0) throw new Error("outTree was not set in vjp");
 	const primalsOut = unflatten(outTree.value, primalsOutFlat);
@@ -4630,26 +4879,43 @@ function vjp$1(f, ...primalsIn) {
 		return unflatten(inTree, cotangentsInFlat);
 	});
 	fVjp.dispose = dispose$1;
+	if (hasAux) return [
+		primalsOut,
+		fVjp,
+		lowerAux(aux.value)
+	];
 	return [primalsOut, fVjp];
 }
-function grad$1(f) {
-	const valueAndGradFn = valueAndGrad$1(f);
+function grad$1(f, opts) {
+	const valueAndGradFn = valueAndGrad$1(f, opts);
 	return (...x) => {
-		const [y, dx] = valueAndGradFn(...x);
-		y.dispose();
-		return dx;
+		if (opts?.hasAux) {
+			const [[y, aux], dx] = valueAndGradFn(...x);
+			y.dispose();
+			return [dx, aux];
+		} else {
+			const [y, dx] = valueAndGradFn(...x);
+			y.dispose();
+			return dx;
+		}
 	};
 }
-function valueAndGrad$1(f) {
+function valueAndGrad$1(f, opts) {
+	const argnums = opts?.argnums ?? 0;
+	const hasAux = opts?.hasAux ?? false;
+	require_backend.checkInts(argnums);
+	const argnumsSet = new Set(typeof argnums === "number" ? [argnums] : argnums);
 	return (...x) => {
 		if (x.length === 0) throw new Error("grad requires at least one argument to differentiate");
-		const [y, fVjp] = vjp$1(f, x[0], ...x.slice(1).map(stopGradient));
+		for (let i = 0; i < x.length; i++) if (!argnumsSet.has(i)) x[i] = map(stopGradient, x[i]);
+		const [y, fVjp, aux] = vjp$1(f, x, { hasAux });
 		if (!(y instanceof Tracer) || ndim$1(y) !== 0) throw new TypeError("grad requires a scalar output");
 		if (!require_backend.isFloatDtype(y.dtype)) throw new TypeError("grad only supports floating-point dtypes");
-		const [ct, ...rest] = fVjp(onesLike$1(y.ref));
-		for (const r of rest) dispose(r);
+		const cts = fVjp(onesLike$1(y.ref));
 		fVjp.dispose();
-		return [y, ct];
+		for (let i = 0; i < cts.length; i++) if (!argnumsSet.has(i)) dispose(cts[i]);
+		const grads = typeof argnums === "number" ? cts[argnums] : argnums.map((i) => cts[i]);
+		return hasAux ? [[y, aux], grads] : [y, grads];
 	};
 }
 function jacrev$1(f) {
@@ -4657,7 +4923,7 @@ function jacrev$1(f) {
 		if (x.shape.length !== 1) throw new TypeError("jacrev only supports 1D inputs");
 		const [size$1] = x.shape;
 		const pullback = (ct) => {
-			const [y, fVjp] = vjp$1(f, x);
+			const [y, fVjp] = vjp$1(f, [x]);
 			y.dispose();
 			const [ret] = fVjp(ct);
 			fVjp.dispose();
@@ -4666,6 +4932,9 @@ function jacrev$1(f) {
 		return vmap$1(pullback, [1])(eye(size$1, void 0, { dtype: x.dtype }));
 	};
 }
+function hessian$1(f) {
+	return jacfwd$1(grad$1(f));
+}
 //#endregion
 //#region src/library/numpy/einsum.ts
@@ -4804,8 +5073,8 @@ function computeSizeMap({ shapes, lhsIndices, rhsIndex }) {
 			const idx = lhsIndex[j];
 			const dim = shape$1[j];
 			const existing = sizeMap.get(idx);
-			if (existing === void 0) sizeMap.set(idx, dim);
-			else if (existing !== dim) throw new Error(`Inconsistent size for index ${idx} in einsum: ${existing} vs ${dim}`);
+			if (existing === void 0 || existing === 1) sizeMap.set(idx, dim);
+			else if (existing !== dim && dim !== 1) throw new Error(`Inconsistent size for index ${idx} in einsum: ${existing} vs ${dim}`);
 		}
 	}
 	for (const [idx, size$1] of sizeMap) if (!Number.isInteger(idx) || idx < 0) throw new Error(`Invalid index ${idx} in einsum expression, must be non-negative integer`);
@@ -4961,27 +5230,53 @@ function ifft(a, axis = -1) {
 //#region src/library/numpy-linalg.ts
 var numpy_linalg_exports = {};
 __export(numpy_linalg_exports, {
-	cholesky: () => cholesky$1,
+	cholesky: () => cholesky,
+	det: () => det,
 	diagonal: () => diagonal,
+	inv: () => inv,
 	lstsq: () => lstsq,
 	matmul: () => matmul,
+	matrixPower: () => matrixPower,
 	matrixTranspose: () => matrixTranspose,
 	outer: () => outer,
+	slogdet: () => slogdet,
+	solve: () => solve,
 	tensordot: () => tensordot,
 	trace: () => trace,
 	vecdot: () => vecdot
 });
+function checkSquare(name, a) {
+	if (a.ndim < 2 || a.shape[a.ndim - 1] !== a.shape[a.ndim - 2]) throw new Error(`${name}: input must be at least 2D square matrix, got ${a.aval}`);
+	return a.shape[a.ndim - 1];
+}
 /**
 * Compute the Cholesky decomposition of a (batched) positive-definite matrix.
 *
 * This is like `jax.lax.linalg.cholesky()`, except with an option to symmetrize
 * the input matrix, which is on by default.
 */
-function cholesky$1(a, { upper = false, symmetrizeInput = true } = {}) {
+function cholesky(a, { upper = false, symmetrizeInput = true } = {}) {
 	a = fudgeArray(a);
-	if (a.ndim < 2 || a.shape[a.ndim - 1] !== a.shape[a.ndim - 2]) throw new Error(`cholesky: input must be at least 2D square matrix, got ${a.aval}`);
+	checkSquare("cholesky", a);
 	if (symmetrizeInput) a = a.ref.add(matrixTranspose(a)).mul(.5);
-	return cholesky(a, { upper });
+	return cholesky$1(a, { upper });
+}
+/** Compute the determinant of a square matrix (batched). */
+function det(a) {
+	a = fudgeArray(a);
+	const n = checkSquare("det", a);
+	const [lu$2, pivots, permutation] = lu(a);
+	permutation.dispose();
+	const parity = pivots.notEqual(arange(n)).astype(int32).sum(-1).mod(2);
+	const sign$1 = parity.mul(-2).add(1);
+	const diag$1 = lu$2.diagonal(0, -1, -2);
+	return prod$1(diag$1, -1).mul(sign$1);
+}
+/** Compute the inverse of a square matrix (batched). */
+function inv(a) {
+	a = fudgeArray(a);
+	const n = checkSquare("inv", a);
+	return solve(a, eye(n));
 }
 /**
 * Return the least-squares solution to a linear equation.
@@ -5005,7 +5300,7 @@ function lstsq(a, b) {
 	const at = matrixTranspose(a.ref);
 	if (m <= n) {
 		const aat = matmul(a, at.ref);
-		const l = cholesky$1(aat, { symmetrizeInput: false });
+		const l = cholesky(aat, { symmetrizeInput: false });
 		const lb = triangularSolve(l.ref, b, {
 			leftSide: true,
 			lower: true
@@ -5017,7 +5312,7 @@ function lstsq(a, b) {
 		return matmul(at, llb.ref);
 	} else {
 		const ata = matmul(at.ref, a);
-		const l = cholesky$1(ata, { symmetrizeInput: false });
+		const l = cholesky(ata, { symmetrizeInput: false });
 		const atb = matmul(at, b);
 		const lb = triangularSolve(l.ref, atb, {
 			leftSide: true,
@@ -5030,6 +5325,169 @@ function lstsq(a, b) {
 		return llb;
 	}
 }
+/** Raise a square matrix to an integer power, via repeated squarings. */
+function matrixPower(a, n) {
+	if (!Number.isInteger(n)) throw new Error(`matrixPower: exponent must be an integer, got ${n}`);
+	a = fudgeArray(a);
+	const m = checkSquare("matrixPower", a);
+	if (n === 0) {
+		a.dispose();
+		return broadcastTo(eye(m), a.shape);
+	}
+	if (n < 0) {
+		a = inv(a);
+		n = -n;
+	}
+	let result = null;
+	let a2k = a;
+	for (let k = 0; n; k++) {
+		if (k > 0) a2k = matmul(a2k.ref, a2k);
+		if (n % 2 === 1) result = result === null ? a2k.ref : matmul(result, a2k.ref);
+		n = Math.floor(n / 2);
+	}
+	a2k.dispose();
+	return result;
+}
+/** Return sign and natural logarithm of the determinant of `a`. */
+function slogdet(a) {
+	a = fudgeArray(a);
+	const n = checkSquare("slogdet", a);
+	const [lu$2, pivots, permutation] = lu(a);
+	permutation.dispose();
+	let parity = pivots.notEqual(arange(n)).astype(int32).sum(-1);
+	const diag$1 = lu$2.diagonal(0, -1, -2);
+	parity = parity.add(diag$1.ref.less(0).astype(int32).sum(-1)).mod(2);
+	const logabsdet = log(absolute(diag$1)).sum(-1);
+	const sign$1 = parity.mul(-2).add(1);
+	return [sign$1, logabsdet];
+}
+/**
+* Solve a linear system of equations.
+*
+* This solves a (batched) linear system of equations `a @ x = b` for `x` given
+* `a` and `b`. If `a` is singular, this will return `nan` or `inf` values.
+*
+* @param a - Coefficient matrix of shape `(..., N, N)`.
+* @param b - Values of shape `(N,)` or `(..., N, M)`.
+* @returns Solution `x` of shape `(..., N)` or `(..., N, M)`.
+*/
+function solve(a, b) {
+	a = fudgeArray(a);
+	b = fudgeArray(b);
+	const n = checkSquare("solve", a);
+	if (b.ndim === 0) throw new Error(`solve: b cannot be scalar`);
+	const bIs1d = b.ndim === 1;
+	if (bIs1d) b = b.reshape([...b.shape, 1]);
+	if (b.shape[b.ndim - 2] !== n) throw new Error(`solve: leading dimension of b must match size of a, got a=${a.aval}, b=${b.aval}`);
+	const m = b.shape[b.ndim - 1];
+	const batchDims = require_backend.generalBroadcast(a.shape.slice(0, -2), b.shape.slice(0, -2));
+	a = broadcastTo(a, [
+		...batchDims,
+		n,
+		n
+	]);
+	b = broadcastTo(b, [
+		...batchDims,
+		n,
+		m
+	]);
+	const [lu$2, pivots, permutation] = lu(a);
+	pivots.dispose();
+	const P = arange(n).equal(permutation.reshape([...permutation.shape, 1])).astype(b.dtype);
+	const LPb = triangularSolve(lu$2.ref, matmul(P, b), {
+		leftSide: true,
+		lower: true,
+		unitDiagonal: true
+	});
+	let x = triangularSolve(lu$2, LPb.ref, {
+		leftSide: true,
+		lower: false
+	});
+	if (bIs1d) x = squeeze(x, -1);
+	return x;
+}
+//#endregion
+//#region src/library/numpy/dtype-info.ts
+/** Machine limits for floating-point types. */
+function finfo(dtype) {
+	if (!require_backend.isFloatDtype(dtype)) throw new Error(`finfo: received ${dtype}, must be a floating-point type`);
+	switch (dtype) {
+		case require_backend.DType.Float16: return Object.freeze({
+			bits: 16,
+			dtype: require_backend.DType.Float16,
+			eps: 2 ** -10,
+			epsneg: 2 ** -11,
+			machep: -10,
+			max: 65504,
+			maxexp: 16,
+			min: -65504,
+			minexp: -14,
+			negep: -24,
+			nexp: 5,
+			nmant: 10,
+			precision: 3,
+			resolution: .001,
+			smallestNormal: 2 ** -14,
+			smallestSubnormal: 2 ** -24
+		});
+		case require_backend.DType.Float32: return Object.freeze({
+			bits: 32,
+			dtype: require_backend.DType.Float32,
+			eps: 2 ** -23,
+			epsneg: 2 ** -24,
+			machep: -23,
+			max: 34028234663852886e22,
+			maxexp: 128,
+			min: -34028234663852886e22,
+			minexp: -126,
+			negep: -24,
+			nexp: 8,
+			nmant: 23,
+			precision: 6,
+			resolution: 1e-6,
+			smallestNormal: 2 ** -126,
+			smallestSubnormal: 2 ** -149
+		});
+		case require_backend.DType.Float64: return Object.freeze({
+			bits: 64,
+			dtype: require_backend.DType.Float64,
+			eps: 2 ** -52,
+			epsneg: 2 ** -53,
+			machep: -52,
+			max: Number.MAX_VALUE,
+			maxexp: 1024,
+			min: -Number.MAX_VALUE,
+			minexp: -1022,
+			negep: -53,
+			nexp: 11,
+			nmant: 52,
+			precision: 15,
+			resolution: 1e-15,
+			smallestNormal: 2 ** -1022,
+			smallestSubnormal: 2 ** -1074
+		});
+		default: throw new Error(`finfo: unsupported dtype ${dtype}`);
+	}
+}
+/** Machine limits for integer types. */
+function iinfo(dtype) {
+	switch (dtype) {
+		case require_backend.DType.Int32: return Object.freeze({
+			bits: 32,
+			dtype: require_backend.DType.Int32,
+			max: 2147483647,
+			min: -2147483648
+		});
+		case require_backend.DType.Uint32: return Object.freeze({
+			bits: 32,
+			dtype: require_backend.DType.Uint32,
+			max: 4294967295,
+			min: 0
+		});
+		default: throw new Error(`iinfo: unsupported dtype ${dtype}`);
+	}
+}
 //#endregion
 //#region src/library/numpy.ts
@@ -5085,6 +5543,7 @@ __export(numpy_exports, {
 	diag: () => diag,
 	diagonal: () => diagonal,
 	divide: () => trueDivide,
+	divmod: () => divmod,
 	dot: () => dot$1,
 	dstack: () => dstack,
 	e: () => e,
@@ -5097,6 +5556,7 @@ __export(numpy_exports, {
 	expm1: () => expm1,
 	eye: () => eye,
 	fft: () => numpy_fft_exports,
+	finfo: () => finfo,
 	flip: () => flip,
 	fliplr: () => fliplr,
 	flipud: () => flipud,
@@ -5104,6 +5564,7 @@ __export(numpy_exports, {
 	float32: () => float32,
 	float64: () => float64,
 	floor: () => floor,
+	floorDivide: () => floorDivide,
 	fmod: () => fmod,
 	frexp: () => frexp,
 	full: () => full,
@@ -5116,6 +5577,7 @@ __export(numpy_exports, {
 	hstack: () => hstack,
 	hypot: () => hypot,
 	identity: () => identity$1,
+	iinfo: () => iinfo,
 	inf: () => inf,
 	inner: () => inner,
 	int32: () => int32,
@@ -5133,6 +5595,7 @@ __export(numpy_exports, {
 	log10: () => log10,
 	log1p: () => log1p,
 	log2: () => log2,
+	logspace: () => logspace,
 	matmul: () => matmul,
 	matrixTranspose: () => matrixTranspose,
 	max: () => max,
@@ -5169,9 +5632,11 @@ __export(numpy_exports, {
 	shape: () => shape,
 	sign: () => sign,
 	sin: () => sin,
+	sinc: () => sinc,
 	sinh: () => sinh,
 	size: () => size,
 	sort: () => sort,
+	split: () => split$1,
 	sqrt: () => sqrt,
 	square: () => square,
 	squeeze: () => squeeze,
@@ -5179,6 +5644,8 @@ __export(numpy_exports, {
 	std: () => std,
 	subtract: () => subtract,
 	sum: () => sum,
+	swapaxes: () => swapaxes,
+	take: () => take,
 	tan: () => tan,
 	tanh: () => tanh,
 	tensordot: () => tensordot,
@@ -5437,6 +5904,45 @@ function flip(x, axis = null) {
 	return flip$1(x, axis);
 }
 /**
+* Split an array into multiple sub-arrays along an axis.
+*
+* @param a - The input array to split.
+* @param indicesOrSections - If an integer, it indicates the number of equal
+* sections to create along the specified axis. If a list of integers, it
+* specifies the indices at which to split the array.
+* @param axis - The axis along which to split the array. Default is 0.
+*/
+function split$1(a, indicesOrSections, axis = 0) {
+	a = fudgeArray(a);
+	axis = require_backend.checkAxis(axis, a.ndim);
+	const size$1 = a.shape[axis];
+	let sizes;
+	if (typeof indicesOrSections === "number") {
+		if (size$1 % indicesOrSections !== 0) throw new Error(`Array of size ${size$1} cannot be split into ${indicesOrSections} equal parts`);
+		const partSize = size$1 / indicesOrSections;
+		sizes = require_backend.rep(indicesOrSections, partSize);
+	} else {
+		const indices = indicesOrSections;
+		sizes = [indices[0]];
+		for (let i = 1; i < indices.length; i++) sizes.push(indices[i] - indices[i - 1]);
+		sizes.push(size$1 - indices[indices.length - 1]);
+	}
+	const results = [];
+	for (let i = 0; i < sizes.length; i += 7) if (i === sizes.length) {
+		results.push(a);
+		break;
+	} else if (i + 8 >= sizes.length) {
+		results.push(...split$2(a, axis, sizes.slice(i)));
+		break;
+	} else {
+		const groupSizes = [...sizes.slice(i, i + 7), sizes.slice(i + 7).reduce((x, y) => x + y, 0)];
+		const outs = split$2(a, axis, groupSizes);
+		results.push(...outs.slice(0, -1));
+		a = outs[outs.length - 1];
+	}
+	return results;
+}
+/**
 * Join a sequence of arrays along an existing axis.
 *
 * The arrays must have the same shape, except in the dimension corresponding to
@@ -5448,13 +5954,11 @@ function concatenate(xs, axis = 0) {
 	if (xs.length === 0) throw new Error("Need at least one array to concatenate");
 	const shapes = xs.map(shape);
 	axis = require_backend.checkAxis(axis, shapes[0].length);
-	for (let i = 1; i < shapes.length; i++) if (shapes[i].length !== shapes[0].length || !shapes[i].every((d, j) => j === axis || d === shapes[0][j])) throw new Error(`Cannot concatenate arrays with shapes ${JSON.stringify(shapes)} along axis ${axis}`);
-	const makePadAxis = (start, end) => shapes[0].map((_, i) => i === axis ? [start, end] : [0, 0]);
+	for (let i = 1; i < shapes.length; i++) if (shapes[i].length !== shapes[0].length || !shapes[i].every((d, j) => j === axis || d === shapes[0][j])) throw new Error(`Cannot concatenate arrays ${xs[0].aval} and ${xs[i].aval} along axis ${axis}`);
 	let result = xs[0];
-	for (let i = 1; i < xs.length; i++) {
-		const len1 = result.shape[axis];
-		const len2 = shapes[i][axis];
-		result = pad(result, makePadAxis(0, len2)).add(pad(xs[i], makePadAxis(len1, 0)));
+	for (let i = 1; i < xs.length; i += 7) {
+		const group = xs.slice(i, i + 7);
+		result = concatenate$1([result, ...group], axis);
 	}
 	return result;
 }
@@ -5539,6 +6043,17 @@ function flipud(x) {
 function fliplr(x) {
 	return flip(x, 1);
 }
+/** Interchange two axes of an array. */
+function swapaxes(a, axis1, axis2) {
+	a = fudgeArray(a);
+	axis1 = require_backend.checkAxis(axis1, a.ndim);
+	axis2 = require_backend.checkAxis(axis2, a.ndim);
+	if (axis1 === axis2) return a;
+	const perm = require_backend.range(a.ndim);
+	perm[axis1] = axis2;
+	perm[axis2] = axis1;
+	return transpose(a, perm);
+}
 /** Transpose the last two dimensions of an array. */
 function matrixTranspose(a) {
 	if (ndim(a) < 2) throw new Error(`matrixTranspose: input array must be at least 2D`);
@@ -5706,6 +6221,20 @@ function sort(a, axis = -1) {
 function argsort(a, axis = -1) {
 	return fudgeArray(a).argsort(axis);
 }
+/**
+* Take elements from an array along an axis.
+*
+* This is equivalent to advanced indexing with integer indices over that
+* numbered axis. By default, the flattened array is used.
+*/
+function take(a, indices, axis = null) {
+	if (axis === null) {
+		a = ravel(a);
+		axis = 0;
+	}
+	axis = require_backend.checkAxis(axis, ndim(a));
+	return gather(a, [indices], [axis], axis);
+}
 /** Return if two arrays are element-wise equal within a tolerance. */
 function allclose(actual, expected, options) {
 	const { rtol = 1e-5, atol = 1e-7 } = options ?? {};
@@ -6025,6 +6554,20 @@ function tan(x) {
 	x = fudgeArray(x);
 	return sin(x.ref).div(cos(x));
 }
+/**
+* @function
+* Return the normalized sinc function.
+*
+* The sinc function is defined as `sin(πx) / (πx)` for `x != 0`, and `1` for `x = 0`.
+* This is the normalized sinc function commonly used in signal processing.
+*
+* **Note:** JVP is not supported at x=0 due to discontinuous derivative. This
+* requires a custom JVP rule to handle properly (see JAX implementation).
+*/
+const sinc = jit$1(function sinc$1(x) {
+	const pix = x.ref.mul(Math.PI);
+	return where(equal(x, 0), 1, sin(pix.ref).div(pix));
+});
 /** Element-wise inverse cosine function (inverse of cos). */
 function acos(x) {
 	return subtract(pi / 2, asin(x));
@@ -6077,6 +6620,25 @@ function trueDivide(x, y) {
 	return x.div(y);
 }
 /**
+* Return the largest integer smaller or equal to the division of the inputs.
+*
+* The result is always rounded towards negative infinity.
+*
+* For floating-point inputs, this is equivalent to `floor(x / y)`.
+* For integer inputs, we use `(x - remainder(x, y)) / y` to handle
+* negative values correctly (note: may overflow near int32 boundaries).
+*
+* @param x - Dividend array.
+* @param y - Divisor array.
+* @returns Element-wise floor division of x by y.
+*/
+function floorDivide(x, y) {
+	x = fudgeArray(x);
+	y = fudgeArray(y);
+	if (require_backend.isFloatDtype(x.dtype) || require_backend.isFloatDtype(y.dtype)) return floor(trueDivide(x, y));
+	return subtract(x, remainder(x.ref, y.ref)).div(y);
+}
+/**
 * @function
 * Calculate element-wise floating-point modulo operation.
 */
@@ -6090,6 +6652,20 @@ const fmod = jit$1(function fmod$1(x, y) {
 const remainder = jit$1(function remainder$1(x, y) {
 	return mod(mod(x, y.ref).add(y.ref), y);
 });
+/**
+* Return element-wise quotient and remainder simultaneously.
+*
+* Equivalent to `[floorDivide(x, y), remainder(x, y)]`.
+*
+* @param x - Dividend array.
+* @param y - Divisor array.
+* @returns Tuple of [quotient, remainder].
+*/
+function divmod(x, y) {
+	const xArr = fudgeArray(x);
+	const yArr = fudgeArray(y);
+	return [floorDivide(xArr.ref, yArr.ref), remainder(xArr, yArr)];
+}
 /** Round input to the nearest integer towards zero. */
 function trunc(x) {
 	return idiv(x, 1);
@@ -6253,14 +6829,15 @@ function std(x, axis = null, opts) {
 	return sqrt(var_(x, axis, opts));
 }
 /** Estimate the sample covariance of a set of variables. */
-function cov(x, y) {
+function cov(x, y = null, { rowvar = true } = {}) {
 	x = fudgeArray(x);
 	if (x.ndim === 1) x = x.reshape([1, x.shape[0]]);
-	if (y !== void 0) {
+	if (y !== null) {
 		y = fudgeArray(y);
 		if (y.ndim === 1) y = y.reshape([1, y.shape[0]]);
 		x = vstack([x, y]);
 	}
+	if (!rowvar) x = x.transpose();
 	const [_M, N] = x.shape;
 	x = x.ref.sub(x.mean(1, { keepdims: true }));
 	return dot$1(x.ref, x.transpose()).div(N - 1);
@@ -6305,7 +6882,8 @@ const isfinite = jit$1(function isfinite$1(x) {
 //#region src/library/lax-linalg.ts
 var lax_linalg_exports = {};
 __export(lax_linalg_exports, {
-	cholesky: () => cholesky,
+	cholesky: () => cholesky$1,
+	lu: () => lu,
 	triangularSolve: () => triangularSolve
 });
 /**
@@ -6334,11 +6912,39 @@ __export(lax_linalg_exports, {
 * // U ≈ [[1.4142135, 0.70710677], [0, 1.2247449]]
 * ```
 */
-function cholesky(a, { upper = false } = {}) {
+function cholesky$1(a, { upper = false } = {}) {
 	const L = cholesky$2(a);
 	return upper ? moveaxis$1(L, -2, -1) : L;
 }
 /**
+* LU decomposition with partial pivoting.
+*
+* Computes the matrix decomposition: `P @ A = L @ U`, where `P` is a
+* permutation of the rows of `A`, `L` is lower-triangular with unit diagonal,
+* and `U` is upper-triangular.
+*
+* @param x - A batch of matrices with shape `[..., m, n]`.
+*
+* @returns A tuple `(lu, pivots, permutation)` where:
+* - `lu`: combined lower and upper triangular matrices.
+* - `pivots`: an array of pivot indices with shape `[..., min(m, n)]`.
+* - `permutation`: the permutation generated by pivots with shape `[..., m]`.
+*
+* @example
+* ```ts
+* import { lax, numpy as np } from "@jax-js/jax";
+*
+* const A = np.array([[4., 3.], [6., 3.]]);
+* const [lu, pivots, permutation] = lax.linalg.lu(A);
+* // lu ≈ [[6., 3.], [0.6666667, 1.0]]
+* // pivots = [1, 1]
+* // permutation = [1, 0]
+* ```
+*/
+function lu(x) {
+	return lu$1(x);
+}
+/**
 * Solve a triangular linear system.
 *
 * Solves `a @ x = b` (if leftSide=true) or `x @ a = b` (if leftSide=false)
@@ -6376,6 +6982,7 @@ var lax_exports = {};
 __export(lax_exports, {
 	conv: () => conv,
 	convGeneralDilated: () => convGeneralDilated,
+	convTranspose: () => convTranspose,
 	convWithGeneralPadding: () => convWithGeneralPadding,
 	dot: () => dot,
 	erf: () => erf,
@@ -6384,6 +6991,7 @@ __export(lax_exports, {
 	reduceWindow: () => reduceWindow,
 	stopGradient: () => stopGradient$1
 });
+const JsArray = globalThis.Array;
 /**
 * General dot product/contraction operator.
 *
@@ -6455,7 +7063,11 @@ function padtypeToPads(inShape, filterShape, strides, dilation, padding) {
 * The semantics of this operation mimic the `jax.lax.conv_general_dilated`
 * function in JAX, which wraps XLA's general convolution operator.
 *
-* Grouped convolutions are not supported right now.
+* @param lhs - Input tensor; shape `[N, C_in, ...xs]`
+* @param rhs - Convolution kernel; shape `[C_out, C_in / G, ...ks]`
+* @param windowStrides - Strides for each spatial dimension
+* @param padding - Padding for each spatial dimension, or a string
+*   (`"VALID"`, `"SAME"`, or `"SAME_LOWER"`)
 */
 function convGeneralDilated(lhs, rhs, windowStrides, padding, { lhsDilation, rhsDilation, featureGroupCount = 1 } = {}) {
 	if (lhs.ndim < 2) throw new Error("lhs must have at least 2 dimensions");
@@ -6515,6 +7127,60 @@ function convWithGeneralPadding(lhs, rhs, windowStrides, padding, lhsDilation, r
 function conv(lhs, rhs, windowStrides, padding) {
 	return convGeneralDilated(lhs, rhs, windowStrides, padding);
 }
+/**
+* Convenience wrapper for calculating the N-d convolution "transpose".
+*
+* This function directly calculates a fractionally strided conv rather than
+* indirectly calculating the gradient (transpose) of a forward convolution.
+* It is equivalent to the JAX version, except:
+*
+* - The `use_consistent_padding` option is not available. We only have the
+*   consistent padding case (JAX version >0.8.4).
+* - The order of dimensions matches `lax.conv_general_dilated`.
+*
+* Unlike PyTorch/TensorFlow, by default we don't reverse the kernel's spatial
+* dimensions or the `(C_out, C_in)` axis order. To get this behavior, set
+* `transposeKernel` to true.
+*
+* @param lhs - Input tensor; shape `[N, C_in, ...xs]`
+* @param rhs - Convolution kernel; shape `[C_out, C_in, ...ks]`
+* @param strides - Sequence of n integers, sets fractional stride
+* @param padding - Apply padding of `dilation * (kernel_size - 1) - padding` to
+*   each side of the input, so it acts like gradient of `conv()`
+* @param rhsDilation - Atrous dilation for the kernel
+* @param transposeKernel - Flip spatial axes and swap the input/output channels
+*   of the kernel; its shape should be `[C_in, C_out, ...ks]`
+*/
+function convTranspose(lhs, rhs, strides, padding, { rhsDilation, transposeKernel = false } = {}) {
+	const kernelShape = rhs.shape.slice(2);
+	rhsDilation = rhsDilation ?? require_backend.rep(kernelShape.length, 1);
+	const effectiveKernel = kernelShape.map((k, i) => Math.max(0, (k - 1) * rhsDilation[i] + 1));
+	const pads = effectiveKernel.map((k, i) => convTransposePadding(k, strides[i], typeof padding === "string" ? padding : padding[i]));
+	if (transposeKernel) {
+		rhs = flip$1(rhs, require_backend.range(2, rhs.ndim));
+		rhs = moveaxis(rhs, 0, 1);
+	}
+	return convGeneralDilated(lhs, rhs, require_backend.rep(lhs.ndim - 2, 1), pads, {
+		lhsDilation: strides,
+		rhsDilation
+	});
+}
+function convTransposePadding(k, s, padding) {
+	let padLen;
+	let pad1;
+	if (padding === "SAME") {
+		padLen = k + s - 2;
+		pad1 = s > k - 1 ? k - 1 : Math.ceil(padLen / 2);
+	} else if (padding === "VALID") {
+		padLen = k + s - 2 + Math.max(k - s, 0);
+		pad1 = k - 1;
+	} else if (JsArray.isArray(padding)) {
+		const pads = [k - 1 - padding[0], k - 1 - padding[1]];
+		pad1 = pads[0];
+		padLen = pads[0] + pads[1];
+	} else throw new Error(`convTranspose: Invalid padding type ${padding}`);
+	return [pad1, padLen - pad1];
+}
 /** Reduce a computation over padded windows. */
 function reduceWindow(operand, computation, windowDimensions, windowStrides) {
 	if (operand.ndim < windowDimensions.length) throw new Error(`Operand dimensions ${operand.ndim} < window ${windowDimensions.length}`);
@@ -6553,6 +7219,7 @@ function stopGradient$1(x) {
 var nn_exports = {};
 __export(nn_exports, {
 	celu: () => celu,
+	dotProductAttention: () => dotProductAttention,
 	elu: () => elu,
 	gelu: () => gelu,
 	glu: () => glu,
@@ -6869,6 +7536,95 @@ function oneHot(x, numClasses) {
 	if (require_backend.isFloatDtype(x.dtype)) throw new TypeError(`oneHot expects integers, got ${x.dtype}`);
 	return eye(numClasses, void 0, { device: x.device }).slice(x);
 }
+/**
+* Scaled dot product attention (SDPA).
+*
+* Computes `softmax((Q @ K^T) / sqrt(d) + bias) @ V`, where `Q` is the query,
+* `K` is the key, `V` is the value, and `d` is the dimensionality of each key
+* and query vector.
+*
+* Multi-query attention is applied when input `key` and `value` tensors have
+* fewer heads than `query`.
+*
+* We use the following uppercase letters to denote array shapes:
+* - `B` = batch size
+* - `S` = length of key/value sequences (source)
+* - `L` = length of query sequences
+* - `N` = number of attention heads
+* - `H` = dimensionality of each attention head
+* - `K` = number of key/value heads (for grouped-query attention)
+*
+* The batch size `B` may be omitted, which is equivalent to `B = 1`. In this
+* case it must be omitted from all inputs.
+*
+* @param query - Query array; shape `[B, L, N, H]`
+* @param key - Key array; shape `[B, S, K, H]`
+* @param value - Value array; same shape as `key`
+* @param opts.bias - Optional bias to add to the attention logits; shape
+*   `[B, N, L, S]` or broadcastable to it.
+* @param opts.mask - Optional mask to apply to the attention logits; should be
+*   a boolean array broadcastable to `[B, N, L, S]`, where `true` indicates
+*   the element should take part in attention.
+* @param opts.scale - Scaling factor override, default is `1 / sqrt(H)`.
+* @param opts.isCausal - If true, applies a casual mask.
+* @param opts.querySeqLengths - Optional sequence lengths for the queries;
+*   shape `(B,)`. Taken from the beginning of the tensor.
+* @param opts.keyValueSeqLengths - Optional sequence lengths for the keys and
+*   values; shape `(B,)`. Taken from the beginning of the tensor.
+* @param opts.localWindowSize - If specified, applies a local attention window
+*   of the given size. Can be a single number or a tuple `[left, right]`.
+*
+* @returns The result of the attention operation; shape is the same as query
+*   `[B, L, N, H]`, or `[L, N, H]` if `B` is omitted.
+*/
+function dotProductAttention(query, key$1, value, opts = {}) {
+	if (opts.querySeqLengths !== void 0 || opts.keyValueSeqLengths !== void 0) throw new Error("Sequence length masking is not yet implemented");
+	if (opts.localWindowSize !== void 0) throw new Error("Local attention is not yet implemented");
+	query = fudgeArray(query);
+	key$1 = fudgeArray(key$1);
+	value = fudgeArray(value);
+	if (query.ndim !== 3 && query.ndim !== 4 || query.ndim !== key$1.ndim || query.ndim !== value.ndim) throw new Error(`dotProductAttention: expected all tensors to have rank 3 or 4, got Q=${query.aval}, K=${key$1.aval}, V=${value.aval}`);
+	if (!require_backend.deepEqual(key$1.shape, value.shape)) throw new Error(`dotProductAttention: key and value shapes must match, got K=${key$1.shape}, V=${value.shape}`);
+	const isRank3 = query.ndim === 3;
+	if (isRank3) {
+		query = expandDims(query, 0);
+		key$1 = expandDims(key$1, 0);
+		value = expandDims(value, 0);
+	}
+	const [B, L, N, H] = query.shape;
+	if (key$1.shape[0] !== B || key$1.shape[3] !== H) throw new Error(`dotProductAttention: query and key shapes mismatch, got Q=${query.aval}, K=${key$1.aval}`);
+	const S = key$1.shape[1];
+	const K = key$1.shape[2];
+	if (N < K || N != K && N % K !== 0) throw new Error(`dotProductAttention: number of query heads N=${N} must be divisible by number of key/value heads K=${K} for GQA`);
+	const G = N / K;
+	key$1 = tile(key$1, [
+		1,
+		1,
+		G,
+		1
+	]);
+	value = tile(value, [
+		1,
+		1,
+		G,
+		1
+	]);
+	const scale = opts.scale ?? 1 / Math.sqrt(H);
+	let scores = einsum("BLNH,BSNH->BNLS", query, key$1).mul(scale);
+	if (opts.bias !== void 0) scores = scores.add(opts.bias);
+	if (opts.mask !== void 0) scores = where(opts.mask, scores, -Infinity);
+	if (opts.isCausal) {
+		const causalMask = tri(L, S, 0, { dtype: require_backend.DType.Bool });
+		scores = where(causalMask, scores, -Infinity);
+	}
+	const attn = softmax(scores, -1);
+	const out = einsum("BNLS,BSNH->BLNH", attn, value);
+	return isRank3 ? out.reshape([
+		L,
+		N,
+		H
+	]) : out;
+}
 //#endregion
 //#region src/library/random.ts
@@ -6881,33 +7637,41 @@ __export(random_exports, {
 	gumbel: () => gumbel,
 	key: () => key,
 	laplace: () => laplace,
+	multivariateNormal: () => multivariateNormal,
 	normal: () => normal,
 	split: () => split,
 	uniform: () => uniform
 });
-function validateKeyShape(key$1) {
+function validateKeyShape(key$1, scalar = false) {
 	if (key$1.ndim === 0) throw new Error("Key must have at least one dimension.");
 	if (key$1.shape[key$1.shape.length - 1] !== 2) throw new Error(`Invalid key shape: ${key$1.shape}. Expected last dimension to be 2.`);
+	if (scalar && key$1.shape.length > 1) throw new Error(`Expected a single PRNG key, but got a batch of keys with shape ${JSON.stringify(key$1.shape)} - use jax.vmap for batching.`);
 	return key$1.shape.slice(0, -1);
 }
+function getK01(key$1) {
+	const keyShape = validateKeyShape(key$1, true);
+	let [k0, k1] = split$2(key$1, -1, [1, 1]);
+	k0 = k0.reshape(keyShape);
+	k1 = k1.reshape(keyShape);
+	return [k0, k1];
+}
 /** Create a pseudo-random number generator (PRNG) key from 32-bit integer seed. */
 function key(seed) {
-	seed = seed >>> 0;
-	return array([0, seed], { dtype: require_backend.DType.Uint32 });
+	seed = array(seed, { dtype: require_backend.DType.Uint32 });
+	if (seed.ndim !== 0) throw new Error(`key: seed must be a scalar integer, but got shape ${seed.shape} - use jax.vmap for batching.`);
+	return stack([0, seed]);
 }
 /** Splits a PRNG key into `num` new keys by adding a leading axis. */
 function split(key$1, num = 2) {
 	const shape$1 = typeof num === "number" ? [num] : num;
 	for (const len of shape$1) if (len <= 0 || !Number.isInteger(len)) throw new Error(`Invalid split length: ${len}. Must be a positive integer.`);
-	const keyShape = validateKeyShape(key$1);
-	const k0 = key$1.ref.slice(...keyShape.map(() => null), 0);
-	const k1 = key$1.slice(...keyShape.map(() => null), 1);
+	const [k0, k1] = getK01(key$1);
 	return stack([randomBits(k0.ref, k1.ref, shape$1, 0), randomBits(k0, k1, shape$1, 1)], -1);
 }
 /** Sample uniform bits in the form of unsigned integers. */
 function bits(key$1, shape$1 = []) {
-	const keyShape = validateKeyShape(key$1);
-	return randomBits(key$1.ref.slice(...keyShape.map(() => null), 0), key$1.slice(...keyShape.map(() => null), 1), shape$1);
+	const [k0, k1] = getK01(key$1);
+	return randomBits(k0, k1, shape$1);
 }
 /**
 * @function
@@ -6981,6 +7745,32 @@ const laplace = jit$1(function laplace$1(key$1, shape$1 = []) {
 }, { staticArgnums: [1] });
 /**
 * @function
+* Sample multivariate normal random values with given mean and covariance.
+*
+* The values are returned with the given shape, along with the final dimension
+* used to represent the n-dimensional multivariate normal factors.
+*
+* This uses Cholesky decomposition on the covariance matrix.
+*
+* - `key` - PRNG key
+* - `mean` - Mean vector of shape `[..., n]`
+* - `cov` - Covariance of shape `[..., n, n]`, must be positive-definite
+* - `shape` - Result batch shape, must be broadcastable with
+*            `mean.shape[:-1]` and `cov.shape[:-2]`
+* @returns Random samples of shape `[...shape, n]`
+*/
+const multivariateNormal = jit$1(function multivariateNormal$1(key$1, mean$1, cov$1, shape$1 = []) {
+	mean$1 = fudgeArray(mean$1);
+	cov$1 = fudgeArray(cov$1);
+	const n = mean$1.shape[mean$1.ndim - 1];
+	if (cov$1.shape[cov$1.ndim - 1] !== n || cov$1.shape[cov$1.ndim - 2] !== n) throw new Error(`Invalid covariance shape: ${cov$1.shape}. Expected last two dimensions to be [${n}, ${n}].`);
+	const outputShape = broadcastShapes(shape$1, mean$1.shape.slice(0, -1), cov$1.shape.slice(0, -2)).concat(n);
+	const L = cholesky(cov$1);
+	const z = normal(key$1, outputShape);
+	return einsum("...ij,...j->...i", L, z).add(mean$1);
+}, { staticArgnums: [3] });
+/**
+* @function
 * Sample random values according to `p(x) = 1/sqrt(2pi) * exp(-x^2/2)`.
 *
 * Unlike JAX, this uses the Box-Muller transform. JAX uses the erf_inv primitive instead and
@@ -7070,17 +7860,62 @@ const linearize = linearize$1;
 /**
 * @function
 * Calculate the reverse-mode vector-Jacobian product for a function.
+*
+* The return value is a tuple of `[out, vjpFn]`, where `out` is the output of
+* `f(primals)`, and `vjpFn` is a function that takes in cotangents for each
+* output and returns the cotangents for each input.
+*
+* When `{ hasAux: true }` is passed, the function `f` is expected to return an
+* `[out, aux]` tuple, and `vjp` returns `[out, vjpFn, aux]`.
+*
+* @example
+* ```ts
+* const [y, vjpFn] = vjp(f, [x]);
+*
+* // With hasAux
+* const [y, vjpFn, aux] = vjp(f, [x], { hasAux: true });
+* ```
 */
 const vjp = vjp$1;
 /**
 * @function
 * Compute the gradient of a scalar-valued function `f` with respect to its
 * first argument.
+*
+* Pass in different `argnums` to differentiate with respect to other
+* arguments. If a tuple is provided, the return value will be a tuple of
+* gradients corresponding to each argument index.
+*
+* When `{ hasAux: true }` is passed, the function `f` is expected to return a
+* `[out, aux]` tuple, and the return value will be `[gradient, aux]`.
+*
+* @example
+* ```ts
+* const gradient = grad(f)(x);
+*
+* // With `argnums`
+* const [gradientX, gradientZ] = grad(f, { argnums: [0, 2] })(x, y, z);
+*
+* // With `hasAux`
+* const [gradient, aux] = grad(f, { hasAux: true })(x);
+* ```
 */
 const grad = grad$1;
 /**
 * @function
 * Create a function that evaluates both `f` and the gradient of `f`.
+*
+* When `{ hasAux: true }` is passed, the function `f` is expected to return an
+* `[out, aux]` tuple, and the return value will be `[[out, aux], gradient]`.
+*
+* @example
+* ```ts
+* // Without hasAux
+* const [value, gradient] = valueAndGrad(f)(x);
+*
+* // With hasAux
+* const [[value, aux], gradient] = valueAndGrad(f, { hasAux: true })(x);
+* ```
 */
 const valueAndGrad = valueAndGrad$1;
 /**
@@ -7089,6 +7924,21 @@ const valueAndGrad = valueAndGrad$1;
 */
 const jacrev = jacrev$1;
 /**
+* @function
+* Compute the Hessian matrix of a scalar-valued function.
+*
+* The Hessian is the matrix of second-order partial derivatives of a function.
+* This is implemented as `jacfwd(grad(f))`.
+*
+* @example
+* ```ts
+* const f = (x: np.Array) => np.sum(x.ref.mul(x.ref).mul(x)); // x^3
+* const H = hessian(f)(np.array([1, 2, 3]));
+* // H[i,j] = d^2f / dx_i dx_j
+* ```
+*/
+const hessian = hessian$1;
+/**
 * Wait until all `Array` leaves are ready by calling `Array.blockUntilReady()`.
 *
 * This can be used to wait for the results of an intermediate computation to
@@ -7132,6 +7982,7 @@ exports.defaultDevice = require_backend.defaultDevice;
 exports.devicePut = devicePut;
 exports.devices = require_backend.devices;
 exports.grad = grad;
+exports.hessian = hessian;
 exports.init = require_backend.init;
 exports.jacfwd = jacfwd;
 exports.jacobian = jacrev;