npm - @jax-js/jax - Versions diffs - 0.1.7 → 0.1.9 - Mend

@jax-js/jax 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +30 -13
package/dist/{backend-nEolvdLv.js → backend-BId79r5b.js} +17 -6
package/dist/{backend-B3foXiV_.cjs → backend-DpI0riom.cjs} +17 -6
package/dist/index.cjs +113 -30
package/dist/index.d.cts +64 -21
package/dist/index.d.ts +64 -21
package/dist/index.js +113 -30
package/dist/{webgl-DIIbKJ0G.cjs → webgl-C5NjXc1p.cjs} +1 -1
package/dist/{webgl-DweKSWEm.js → webgl-DnGrclTz.js} +1 -1
package/dist/{webgpu-B96vzWGE.js → webgpu-AN0cG_nB.js} +25 -10
package/dist/{webgpu-BykvF26B.cjs → webgpu-CdjiJSa7.cjs} +25 -10
package/package.json +12 -1

package/README.md CHANGED Viewed

@@ -43,6 +43,23 @@ way to get started on a blank HTML page.
 </script>
 ```
+## Examples
+Cool things that the community has made with jax-js:
+- [**tanh.xyz**: Interactive ML visualizations](https://tanh.xyz/)
+And some more demos on the official website.
+- [Training neural networks on MNIST](https://jax-js.com/mnist)
+- [Voice cloning: Kyutai Pocket TTS](https://jax-js.com/tts)
+- [CLIP embeddings for books in-browser](https://jax-js.com/mobileclip)
+- [Object detection: DETR ResNet-50 (ONNX)](https://jax-js.com/detr-resnet-50)
+- [In-browser REPL](https://jax-js.com/repl)
+- [Matmul benchmark](https://jax-js.com/bench/matmul)
+- [Conv2d benchmark](https://jax-js.com/bench/conv2d)
+- [Mandelbrot set](https://jax-js.com/mandelbrot)
 ## Feature comparison
 Here's a quick, high-level comparison with other popular web ML runtimes:
@@ -338,19 +355,6 @@ well as unique optimizations such as FlashAttention variants.
 That's all for this short tutorial. Please see the generated
 [API reference](https://jax-js.com/docs) for detailed documentation.
-## Examples
-If you make something cool with jax-js, don't be a stranger! We can feature it here.
-- [Training neural networks on MNIST](https://jax-js.com/mnist)
-- [Voice cloning: Kyutai Pocket TTS](https://jax-js.com/tts)
-- [CLIP embeddings for books in-browser](https://jax-js.com/mobileclip)
-- [Object detection: DETR ResNet-50 (ONNX)](https://jax-js.com/detr-resnet-50)
-- [In-browser REPL](https://jax-js.com/repl)
-- [Matmul benchmark](https://jax-js.com/bench/matmul)
-- [Conv2d benchmark](https://jax-js.com/bench/conv2d)
-- [Mandelbrot set](https://jax-js.com/mandelbrot)
 ## Development
 _The following technical details are for contributing to jax-js and modifying its internals._
@@ -363,6 +367,19 @@ pnpm install
 pnpm run build:watch
 ```
+The `pnpm install` command automatically sets up Git hooks via
+[Husky](https://typicode.github.io/husky/). Pre-commit hooks will run ESLint and Prettier on staged
+files to ensure code quality.
+You can also run linting and formatting manually:
+```bash
+pnpm lint          # Run ESLint
+pnpm format        # Format all files with Prettier
+pnpm format:check  # Check formatting without writing
+pnpm check         # Run TypeScript type checking
+```
 Then you can run tests in a headless browser using [Vitest](https://vitest.dev/).
 ```bash

package/dist/{backend-nEolvdLv.js → backend-BId79r5b.js} RENAMED Viewed

@@ -1479,9 +1479,14 @@ var Routine = class {
 };
 /** One of the valid `Routine` that can be dispatched to backend. */
 let Routines = /* @__PURE__ */ function(Routines$1) {
-	/** Stable sorting algorithm along the last axis. */
+	/**
+	* Sort along the last axis.
+	*
+	* This may be _unstable_ but it often doesn't matter, sorting numbers is
+	* bitwise unique up to signed zeros and NaNs.
+	*/
 	Routines$1["Sort"] = "Sort";
-	/** Returns `int32` indices of the stably sorted array. */
+	/** Stable sorting, returns `int32` indices and values of the sorted array. */
 	Routines$1["Argsort"] = "Argsort";
 	/**
 	* Solve a triangular system of equations.
@@ -1545,7 +1550,13 @@ function runArgsort(type, [x], [y, yi]) {
 		const out = y.subarray(offset, offset + n);
 		const outi = yi.subarray(offset, offset + n);
 		for (let i = 0; i < n; i++) outi[i] = i;
-		outi.sort((a, b) => ar[a] - ar[b]);
+		outi.sort((a, b) => {
+			const x$1 = ar[a];
+			const y$1 = ar[b];
+			if (isNaN(x$1)) return isNaN(y$1) ? 0 : 1;
+			if (isNaN(y$1)) return -1;
+			return x$1 === y$1 ? 0 : x$1 < y$1 ? -1 : 1;
+		});
 		for (let i = 0; i < n; i++) out[i] = ar[outi[i]];
 	}
 }
@@ -2321,7 +2332,7 @@ function tuneWebgpu(kernel) {
 	if (!/Mobi|Android/i.test(navigator.userAgent) && dim.reduce < dim.unroll && (prod(dim.st.shape.slice(dim.unroll)) <= 4 || dim.unroll === dim.upcast && prod(dim.st.shape.slice(dim.upcast)) < 64)) {
 		const s = dim.st.shape[dim.unroll - 1];
 		if (0 < s && s <= 32) dim.applyUnroll(dim.reduce, s);
-		else for (const splits of [8, 4]) if (s % splits === 0) {
+		else for (const splits of [4, 2]) if (s % splits === 0) {
 			dim.applyUnroll(dim.unroll - 1, splits);
 			break;
 		}
@@ -4252,7 +4263,7 @@ async function createBackend(device) {
 		if (!navigator.gpu) return null;
 		const adapter = await navigator.gpu.requestAdapter({ powerPreference: "high-performance" });
 		if (!adapter) return null;
-		const { WebGPUBackend } = await import("./webgpu-B96vzWGE.js");
+		const { WebGPUBackend } = await import("./webgpu-AN0cG_nB.js");
 		const importantLimits = [
 			"maxBufferSize",
 			"maxComputeInvocationsPerWorkgroup",
@@ -4290,7 +4301,7 @@ async function createBackend(device) {
 		});
 		if (!gl) return null;
 		if (!gl.getExtension("EXT_color_buffer_float")) return null;
-		const { WebGLBackend } = await import("./webgl-DweKSWEm.js");
+		const { WebGLBackend } = await import("./webgl-DnGrclTz.js");
 		return new WebGLBackend(gl);
 	} else throw new Error(`Backend not found: ${device}`);
 }

package/dist/{backend-B3foXiV_.cjs → backend-DpI0riom.cjs} RENAMED Viewed

@@ -1480,9 +1480,14 @@ var Routine = class {
 };
 /** One of the valid `Routine` that can be dispatched to backend. */
 let Routines = /* @__PURE__ */ function(Routines$1) {
-	/** Stable sorting algorithm along the last axis. */
+	/**
+	* Sort along the last axis.
+	*
+	* This may be _unstable_ but it often doesn't matter, sorting numbers is
+	* bitwise unique up to signed zeros and NaNs.
+	*/
 	Routines$1["Sort"] = "Sort";
-	/** Returns `int32` indices of the stably sorted array. */
+	/** Stable sorting, returns `int32` indices and values of the sorted array. */
 	Routines$1["Argsort"] = "Argsort";
 	/**
 	* Solve a triangular system of equations.
@@ -1546,7 +1551,13 @@ function runArgsort(type, [x], [y, yi]) {
 		const out = y.subarray(offset, offset + n);
 		const outi = yi.subarray(offset, offset + n);
 		for (let i = 0; i < n; i++) outi[i] = i;
-		outi.sort((a, b) => ar[a] - ar[b]);
+		outi.sort((a, b) => {
+			const x$1 = ar[a];
+			const y$1 = ar[b];
+			if (isNaN(x$1)) return isNaN(y$1) ? 0 : 1;
+			if (isNaN(y$1)) return -1;
+			return x$1 === y$1 ? 0 : x$1 < y$1 ? -1 : 1;
+		});
 		for (let i = 0; i < n; i++) out[i] = ar[outi[i]];
 	}
 }
@@ -2322,7 +2333,7 @@ function tuneWebgpu(kernel) {
 	if (!/Mobi|Android/i.test(navigator.userAgent) && dim.reduce < dim.unroll && (prod(dim.st.shape.slice(dim.unroll)) <= 4 || dim.unroll === dim.upcast && prod(dim.st.shape.slice(dim.upcast)) < 64)) {
 		const s = dim.st.shape[dim.unroll - 1];
 		if (0 < s && s <= 32) dim.applyUnroll(dim.reduce, s);
-		else for (const splits of [8, 4]) if (s % splits === 0) {
+		else for (const splits of [4, 2]) if (s % splits === 0) {
 			dim.applyUnroll(dim.unroll - 1, splits);
 			break;
 		}
@@ -4253,7 +4264,7 @@ async function createBackend(device) {
 		if (!navigator.gpu) return null;
 		const adapter = await navigator.gpu.requestAdapter({ powerPreference: "high-performance" });
 		if (!adapter) return null;
-		const { WebGPUBackend } = await Promise.resolve().then(() => require("./webgpu-BykvF26B.cjs"));
+		const { WebGPUBackend } = await Promise.resolve().then(() => require("./webgpu-CdjiJSa7.cjs"));
 		const importantLimits = [
 			"maxBufferSize",
 			"maxComputeInvocationsPerWorkgroup",
@@ -4291,7 +4302,7 @@ async function createBackend(device) {
 		});
 		if (!gl) return null;
 		if (!gl.getExtension("EXT_color_buffer_float")) return null;
-		const { WebGLBackend } = await Promise.resolve().then(() => require("./webgl-DIIbKJ0G.cjs"));
+		const { WebGLBackend } = await Promise.resolve().then(() => require("./webgl-C5NjXc1p.cjs"));
 		return new WebGLBackend(gl);
 	} else throw new Error(`Backend not found: ${device}`);
 }

package/dist/index.cjs CHANGED Viewed

@@ -30,7 +30,7 @@ var __toESM = (mod$1, isNodeMode, target) => (target = mod$1 != null ? __create(
 }) : target, mod$1));
 //#endregion
-const require_backend = require('./backend-B3foXiV_.cjs');
+const require_backend = require('./backend-DpI0riom.cjs');
 //#region src/frontend/convolution.ts
 /**
@@ -920,18 +920,25 @@ var Tracer = class Tracer {
 		return sort$1(this.transpose(perm)).transpose(require_backend.invertPermutation(perm));
 	}
 	/**
-	* Return the indices that would sort an array. This may not be a stable
-	* sorting algorithm; it need not preserve order of indices in ties.
+	* Return the indices that would sort an array. Unlike `sort`, this is
+	* guaranteed to be a stable sorting algorithm; it always returns the smaller
+	* index first in event of ties.
 	*
 	* See `jax.numpy.argsort` for full docs.
 	*/
 	argsort(axis = -1) {
 		axis = require_backend.checkAxis(axis, this.ndim);
-		if (axis === this.ndim - 1) return argsort$1(this)[1];
+		if (axis === this.ndim - 1) {
+			const [y$1, yi$1] = argsort$1(this);
+			y$1.dispose();
+			return yi$1;
+		}
 		const perm = require_backend.range(this.ndim);
 		perm.splice(axis, 1);
 		perm.push(axis);
-		return argsort$1(this.transpose(perm))[1].transpose(require_backend.invertPermutation(perm));
+		const [y, yi] = argsort$1(this.transpose(perm));
+		y.dispose();
+		return yi.transpose(require_backend.invertPermutation(perm));
 	}
 	/**
 	* Slice an array along one or more axes.
@@ -3416,32 +3423,26 @@ function fullInternal(aval, fillValue, device) {
 		committed: device != void 0
 	});
 }
-function zerosLike$1(val, dtype) {
-	return fullLike(val, 0, dtype);
+function zerosLike$1(val, opts) {
+	return fullLike(val, 0, opts);
 }
-function onesLike$1(val, dtype) {
-	return fullLike(val, 1, dtype);
+function onesLike$1(val, opts) {
+	return fullLike(val, 1, opts);
 }
-function fullLike(val, fillValue, dtype) {
+function fullLike(val, fillValue, { dtype, shape: shape$1, device } = {}) {
 	const aval = getAval(val);
 	if (val instanceof Tracer) val.dispose();
 	if (fillValue instanceof Tracer) throw new Error("numpy.fullLike() with array argument not implemented yet");
-	const sa = new ShapedArray(aval.shape, dtype ?? aval.dtype, aval.weakType);
-	return fullInternal(sa, fillValue);
+	const sa = new ShapedArray(shape$1 ?? aval.shape, dtype ?? aval.dtype, aval.weakType && dtype === void 0);
+	return fullInternal(sa, fillValue, device);
 }
 /** Return a new array of given shape and type, filled with zeros. */
-function zeros(shape$1, { dtype, device } = {}) {
-	return full(shape$1, 0, {
-		dtype,
-		device
-	});
+function zeros(shape$1, opts) {
+	return full(shape$1, 0, opts);
 }
 /** Return a new array of given shape and type, filled with ones. */
-function ones(shape$1, { dtype, device } = {}) {
-	return full(shape$1, 1, {
-		dtype,
-		device
-	});
+function ones(shape$1, opts) {
+	return full(shape$1, 1, opts);
 }
 /** Return a new array of given shape and type, filled with `fill_value`. */
 function full(shape$1, fillValue, { dtype, device } = {}) {
@@ -5329,9 +5330,10 @@ function lstsq(a, b) {
 		});
 		const llb = triangularSolve(l, lb, {
 			leftSide: true,
+			lower: true,
 			transposeA: true
 		});
-		return matmul(at, llb.ref);
+		return matmul(at, llb);
 	} else {
 		const ata = matmul(at.ref, a);
 		const l = cholesky(ata, { symmetrizeInput: false });
@@ -5342,6 +5344,7 @@ function lstsq(a, b) {
 		});
 		const llb = triangularSolve(l, lb, {
 			leftSide: true,
+			lower: true,
 			transposeA: true
 		});
 		return llb;
@@ -5421,7 +5424,7 @@ function solve(a, b) {
 		lower: true,
 		unitDiagonal: true
 	});
-	let x = triangularSolve(lu$2, LPb.ref, {
+	let x = triangularSolve(lu$2, LPb, {
 		leftSide: true,
 		lower: false
 	});
@@ -6232,8 +6235,9 @@ function sort(a, axis = -1) {
 	return fudgeArray(a).sort(axis);
 }
 /**
-* Return indices that would sort an array. This may be an unstable sorting
-* algorithm; it need not preserve order of indices in ties.
+* Return indices that would sort an array. Unlike `sort`, this is guaranteed to
+* be a stable sorting algorithm; it always returns the smaller index first in
+* event of ties.
 *
 * Returns an array of `int32` indices.
 *
@@ -6535,7 +6539,7 @@ function absolute(x) {
 /** Return an element-wise indication of sign of the input. */
 function sign(x) {
 	x = fudgeArray(x);
-	return where(notEqual(x.ref, 0), where(less(x.ref, 0), -1, 1), 0);
+	return where(notEqual(x.ref, 0), where(less(x, 0), -1, 1), 0);
 }
 /** @function Return element-wise positive values of the input (no-op). */
 const positive = fudgeArray;
@@ -7003,7 +7007,10 @@ function triangularSolve(a, b, { leftSide = false, lower = false, transposeA = f
 	b = fudgeArray(b);
 	if (!leftSide) transposeA = !transposeA;
 	else b = moveaxis$1(b, -2, -1);
-	if (transposeA) a = moveaxis$1(a, -2, -1);
+	if (transposeA) {
+		a = moveaxis$1(a, -2, -1);
+		lower = !lower;
+	}
 	let x = triangularSolve$1(a, b, {
 		lower,
 		unitDiagonal
@@ -7025,7 +7032,8 @@ __export(lax_exports, {
 	erfc: () => erfc,
 	linalg: () => lax_linalg_exports,
 	reduceWindow: () => reduceWindow,
-	stopGradient: () => stopGradient$1
+	stopGradient: () => stopGradient$1,
+	topK: () => topK
 });
 const JsArray = globalThis.Array;
 /**
@@ -7249,6 +7257,39 @@ function erfc(x) {
 function stopGradient$1(x) {
 	return stopGradient(x);
 }
+/**
+* Returns top `k` values and their indices along the specified axis of operand.
+*
+* This is a _stable_ algorithm: If two elements are equal, the lower-index
+* element appears first.
+*
+* @returns A tuple of `(values, indices)`, where `values` and `indices` have
+* the same shape as `x`, except along `axis` where they have size `k`.
+*/
+function topK(x, k, axis = -1) {
+	x = fudgeArray(x);
+	axis = require_backend.checkAxis(axis, x.ndim);
+	const size$1 = x.shape[axis];
+	if (k < 0 || k > size$1) throw new Error(`topK: k must be in the range [0, ${size$1}], got ${k}`);
+	if (k === 0) {
+		const outShape = x.shape.slice();
+		outShape[axis] = 0;
+		const y$1 = zerosLike$1(x.ref, { shape: outShape });
+		const yi$1 = zerosLike$1(x, {
+			dtype: require_backend.DType.Int32,
+			shape: outShape
+		});
+		return [y$1, yi$1];
+	}
+	x = flip$1(x, [axis]);
+	x = moveaxis(x, axis, -1);
+	const [y, yi] = argsort$1(x);
+	const extract = (a) => {
+		a = a.slice(...require_backend.rep(a.ndim - 1, []), [-k]);
+		return flip$1(moveaxis(a, -1, axis), [axis]);
+	};
+	return [extract(y), extract(yi.neg().add(size$1 - 1))];
+}
 //#endregion
 //#region src/library/nn.ts
@@ -7440,7 +7481,7 @@ const gelu = jit$1(function gelu$1(x, opts) {
 	if (opts?.approximate ?? true) {
 		const SQRT_2_OVER_PI = Math.sqrt(2 / Math.PI);
 		return x.ref.mul(.5).mul(tanh(x.ref.mul(x.ref.mul(x).mul(.044715).add(1)).mul(SQRT_2_OVER_PI)).add(1));
-	} else return x.ref.mul(.5).mul(erfc$1(negative(x.ref.mul(Math.SQRT1_2))));
+	} else return x.ref.mul(.5).mul(erfc$1(negative(x.mul(Math.SQRT1_2))));
 }, { staticArgnums: [1] });
 /**
 * Gated linear unit (GLU) activation function.
@@ -7698,6 +7739,7 @@ var random_exports = {};
 __export(random_exports, {
 	bernoulli: () => bernoulli,
 	bits: () => bits,
+	categorical: () => categorical,
 	cauchy: () => cauchy,
 	exponential: () => exponential,
 	gumbel: () => gumbel,
@@ -7769,6 +7811,47 @@ function bernoulli(key$1, p = .5, shape$1 = []) {
 }
 /**
 * @function
+* Sample random values from categorical distributions.
+*
+* Uses the Gumbel max trick for sampling with replacement, or the Gumbel top-k
+* trick for sampling without replacement.
+*
+* Note: Sampling without replacement currently uses argsort and slices the last
+* k elements. This should be replaced with a more efficient topK implementation.
+*
+* - `key` - PRNG key
+* - `logits` - Unnormalized log probabilities of the categorical distribution(s).
+*   `softmax(logits, axis)` gives the corresponding probabilities.
+* - `axis` - Axis along which logits belong to the same categorical distribution.
+* - `shape` - Result batch shape. Must be broadcast-compatible with
+*   `logits.shape` with `axis` removed. Default is `logits.shape` with `axis` removed.
+* - `replace` - If true (default), sample with replacement. If false, sample
+*   without replacement (each category can only be selected once per batch).
+* @returns A random array with int dtype and shape given by `shape` if provided,
+*   otherwise `logits.shape` with `axis` removed.
+*/
+const categorical = jit$1(function categorical$1(key$1, logits, { axis = -1, shape: shape$1, replace = true } = {}) {
+	logits = fudgeArray(logits);
+	axis = require_backend.checkAxis(axis, logits.ndim);
+	const numCategories = logits.shape[axis];
+	const batchShape = logits.shape.toSpliced(axis, 1);
+	if (shape$1 === void 0) shape$1 = batchShape;
+	else if (!require_backend.deepEqual(require_backend.generalBroadcast(shape$1, batchShape), shape$1)) throw new Error(`Shape ${shape$1} is not broadcast-compatible with batch shape ${batchShape}.`);
+	const shapePrefix = shape$1.slice(0, shape$1.length - batchShape.length);
+	if (replace) {
+		const noise = gumbel(key$1, [...shapePrefix, ...logits.shape]);
+		return argmax(noise.add(logits), axis + shapePrefix.length);
+	} else {
+		const k = shapePrefix.reduce((a, b) => a * b, 1);
+		if (k > numCategories) throw new Error(`Number of samples without replacement (${k}) cannot exceed number of categories (${numCategories}).`);
+		const noise = gumbel(key$1, logits.shape);
+		const [values, indices] = topK(noise.add(logits), k, axis);
+		values.dispose();
+		return indices.reshape(shape$1);
+	}
+}, { staticArgnums: [2] });
+/**
+* @function
 * Sample from a Cauchy distribution with location 0 and scale 1.
 *
 * Uses inverse transform sampling: `x = tan(π * (u - 0.5))` where u ~ Uniform(0, 1).

package/dist/index.d.cts CHANGED Viewed

@@ -436,9 +436,14 @@ declare class Routine {
 }
 /** One of the valid `Routine` that can be dispatched to backend. */
 declare enum Routines {
-  /** Stable sorting algorithm along the last axis. */
+  /**
+   * Sort along the last axis.
+   *
+   * This may be _unstable_ but it often doesn't matter, sorting numbers is
+   * bitwise unique up to signed zeros and NaNs.
+   */
   Sort = "Sort",
-  /** Returns `int32` indices of the stably sorted array. */
+  /** Stable sorting, returns `int32` indices and values of the sorted array. */
   Argsort = "Argsort",
   /**
    * Solve a triangular system of equations.
@@ -750,9 +755,9 @@ declare enum Primitive {
   Shrink = "shrink",
   Pad = "pad",
   Sort = "sort",
-  // sort(x, axis=-1)
+  // sort(x, axis=-1), unstable
   Argsort = "argsort",
-  // argsort(x, axis=-1)
+  // argsort(x, axis=-1), stable
   TriangularSolve = "triangular_solve",
   // A is upper triangular, A @ X.T = B.T
   Cholesky = "cholesky",
@@ -1029,8 +1034,9 @@ declare abstract class Tracer {
    */
   sort(axis?: number): this;
   /**
-   * Return the indices that would sort an array. This may not be a stable
-   * sorting algorithm; it need not preserve order of indices in ties.
+   * Return the indices that would sort an array. Unlike `sort`, this is
+   * guaranteed to be a stable sorting algorithm; it always returns the smaller
+   * index first in event of ties.
    *
    * See `jax.numpy.argsort` for full docs.
    */
@@ -1112,6 +1118,12 @@ type DTypeAndDevice = {
   dtype?: DType;
   device?: Device;
 };
+/** @inline */
+type DTypeShapeAndDevice = {
+  dtype?: DType;
+  shape?: number[];
+  device?: Device;
+};
 type ArrayConstructorArgs = {
   source: AluExp | Slot;
   st: ShapeTracker;
@@ -1221,15 +1233,9 @@ declare function array(values: Array | DataArray | RecursiveArray<number> | Recu
 type ImplRule<P extends Primitive> = (tracers: Array[], params: PrimitiveParams<P>) => Array[];
 declare const implRules: { [P in Primitive]: ImplRule<P> };
 /** Return a new array of given shape and type, filled with zeros. */
-declare function zeros(shape: number[], {
-  dtype,
-  device
-}?: DTypeAndDevice): Array;
+declare function zeros(shape: number[], opts?: DTypeAndDevice): Array;
 /** Return a new array of given shape and type, filled with ones. */
-declare function ones(shape: number[], {
-  dtype,
-  device
-}?: DTypeAndDevice): Array;
+declare function ones(shape: number[], opts?: DTypeAndDevice): Array;
 /** Return a new array of given shape and type, filled with `fill_value`. */
 declare function full(shape: number[], fillValue: number | boolean | Array, {
   dtype,
@@ -1421,7 +1427,7 @@ declare function triangularSolve(a: ArrayLike, b: ArrayLike, {
   unitDiagonal?: boolean;
 }): Array;
 declare namespace lax_d_exports {
-  export { DotDimensionNumbers, PaddingType, conv, convGeneralDilated, convTranspose, convWithGeneralPadding, dot$1 as dot, erf, erfc, lax_linalg_d_exports as linalg, reduceWindow, stopGradient };
+  export { DotDimensionNumbers, PaddingType, conv, convGeneralDilated, convTranspose, convWithGeneralPadding, dot$1 as dot, erf, erfc, lax_linalg_d_exports as linalg, reduceWindow, stopGradient, topK };
 }
 /**
  * Dimension numbers for general `dot()` primitive.
@@ -1527,6 +1533,16 @@ declare function erfc(x: ArrayLike): Array;
  * forward or reverse-mode automatic differentiation.
  */
 declare function stopGradient(x: ArrayLike): Array;
+/**
+ * Returns top `k` values and their indices along the specified axis of operand.
+ *
+ * This is a _stable_ algorithm: If two elements are equal, the lower-index
+ * element appears first.
+ *
+ * @returns A tuple of `(values, indices)`, where `values` and `indices` have
+ * the same shape as `x`, except along `axis` where they have size `k`.
+ */
+declare function topK(x: ArrayLike, k: number, axis?: number): [Array, Array];
 declare namespace numpy_fft_d_exports {
   export { ComplexPair, fft, ifft };
 }
@@ -1752,17 +1768,17 @@ declare const shape$1: (x: ArrayLike) => number[];
  * @function
  * Return an array of zeros with the same shape and type as a given array.
  */
-declare const zerosLike: (a: ArrayLike, dtype?: DType) => Array;
+declare const zerosLike: (a: ArrayLike, opts?: DTypeShapeAndDevice) => Array;
 /**
  * @function
  * Return an array of ones with the same shape and type as a given array.
  */
-declare const onesLike: (a: ArrayLike, dtype?: DType) => Array;
+declare const onesLike: (a: ArrayLike, opts?: DTypeShapeAndDevice) => Array;
 /**
  * @function
  * Return a full array with the same shape and type as a given array.
  */
-declare const fullLike: (a: ArrayLike, fillValue: number | boolean | Array, dtype?: DType) => Array;
+declare const fullLike: (a: ArrayLike, fillValue: number | boolean | Array, opts?: DTypeShapeAndDevice) => Array;
 /**
  * Return the number of elements in an array, optionally along an axis.
  * Does not consume array reference.
@@ -1951,8 +1967,9 @@ declare function trace(a: ArrayLike, offset?: number, axis1?: number, axis2?: nu
  */
 declare function sort(a: ArrayLike, axis?: number): Array;
 /**
- * Return indices that would sort an array. This may be an unstable sorting
- * algorithm; it need not preserve order of indices in ties.
+ * Return indices that would sort an array. Unlike `sort`, this is guaranteed to
+ * be a stable sorting algorithm; it always returns the smaller index first in
+ * event of ties.
  *
  * Returns an array of `int32` indices.
  *
@@ -2564,7 +2581,7 @@ declare function dotProductAttention(query: ArrayLike, key: ArrayLike, value: Ar
   localWindowSize?: number | [number, number];
 }): Array;
 declare namespace random_d_exports {
-  export { bernoulli, bits, cauchy, exponential, gumbel, key, laplace, multivariateNormal, normal, split, uniform };
+  export { bernoulli, bits, categorical, cauchy, exponential, gumbel, key, laplace, multivariateNormal, normal, split, uniform };
 }
 /** Create a pseudo-random number generator (PRNG) key from 32-bit integer seed. */
 declare function key(seed: ArrayLike): Array;
@@ -2587,6 +2604,32 @@ declare const uniform: OwnedFunction<(key: ArrayLike, shape?: number[] | undefin
  * and must be broadcastable to `shape`.
  */
 declare function bernoulli(key: Array, p?: ArrayLike, shape?: number[]): Array;
+/**
+ * @function
+ * Sample random values from categorical distributions.
+ *
+ * Uses the Gumbel max trick for sampling with replacement, or the Gumbel top-k
+ * trick for sampling without replacement.
+ *
+ * Note: Sampling without replacement currently uses argsort and slices the last
+ * k elements. This should be replaced with a more efficient topK implementation.
+ *
+ * - `key` - PRNG key
+ * - `logits` - Unnormalized log probabilities of the categorical distribution(s).
+ *   `softmax(logits, axis)` gives the corresponding probabilities.
+ * - `axis` - Axis along which logits belong to the same categorical distribution.
+ * - `shape` - Result batch shape. Must be broadcast-compatible with
+ *   `logits.shape` with `axis` removed. Default is `logits.shape` with `axis` removed.
+ * - `replace` - If true (default), sample with replacement. If false, sample
+ *   without replacement (each category can only be selected once per batch).
+ * @returns A random array with int dtype and shape given by `shape` if provided,
+ *   otherwise `logits.shape` with `axis` removed.
+ */
+declare const categorical: OwnedFunction<(key: ArrayLike, logits: ArrayLike, args_2?: {
+  axis?: number | undefined;
+  shape?: number[] | undefined;
+  replace?: boolean | undefined;
+} | undefined) => Array>;
 /**
  * @function
  * Sample from a Cauchy distribution with location 0 and scale 1.

package/dist/index.d.ts CHANGED Viewed

@@ -433,9 +433,14 @@ declare class Routine {
 }
 /** One of the valid `Routine` that can be dispatched to backend. */
 declare enum Routines {
-  /** Stable sorting algorithm along the last axis. */
+  /**
+   * Sort along the last axis.
+   *
+   * This may be _unstable_ but it often doesn't matter, sorting numbers is
+   * bitwise unique up to signed zeros and NaNs.
+   */
   Sort = "Sort",
-  /** Returns `int32` indices of the stably sorted array. */
+  /** Stable sorting, returns `int32` indices and values of the sorted array. */
   Argsort = "Argsort",
   /**
    * Solve a triangular system of equations.
@@ -747,9 +752,9 @@ declare enum Primitive {
   Shrink = "shrink",
   Pad = "pad",
   Sort = "sort",
-  // sort(x, axis=-1)
+  // sort(x, axis=-1), unstable
   Argsort = "argsort",
-  // argsort(x, axis=-1)
+  // argsort(x, axis=-1), stable
   TriangularSolve = "triangular_solve",
   // A is upper triangular, A @ X.T = B.T
   Cholesky = "cholesky",
@@ -1026,8 +1031,9 @@ declare abstract class Tracer {
    */
   sort(axis?: number): this;
   /**
-   * Return the indices that would sort an array. This may not be a stable
-   * sorting algorithm; it need not preserve order of indices in ties.
+   * Return the indices that would sort an array. Unlike `sort`, this is
+   * guaranteed to be a stable sorting algorithm; it always returns the smaller
+   * index first in event of ties.
    *
    * See `jax.numpy.argsort` for full docs.
    */
@@ -1109,6 +1115,12 @@ type DTypeAndDevice = {
   dtype?: DType;
   device?: Device;
 };
+/** @inline */
+type DTypeShapeAndDevice = {
+  dtype?: DType;
+  shape?: number[];
+  device?: Device;
+};
 type ArrayConstructorArgs = {
   source: AluExp | Slot;
   st: ShapeTracker;
@@ -1218,15 +1230,9 @@ declare function array(values: Array | DataArray | RecursiveArray<number> | Recu
 type ImplRule<P extends Primitive> = (tracers: Array[], params: PrimitiveParams<P>) => Array[];
 declare const implRules: { [P in Primitive]: ImplRule<P> };
 /** Return a new array of given shape and type, filled with zeros. */
-declare function zeros(shape: number[], {
-  dtype,
-  device
-}?: DTypeAndDevice): Array;
+declare function zeros(shape: number[], opts?: DTypeAndDevice): Array;
 /** Return a new array of given shape and type, filled with ones. */
-declare function ones(shape: number[], {
-  dtype,
-  device
-}?: DTypeAndDevice): Array;
+declare function ones(shape: number[], opts?: DTypeAndDevice): Array;
 /** Return a new array of given shape and type, filled with `fill_value`. */
 declare function full(shape: number[], fillValue: number | boolean | Array, {
   dtype,
@@ -1418,7 +1424,7 @@ declare function triangularSolve(a: ArrayLike, b: ArrayLike, {
   unitDiagonal?: boolean;
 }): Array;
 declare namespace lax_d_exports {
-  export { DotDimensionNumbers, PaddingType, conv, convGeneralDilated, convTranspose, convWithGeneralPadding, dot$1 as dot, erf, erfc, lax_linalg_d_exports as linalg, reduceWindow, stopGradient };
+  export { DotDimensionNumbers, PaddingType, conv, convGeneralDilated, convTranspose, convWithGeneralPadding, dot$1 as dot, erf, erfc, lax_linalg_d_exports as linalg, reduceWindow, stopGradient, topK };
 }
 /**
  * Dimension numbers for general `dot()` primitive.
@@ -1524,6 +1530,16 @@ declare function erfc(x: ArrayLike): Array;
  * forward or reverse-mode automatic differentiation.
  */
 declare function stopGradient(x: ArrayLike): Array;
+/**
+ * Returns top `k` values and their indices along the specified axis of operand.
+ *
+ * This is a _stable_ algorithm: If two elements are equal, the lower-index
+ * element appears first.
+ *
+ * @returns A tuple of `(values, indices)`, where `values` and `indices` have
+ * the same shape as `x`, except along `axis` where they have size `k`.
+ */
+declare function topK(x: ArrayLike, k: number, axis?: number): [Array, Array];
 declare namespace numpy_fft_d_exports {
   export { ComplexPair, fft, ifft };
 }
@@ -1749,17 +1765,17 @@ declare const shape$1: (x: ArrayLike) => number[];
  * @function
  * Return an array of zeros with the same shape and type as a given array.
  */
-declare const zerosLike: (a: ArrayLike, dtype?: DType) => Array;
+declare const zerosLike: (a: ArrayLike, opts?: DTypeShapeAndDevice) => Array;
 /**
  * @function
  * Return an array of ones with the same shape and type as a given array.
  */
-declare const onesLike: (a: ArrayLike, dtype?: DType) => Array;
+declare const onesLike: (a: ArrayLike, opts?: DTypeShapeAndDevice) => Array;
 /**
  * @function
  * Return a full array with the same shape and type as a given array.
  */
-declare const fullLike: (a: ArrayLike, fillValue: number | boolean | Array, dtype?: DType) => Array;
+declare const fullLike: (a: ArrayLike, fillValue: number | boolean | Array, opts?: DTypeShapeAndDevice) => Array;
 /**
  * Return the number of elements in an array, optionally along an axis.
  * Does not consume array reference.
@@ -1948,8 +1964,9 @@ declare function trace(a: ArrayLike, offset?: number, axis1?: number, axis2?: nu
  */
 declare function sort(a: ArrayLike, axis?: number): Array;
 /**
- * Return indices that would sort an array. This may be an unstable sorting
- * algorithm; it need not preserve order of indices in ties.
+ * Return indices that would sort an array. Unlike `sort`, this is guaranteed to
+ * be a stable sorting algorithm; it always returns the smaller index first in
+ * event of ties.
  *
  * Returns an array of `int32` indices.
  *
@@ -2561,7 +2578,7 @@ declare function dotProductAttention(query: ArrayLike, key: ArrayLike, value: Ar
   localWindowSize?: number | [number, number];
 }): Array;
 declare namespace random_d_exports {
-  export { bernoulli, bits, cauchy, exponential, gumbel, key, laplace, multivariateNormal, normal, split, uniform };
+  export { bernoulli, bits, categorical, cauchy, exponential, gumbel, key, laplace, multivariateNormal, normal, split, uniform };
 }
 /** Create a pseudo-random number generator (PRNG) key from 32-bit integer seed. */
 declare function key(seed: ArrayLike): Array;
@@ -2584,6 +2601,32 @@ declare const uniform: OwnedFunction<(key: ArrayLike, shape?: number[] | undefin
  * and must be broadcastable to `shape`.
  */
 declare function bernoulli(key: Array, p?: ArrayLike, shape?: number[]): Array;
+/**
+ * @function
+ * Sample random values from categorical distributions.
+ *
+ * Uses the Gumbel max trick for sampling with replacement, or the Gumbel top-k
+ * trick for sampling without replacement.
+ *
+ * Note: Sampling without replacement currently uses argsort and slices the last
+ * k elements. This should be replaced with a more efficient topK implementation.
+ *
+ * - `key` - PRNG key
+ * - `logits` - Unnormalized log probabilities of the categorical distribution(s).
+ *   `softmax(logits, axis)` gives the corresponding probabilities.
+ * - `axis` - Axis along which logits belong to the same categorical distribution.
+ * - `shape` - Result batch shape. Must be broadcast-compatible with
+ *   `logits.shape` with `axis` removed. Default is `logits.shape` with `axis` removed.
+ * - `replace` - If true (default), sample with replacement. If false, sample
+ *   without replacement (each category can only be selected once per batch).
+ * @returns A random array with int dtype and shape given by `shape` if provided,
+ *   otherwise `logits.shape` with `axis` removed.
+ */
+declare const categorical: OwnedFunction<(key: ArrayLike, logits: ArrayLike, args_2?: {
+  axis?: number | undefined;
+  shape?: number[] | undefined;
+  replace?: boolean | undefined;
+} | undefined) => Array>;
 /**
  * @function
  * Sample from a Cauchy distribution with location 0 and scale 1.

package/dist/index.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { __export } from "./chunk-Cl8Af3a2.js";
-import { AluExp, AluGroup, AluOp, AluVar, DEBUG, DType, FpHash, Kernel, PPrint, Reduction, Routine, Routines, ShapeTracker, accessorAluExp, accessorGlobal, assertNonNull, byteWidth, checkAxis, checkInts, deepEqual, defaultDevice, devices, dtypedArray, dtypedJsArray, generalBroadcast, getBackend, init, invertPermutation, isFloatDtype, isNumberPair, isPermutation, normalizeAxis, partitionList, prod, promoteTypes, range, recursiveFlatten, rep, runWithCache, setDebug, toposort, unravelAlu, unzip2, zip, zipn } from "./backend-nEolvdLv.js";
+import { AluExp, AluGroup, AluOp, AluVar, DEBUG, DType, FpHash, Kernel, PPrint, Reduction, Routine, Routines, ShapeTracker, accessorAluExp, accessorGlobal, assertNonNull, byteWidth, checkAxis, checkInts, deepEqual, defaultDevice, devices, dtypedArray, dtypedJsArray, generalBroadcast, getBackend, init, invertPermutation, isFloatDtype, isNumberPair, isPermutation, normalizeAxis, partitionList, prod, promoteTypes, range, recursiveFlatten, rep, runWithCache, setDebug, toposort, unravelAlu, unzip2, zip, zipn } from "./backend-BId79r5b.js";
 //#region src/frontend/convolution.ts
 /**
@@ -889,18 +889,25 @@ var Tracer = class Tracer {
 		return sort$1(this.transpose(perm)).transpose(invertPermutation(perm));
 	}
 	/**
-	* Return the indices that would sort an array. This may not be a stable
-	* sorting algorithm; it need not preserve order of indices in ties.
+	* Return the indices that would sort an array. Unlike `sort`, this is
+	* guaranteed to be a stable sorting algorithm; it always returns the smaller
+	* index first in event of ties.
 	*
 	* See `jax.numpy.argsort` for full docs.
 	*/
 	argsort(axis = -1) {
 		axis = checkAxis(axis, this.ndim);
-		if (axis === this.ndim - 1) return argsort$1(this)[1];
+		if (axis === this.ndim - 1) {
+			const [y$1, yi$1] = argsort$1(this);
+			y$1.dispose();
+			return yi$1;
+		}
 		const perm = range(this.ndim);
 		perm.splice(axis, 1);
 		perm.push(axis);
-		return argsort$1(this.transpose(perm))[1].transpose(invertPermutation(perm));
+		const [y, yi] = argsort$1(this.transpose(perm));
+		y.dispose();
+		return yi.transpose(invertPermutation(perm));
 	}
 	/**
 	* Slice an array along one or more axes.
@@ -3381,32 +3388,26 @@ function fullInternal(aval, fillValue, device) {
 		committed: device != void 0
 	});
 }
-function zerosLike$1(val, dtype) {
-	return fullLike(val, 0, dtype);
+function zerosLike$1(val, opts) {
+	return fullLike(val, 0, opts);
 }
-function onesLike$1(val, dtype) {
-	return fullLike(val, 1, dtype);
+function onesLike$1(val, opts) {
+	return fullLike(val, 1, opts);
 }
-function fullLike(val, fillValue, dtype) {
+function fullLike(val, fillValue, { dtype, shape: shape$1, device } = {}) {
 	const aval = getAval(val);
 	if (val instanceof Tracer) val.dispose();
 	if (fillValue instanceof Tracer) throw new Error("numpy.fullLike() with array argument not implemented yet");
-	const sa = new ShapedArray(aval.shape, dtype ?? aval.dtype, aval.weakType);
-	return fullInternal(sa, fillValue);
+	const sa = new ShapedArray(shape$1 ?? aval.shape, dtype ?? aval.dtype, aval.weakType && dtype === void 0);
+	return fullInternal(sa, fillValue, device);
 }
 /** Return a new array of given shape and type, filled with zeros. */
-function zeros(shape$1, { dtype, device } = {}) {
-	return full(shape$1, 0, {
-		dtype,
-		device
-	});
+function zeros(shape$1, opts) {
+	return full(shape$1, 0, opts);
 }
 /** Return a new array of given shape and type, filled with ones. */
-function ones(shape$1, { dtype, device } = {}) {
-	return full(shape$1, 1, {
-		dtype,
-		device
-	});
+function ones(shape$1, opts) {
+	return full(shape$1, 1, opts);
 }
 /** Return a new array of given shape and type, filled with `fill_value`. */
 function full(shape$1, fillValue, { dtype, device } = {}) {
@@ -5292,9 +5293,10 @@ function lstsq(a, b) {
 		});
 		const llb = triangularSolve(l, lb, {
 			leftSide: true,
+			lower: true,
 			transposeA: true
 		});
-		return matmul(at, llb.ref);
+		return matmul(at, llb);
 	} else {
 		const ata = matmul(at.ref, a);
 		const l = cholesky(ata, { symmetrizeInput: false });
@@ -5305,6 +5307,7 @@ function lstsq(a, b) {
 		});
 		const llb = triangularSolve(l, lb, {
 			leftSide: true,
+			lower: true,
 			transposeA: true
 		});
 		return llb;
@@ -5384,7 +5387,7 @@ function solve(a, b) {
 		lower: true,
 		unitDiagonal: true
 	});
-	let x = triangularSolve(lu$2, LPb.ref, {
+	let x = triangularSolve(lu$2, LPb, {
 		leftSide: true,
 		lower: false
 	});
@@ -6195,8 +6198,9 @@ function sort(a, axis = -1) {
 	return fudgeArray(a).sort(axis);
 }
 /**
-* Return indices that would sort an array. This may be an unstable sorting
-* algorithm; it need not preserve order of indices in ties.
+* Return indices that would sort an array. Unlike `sort`, this is guaranteed to
+* be a stable sorting algorithm; it always returns the smaller index first in
+* event of ties.
 *
 * Returns an array of `int32` indices.
 *
@@ -6498,7 +6502,7 @@ function absolute(x) {
 /** Return an element-wise indication of sign of the input. */
 function sign(x) {
 	x = fudgeArray(x);
-	return where(notEqual(x.ref, 0), where(less(x.ref, 0), -1, 1), 0);
+	return where(notEqual(x.ref, 0), where(less(x, 0), -1, 1), 0);
 }
 /** @function Return element-wise positive values of the input (no-op). */
 const positive = fudgeArray;
@@ -6966,7 +6970,10 @@ function triangularSolve(a, b, { leftSide = false, lower = false, transposeA = f
 	b = fudgeArray(b);
 	if (!leftSide) transposeA = !transposeA;
 	else b = moveaxis$1(b, -2, -1);
-	if (transposeA) a = moveaxis$1(a, -2, -1);
+	if (transposeA) {
+		a = moveaxis$1(a, -2, -1);
+		lower = !lower;
+	}
 	let x = triangularSolve$1(a, b, {
 		lower,
 		unitDiagonal
@@ -6988,7 +6995,8 @@ __export(lax_exports, {
 	erfc: () => erfc,
 	linalg: () => lax_linalg_exports,
 	reduceWindow: () => reduceWindow,
-	stopGradient: () => stopGradient$1
+	stopGradient: () => stopGradient$1,
+	topK: () => topK
 });
 const JsArray = globalThis.Array;
 /**
@@ -7212,6 +7220,39 @@ function erfc(x) {
 function stopGradient$1(x) {
 	return stopGradient(x);
 }
+/**
+* Returns top `k` values and their indices along the specified axis of operand.
+*
+* This is a _stable_ algorithm: If two elements are equal, the lower-index
+* element appears first.
+*
+* @returns A tuple of `(values, indices)`, where `values` and `indices` have
+* the same shape as `x`, except along `axis` where they have size `k`.
+*/
+function topK(x, k, axis = -1) {
+	x = fudgeArray(x);
+	axis = checkAxis(axis, x.ndim);
+	const size$1 = x.shape[axis];
+	if (k < 0 || k > size$1) throw new Error(`topK: k must be in the range [0, ${size$1}], got ${k}`);
+	if (k === 0) {
+		const outShape = x.shape.slice();
+		outShape[axis] = 0;
+		const y$1 = zerosLike$1(x.ref, { shape: outShape });
+		const yi$1 = zerosLike$1(x, {
+			dtype: DType.Int32,
+			shape: outShape
+		});
+		return [y$1, yi$1];
+	}
+	x = flip$1(x, [axis]);
+	x = moveaxis(x, axis, -1);
+	const [y, yi] = argsort$1(x);
+	const extract = (a) => {
+		a = a.slice(...rep(a.ndim - 1, []), [-k]);
+		return flip$1(moveaxis(a, -1, axis), [axis]);
+	};
+	return [extract(y), extract(yi.neg().add(size$1 - 1))];
+}
 //#endregion
 //#region src/library/nn.ts
@@ -7403,7 +7444,7 @@ const gelu = jit$1(function gelu$1(x, opts) {
 	if (opts?.approximate ?? true) {
 		const SQRT_2_OVER_PI = Math.sqrt(2 / Math.PI);
 		return x.ref.mul(.5).mul(tanh(x.ref.mul(x.ref.mul(x).mul(.044715).add(1)).mul(SQRT_2_OVER_PI)).add(1));
-	} else return x.ref.mul(.5).mul(erfc$1(negative(x.ref.mul(Math.SQRT1_2))));
+	} else return x.ref.mul(.5).mul(erfc$1(negative(x.mul(Math.SQRT1_2))));
 }, { staticArgnums: [1] });
 /**
 * Gated linear unit (GLU) activation function.
@@ -7661,6 +7702,7 @@ var random_exports = {};
 __export(random_exports, {
 	bernoulli: () => bernoulli,
 	bits: () => bits,
+	categorical: () => categorical,
 	cauchy: () => cauchy,
 	exponential: () => exponential,
 	gumbel: () => gumbel,
@@ -7732,6 +7774,47 @@ function bernoulli(key$1, p = .5, shape$1 = []) {
 }
 /**
 * @function
+* Sample random values from categorical distributions.
+*
+* Uses the Gumbel max trick for sampling with replacement, or the Gumbel top-k
+* trick for sampling without replacement.
+*
+* Note: Sampling without replacement currently uses argsort and slices the last
+* k elements. This should be replaced with a more efficient topK implementation.
+*
+* - `key` - PRNG key
+* - `logits` - Unnormalized log probabilities of the categorical distribution(s).
+*   `softmax(logits, axis)` gives the corresponding probabilities.
+* - `axis` - Axis along which logits belong to the same categorical distribution.
+* - `shape` - Result batch shape. Must be broadcast-compatible with
+*   `logits.shape` with `axis` removed. Default is `logits.shape` with `axis` removed.
+* - `replace` - If true (default), sample with replacement. If false, sample
+*   without replacement (each category can only be selected once per batch).
+* @returns A random array with int dtype and shape given by `shape` if provided,
+*   otherwise `logits.shape` with `axis` removed.
+*/
+const categorical = jit$1(function categorical$1(key$1, logits, { axis = -1, shape: shape$1, replace = true } = {}) {
+	logits = fudgeArray(logits);
+	axis = checkAxis(axis, logits.ndim);
+	const numCategories = logits.shape[axis];
+	const batchShape = logits.shape.toSpliced(axis, 1);
+	if (shape$1 === void 0) shape$1 = batchShape;
+	else if (!deepEqual(generalBroadcast(shape$1, batchShape), shape$1)) throw new Error(`Shape ${shape$1} is not broadcast-compatible with batch shape ${batchShape}.`);
+	const shapePrefix = shape$1.slice(0, shape$1.length - batchShape.length);
+	if (replace) {
+		const noise = gumbel(key$1, [...shapePrefix, ...logits.shape]);
+		return argmax(noise.add(logits), axis + shapePrefix.length);
+	} else {
+		const k = shapePrefix.reduce((a, b) => a * b, 1);
+		if (k > numCategories) throw new Error(`Number of samples without replacement (${k}) cannot exceed number of categories (${numCategories}).`);
+		const noise = gumbel(key$1, logits.shape);
+		const [values, indices] = topK(noise.add(logits), k, axis);
+		values.dispose();
+		return indices.reshape(shape$1);
+	}
+}, { staticArgnums: [2] });
+/**
+* @function
 * Sample from a Cauchy distribution with location 0 and scale 1.
 *
 * Uses inverse transform sampling: `x = tan(π * (u - 0.5))` where u ~ Uniform(0, 1).

package/dist/{webgl-DIIbKJ0G.cjs → webgl-C5NjXc1p.cjs} RENAMED Viewed

@@ -1,4 +1,4 @@
-const require_backend = require('./backend-B3foXiV_.cjs');
+const require_backend = require('./backend-DpI0riom.cjs');
 //#region src/backend/webgl/builtins.ts
 const threefrySrc = `

package/dist/{webgl-DweKSWEm.js → webgl-DnGrclTz.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { AluGroup, AluOp, DEBUG, DType, Executable, SlotError, UnsupportedOpError, UnsupportedRoutineError, isFloatDtype, range, strip1, tuneNullopt } from "./backend-nEolvdLv.js";
+import { AluGroup, AluOp, DEBUG, DType, Executable, SlotError, UnsupportedOpError, UnsupportedRoutineError, isFloatDtype, range, strip1, tuneNullopt } from "./backend-BId79r5b.js";
 //#region src/backend/webgl/builtins.ts
 const threefrySrc = `

package/dist/{webgpu-B96vzWGE.js → webgpu-AN0cG_nB.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { AluExp, AluGroup, AluOp, DEBUG, DType, Executable, FpHash, Routines, SlotError, UnsupportedOpError, UnsupportedRoutineError, findPow2, isFloatDtype, mapSetUnion, prod, range, strip1, tuneWebgpu } from "./backend-nEolvdLv.js";
+import { AluExp, AluGroup, AluOp, DEBUG, DType, Executable, FpHash, Routines, SlotError, UnsupportedOpError, UnsupportedRoutineError, findPow2, isFloatDtype, mapSetUnion, prod, range, strip1, tuneWebgpu } from "./backend-BId79r5b.js";
 //#region src/backend/webgpu/builtins.ts
 const threefrySrc = `
@@ -247,6 +247,10 @@ function bitonicSortUniform(pass) {
 *   `2^(step+1)` with multiple workgroups. This doesn't use shared memory.
 *
 * The total number of passes is roughly `log2(n / workgroupSize)^2 / 2`.
+*
+* If `outputIndices` is true, the shader also tracks the original indices of
+* the sorted elements (argsort) and outputs them to a separate buffer. This
+* also makes the sorting algorithm stable.
 */
 function bitonicSortShader(device, dtype, n, batches, outputIndices) {
 	const ty = dtypeToWgsl(dtype, true);
@@ -286,14 +290,21 @@ ${isFloatDtype(dtype) ? `
 fn compare_and_swap(i: u32, j: u32) {
   let val_i = shared_vals[i];
   let val_j = shared_vals[j];
-  if (compare(val_j, val_i)) {
+${outputIndices ? `
+  if (
+    compare(val_j, val_i) ||
+    (!compare(val_i, val_j) && shared_idx[j] < shared_idx[i])
+  ) {
     shared_vals[i] = val_j;
     shared_vals[j] = val_i;
-${outputIndices ? `
     let tmp_idx = shared_idx[i];
     shared_idx[i] = shared_idx[j];
-    shared_idx[j] = tmp_idx;` : ""}
-  }
+    shared_idx[j] = tmp_idx;
+  }` : `
+  if (compare(val_j, val_i)) {
+    shared_vals[i] = val_j;
+    shared_vals[j] = val_i;
+  }`}
 }
 @compute @workgroup_size(${workgroupSize})
@@ -370,13 +381,17 @@ ${outputIndices ? `
     if (j < ${n}u) {
       let val_i = output[base + i];
       let val_j = output[base + j];
-      if (compare(val_j, val_i)) {
+${outputIndices ? `
+      let idx_i = output_idx[base + i];
+      let idx_j = output_idx[base + j];
+      if (compare(val_j, val_i) || (!compare(val_i, val_j) && idx_j < idx_i)) {
         output[base + i] = val_j;
         output[base + j] = val_i;
-${outputIndices ? `
-        let tmp_idx = output_idx[base + i];
-        output_idx[base + i] = output_idx[base + j];
-        output_idx[base + j] = tmp_idx;` : ""}
+        output_idx[base + i] = idx_j;
+        output_idx[base + j] = idx_i;` : `
+      if (compare(val_j, val_i)) {
+        output[base + i] = val_j;
+        output[base + j] = val_i;`}
       }
     }
   }

package/dist/{webgpu-BykvF26B.cjs → webgpu-CdjiJSa7.cjs} RENAMED Viewed

@@ -1,4 +1,4 @@
-const require_backend = require('./backend-B3foXiV_.cjs');
+const require_backend = require('./backend-DpI0riom.cjs');
 //#region src/backend/webgpu/builtins.ts
 const threefrySrc = `
@@ -247,6 +247,10 @@ function bitonicSortUniform(pass) {
 *   `2^(step+1)` with multiple workgroups. This doesn't use shared memory.
 *
 * The total number of passes is roughly `log2(n / workgroupSize)^2 / 2`.
+*
+* If `outputIndices` is true, the shader also tracks the original indices of
+* the sorted elements (argsort) and outputs them to a separate buffer. This
+* also makes the sorting algorithm stable.
 */
 function bitonicSortShader(device, dtype, n, batches, outputIndices) {
 	const ty = dtypeToWgsl(dtype, true);
@@ -286,14 +290,21 @@ ${require_backend.isFloatDtype(dtype) ? `
 fn compare_and_swap(i: u32, j: u32) {
   let val_i = shared_vals[i];
   let val_j = shared_vals[j];
-  if (compare(val_j, val_i)) {
+${outputIndices ? `
+  if (
+    compare(val_j, val_i) ||
+    (!compare(val_i, val_j) && shared_idx[j] < shared_idx[i])
+  ) {
     shared_vals[i] = val_j;
     shared_vals[j] = val_i;
-${outputIndices ? `
     let tmp_idx = shared_idx[i];
     shared_idx[i] = shared_idx[j];
-    shared_idx[j] = tmp_idx;` : ""}
-  }
+    shared_idx[j] = tmp_idx;
+  }` : `
+  if (compare(val_j, val_i)) {
+    shared_vals[i] = val_j;
+    shared_vals[j] = val_i;
+  }`}
 }
 @compute @workgroup_size(${workgroupSize})
@@ -370,13 +381,17 @@ ${outputIndices ? `
     if (j < ${n}u) {
       let val_i = output[base + i];
       let val_j = output[base + j];
-      if (compare(val_j, val_i)) {
+${outputIndices ? `
+      let idx_i = output_idx[base + i];
+      let idx_j = output_idx[base + j];
+      if (compare(val_j, val_i) || (!compare(val_i, val_j) && idx_j < idx_i)) {
         output[base + i] = val_j;
         output[base + j] = val_i;
-${outputIndices ? `
-        let tmp_idx = output_idx[base + i];
-        output_idx[base + i] = output_idx[base + j];
-        output_idx[base + j] = tmp_idx;` : ""}
+        output_idx[base + i] = idx_j;
+        output_idx[base + j] = idx_i;` : `
+      if (compare(val_j, val_i)) {
+        output[base + i] = val_j;
+        output[base + j] = val_i;`}
       }
     }
   }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@jax-js/jax",
-  "version": "0.1.7",
+  "version": "0.1.9",
   "description": "Numerical computing and ML in the browser",
   "keywords": [
     "machine learning",
@@ -44,6 +44,8 @@
     "eslint": "^9.31.0",
     "eslint-plugin-import": "^2.32.0",
     "globals": "^16.0.0",
+    "husky": "^9.1.7",
+    "lint-staged": "^16.2.7",
     "playwright": "~1.52.0",
     "prettier": "^3.6.2",
     "prettier-plugin-svelte": "^3.4.0",
@@ -74,6 +76,15 @@
     ],
     "proseWrap": "always"
   },
+  "lint-staged": {
+    "*.{ts,tsx,js,jsx}": [
+      "eslint --fix",
+      "prettier --write"
+    ],
+    "*.{json,md,yml,yaml,css,svelte,html}": [
+      "prettier --write"
+    ]
+  },
   "scripts": {
     "build": "tsdown",
     "build:watch": "TSDOWN_WATCH_MODE=1 tsdown",