npm - @jax-js/jax - Versions diffs - 0.1.5 → 0.1.6 - Mend

@jax-js/jax 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/{backend-DziQSaoQ.cjs → backend-D7s-Retx.cjs} +23 -4
package/dist/{backend-DaqL-MNz.js → backend-Dx6Ob2D1.js} +18 -5
package/dist/index.cjs +365 -110
package/dist/index.d.cts +192 -13
package/dist/index.d.ts +192 -13
package/dist/index.js +365 -111
package/dist/{webgl-RSuZKvgc.js → webgl-CLLvzJlO.js} +1 -1
package/dist/{webgl-ClIYb8jP.cjs → webgl-CyfzNW8T.cjs} +1 -1
package/dist/{webgpu-Dh7k9io0.js → webgpu-C-VfevQW.js} +1 -1
package/dist/{webgpu-Db2JrNBr.cjs → webgpu-rraa6dfz.cjs} +1 -1
package/package.json +1 -1

package/dist/index.d.cts CHANGED Viewed

@@ -666,7 +666,7 @@ type IInfo = Readonly<{
 /** Machine limits for integer types. */
 declare function iinfo(dtype: DType): IInfo;
 declare namespace numpy_d_exports {
-  export { Array, ArrayLike, DType, absolute as abs, absolute, acos, arccosh as acosh, add, all, allclose, any, arange, acos as arccos, arccosh, asin as arcsin, arcsinh, atan as arctan, atan2 as arctan2, arctanh, argmax, argmin, argsort, array, asin, arcsinh as asinh, astype, atan, atan2, arctanh as atanh, bool, broadcastArrays, broadcastShapes, broadcastTo, cbrt, ceil, clip, columnStack, concatenate, convolve, corrcoef, correlate, cos, cosh, cov, cumsum, cumsum as cumulativeSum, deg2rad, degrees, diag, diagonal, trueDivide as divide, divmod, dot$1 as dot, dstack, e, einsum, equal, eulerGamma, exp, exp2, expandDims, expm1, eye, numpy_fft_d_exports as fft, finfo, flip, fliplr, flipud, float16, float32, float64, floor, floorDivide, fmod, frexp, full, fullLike, greater, greaterEqual, hamming, hann, heaviside, hstack, hypot, identity$1 as identity, iinfo, inf, inner, int32, isfinite, isinf, isnan, isneginf, isposinf, ldexp, less, lessEqual, numpy_linalg_d_exports as linalg, linspace, log, log10, log1p, log2, logspace, matmul, matrixTranspose, max, maximum, mean, meshgrid, min, minimum, moveaxis, multiply, nan, ndim, negative, notEqual, ones, onesLike, outer, pad, transpose as permuteDims, pi, positive, power as pow, power, prod, promoteTypes, ptp, rad2deg, radians, ravel, reciprocal, remainder, repeat, reshape, shape$1 as shape, sign, sin, sinc, sinh, size, sort, split$1 as split, sqrt, square, squeeze, stack, std, subtract, sum, take, tan, tanh, tensordot, tile, trace, transpose, tri, tril, triu, trueDivide, trunc, uint32, var_, vdot, vecdot, vstack, where, zeros, zerosLike };
+  export { Array, ArrayLike, DType, absolute as abs, absolute, acos, arccosh as acosh, add, all, allclose, any, arange, acos as arccos, arccosh, asin as arcsin, arcsinh, atan as arctan, atan2 as arctan2, arctanh, argmax, argmin, argsort, array, asin, arcsinh as asinh, astype, atan, atan2, arctanh as atanh, bool, broadcastArrays, broadcastShapes, broadcastTo, cbrt, ceil, clip, columnStack, concatenate, convolve, corrcoef, correlate, cos, cosh, cov, cumsum, cumsum as cumulativeSum, deg2rad, degrees, diag, diagonal, trueDivide as divide, divmod, dot$1 as dot, dstack, e, einsum, equal, eulerGamma, exp, exp2, expandDims, expm1, eye, numpy_fft_d_exports as fft, finfo, flip, fliplr, flipud, float16, float32, float64, floor, floorDivide, fmod, frexp, full, fullLike, greater, greaterEqual, hamming, hann, heaviside, hstack, hypot, identity$1 as identity, iinfo, inf, inner, int32, isfinite, isinf, isnan, isneginf, isposinf, ldexp, less, lessEqual, numpy_linalg_d_exports as linalg, linspace, log, log10, log1p, log2, logspace, matmul, matrixTranspose, max, maximum, mean, meshgrid, min, minimum, moveaxis, multiply, nan, ndim, negative, notEqual, ones, onesLike, outer, pad, transpose as permuteDims, pi, positive, power as pow, power, prod, promoteTypes, ptp, rad2deg, radians, ravel, reciprocal, remainder, repeat, reshape, shape$1 as shape, sign, sin, sinc, sinh, size, sort, split$1 as split, sqrt, square, squeeze, stack, std, subtract, sum, swapaxes, take, tan, tanh, tensordot, tile, trace, transpose, tri, tril, triu, trueDivide, trunc, uint32, var_, vdot, vecdot, vstack, where, zeros, zerosLike };
 }
 declare const float32 = DType.Float32;
 declare const int32 = DType.Int32;
@@ -886,6 +886,8 @@ declare function columnStack(xs: ArrayLike[]): Array;
 declare function flipud(x: ArrayLike): Array;
 /** Flip an array horizontally (axis=1). */
 declare function fliplr(x: ArrayLike): Array;
+/** Interchange two axes of an array. */
+declare function swapaxes(a: ArrayLike, axis1: number, axis2: number): Array;
 /** Transpose the last two dimensions of an array. */
 declare function matrixTranspose(a: ArrayLike): Array;
 /** Return a 1-D flattened array containing the elements of the input. */
@@ -1590,14 +1592,14 @@ interface PrimitiveParamsImpl extends Record<Primitive, Record<string, any>> {
   [Primitive.Pad]: {
     width: Pair[];
   };
+  [Primitive.TriangularSolve]: {
+    unitDiagonal: boolean;
+  };
   [Primitive.Jit]: {
     name: string;
     jaxpr: Jaxpr;
     numConsts: number;
   };
-  [Primitive.TriangularSolve]: {
-    unitDiagonal: boolean;
-  };
 }
 /** Type of parameters taken by each primitive. */
 type PrimitiveParams<T extends Primitive> = T extends keyof PrimitiveParamsImpl ? PrimitiveParamsImpl[T] : Record<string, never>;
@@ -2076,6 +2078,24 @@ declare function logspace(start: number, stop: number, num?: number, endpoint?:
   dtype,
   device
 }?: DTypeAndDevice): Array;
+//#endregion
+//#region src/frontend/linearize.d.ts
+/** @inline */
+type GradOpts = {
+  /**
+   * Integer or sequence of integers. Specifies which positional argument(s) to
+   * differentiate with respect to.
+   *
+   * Defaults to `0` (the first argument).
+   */
+  argnums?: number | number[];
+  /**
+   * The input function returns a pair of `[out, aux]` including an auxiliary
+   * value. This `aux` is not differentiated, but is returned alongside the
+   * gradient when evaluating the function.
+   */
+  hasAux?: boolean;
+};
 declare namespace lax_linalg_d_exports {
   export { cholesky, lu, triangularSolve };
 }
@@ -2166,7 +2186,7 @@ declare function triangularSolve(a: ArrayLike, b: ArrayLike, {
   unitDiagonal?: boolean;
 }): Array;
 declare namespace lax_d_exports {
-  export { DotDimensionNumbers, PaddingType, conv, convGeneralDilated, convWithGeneralPadding, dot, erf, erfc, lax_linalg_d_exports as linalg, reduceWindow, stopGradient };
+  export { DotDimensionNumbers, PaddingType, conv, convGeneralDilated, convTranspose, convWithGeneralPadding, dot, erf, erfc, lax_linalg_d_exports as linalg, reduceWindow, stopGradient };
 }
 /**
  * Dimension numbers for general `dot()` primitive.
@@ -2204,7 +2224,11 @@ type PaddingType = "VALID" | "SAME" | "SAME_LOWER" | Pair[];
  * The semantics of this operation mimic the `jax.lax.conv_general_dilated`
  * function in JAX, which wraps XLA's general convolution operator.
  *
- * Grouped convolutions are not supported right now.
+ * @param lhs - Input tensor; shape `[N, C_in, ...xs]`
+ * @param rhs - Convolution kernel; shape `[C_out, C_in / G, ...ks]`
+ * @param windowStrides - Strides for each spatial dimension
+ * @param padding - Padding for each spatial dimension, or a string
+ *   (`"VALID"`, `"SAME"`, or `"SAME_LOWER"`)
  */
 declare function convGeneralDilated(lhs: Array, rhs: Array, windowStrides: number[], padding: PaddingType, {
   lhsDilation,
@@ -2219,6 +2243,37 @@ declare function convGeneralDilated(lhs: Array, rhs: Array, windowStrides: numbe
 declare function convWithGeneralPadding(lhs: Array, rhs: Array, windowStrides: number[], padding: PaddingType, lhsDilation?: number[], rhsDilation?: number[]): Array;
 /** Convenience wrapper around `convGeneralDilated`. */
 declare function conv(lhs: Array, rhs: Array, windowStrides: number[], padding: PaddingType): Array;
+/**
+ * Convenience wrapper for calculating the N-d convolution "transpose".
+ *
+ * This function directly calculates a fractionally strided conv rather than
+ * indirectly calculating the gradient (transpose) of a forward convolution.
+ * It is equivalent to the JAX version, except:
+ *
+ * - The `use_consistent_padding` option is not available. We only have the
+ *   consistent padding case (JAX version >0.8.4).
+ * - The order of dimensions matches `lax.conv_general_dilated`.
+ *
+ * Unlike PyTorch/TensorFlow, by default we don't reverse the kernel's spatial
+ * dimensions or the `(C_out, C_in)` axis order. To get this behavior, set
+ * `transposeKernel` to true.
+ *
+ * @param lhs - Input tensor; shape `[N, C_in, ...xs]`
+ * @param rhs - Convolution kernel; shape `[C_out, C_in, ...ks]`
+ * @param strides - Sequence of n integers, sets fractional stride
+ * @param padding - Apply padding of `dilation * (kernel_size - 1) - padding` to
+ *   each side of the input, so it acts like gradient of `conv()`
+ * @param rhsDilation - Atrous dilation for the kernel
+ * @param transposeKernel - Flip spatial axes and swap the input/output channels
+ *   of the kernel; its shape should be `[C_in, C_out, ...ks]`
+ */
+declare function convTranspose(lhs: Array, rhs: Array, strides: number[], padding: PaddingType, {
+  rhsDilation,
+  transposeKernel
+}?: {
+  rhsDilation?: number[];
+  transposeKernel?: boolean;
+}): Array;
 /** Reduce a computation over padded windows. */
 declare function reduceWindow(operand: Array, computation: (x: Array) => Array, windowDimensions: number[], windowStrides?: number[]): Array;
 /** The error function: `erf(x) = 2/sqrt(pi) * int[0..x] exp(-t^2) dt`. */
@@ -2238,7 +2293,7 @@ declare function erfc(x: ArrayLike): Array;
  */
 declare function stopGradient(x: ArrayLike): Array;
 declare namespace nn_d_exports {
-  export { celu, elu, gelu, glu, hardSigmoid, hardSilu, hardSilu as hardSwish, hardTanh, identity, leakyRelu, logSigmoid, logSoftmax, logmeanexp, logsumexp, mish, oneHot, relu, relu6, selu, sigmoid, silu, softSign, softmax, softplus, sparsePlus, sparseSigmoid, squareplus, standardize, silu as swish };
+  export { celu, dotProductAttention, elu, gelu, glu, hardSigmoid, hardSilu, hardSilu as hardSwish, hardTanh, identity, leakyRelu, logSigmoid, logSoftmax, logmeanexp, logsumexp, mish, oneHot, relu, relu6, selu, sigmoid, silu, softSign, softmax, softplus, sparsePlus, sparseSigmoid, squareplus, standardize, silu as swish };
 }
 /**
  * Rectified Linear Unit (ReLU) activation function:
@@ -2435,6 +2490,56 @@ declare function standardize(x: ArrayLike, axis?: Axis, opts?: {
  * ```
  */
 declare function oneHot(x: Array, numClasses: number): Array;
+/**
+ * Scaled dot product attention (SDPA).
+ *
+ * Computes `softmax((Q @ K^T) / sqrt(d) + bias) @ V`, where `Q` is the query,
+ * `K` is the key, `V` is the value, and `d` is the dimensionality of each key
+ * and query vector.
+ *
+ * Multi-query attention is applied when input `key` and `value` tensors have
+ * fewer heads than `query`.
+ *
+ * We use the following uppercase letters to denote array shapes:
+ * - `B` = batch size
+ * - `S` = length of key/value sequences (source)
+ * - `L` = length of query sequences
+ * - `N` = number of attention heads
+ * - `H` = dimensionality of each attention head
+ * - `K` = number of key/value heads (for grouped-query attention)
+ *
+ * The batch size `B` may be omitted, which is equivalent to `B = 1`. In this
+ * case it must be omitted from all inputs.
+ *
+ * @param query - Query array; shape `[B, L, N, H]`
+ * @param key - Key array; shape `[B, S, K, H]`
+ * @param value - Value array; same shape as `key`
+ * @param opts.bias - Optional bias to add to the attention logits; shape
+ *   `[B, N, L, S]` or broadcastable to it.
+ * @param opts.mask - Optional mask to apply to the attention logits; should be
+ *   a boolean array broadcastable to `[B, N, L, S]`, where `true` indicates
+ *   the element should take part in attention.
+ * @param opts.scale - Scaling factor override, default is `1 / sqrt(H)`.
+ * @param opts.isCausal - If true, applies a casual mask.
+ * @param opts.querySeqLengths - Optional sequence lengths for the queries;
+ *   shape `(B,)`. Taken from the beginning of the tensor.
+ * @param opts.keyValueSeqLengths - Optional sequence lengths for the keys and
+ *   values; shape `(B,)`. Taken from the beginning of the tensor.
+ * @param opts.localWindowSize - If specified, applies a local attention window
+ *   of the given size. Can be a single number or a tuple `[left, right]`.
+ *
+ * @returns The result of the attention operation; shape is the same as query
+ *   `[B, L, N, H]`, or `[L, N, H]` if `B` is omitted.
+ */
+declare function dotProductAttention(query: ArrayLike, key: ArrayLike, value: ArrayLike, opts?: {
+  bias?: ArrayLike;
+  mask?: ArrayLike;
+  scale?: number;
+  isCausal?: boolean;
+  querySeqLengths?: ArrayLike;
+  keyValueSeqLengths?: ArrayLike;
+  localWindowSize?: number | [number, number];
+}): Array;
 declare namespace random_d_exports {
   export { bernoulli, bits, cauchy, exponential, gumbel, key, laplace, multivariateNormal, normal, split, uniform };
 }
@@ -2526,7 +2631,9 @@ declare const logit: OwnedFunction<(x: ArrayLike) => Array>;
  * @function
  * Compute the forward-mode Jacobian-vector product for a function.
  */
-declare const jvp: <F extends (...args: any[]) => JsTree<Array>>(f: F, primals: MapJsTree<Parameters<F>, Array, ArrayLike>, tangents: MapJsTree<Parameters<F>, Array, ArrayLike>) => [ReturnType<F>, ReturnType<F>];
+declare const jvp: <F extends (...args: any[]) => JsTree<Array>, HA extends boolean = false>(f: F, primals: MapJsTree<Parameters<F>, Array, ArrayLike>, tangents: MapJsTree<Parameters<F>, Array, ArrayLike>, opts?: {
+  hasAux?: HA;
+}) => HA extends true ? ReturnType<F> extends [infer Out, infer Aux] ? [Out, Out, Aux] : never : [ReturnType<F>, ReturnType<F>];
 /**
  * @function
  * Vectorize an operation on a batched axis for one or more inputs.
@@ -2568,28 +2675,100 @@ declare const jit: <F extends (...args: any[]) => JsTree<Array>>(f: F, opts?: Ji
  * Produce a local linear approximation to a function at a point using jvp() and
  * partial evaluation.
  */
-declare const linearize: <F extends (...args: any[]) => JsTree<Array>>(f: F, ...primals: MapJsTree<Parameters<F>, Array, ArrayLike>) => [ReturnType<F>, (...tangents: MapJsTree<Parameters<F>, Array, ArrayLike>) => ReturnType<F>];
+declare const linearize: <F extends (...args: any[]) => JsTree<Array>, HA extends boolean = false>(f: F, primals: MapJsTree<Parameters<F>, Array, ArrayLike>, opts?: {
+  hasAux?: HA;
+}) => HA extends true ? ReturnType<F> extends [infer Out, infer Aux] ? [Out, OwnedFunction<(...tangents: MapJsTree<Parameters<F>, Array, ArrayLike>) => Out>, Aux] : never : [ReturnType<F>, OwnedFunction<(...tangents: MapJsTree<Parameters<F>, Array, ArrayLike>) => ReturnType<F>>];
 /**
  * @function
  * Calculate the reverse-mode vector-Jacobian product for a function.
+ *
+ * The return value is a tuple of `[out, vjpFn]`, where `out` is the output of
+ * `f(primals)`, and `vjpFn` is a function that takes in cotangents for each
+ * output and returns the cotangents for each input.
+ *
+ * When `{ hasAux: true }` is passed, the function `f` is expected to return an
+ * `[out, aux]` tuple, and `vjp` returns `[out, vjpFn, aux]`.
+ *
+ * @example
+ * ```ts
+ * const [y, vjpFn] = vjp(f, [x]);
+ *
+ * // With hasAux
+ * const [y, vjpFn, aux] = vjp(f, [x], { hasAux: true });
+ * ```
  */
-declare const vjp: <F extends (...args: any[]) => JsTree<Array>>(f: F, ...primals: MapJsTree<Parameters<F>, Array, ArrayLike>) => [ReturnType<F>, (cotangents: MapJsTree<ReturnType<F>, Array, ArrayLike>) => MapJsTree<Parameters<F>, ArrayLike, Array>];
+declare const vjp: <F extends (...args: any[]) => JsTree<Array>, const HA extends boolean = false>(f: F, primals: MapJsTree<Parameters<F>, Array, ArrayLike>, opts?: {
+  hasAux?: HA;
+}) => HA extends true ? ReturnType<F> extends [infer Out, infer Aux] ? [Out, OwnedFunction<(cotangents: MapJsTree<Out, Array, ArrayLike>) => MapJsTree<Parameters<F>, ArrayLike, Array>>, Aux] : never : [ReturnType<F>, OwnedFunction<(cotangents: MapJsTree<ReturnType<F>, Array, ArrayLike>) => MapJsTree<Parameters<F>, ArrayLike, Array>>];
+/** @inline */
+type GradOutputType<I, F extends (...args: any[]) => any> = MapJsTree<I extends undefined ? Parameters<F>[0] : I extends number ? Parameters<F>[I] : I extends number[] ? { [K in keyof I]: I[K] extends number ? Parameters<F>[I[K]] : never } : never, ArrayLike, Array>;
 /**
  * @function
  * Compute the gradient of a scalar-valued function `f` with respect to its
  * first argument.
+ *
+ * Pass in different `argnums` to differentiate with respect to other
+ * arguments. If a tuple is provided, the return value will be a tuple of
+ * gradients corresponding to each argument index.
+ *
+ * When `{ hasAux: true }` is passed, the function `f` is expected to return a
+ * `[out, aux]` tuple, and the return value will be `[gradient, aux]`.
+ *
+ * @example
+ * ```ts
+ * const gradient = grad(f)(x);
+ *
+ * // With `argnums`
+ * const [gradientX, gradientZ] = grad(f, { argnums: [0, 2] })(x, y, z);
+ *
+ * // With `hasAux`
+ * const [gradient, aux] = grad(f, { hasAux: true })(x);
+ * ```
  */
-declare const grad: <F extends (...args: any[]) => JsTree<Array>>(f: F) => (...primals: MapJsTree<Parameters<F>, Array, ArrayLike>) => MapJsTree<Parameters<F>[0], ArrayLike, Array>;
+declare const grad: <F extends (...args: any[]) => JsTree<Array>, const I extends undefined | number | number[] = undefined, const HA extends boolean = false>(f: F, opts?: Omit<GradOpts, "argnums" | "hasAux"> & {
+  argnums?: I;
+  hasAux?: HA;
+}) => (...primals: MapJsTree<Parameters<F>, Array, ArrayLike>) => HA extends true ? ReturnType<F> extends [any, infer Aux] ? [GradOutputType<I, F>, Aux] : never : GradOutputType<I, F>;
 /**
  * @function
  * Create a function that evaluates both `f` and the gradient of `f`.
+ *
+ * When `{ hasAux: true }` is passed, the function `f` is expected to return an
+ * `[out, aux]` tuple, and the return value will be `[[out, aux], gradient]`.
+ *
+ * @example
+ * ```ts
+ * // Without hasAux
+ * const [value, gradient] = valueAndGrad(f)(x);
+ *
+ * // With hasAux
+ * const [[value, aux], gradient] = valueAndGrad(f, { hasAux: true })(x);
+ * ```
  */
-declare const valueAndGrad: <F extends (...args: any[]) => JsTree<Array>>(f: F) => (...primals: MapJsTree<Parameters<F>, Array, ArrayLike>) => [ReturnType<F>, MapJsTree<Parameters<F>[0], ArrayLike, Array>];
+declare const valueAndGrad: <F extends (...args: any[]) => JsTree<Array>, const I extends undefined | number | number[] = undefined, const HA extends boolean = false>(f: F, opts?: Omit<GradOpts, "argnums"> & {
+  argnums?: I;
+  hasAux?: HA;
+}) => (...primals: MapJsTree<Parameters<F>, Array, ArrayLike>) => [ReturnType<F>, GradOutputType<I, F>];
 /**
  * @function
  * Compute the Jacobian evaluated row-by-row by reverse-mode AD.
  */
 declare const jacrev: typeof jacfwd;
+/**
+ * @function
+ * Compute the Hessian matrix of a scalar-valued function.
+ *
+ * The Hessian is the matrix of second-order partial derivatives of a function.
+ * This is implemented as `jacfwd(grad(f))`.
+ *
+ * @example
+ * ```ts
+ * const f = (x: np.Array) => np.sum(x.ref.mul(x.ref).mul(x)); // x^3
+ * const H = hessian(f)(np.array([1, 2, 3]));
+ * // H[i,j] = d^2f / dx_i dx_j
+ * ```
+ */
+declare const hessian: <F extends (x: Array) => Array>(f: F) => (...args: MapJsTree<Parameters<F>, Array, ArrayLike>) => ReturnType<F>;
 /**
  * Wait until all `Array` leaves are ready by calling `Array.blockUntilReady()`.
  *
@@ -2612,4 +2791,4 @@ declare function blockUntilReady<T extends JsTree<any>>(x: T): Promise<T>;
  */
 declare function devicePut<T extends JsTree<any>>(x: T, device?: Device): Promise<MapJsTree<T, number | boolean, Array>>;
 //#endregion
-export { Array, ClosedJaxpr, DType, type Device, Jaxpr, type JsTree, type JsTreeDef, type OwnedFunction, blockUntilReady, defaultDevice, devicePut, devices, grad, init, jacfwd, jacrev as jacobian, jacrev, jit, jvp, lax_d_exports as lax, linearize, makeJaxpr, nn_d_exports as nn, numpy_d_exports as numpy, random_d_exports as random, scipy_special_d_exports as scipySpecial, setDebug, tree_d_exports as tree, valueAndGrad, vjp, vmap };
+export { Array, ClosedJaxpr, DType, type Device, Jaxpr, type JsTree, type JsTreeDef, type OwnedFunction, blockUntilReady, defaultDevice, devicePut, devices, grad, hessian, init, jacfwd, jacrev as jacobian, jacrev, jit, jvp, lax_d_exports as lax, linearize, makeJaxpr, nn_d_exports as nn, numpy_d_exports as numpy, random_d_exports as random, scipy_special_d_exports as scipySpecial, setDebug, tree_d_exports as tree, valueAndGrad, vjp, vmap };

package/dist/index.d.ts CHANGED Viewed

@@ -663,7 +663,7 @@ type IInfo = Readonly<{
 /** Machine limits for integer types. */
 declare function iinfo(dtype: DType): IInfo;
 declare namespace numpy_d_exports {
-  export { Array, ArrayLike, DType, absolute as abs, absolute, acos, arccosh as acosh, add, all, allclose, any, arange, acos as arccos, arccosh, asin as arcsin, arcsinh, atan as arctan, atan2 as arctan2, arctanh, argmax, argmin, argsort, array, asin, arcsinh as asinh, astype, atan, atan2, arctanh as atanh, bool, broadcastArrays, broadcastShapes, broadcastTo, cbrt, ceil, clip, columnStack, concatenate, convolve, corrcoef, correlate, cos, cosh, cov, cumsum, cumsum as cumulativeSum, deg2rad, degrees, diag, diagonal, trueDivide as divide, divmod, dot$1 as dot, dstack, e, einsum, equal, eulerGamma, exp, exp2, expandDims, expm1, eye, numpy_fft_d_exports as fft, finfo, flip, fliplr, flipud, float16, float32, float64, floor, floorDivide, fmod, frexp, full, fullLike, greater, greaterEqual, hamming, hann, heaviside, hstack, hypot, identity$1 as identity, iinfo, inf, inner, int32, isfinite, isinf, isnan, isneginf, isposinf, ldexp, less, lessEqual, numpy_linalg_d_exports as linalg, linspace, log, log10, log1p, log2, logspace, matmul, matrixTranspose, max, maximum, mean, meshgrid, min, minimum, moveaxis, multiply, nan, ndim, negative, notEqual, ones, onesLike, outer, pad, transpose as permuteDims, pi, positive, power as pow, power, prod, promoteTypes, ptp, rad2deg, radians, ravel, reciprocal, remainder, repeat, reshape, shape$1 as shape, sign, sin, sinc, sinh, size, sort, split$1 as split, sqrt, square, squeeze, stack, std, subtract, sum, take, tan, tanh, tensordot, tile, trace, transpose, tri, tril, triu, trueDivide, trunc, uint32, var_, vdot, vecdot, vstack, where, zeros, zerosLike };
+  export { Array, ArrayLike, DType, absolute as abs, absolute, acos, arccosh as acosh, add, all, allclose, any, arange, acos as arccos, arccosh, asin as arcsin, arcsinh, atan as arctan, atan2 as arctan2, arctanh, argmax, argmin, argsort, array, asin, arcsinh as asinh, astype, atan, atan2, arctanh as atanh, bool, broadcastArrays, broadcastShapes, broadcastTo, cbrt, ceil, clip, columnStack, concatenate, convolve, corrcoef, correlate, cos, cosh, cov, cumsum, cumsum as cumulativeSum, deg2rad, degrees, diag, diagonal, trueDivide as divide, divmod, dot$1 as dot, dstack, e, einsum, equal, eulerGamma, exp, exp2, expandDims, expm1, eye, numpy_fft_d_exports as fft, finfo, flip, fliplr, flipud, float16, float32, float64, floor, floorDivide, fmod, frexp, full, fullLike, greater, greaterEqual, hamming, hann, heaviside, hstack, hypot, identity$1 as identity, iinfo, inf, inner, int32, isfinite, isinf, isnan, isneginf, isposinf, ldexp, less, lessEqual, numpy_linalg_d_exports as linalg, linspace, log, log10, log1p, log2, logspace, matmul, matrixTranspose, max, maximum, mean, meshgrid, min, minimum, moveaxis, multiply, nan, ndim, negative, notEqual, ones, onesLike, outer, pad, transpose as permuteDims, pi, positive, power as pow, power, prod, promoteTypes, ptp, rad2deg, radians, ravel, reciprocal, remainder, repeat, reshape, shape$1 as shape, sign, sin, sinc, sinh, size, sort, split$1 as split, sqrt, square, squeeze, stack, std, subtract, sum, swapaxes, take, tan, tanh, tensordot, tile, trace, transpose, tri, tril, triu, trueDivide, trunc, uint32, var_, vdot, vecdot, vstack, where, zeros, zerosLike };
 }
 declare const float32 = DType.Float32;
 declare const int32 = DType.Int32;
@@ -883,6 +883,8 @@ declare function columnStack(xs: ArrayLike[]): Array;
 declare function flipud(x: ArrayLike): Array;
 /** Flip an array horizontally (axis=1). */
 declare function fliplr(x: ArrayLike): Array;
+/** Interchange two axes of an array. */
+declare function swapaxes(a: ArrayLike, axis1: number, axis2: number): Array;
 /** Transpose the last two dimensions of an array. */
 declare function matrixTranspose(a: ArrayLike): Array;
 /** Return a 1-D flattened array containing the elements of the input. */
@@ -1587,14 +1589,14 @@ interface PrimitiveParamsImpl extends Record<Primitive, Record<string, any>> {
   [Primitive.Pad]: {
     width: Pair[];
   };
+  [Primitive.TriangularSolve]: {
+    unitDiagonal: boolean;
+  };
   [Primitive.Jit]: {
     name: string;
     jaxpr: Jaxpr;
     numConsts: number;
   };
-  [Primitive.TriangularSolve]: {
-    unitDiagonal: boolean;
-  };
 }
 /** Type of parameters taken by each primitive. */
 type PrimitiveParams<T extends Primitive> = T extends keyof PrimitiveParamsImpl ? PrimitiveParamsImpl[T] : Record<string, never>;
@@ -2073,6 +2075,24 @@ declare function logspace(start: number, stop: number, num?: number, endpoint?:
   dtype,
   device
 }?: DTypeAndDevice): Array;
+//#endregion
+//#region src/frontend/linearize.d.ts
+/** @inline */
+type GradOpts = {
+  /**
+   * Integer or sequence of integers. Specifies which positional argument(s) to
+   * differentiate with respect to.
+   *
+   * Defaults to `0` (the first argument).
+   */
+  argnums?: number | number[];
+  /**
+   * The input function returns a pair of `[out, aux]` including an auxiliary
+   * value. This `aux` is not differentiated, but is returned alongside the
+   * gradient when evaluating the function.
+   */
+  hasAux?: boolean;
+};
 declare namespace lax_linalg_d_exports {
   export { cholesky, lu, triangularSolve };
 }
@@ -2163,7 +2183,7 @@ declare function triangularSolve(a: ArrayLike, b: ArrayLike, {
   unitDiagonal?: boolean;
 }): Array;
 declare namespace lax_d_exports {
-  export { DotDimensionNumbers, PaddingType, conv, convGeneralDilated, convWithGeneralPadding, dot, erf, erfc, lax_linalg_d_exports as linalg, reduceWindow, stopGradient };
+  export { DotDimensionNumbers, PaddingType, conv, convGeneralDilated, convTranspose, convWithGeneralPadding, dot, erf, erfc, lax_linalg_d_exports as linalg, reduceWindow, stopGradient };
 }
 /**
  * Dimension numbers for general `dot()` primitive.
@@ -2201,7 +2221,11 @@ type PaddingType = "VALID" | "SAME" | "SAME_LOWER" | Pair[];
  * The semantics of this operation mimic the `jax.lax.conv_general_dilated`
  * function in JAX, which wraps XLA's general convolution operator.
  *
- * Grouped convolutions are not supported right now.
+ * @param lhs - Input tensor; shape `[N, C_in, ...xs]`
+ * @param rhs - Convolution kernel; shape `[C_out, C_in / G, ...ks]`
+ * @param windowStrides - Strides for each spatial dimension
+ * @param padding - Padding for each spatial dimension, or a string
+ *   (`"VALID"`, `"SAME"`, or `"SAME_LOWER"`)
  */
 declare function convGeneralDilated(lhs: Array, rhs: Array, windowStrides: number[], padding: PaddingType, {
   lhsDilation,
@@ -2216,6 +2240,37 @@ declare function convGeneralDilated(lhs: Array, rhs: Array, windowStrides: numbe
 declare function convWithGeneralPadding(lhs: Array, rhs: Array, windowStrides: number[], padding: PaddingType, lhsDilation?: number[], rhsDilation?: number[]): Array;
 /** Convenience wrapper around `convGeneralDilated`. */
 declare function conv(lhs: Array, rhs: Array, windowStrides: number[], padding: PaddingType): Array;
+/**
+ * Convenience wrapper for calculating the N-d convolution "transpose".
+ *
+ * This function directly calculates a fractionally strided conv rather than
+ * indirectly calculating the gradient (transpose) of a forward convolution.
+ * It is equivalent to the JAX version, except:
+ *
+ * - The `use_consistent_padding` option is not available. We only have the
+ *   consistent padding case (JAX version >0.8.4).
+ * - The order of dimensions matches `lax.conv_general_dilated`.
+ *
+ * Unlike PyTorch/TensorFlow, by default we don't reverse the kernel's spatial
+ * dimensions or the `(C_out, C_in)` axis order. To get this behavior, set
+ * `transposeKernel` to true.
+ *
+ * @param lhs - Input tensor; shape `[N, C_in, ...xs]`
+ * @param rhs - Convolution kernel; shape `[C_out, C_in, ...ks]`
+ * @param strides - Sequence of n integers, sets fractional stride
+ * @param padding - Apply padding of `dilation * (kernel_size - 1) - padding` to
+ *   each side of the input, so it acts like gradient of `conv()`
+ * @param rhsDilation - Atrous dilation for the kernel
+ * @param transposeKernel - Flip spatial axes and swap the input/output channels
+ *   of the kernel; its shape should be `[C_in, C_out, ...ks]`
+ */
+declare function convTranspose(lhs: Array, rhs: Array, strides: number[], padding: PaddingType, {
+  rhsDilation,
+  transposeKernel
+}?: {
+  rhsDilation?: number[];
+  transposeKernel?: boolean;
+}): Array;
 /** Reduce a computation over padded windows. */
 declare function reduceWindow(operand: Array, computation: (x: Array) => Array, windowDimensions: number[], windowStrides?: number[]): Array;
 /** The error function: `erf(x) = 2/sqrt(pi) * int[0..x] exp(-t^2) dt`. */
@@ -2235,7 +2290,7 @@ declare function erfc(x: ArrayLike): Array;
  */
 declare function stopGradient(x: ArrayLike): Array;
 declare namespace nn_d_exports {
-  export { celu, elu, gelu, glu, hardSigmoid, hardSilu, hardSilu as hardSwish, hardTanh, identity, leakyRelu, logSigmoid, logSoftmax, logmeanexp, logsumexp, mish, oneHot, relu, relu6, selu, sigmoid, silu, softSign, softmax, softplus, sparsePlus, sparseSigmoid, squareplus, standardize, silu as swish };
+  export { celu, dotProductAttention, elu, gelu, glu, hardSigmoid, hardSilu, hardSilu as hardSwish, hardTanh, identity, leakyRelu, logSigmoid, logSoftmax, logmeanexp, logsumexp, mish, oneHot, relu, relu6, selu, sigmoid, silu, softSign, softmax, softplus, sparsePlus, sparseSigmoid, squareplus, standardize, silu as swish };
 }
 /**
  * Rectified Linear Unit (ReLU) activation function:
@@ -2432,6 +2487,56 @@ declare function standardize(x: ArrayLike, axis?: Axis, opts?: {
  * ```
  */
 declare function oneHot(x: Array, numClasses: number): Array;
+/**
+ * Scaled dot product attention (SDPA).
+ *
+ * Computes `softmax((Q @ K^T) / sqrt(d) + bias) @ V`, where `Q` is the query,
+ * `K` is the key, `V` is the value, and `d` is the dimensionality of each key
+ * and query vector.
+ *
+ * Multi-query attention is applied when input `key` and `value` tensors have
+ * fewer heads than `query`.
+ *
+ * We use the following uppercase letters to denote array shapes:
+ * - `B` = batch size
+ * - `S` = length of key/value sequences (source)
+ * - `L` = length of query sequences
+ * - `N` = number of attention heads
+ * - `H` = dimensionality of each attention head
+ * - `K` = number of key/value heads (for grouped-query attention)
+ *
+ * The batch size `B` may be omitted, which is equivalent to `B = 1`. In this
+ * case it must be omitted from all inputs.
+ *
+ * @param query - Query array; shape `[B, L, N, H]`
+ * @param key - Key array; shape `[B, S, K, H]`
+ * @param value - Value array; same shape as `key`
+ * @param opts.bias - Optional bias to add to the attention logits; shape
+ *   `[B, N, L, S]` or broadcastable to it.
+ * @param opts.mask - Optional mask to apply to the attention logits; should be
+ *   a boolean array broadcastable to `[B, N, L, S]`, where `true` indicates
+ *   the element should take part in attention.
+ * @param opts.scale - Scaling factor override, default is `1 / sqrt(H)`.
+ * @param opts.isCausal - If true, applies a casual mask.
+ * @param opts.querySeqLengths - Optional sequence lengths for the queries;
+ *   shape `(B,)`. Taken from the beginning of the tensor.
+ * @param opts.keyValueSeqLengths - Optional sequence lengths for the keys and
+ *   values; shape `(B,)`. Taken from the beginning of the tensor.
+ * @param opts.localWindowSize - If specified, applies a local attention window
+ *   of the given size. Can be a single number or a tuple `[left, right]`.
+ *
+ * @returns The result of the attention operation; shape is the same as query
+ *   `[B, L, N, H]`, or `[L, N, H]` if `B` is omitted.
+ */
+declare function dotProductAttention(query: ArrayLike, key: ArrayLike, value: ArrayLike, opts?: {
+  bias?: ArrayLike;
+  mask?: ArrayLike;
+  scale?: number;
+  isCausal?: boolean;
+  querySeqLengths?: ArrayLike;
+  keyValueSeqLengths?: ArrayLike;
+  localWindowSize?: number | [number, number];
+}): Array;
 declare namespace random_d_exports {
   export { bernoulli, bits, cauchy, exponential, gumbel, key, laplace, multivariateNormal, normal, split, uniform };
 }
@@ -2523,7 +2628,9 @@ declare const logit: OwnedFunction<(x: ArrayLike) => Array>;
  * @function
  * Compute the forward-mode Jacobian-vector product for a function.
  */
-declare const jvp: <F extends (...args: any[]) => JsTree<Array>>(f: F, primals: MapJsTree<Parameters<F>, Array, ArrayLike>, tangents: MapJsTree<Parameters<F>, Array, ArrayLike>) => [ReturnType<F>, ReturnType<F>];
+declare const jvp: <F extends (...args: any[]) => JsTree<Array>, HA extends boolean = false>(f: F, primals: MapJsTree<Parameters<F>, Array, ArrayLike>, tangents: MapJsTree<Parameters<F>, Array, ArrayLike>, opts?: {
+  hasAux?: HA;
+}) => HA extends true ? ReturnType<F> extends [infer Out, infer Aux] ? [Out, Out, Aux] : never : [ReturnType<F>, ReturnType<F>];
 /**
  * @function
  * Vectorize an operation on a batched axis for one or more inputs.
@@ -2565,28 +2672,100 @@ declare const jit: <F extends (...args: any[]) => JsTree<Array>>(f: F, opts?: Ji
  * Produce a local linear approximation to a function at a point using jvp() and
  * partial evaluation.
  */
-declare const linearize: <F extends (...args: any[]) => JsTree<Array>>(f: F, ...primals: MapJsTree<Parameters<F>, Array, ArrayLike>) => [ReturnType<F>, (...tangents: MapJsTree<Parameters<F>, Array, ArrayLike>) => ReturnType<F>];
+declare const linearize: <F extends (...args: any[]) => JsTree<Array>, HA extends boolean = false>(f: F, primals: MapJsTree<Parameters<F>, Array, ArrayLike>, opts?: {
+  hasAux?: HA;
+}) => HA extends true ? ReturnType<F> extends [infer Out, infer Aux] ? [Out, OwnedFunction<(...tangents: MapJsTree<Parameters<F>, Array, ArrayLike>) => Out>, Aux] : never : [ReturnType<F>, OwnedFunction<(...tangents: MapJsTree<Parameters<F>, Array, ArrayLike>) => ReturnType<F>>];
 /**
  * @function
  * Calculate the reverse-mode vector-Jacobian product for a function.
+ *
+ * The return value is a tuple of `[out, vjpFn]`, where `out` is the output of
+ * `f(primals)`, and `vjpFn` is a function that takes in cotangents for each
+ * output and returns the cotangents for each input.
+ *
+ * When `{ hasAux: true }` is passed, the function `f` is expected to return an
+ * `[out, aux]` tuple, and `vjp` returns `[out, vjpFn, aux]`.
+ *
+ * @example
+ * ```ts
+ * const [y, vjpFn] = vjp(f, [x]);
+ *
+ * // With hasAux
+ * const [y, vjpFn, aux] = vjp(f, [x], { hasAux: true });
+ * ```
  */
-declare const vjp: <F extends (...args: any[]) => JsTree<Array>>(f: F, ...primals: MapJsTree<Parameters<F>, Array, ArrayLike>) => [ReturnType<F>, (cotangents: MapJsTree<ReturnType<F>, Array, ArrayLike>) => MapJsTree<Parameters<F>, ArrayLike, Array>];
+declare const vjp: <F extends (...args: any[]) => JsTree<Array>, const HA extends boolean = false>(f: F, primals: MapJsTree<Parameters<F>, Array, ArrayLike>, opts?: {
+  hasAux?: HA;
+}) => HA extends true ? ReturnType<F> extends [infer Out, infer Aux] ? [Out, OwnedFunction<(cotangents: MapJsTree<Out, Array, ArrayLike>) => MapJsTree<Parameters<F>, ArrayLike, Array>>, Aux] : never : [ReturnType<F>, OwnedFunction<(cotangents: MapJsTree<ReturnType<F>, Array, ArrayLike>) => MapJsTree<Parameters<F>, ArrayLike, Array>>];
+/** @inline */
+type GradOutputType<I, F extends (...args: any[]) => any> = MapJsTree<I extends undefined ? Parameters<F>[0] : I extends number ? Parameters<F>[I] : I extends number[] ? { [K in keyof I]: I[K] extends number ? Parameters<F>[I[K]] : never } : never, ArrayLike, Array>;
 /**
  * @function
  * Compute the gradient of a scalar-valued function `f` with respect to its
  * first argument.
+ *
+ * Pass in different `argnums` to differentiate with respect to other
+ * arguments. If a tuple is provided, the return value will be a tuple of
+ * gradients corresponding to each argument index.
+ *
+ * When `{ hasAux: true }` is passed, the function `f` is expected to return a
+ * `[out, aux]` tuple, and the return value will be `[gradient, aux]`.
+ *
+ * @example
+ * ```ts
+ * const gradient = grad(f)(x);
+ *
+ * // With `argnums`
+ * const [gradientX, gradientZ] = grad(f, { argnums: [0, 2] })(x, y, z);
+ *
+ * // With `hasAux`
+ * const [gradient, aux] = grad(f, { hasAux: true })(x);
+ * ```
  */
-declare const grad: <F extends (...args: any[]) => JsTree<Array>>(f: F) => (...primals: MapJsTree<Parameters<F>, Array, ArrayLike>) => MapJsTree<Parameters<F>[0], ArrayLike, Array>;
+declare const grad: <F extends (...args: any[]) => JsTree<Array>, const I extends undefined | number | number[] = undefined, const HA extends boolean = false>(f: F, opts?: Omit<GradOpts, "argnums" | "hasAux"> & {
+  argnums?: I;
+  hasAux?: HA;
+}) => (...primals: MapJsTree<Parameters<F>, Array, ArrayLike>) => HA extends true ? ReturnType<F> extends [any, infer Aux] ? [GradOutputType<I, F>, Aux] : never : GradOutputType<I, F>;
 /**
  * @function
  * Create a function that evaluates both `f` and the gradient of `f`.
+ *
+ * When `{ hasAux: true }` is passed, the function `f` is expected to return an
+ * `[out, aux]` tuple, and the return value will be `[[out, aux], gradient]`.
+ *
+ * @example
+ * ```ts
+ * // Without hasAux
+ * const [value, gradient] = valueAndGrad(f)(x);
+ *
+ * // With hasAux
+ * const [[value, aux], gradient] = valueAndGrad(f, { hasAux: true })(x);
+ * ```
  */
-declare const valueAndGrad: <F extends (...args: any[]) => JsTree<Array>>(f: F) => (...primals: MapJsTree<Parameters<F>, Array, ArrayLike>) => [ReturnType<F>, MapJsTree<Parameters<F>[0], ArrayLike, Array>];
+declare const valueAndGrad: <F extends (...args: any[]) => JsTree<Array>, const I extends undefined | number | number[] = undefined, const HA extends boolean = false>(f: F, opts?: Omit<GradOpts, "argnums"> & {
+  argnums?: I;
+  hasAux?: HA;
+}) => (...primals: MapJsTree<Parameters<F>, Array, ArrayLike>) => [ReturnType<F>, GradOutputType<I, F>];
 /**
  * @function
  * Compute the Jacobian evaluated row-by-row by reverse-mode AD.
  */
 declare const jacrev: typeof jacfwd;
+/**
+ * @function
+ * Compute the Hessian matrix of a scalar-valued function.
+ *
+ * The Hessian is the matrix of second-order partial derivatives of a function.
+ * This is implemented as `jacfwd(grad(f))`.
+ *
+ * @example
+ * ```ts
+ * const f = (x: np.Array) => np.sum(x.ref.mul(x.ref).mul(x)); // x^3
+ * const H = hessian(f)(np.array([1, 2, 3]));
+ * // H[i,j] = d^2f / dx_i dx_j
+ * ```
+ */
+declare const hessian: <F extends (x: Array) => Array>(f: F) => (...args: MapJsTree<Parameters<F>, Array, ArrayLike>) => ReturnType<F>;
 /**
  * Wait until all `Array` leaves are ready by calling `Array.blockUntilReady()`.
  *
@@ -2609,4 +2788,4 @@ declare function blockUntilReady<T extends JsTree<any>>(x: T): Promise<T>;
  */
 declare function devicePut<T extends JsTree<any>>(x: T, device?: Device): Promise<MapJsTree<T, number | boolean, Array>>;
 //#endregion
-export { Array, ClosedJaxpr, DType, type Device, Jaxpr, type JsTree, type JsTreeDef, type OwnedFunction, blockUntilReady, defaultDevice, devicePut, devices, grad, init, jacfwd, jacrev as jacobian, jacrev, jit, jvp, lax_d_exports as lax, linearize, makeJaxpr, nn_d_exports as nn, numpy_d_exports as numpy, random_d_exports as random, scipy_special_d_exports as scipySpecial, setDebug, tree_d_exports as tree, valueAndGrad, vjp, vmap };
+export { Array, ClosedJaxpr, DType, type Device, Jaxpr, type JsTree, type JsTreeDef, type OwnedFunction, blockUntilReady, defaultDevice, devicePut, devices, grad, hessian, init, jacfwd, jacrev as jacobian, jacrev, jit, jvp, lax_d_exports as lax, linearize, makeJaxpr, nn_d_exports as nn, numpy_d_exports as numpy, random_d_exports as random, scipy_special_d_exports as scipySpecial, setDebug, tree_d_exports as tree, valueAndGrad, vjp, vmap };