npm - tensorgrad - Versions diffs - 0.0.17 → 0.0.18 - Mend

tensorgrad 0.0.17 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.js.map CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "version": 3,
   "sources": ["../src/ir.ts", "../src/shape.ts", "../src/trace.ts", "../src/capture.ts", "../src/ops.ts", "../src/grad.ts", "../src/adam.ts", "../src/buffers.ts", "../src/codegen.ts", "../src/runtime.ts", "../src/module.ts", "../src/worker-protocol.ts", "../src/worker-proxy.ts", "../src/compile.ts", "../src/nn.ts"],
-  "sourcesContent": ["// Intermediate representation for tensor computations.\n//\n// A `Graph` is a flat array of `OpNode`s in topological (= construction) order.\n// A `Tensor` is an opaque handle: shape + dtype + a pointer back to the OpNode\n// that produced it (or `null` for graph leaves \u2014 params and external inputs).\n//\n// This is the data structure everything else operates on:\n//   - tracing builds it (src/trace.ts)\n//   - autograd walks it in reverse to add backward nodes (src/grad.ts, later)\n//   - codegen reads it to emit WGSL kernels and a dispatch plan (src/codegen.ts, later)\n//\n// Design intent: keep this file boring. No tracing logic, no shape inference,\n// no codegen \u2014 those live in their own modules and consume `Graph` / `OpNode`.\n\nexport type Dtype = 'f32' | 'i32' | 'bool'\nexport type Shape = readonly number[]\n\n// A Tensor is just metadata + a unique id. The actual storage doesn't exist\n// until the graph is compiled and run on a device.\nexport interface Tensor {\n  readonly id: number\n  readonly shape: Shape\n  readonly dtype: Dtype\n  // null for leaves (params, external inputs); otherwise the index into Graph.ops.\n  readonly source: number | null\n  // Captured at op-call time so shape errors blame the user's frame, not the\n  // library's. Lazy: only formatted on demand.\n  readonly site: CallSite | null\n}\n\nexport interface CallSite {\n  readonly opName: string\n  // Full Error stack at the point of op invocation. Format on demand.\n  readonly stack: string\n}\n\n// Discriminated union over every op the IR knows about. Adding an op means:\n//   1. add a variant here,\n//   2. add a shape rule in src/shape.ts,\n//   3. add a transpose rule in src/grad.ts (later),\n//   4. add a kernel template in src/codegen.ts (later).\n// The kinds intentionally match the surface API in src/ops.ts one-to-one.\nexport type OpNode =\n  // ---- Leaves ----------------------------------------------------------------\n  // A trainable parameter, supplied by the caller as a Float32Array at runtime.\n  | { kind: 'param_input'; out: number; name: string }\n  // A non-trainable input (tokens, targets, constants). Bound at runtime.\n  | { kind: 'tensor_input'; out: number; name: string }\n  // Persistent state buffer (e.g. Adam's m/v). Allocated and zero-initialized\n  // at compile time; survives across step() calls. Updated via writebacks\n  // declared in the compile result.\n  | { kind: 'state_input'; out: number; name: string; initValue: number }\n\n  // ---- Element-wise --------------------------------------------------------\n  | { kind: 'add'; out: number; a: number; b: number }\n  | { kind: 'sub'; out: number; a: number; b: number }\n  | { kind: 'mul'; out: number; a: number; b: number }\n  | { kind: 'div'; out: number; a: number; b: number }\n  | { kind: 'mul_scalar'; out: number; a: number; scalar: number }\n  | { kind: 'add_scalar'; out: number; a: number; scalar: number }\n\n  // ---- Unary ---------------------------------------------------------------\n  | { kind: 'sqrt'; out: number; a: number }\n  | { kind: 'rsqrt'; out: number; a: number }\n  | { kind: 'log'; out: number; a: number }\n  | { kind: 'exp'; out: number; a: number }\n  | { kind: 'relu'; out: number; a: number }\n\n  // ---- Reductions (over last axis only; reshape if you need other axes) ----\n  | { kind: 'mean_last'; out: number; a: number }   // keepdims=true\n  | { kind: 'sum_last'; out: number; a: number }    // keepdims=false\n\n  // ---- Shape ---------------------------------------------------------------\n  | { kind: 'reshape'; out: number; a: number; newShape: Shape }\n  | { kind: 'transpose'; out: number; a: number; perm: readonly number[] }\n\n  // ---- Linear algebra -----------------------------------------------------\n  // matmul: a [..., M, K] \u00B7 b [K, N] -> [..., M, N]. b is unbatched.\n  // (Batched-on-both-sides matmul, e.g. for attention scores, is a separate kind\n  //  to keep autograd transpose rules simple.)\n  | { kind: 'matmul'; out: number; a: number; b: number }\n  // matmul_batched: a [..., M, K] \u00B7 b [..., K, N] -> [..., M, N]. Used by attention.\n  | { kind: 'matmul_batched'; out: number; a: number; b: number }\n\n  // ---- Indexing / casting --------------------------------------------------\n  | { kind: 'one_hot'; out: number; indices: number; depth: number; dtype: Dtype }\n  | { kind: 'arange'; out: number; n: number; dtype: Dtype }\n\n  // ---- ML primitives (fused for cleaner autograd) -------------------------\n  | { kind: 'softmax_causal_last'; out: number; a: number }\n  | { kind: 'log_softmax_last'; out: number; a: number }\n  // Sets cells where (i >= j) on the last two axes; for masking attention scores\n  // *before* softmax. Lower-triangle entries pass through; upper-triangle entries\n  // become `fillValue` (typically -inf or a large negative number).\n  | { kind: 'where_causal'; out: number; a: number; fillValue: number }\n\n  // ---- Comparisons + selection -------------------------------------------\n  // Element-wise comparison; result is bool (lowered to u32 in storage).\n  // Supports the same trailing-axis broadcast as element-wise binops.\n  | { kind: 'less'; out: number; a: number; b: number }\n  | { kind: 'greater'; out: number; a: number; b: number }\n  // Element-wise select: out[i] = cond[i] ? a[i] : b[i]. cond must be bool.\n  // a, b, cond all broadcast-compatible to out's shape.\n  | { kind: 'where'; out: number; cond: number; a: number; b: number }\n\n  // ---- Optimizer-fused ops (Adam) ----------------------------------------\n  // Each is a single kernel doing the full per-element math, baking in the\n  // hyperparameter constant. Used by appendAdam() to avoid decomposing the\n  // update into ~12 element-wise dispatches per param.\n  | { kind: 'adam_update_m'; out: number; m: number; g: number; b1: number }\n  | { kind: 'adam_update_v'; out: number; v: number; g: number; b2: number }\n  // adam_update_p: p_new = decayShrink * p - lrt[0] * m_new / (sqrt(v_new) + eps).\n  // `lrt` is a scalar tensor (provided as a tensor_input updated per step) that\n  // already includes Adam's bias-correction factor: lrt = lr * sqrt(1-b2^t) / (1-b1^t).\n  // `decayShrink` is the decoupled-weight-decay factor (Loshchilov & Hutter,\n  // \"AdamW\"): 1 - lr * weightDecay when the param is being decayed, 1 otherwise.\n  // It can be either a compile-time literal (number) for fixed-lr training, or a\n  // tensor id pointing at a scalar input that the runtime updates per step (used\n  // when the user supplies an lr schedule via `adam: { lr: (step) => ... }`).\n  | {\n      kind: 'adam_update_p'\n      out: number\n      p: number\n      mNew: number\n      vNew: number\n      lrt: number\n      eps: number\n      decayShrink: number               // literal (used when decayShrinkTensor is null)\n      decayShrinkTensor: number | null  // tensor id of a scalar input; takes precedence when set\n    }\n\n  // ---- Slicing / broadcasting / autograd infrastructure -------------------\n  // Slice [start, end) along the last axis. Output shape: input shape with\n  // last axis replaced by (end - start). Used for splitting Q/K/V from a\n  // single fused QKV matmul.\n  | { kind: 'slice_last_range'; out: number; a: number; start: number; end: number }\n  // Broadcast `a` to `targetShape`. Standard right-aligned NumPy broadcast.\n  // Used by autograd to expand cotangents back over reduced/broadcast axes.\n  | { kind: 'broadcast_to'; out: number; a: number; targetShape: Shape }\n  // Inverse of broadcast_to: sum-reduce `a` to `targetShape`. Used by autograd\n  // to \"un-broadcast\" a cotangent back to the smaller operand's shape.\n  | { kind: 'sum_to_shape'; out: number; a: number; targetShape: Shape }\n  // 0-d tensor with a constant value. Used to seed loss cotangent (1.0).\n  | { kind: 'const_scalar'; out: number; value: number; dtype: Dtype }\n  // ReLU's backward: passes `dy` through where `x > 0`, else 0. Output shape = x's.\n  | { kind: 'relu_grad'; out: number; x: number; dy: number }\n\n// A Graph collects ops and tensors during tracing, then becomes the input to\n// autograd and codegen. Once tracing is done it should be treated as immutable.\nexport interface Graph {\n  readonly ops: OpNode[]\n  readonly tensors: Tensor[]\n  // Names of tensors that should be exposed as outputs of the compiled function.\n  // Set by the trace driver; for a loss function, this is `[lossTensor]`.\n  readonly outputs: number[]\n  // Tensors registered for activation readback via `capture(name, t)`.\n  // Keyed by user-supplied name; insertion order preserved. Empty when no\n  // captures registered (the common training case \u2014 zero overhead).\n  readonly captures: Map<string, number>\n}\n\nexport function makeGraph(): Graph {\n  return { ops: [], tensors: [], outputs: [], captures: new Map() }\n}\n\n// Internal: register a fresh tensor in the graph and return its id.\nexport function addTensor(g: Graph, shape: Shape, dtype: Dtype, source: number | null, site: CallSite | null): Tensor {\n  const id = g.tensors.length\n  const t: Tensor = { id, shape, dtype, source, site }\n  g.tensors.push(t)\n  return t\n}\n\n// Internal: append an op and the tensor it produces. Returns the produced tensor.\n// Generic over the specific op kind so callers don't need `as any` casts.\n// `Extract<OpNode, { kind: K }>` narrows the union to the chosen variant, then\n// `Omit` strips the parts addOp itself supplies (the kind tag and out tensor id).\nexport function addOp<K extends OpNode['kind']>(\n  g: Graph,\n  kind: K,\n  shape: Shape,\n  dtype: Dtype,\n  site: CallSite | null,\n  fields: Omit<Extract<OpNode, { kind: K }>, 'kind' | 'out'>,\n): Tensor {\n  const opIndex = g.ops.length\n  const out = addTensor(g, shape, dtype, opIndex, site)\n  const node = { kind, out: out.id, ...fields } as Extract<OpNode, { kind: K }>\n  g.ops.push(node)\n  return out\n}\n\n// Capture a call site without paying full Error formatting cost up-front.\n// The stack is materialised but parsing/trimming is deferred to error reporting.\nexport function captureSite(opName: string): CallSite {\n  // Skip our own frame plus the op wrapper's frame; user's frame is what's left.\n  const stack = (new Error()).stack ?? ''\n  return { opName, stack }\n}\n\n// Format a CallSite for inclusion in a thrown error. Strips Tensorgrad frames\n// and library internals so the user sees their code first.\nexport function formatSite(site: CallSite): string {\n  const lines = site.stack.split('\\n')\n  // Stack starts with \"Error\" line; drop it. Then drop frames from this file\n  // and from src/ops.ts so the first surviving frame is user code.\n  const userFrames: string[] = []\n  for (const line of lines.slice(1)) {\n    if (line.includes('/tensorgrad/src/') || line.includes('\\\\tensorgrad\\\\src\\\\')) continue\n    userFrames.push(line.trim())\n    if (userFrames.length >= 3) break\n  }\n  if (userFrames.length === 0) return `[${site.opName}] (no user frame found)`\n  return `[${site.opName}]\\n  ${userFrames.join('\\n  ')}`\n}\n", "// Shape inference and validation for each op kind.\n//\n// Every op in src/ops.ts validates its inputs and computes its output shape\n// through helpers here. Errors throw with the captured call-site so the\n// stack trace points at the user's line, not into the library.\n//\n// Broadcasting rules (deliberately limited):\n//   * For element-wise binops (add/sub/mul/div), we support trailing-axis\n//     broadcasting: the smaller operand's shape must be a suffix of the\n//     larger's, with axes of size 1 broadcasting to any size. Examples\n//     ALLOWED:  [B, T, D] op [D]  \u2192  [B, T, D]\n//               [B, T, D] op [1, D]  \u2192 [B, T, D]\n//               [B, T, D] op [B, T, D]  \u2192 [B, T, D]\n//     Examples REJECTED:  [B, T, D] op [B]   (suffix mismatch)\n//                         [B, T, D] op [T, D] when T != B (legal numpy, banned here)\n//   The restriction makes codegen and autograd much simpler and covers every\n//   broadcast pattern in our transformer (biases, layernorm gain/bias, masks).\n\nimport type { Shape, CallSite } from './ir.js'\nimport { formatSite } from './ir.js'\n\n// ============================================================================\n// Errors\n// ============================================================================\n\nexport class ShapeError extends Error {\n  constructor(message: string, site: CallSite | null) {\n    const formatted = site ? `${message}\\n  at ${formatSite(site)}` : message\n    super(formatted)\n    this.name = 'ShapeError'\n  }\n}\n\nfunction fail(message: string, site: CallSite | null): never {\n  throw new ShapeError(message, site)\n}\n\n// ============================================================================\n// Shape utilities\n// ============================================================================\n\nexport function shapesEqual(a: Shape, b: Shape): boolean {\n  if (a.length !== b.length) return false\n  for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false\n  return true\n}\n\nexport function shapeSize(shape: Shape): number {\n  let n = 1\n  for (const d of shape) n *= d\n  return n\n}\n\nexport function showShape(shape: Shape): string {\n  return `[${shape.join(', ')}]`\n}\n\n// Standard right-aligned NumPy-style broadcasting. Pad the shorter shape with\n// leading 1s, then per-axis: equal dims unify, size-1 dims broadcast on either\n// side, otherwise incompatible. Returns the resulting shape or null.\nexport function broadcastTrailing(a: Shape, b: Shape): Shape | null {\n  const rank = Math.max(a.length, b.length)\n  const out: number[] = new Array(rank)\n  for (let i = 0; i < rank; i++) {\n    const ai = i - (rank - a.length)\n    const bi = i - (rank - b.length)\n    const av = ai < 0 ? 1 : a[ai]!\n    const bv = bi < 0 ? 1 : b[bi]!\n    if (av === bv) out[i] = av\n    else if (av === 1) out[i] = bv\n    else if (bv === 1) out[i] = av\n    else return null\n  }\n  return out\n}\n\n// ============================================================================\n// Per-op shape rules\n// ============================================================================\n//\n// Each rule takes the input shapes and returns the output shape, or throws.\n// All rules accept a `site` for error attribution.\n\nexport function inferElementwiseBinop(\n  opName: string, aShape: Shape, bShape: Shape, site: CallSite | null,\n): Shape {\n  const result = broadcastTrailing(aShape, bShape)\n  if (!result) {\n    fail(\n      `${opName}: incompatible shapes ${showShape(aShape)} and ${showShape(bShape)}. ` +\n      `Trailing-suffix broadcasting only \u2014 the smaller shape must be a suffix of the larger, ` +\n      `with size-1 axes broadcasting to any size.`,\n      site,\n    )\n  }\n  return result\n}\n\nexport function inferUnary(_opName: string, aShape: Shape, _site: CallSite | null): Shape {\n  return aShape\n}\n\nexport function inferMeanLast(opName: string, aShape: Shape, site: CallSite | null): Shape {\n  if (aShape.length === 0) fail(`${opName}: cannot reduce a 0-d tensor`, site)\n  // keepdims=true: replace last axis with 1.\n  return [...aShape.slice(0, -1), 1]\n}\n\nexport function inferSumLast(opName: string, aShape: Shape, site: CallSite | null): Shape {\n  if (aShape.length === 0) fail(`${opName}: cannot reduce a 0-d tensor`, site)\n  // keepdims=false: drop the last axis.\n  return aShape.slice(0, -1)\n}\n\nexport function inferReshape(opName: string, aShape: Shape, newShape: Shape, site: CallSite | null): Shape {\n  // Validate -1 placeholder (at most one allowed) and total size match.\n  let inferIdx = -1\n  let knownSize = 1\n  for (let i = 0; i < newShape.length; i++) {\n    const d = newShape[i]!\n    if (d === -1) {\n      if (inferIdx !== -1) fail(`${opName}: at most one -1 dim allowed in newShape ${showShape(newShape)}`, site)\n      inferIdx = i\n    } else if (d <= 0) {\n      fail(`${opName}: invalid dim ${d} in newShape ${showShape(newShape)}`, site)\n    } else {\n      knownSize *= d\n    }\n  }\n  const totalIn = shapeSize(aShape)\n  const out = [...newShape]\n  if (inferIdx !== -1) {\n    if (totalIn % knownSize !== 0) {\n      fail(`${opName}: cannot reshape ${showShape(aShape)} (size ${totalIn}) to ${showShape(newShape)} \u2014 known dims multiply to ${knownSize}`, site)\n    }\n    out[inferIdx] = totalIn / knownSize\n  } else if (knownSize !== totalIn) {\n    fail(`${opName}: size mismatch \u2014 input ${showShape(aShape)} has ${totalIn} elements but newShape ${showShape(newShape)} has ${knownSize}`, site)\n  }\n  return out\n}\n\nexport function inferTranspose(opName: string, aShape: Shape, perm: readonly number[], site: CallSite | null): Shape {\n  if (perm.length !== aShape.length) {\n    fail(`${opName}: perm length ${perm.length} must equal input rank ${aShape.length}`, site)\n  }\n  const seen = new Set<number>()\n  for (const p of perm) {\n    if (p < 0 || p >= aShape.length) fail(`${opName}: perm index ${p} out of range for rank ${aShape.length}`, site)\n    if (seen.has(p)) fail(`${opName}: perm has duplicate index ${p}`, site)\n    seen.add(p)\n  }\n  return perm.map(p => aShape[p]!)\n}\n\n// matmul: a [..., M, K] \u00B7 b [K, N]  \u2192  [..., M, N].  b is unbatched.\nexport function inferMatmul(opName: string, aShape: Shape, bShape: Shape, site: CallSite | null): Shape {\n  if (aShape.length < 2) fail(`${opName}: lhs must have rank >= 2, got ${showShape(aShape)}`, site)\n  if (bShape.length !== 2) fail(`${opName}: rhs must have rank 2, got ${showShape(bShape)} \u2014 use matmulBatched for batched rhs`, site)\n  const M = aShape[aShape.length - 2]!\n  const Ka = aShape[aShape.length - 1]!\n  const Kb = bShape[0]!\n  const N = bShape[1]!\n  if (Ka !== Kb) fail(`${opName}: inner dims don't match \u2014 ${showShape(aShape)} \u00B7 ${showShape(bShape)} (last axis of lhs = ${Ka}, first axis of rhs = ${Kb})`, site)\n  return [...aShape.slice(0, -2), M, N]\n}\n\n// matmul_batched: a [..., M, K] \u00B7 b [..., K, N]  \u2192  [..., M, N].  Both have leading batch dims.\nexport function inferMatmulBatched(opName: string, aShape: Shape, bShape: Shape, site: CallSite | null): Shape {\n  if (aShape.length < 2 || bShape.length < 2) {\n    fail(`${opName}: both inputs must have rank >= 2, got ${showShape(aShape)} and ${showShape(bShape)}`, site)\n  }\n  if (aShape.length !== bShape.length) {\n    fail(`${opName}: ranks must match (got ${aShape.length} vs ${bShape.length}). Reshape if you need different batch dims.`, site)\n  }\n  const aBatch = aShape.slice(0, -2)\n  const bBatch = bShape.slice(0, -2)\n  for (let i = 0; i < aBatch.length; i++) {\n    if (aBatch[i] !== bBatch[i]) {\n      fail(`${opName}: batch dims must match \u2014 ${showShape(aShape)} vs ${showShape(bShape)}`, site)\n    }\n  }\n  const M = aShape[aShape.length - 2]!\n  const Ka = aShape[aShape.length - 1]!\n  const Kb = bShape[bShape.length - 2]!\n  const N = bShape[bShape.length - 1]!\n  if (Ka !== Kb) fail(`${opName}: inner dims don't match \u2014 last axis of lhs = ${Ka}, second-to-last of rhs = ${Kb}`, site)\n  return [...aBatch, M, N]\n}\n\nexport function inferOneHot(opName: string, indicesShape: Shape, depth: number, site: CallSite | null): Shape {\n  if (depth <= 0) fail(`${opName}: depth must be positive, got ${depth}`, site)\n  return [...indicesShape, depth]\n}\n\n// where_causal preserves shape but requires the last two axes to be square.\nexport function inferWhereCausal(opName: string, aShape: Shape, site: CallSite | null): Shape {\n  if (aShape.length < 2) fail(`${opName}: requires rank >= 2, got ${showShape(aShape)}`, site)\n  const m = aShape[aShape.length - 2]!\n  const n = aShape[aShape.length - 1]!\n  if (m !== n) fail(`${opName}: last two axes must be equal (square mask), got ${showShape(aShape)}`, site)\n  return aShape\n}\n\nexport function inferSliceLastRange(opName: string, aShape: Shape, start: number, end: number, site: CallSite | null): Shape {\n  if (aShape.length === 0) fail(`${opName}: cannot slice 0-d tensor`, site)\n  const last = aShape[aShape.length - 1]!\n  if (start < 0 || end > last || start >= end) {\n    fail(`${opName}: invalid range [${start}, ${end}) for last axis of size ${last}`, site)\n  }\n  return [...aShape.slice(0, -1), end - start]\n}\n\n// broadcast_to: validate that `aShape` can broadcast to `targetShape` under\n// right-aligned NumPy rules. Returns targetShape on success.\nexport function inferBroadcastTo(opName: string, aShape: Shape, targetShape: Shape, site: CallSite | null): Shape {\n  if (aShape.length > targetShape.length) {\n    fail(`${opName}: source rank ${aShape.length} > target rank ${targetShape.length}`, site)\n  }\n  const offset = targetShape.length - aShape.length\n  for (let i = 0; i < aShape.length; i++) {\n    const av = aShape[i]!\n    const tv = targetShape[offset + i]!\n    if (av !== tv && av !== 1) {\n      fail(`${opName}: cannot broadcast ${showShape(aShape)} to ${showShape(targetShape)} \u2014 axis ${i} (size ${av}) doesn't match target axis ${offset + i} (size ${tv}) and isn't 1`, site)\n    }\n  }\n  return targetShape\n}\n\n// sum_to_shape: validate that `targetShape` is a valid right-aligned reduction\n// of `aShape` (i.e., aShape can have been produced by broadcasting targetShape).\nexport function inferSumToShape(opName: string, aShape: Shape, targetShape: Shape, site: CallSite | null): Shape {\n  if (targetShape.length > aShape.length) {\n    fail(`${opName}: target rank ${targetShape.length} > source rank ${aShape.length}`, site)\n  }\n  const offset = aShape.length - targetShape.length\n  for (let i = 0; i < targetShape.length; i++) {\n    const av = aShape[offset + i]!\n    const tv = targetShape[i]!\n    if (av !== tv && tv !== 1) {\n      fail(`${opName}: cannot sum-reduce ${showShape(aShape)} to ${showShape(targetShape)} \u2014 target axis ${i} (size ${tv}) must be 1 or match source`, site)\n    }\n  }\n  return targetShape\n}\n\n// Three-way broadcast for `where(cond, a, b)`. All three shapes must broadcast\n// to a common shape under standard NumPy rules.\nexport function inferWhere(opName: string, condShape: Shape, aShape: Shape, bShape: Shape, site: CallSite | null): Shape {\n  const ab = broadcastTrailing(aShape, bShape)\n  if (!ab) fail(`${opName}: a/b incompatible: ${showShape(aShape)} vs ${showShape(bShape)}`, site)\n  const result = broadcastTrailing(condShape, ab)\n  if (!result) fail(`${opName}: cond ${showShape(condShape)} incompatible with broadcast(a, b) ${showShape(ab)}`, site)\n  return result\n}\n\nexport function inferReluGrad(opName: string, xShape: Shape, dyShape: Shape, site: CallSite | null): Shape {\n  if (!shapesEqual(xShape, dyShape)) {\n    fail(`${opName}: x and dy must have matching shapes, got ${showShape(xShape)} and ${showShape(dyShape)}`, site)\n  }\n  return xShape\n}\n", "// Trace driver. Holds the \"current graph\" in module-local state so user code\n// can call ops without threading a graph parameter through every function.\n//\n// Usage:\n//\n//   const graph = trace(() => {\n//     const x = tensorInput('x', [B, T], 'i32')\n//     const w = paramInput('w', [V, D], 'f32')\n//     // ... user computation building tensors ...\n//     return finalLossTensor\n//   })\n//\n// `trace` is single-threaded and re-entrant only via nested calls (which share\n// the outer graph \u2014 but we don't currently have a use for nesting). Calling an\n// op outside a `trace(...)` block is an error.\n\nimport type { Graph, Tensor, Shape, Dtype } from './ir.js'\nimport { makeGraph, addOp, captureSite } from './ir.js'\n\n// Module-local: the graph being built right now, or null if no trace is active.\nlet _current: Graph | null = null\n// Module-local: whether `capture(name, t)` calls should register on the current\n// graph. True only during the user's forward trace; false during `traceInto`\n// (autograd / optimizer ops shouldn't accidentally publish gradient tensors).\nlet _captureEnabled = false\n\nexport function currentGraph(): Graph {\n  if (!_current) {\n    throw new Error(\n      'tensorgrad: ops can only be called inside trace(). ' +\n      'Did you forget to wrap your forward pass?',\n    )\n  }\n  return _current\n}\n\nexport function isCaptureEnabled(): boolean {\n  return _captureEnabled\n}\n\n// Run `fn` with a fresh graph as the current one; capture and return the graph.\n// `fn` must return the tensor (or array of tensors) to mark as graph outputs.\nexport function trace(fn: () => Tensor | Tensor[]): Graph {\n  if (_current) {\n    throw new Error('tensorgrad: nested trace() is not supported')\n  }\n  const g = makeGraph()\n  _current = g\n  _captureEnabled = true\n  try {\n    const result = fn()\n    const outputs = Array.isArray(result) ? result : [result]\n    for (const t of outputs) {\n      ;(g.outputs as number[]).push(t.id)\n    }\n  } finally {\n    _current = null\n    _captureEnabled = false\n  }\n  return g\n}\n\n// Re-enter an existing graph to append more ops. Used by autograd to add\n// backward ops to a graph that's already been traced. `fn` runs with the\n// supplied graph as the current one; any ops it calls append to that graph.\n// Capture is intentionally disabled here \u2014 backward / optimizer rules\n// shouldn't publish their internal tensors via `capture()`.\n// Returns whatever `fn` returns.\nexport function traceInto<T>(g: Graph, fn: () => T): T {\n  if (_current) {\n    throw new Error('tensorgrad: traceInto() called while another trace is active')\n  }\n  _current = g\n  // _captureEnabled stays false (default) \u2014 explicit, but not toggled.\n  try {\n    return fn()\n  } finally {\n    _current = null\n  }\n}\n\n// ---- Leaf tensor builders --------------------------------------------------\n// Inputs are added to the graph as `param_input` or `tensor_input` op nodes.\n// Their .source on the Tensor points at that node so codegen knows where to\n// bind external data.\n\n// Param/tensor inputs share a namespace (a step() call passes both as keys in\n// the same dispatch object); state inputs have their own namespace.\ntype NamedInputKind = 'param_input' | 'tensor_input' | 'state_input'\nfunction assertNameUnused(g: Graph, name: string, kinds: NamedInputKind[], label: string): void {\n  if (g.ops.some(op => kinds.includes(op.kind as NamedInputKind) && (op as { name?: string }).name === name)) {\n    throw new Error(`tensorgrad: ${label} name '${name}' already used in this trace`)\n  }\n}\n\nexport function paramInput(name: string, shape: Shape, dtype: Dtype = 'f32'): Tensor {\n  const g = currentGraph()\n  assertNameUnused(g, name, ['param_input', 'tensor_input'], 'input')\n  const site = captureSite('paramInput')\n  return addOp(g, 'param_input', shape, dtype, site, { name } as any)\n}\n\nexport function tensorInput(name: string, shape: Shape, dtype: Dtype = 'f32'): Tensor {\n  const g = currentGraph()\n  assertNameUnused(g, name, ['param_input', 'tensor_input'], 'input')\n  const site = captureSite('tensorInput')\n  return addOp(g, 'tensor_input', shape, dtype, site, { name } as any)\n}\n\n// Persistent state buffer. Allocated at compile time, zero-(or initValue-)initialized,\n// and updated across step() calls via writebacks declared by the optimizer helper.\nexport function stateInput(name: string, shape: Shape, dtype: Dtype = 'f32', initValue = 0): Tensor {\n  const g = currentGraph()\n  assertNameUnused(g, name, ['state_input'], 'state')\n  const site = captureSite('stateInput')\n  return addOp(g, 'state_input', shape, dtype, site, { name, initValue } as any)\n}\n", "// Activation capture \u2014 opt-in readback of intermediate tensors at training step.\n//\n// Usage (inside the user's forward pass):\n//\n//   import { capture } from 'tensorgrad'\n//\n//   function attentionFwd(p, x) {\n//     const scores = mul(matmulBatched(q, kT), SCALE_QK)\n//     const attn = capture(`attn.${layerIdx}`, softmaxCausalLast(scores))\n//     return matmulBatched(attn, v)\n//   }\n//\n// Pass-through return type: `capture(name, t)` returns `t` unchanged so it\n// inlines at the point of computation. Behind the scenes it registers `t.id`\n// against `name` on the current graph; runtime exposes the registered tensors\n// via `step(inputs, { withCaptures: true })`.\n//\n// Outside the user's forward trace (during `appendGrad` / `appendAdam`'s\n// `traceInto` re-entry), `capture()` is a no-op \u2014 gradient and optimizer\n// internals shouldn't accidentally publish themselves to the UI.\n\nimport type { Tensor } from './ir.js'\nimport { currentGraph, isCaptureEnabled } from './trace.js'\n\nexport function capture<T extends Tensor>(name: string, t: T): T {\n  if (!isCaptureEnabled()) return t\n  const g = currentGraph()\n  if (g.captures.has(name)) {\n    throw new Error(\n      `capture: name '${name}' already registered. Use unique names ` +\n      `(e.g. \\`attn.\\${layerIdx}\\`) when capturing across a loop.`,\n    )\n  }\n  g.captures.set(name, t.id)\n  return t\n}\n", "// User-facing op surface.\n//\n// Each function here is a thin wrapper:\n//   1. capture the call site (for error attribution)\n//   2. validate input shapes via src/shape.ts (which throws on mismatch)\n//   3. compute the output shape and dtype\n//   4. append the op to the current Graph (held in module state by src/trace.ts)\n//   5. return the produced Tensor handle\n//\n// No actual numeric work happens here. These calls just build the IR.\n\nimport type { Tensor, Shape, Dtype, OpNode } from './ir.js'\nimport { addOp, captureSite } from './ir.js'\nimport { currentGraph } from './trace.js'\nimport {\n  inferElementwiseBinop, inferUnary, inferMeanLast, inferSumLast,\n  inferReshape, inferTranspose, inferMatmul, inferMatmulBatched,\n  inferOneHot, inferWhereCausal, inferSliceLastRange,\n  inferBroadcastTo, inferSumToShape, inferReluGrad, inferWhere,\n  ShapeError, showShape,\n} from './shape.js'\n\n// ----------------------------------------------------------------------------\n// Element-wise binops (add/sub/mul/div). Trailing-suffix broadcast.\n// ----------------------------------------------------------------------------\n\n/**\n * Build an element-wise binop op (forward declaration only \u2014 appends to the\n * graph). Used by both arithmetic ops (add/sub/mul/div, output dtype = input\n * dtype) and comparisons (less/greater, output dtype = bool).\n */\nfunction binopOp(\n  name: string,\n  kind: OpNode['kind'],\n  a: Tensor, b: Tensor,\n  outDtype: Dtype = a.dtype,\n): Tensor {\n  const site = captureSite(name)\n  if (a.dtype !== b.dtype) throw new ShapeError(`${name}: dtype mismatch (${a.dtype} vs ${b.dtype})`, site)\n  const outShape = inferElementwiseBinop(name, a.shape, b.shape, site)\n  return addOp(currentGraph(), kind, outShape, outDtype, site, { a: a.id, b: b.id })\n}\n\n// Element-wise binops. Second arg can be a Tensor or a JS number; the latter\n// dispatches to scalar-fused IR ops internally. `mul(x, 2)` and `mul(x, y)`\n// both work \u2014 matches every NumPy-shaped library.\nexport function add(a: Tensor, b: Tensor | number): Tensor {\n  return typeof b === 'number' ? addScalar(a, b) : binopOp('add', 'add', a, b)\n}\nexport function sub(a: Tensor, b: Tensor | number): Tensor {\n  return typeof b === 'number' ? addScalar(a, -b) : binopOp('sub', 'sub', a, b)\n}\nexport function mul(a: Tensor, b: Tensor | number): Tensor {\n  return typeof b === 'number' ? mulScalar(a, b) : binopOp('mul', 'mul', a, b)\n}\nexport function div(a: Tensor, b: Tensor | number): Tensor {\n  if (typeof b === 'number') {\n    if (b === 0) throw new ShapeError(`div: scalar divisor cannot be zero`, captureSite('div'))\n    return mulScalar(a, 1 / b)\n  }\n  return binopOp('div', 'div', a, b)\n}\n\n// ----------------------------------------------------------------------------\n// Element-wise scalar binops (mul/add by JS number). Used for things like\n// `scores * (1/sqrt(d))` and `logits + 1e-5` where allocating a 0-d tensor\n// for the scalar is wasteful.\n// ----------------------------------------------------------------------------\n\nexport function mulScalar(a: Tensor, scalar: number): Tensor {\n  const site = captureSite('mulScalar')\n  return addOp(currentGraph(), 'mul_scalar', a.shape, a.dtype, site, { a: a.id, scalar })\n}\n\nexport function addScalar(a: Tensor, scalar: number): Tensor {\n  const site = captureSite('addScalar')\n  return addOp(currentGraph(), 'add_scalar', a.shape, a.dtype, site, { a: a.id, scalar })\n}\n\n// ----------------------------------------------------------------------------\n// Unary ops.\n// ----------------------------------------------------------------------------\n\nfunction unary(name: 'sqrt' | 'rsqrt' | 'log' | 'exp' | 'relu', a: Tensor): Tensor {\n  const site = captureSite(name)\n  if (a.dtype !== 'f32') throw new ShapeError(`${name}: requires f32, got ${a.dtype}`, site)\n  return addOp(currentGraph(), name, inferUnary(name, a.shape, site), 'f32', site, { a: a.id })\n}\n\nexport const sqrt  = (a: Tensor): Tensor => unary('sqrt',  a)\nexport const rsqrt = (a: Tensor): Tensor => unary('rsqrt', a)\nexport const log   = (a: Tensor): Tensor => unary('log',   a)\nexport const exp   = (a: Tensor): Tensor => unary('exp',   a)\nexport const relu  = (a: Tensor): Tensor => unary('relu',  a)\n\n// ----------------------------------------------------------------------------\n// Reductions over the last axis. To reduce along other axes, transpose first.\n// (This is intentional \u2014 keeps codegen and autograd small.)\n// ----------------------------------------------------------------------------\n\nexport function meanLast(a: Tensor): Tensor {\n  const site = captureSite('meanLast')\n  if (a.dtype !== 'f32') throw new ShapeError(`meanLast: requires f32, got ${a.dtype}`, site)\n  const outShape = inferMeanLast('meanLast', a.shape, site)\n  return addOp(currentGraph(), 'mean_last', outShape, a.dtype, site, { a: a.id })\n}\n\nexport function sumLast(a: Tensor): Tensor {\n  const site = captureSite('sumLast')\n  if (a.dtype !== 'f32') throw new ShapeError(`sumLast: requires f32, got ${a.dtype}`, site)\n  const outShape = inferSumLast('sumLast', a.shape, site)\n  return addOp(currentGraph(), 'sum_last', outShape, a.dtype, site, { a: a.id })\n}\n\n/** Reduce all elements to a 0-d scalar. Composes `reshape` + `sumLast`. */\nexport function sumAll(a: Tensor): Tensor {\n  return sumLast(reshape(a, [-1]))\n}\n\n// ----------------------------------------------------------------------------\n// Shape ops.\n// ----------------------------------------------------------------------------\n\nexport function reshape(a: Tensor, newShape: Shape): Tensor {\n  const site = captureSite('reshape')\n  const outShape = inferReshape('reshape', a.shape, newShape, site)\n  return addOp(currentGraph(), 'reshape', outShape, a.dtype, site, { a: a.id, newShape: outShape })\n}\n\nexport function transpose(a: Tensor, perm: readonly number[]): Tensor {\n  const site = captureSite('transpose')\n  const outShape = inferTranspose('transpose', a.shape, perm, site)\n  return addOp(currentGraph(), 'transpose', outShape, a.dtype, site, { a: a.id, perm })\n}\n\n/** Swap two axes of a tensor. Negative indices count from the end (so\n *  `swapAxes(x, -1, -2)` swaps the last two \u2014 the common attention pattern).\n *  All other axes keep their position. Implemented as `transpose` with the\n *  permutation `[0, 1, ..., axis2, ..., axis1, ..., n-1]`. */\nexport function swapAxes(a: Tensor, axis1: number, axis2: number): Tensor {\n  const r = a.shape.length\n  const norm = (axis: number): number => axis < 0 ? r + axis : axis\n  const i1 = norm(axis1)\n  const i2 = norm(axis2)\n  const site = captureSite('swapAxes')\n  if (i1 < 0 || i1 >= r || i2 < 0 || i2 >= r) {\n    throw new ShapeError(`swapAxes: axis out of range \u2014 got (${axis1}, ${axis2}) for rank-${r} tensor`, site)\n  }\n  if (i1 === i2) return a\n  const perm = Array.from({ length: r }, (_, k) => k)\n  perm[i1] = i2\n  perm[i2] = i1\n  return transpose(a, perm)\n}\n\n// ----------------------------------------------------------------------------\n// Linear algebra.\n// ----------------------------------------------------------------------------\n\nexport function matmul(a: Tensor, b: Tensor): Tensor {\n  const site = captureSite('matmul')\n  if (a.dtype !== 'f32' || b.dtype !== 'f32') {\n    throw new ShapeError(`matmul: requires f32, got ${a.dtype} and ${b.dtype}`, site)\n  }\n  const outShape = inferMatmul('matmul', a.shape, b.shape, site)\n  return addOp(currentGraph(), 'matmul', outShape, 'f32', site, { a: a.id, b: b.id })\n}\n\nexport function matmulBatched(a: Tensor, b: Tensor): Tensor {\n  const site = captureSite('matmulBatched')\n  if (a.dtype !== 'f32' || b.dtype !== 'f32') {\n    throw new ShapeError(`matmulBatched: requires f32, got ${a.dtype} and ${b.dtype}`, site)\n  }\n  const outShape = inferMatmulBatched('matmulBatched', a.shape, b.shape, site)\n  return addOp(currentGraph(), 'matmul_batched', outShape, 'f32', site, { a: a.id, b: b.id })\n}\n\n// ----------------------------------------------------------------------------\n// Indexing / casting.\n// ----------------------------------------------------------------------------\n\nexport function oneHot(indices: Tensor, depth: number, dtype: Dtype = 'f32'): Tensor {\n  const site = captureSite('oneHot')\n  if (indices.dtype !== 'i32') {\n    throw new ShapeError(`oneHot: indices must be i32, got ${indices.dtype}`, site)\n  }\n  const outShape = inferOneHot('oneHot', indices.shape, depth, site)\n  return addOp(currentGraph(), 'one_hot', outShape, dtype, site, { indices: indices.id, depth, dtype })\n}\n\n/** Embedding lookup: pull rows from `table` indexed by `indices`. Decomposes\n *  to `oneHot(indices, vocab) @ table` so autograd works without a dedicated\n *  scatter-with-atomic-add backward \u2014 the matmul transpose rule handles it.\n *  `table` is `[vocab, dim]`; `indices` is any shape `[...]` of i32; result\n *  is `[..., dim]`. The vocab size is taken from `table.shape[0]`. */\nexport function embedding(table: Tensor, indices: Tensor): Tensor {\n  const site = captureSite('embedding')\n  if (table.shape.length !== 2) {\n    throw new ShapeError(`embedding: table must be 2-d [vocab, dim], got ${showShape(table.shape)}`, site)\n  }\n  if (indices.dtype !== 'i32') {\n    throw new ShapeError(`embedding: indices must be i32, got ${indices.dtype}`, site)\n  }\n  return matmul(oneHot(indices, table.shape[0]!, 'f32'), table)\n}\n\n// arange(n) \u2192 [n] of values [0, 1, ..., n-1]. Used for position embeddings.\nexport function arange(n: number, dtype: Dtype = 'i32'): Tensor {\n  const site = captureSite('arange')\n  if (n <= 0 || !Number.isInteger(n)) {\n    throw new ShapeError(`arange: n must be a positive integer, got ${n}`, site)\n  }\n  return addOp(currentGraph(), 'arange', [n], dtype, site, { n, dtype })\n}\n\n// ----------------------------------------------------------------------------\n// ML primitives. Fused so autograd's transpose rule is straightforward and the\n// kernels can be hand-tuned for our specific shapes.\n// ----------------------------------------------------------------------------\n\n// Causal-masked softmax along the last axis. Shape preserved. Last two axes\n// must be square (TxT attention scores).\nexport function softmaxCausalLast(a: Tensor): Tensor {\n  const site = captureSite('softmaxCausalLast')\n  if (a.dtype !== 'f32') throw new ShapeError(`softmaxCausalLast: requires f32, got ${a.dtype}`, site)\n  inferWhereCausal('softmaxCausalLast', a.shape, site)  // shape check (square last 2 axes)\n  return addOp(currentGraph(), 'softmax_causal_last', a.shape, 'f32', site, { a: a.id })\n}\n\n// Numerically-stable log-softmax along the last axis. Shape preserved.\nexport function logSoftmaxLast(a: Tensor): Tensor {\n  const site = captureSite('logSoftmaxLast')\n  if (a.dtype !== 'f32') throw new ShapeError(`logSoftmaxLast: requires f32, got ${a.dtype}`, site)\n  return addOp(currentGraph(), 'log_softmax_last', a.shape, 'f32', site, { a: a.id })\n}\n\n// Pre-softmax causal mask. Sets cells where (i < j) on the last two axes to\n// `fillValue` (typically -1e30). Lower-triangle entries pass through.\n// Use this when you want the masked scores explicitly (e.g. for capture);\n// for the common case, prefer softmaxCausalLast which fuses both.\nexport function whereCausal(a: Tensor, fillValue: number): Tensor {\n  const site = captureSite('whereCausal')\n  if (a.dtype !== 'f32') throw new ShapeError(`whereCausal: requires f32, got ${a.dtype}`, site)\n  inferWhereCausal('whereCausal', a.shape, site)\n  return addOp(currentGraph(), 'where_causal', a.shape, 'f32', site, { a: a.id, fillValue })\n}\n\n// ----------------------------------------------------------------------------\n// Slicing.\n// ----------------------------------------------------------------------------\n\n// sliceLastRange(a, start, end): slice [start, end) along the last axis.\n// Used for splitting Q/K/V from a fused QKV matmul.\nexport function sliceLastRange(a: Tensor, start: number, end: number): Tensor {\n  const site = captureSite('sliceLastRange')\n  const outShape = inferSliceLastRange('sliceLastRange', a.shape, start, end, site)\n  return addOp(currentGraph(), 'slice_last_range', outShape, a.dtype, site, { a: a.id, start, end })\n}\n\n// ----------------------------------------------------------------------------\n// Broadcast / un-broadcast. Mostly used by autograd, but exposed in case user\n// code needs them (e.g. explicit broadcasting for clarity).\n// ----------------------------------------------------------------------------\n\nexport function broadcastTo(a: Tensor, targetShape: Shape): Tensor {\n  const site = captureSite('broadcastTo')\n  inferBroadcastTo('broadcastTo', a.shape, targetShape, site)\n  return addOp(currentGraph(), 'broadcast_to', targetShape, a.dtype, site, { a: a.id, targetShape })\n}\n\nexport function sumToShape(a: Tensor, targetShape: Shape): Tensor {\n  const site = captureSite('sumToShape')\n  inferSumToShape('sumToShape', a.shape, targetShape, site)\n  return addOp(currentGraph(), 'sum_to_shape', targetShape, a.dtype, site, { a: a.id, targetShape })\n}\n\n// ----------------------------------------------------------------------------\n// Constants.\n// ----------------------------------------------------------------------------\n\n// 0-d tensor with a constant value. Used by autograd to seed the loss cotangent.\nexport function constScalar(value: number, dtype: Dtype = 'f32'): Tensor {\n  const site = captureSite('constScalar')\n  return addOp(currentGraph(), 'const_scalar', [], dtype, site, { value, dtype })\n}\n\n// ----------------------------------------------------------------------------\n// Autograd-internal helpers (exposed for users writing custom transpose rules).\n// ----------------------------------------------------------------------------\n\n// ----------------------------------------------------------------------------\n// Comparisons and selection.\n// ----------------------------------------------------------------------------\n\n// Comparisons reuse the binop helper but return bool.\nexport const less    = (a: Tensor, b: Tensor): Tensor => binopOp('less',    'less',    a, b, 'bool')\nexport const greater = (a: Tensor, b: Tensor): Tensor => binopOp('greater', 'greater', a, b, 'bool')\n\n// where(cond, a, b): elementwise select. cond is bool; a and b can be any matching dtype.\nexport function where(cond: Tensor, a: Tensor, b: Tensor): Tensor {\n  const site = captureSite('where')\n  if (cond.dtype !== 'bool') throw new ShapeError(`where: cond must be bool, got ${cond.dtype}`, site)\n  if (a.dtype !== b.dtype) throw new ShapeError(`where: a/b dtype mismatch (${a.dtype} vs ${b.dtype})`, site)\n  const outShape = inferWhere('where', cond.shape, a.shape, b.shape, site)\n  return addOp(currentGraph(), 'where', outShape, a.dtype, site, { cond: cond.id, a: a.id, b: b.id })\n}\n\n// reluGrad(x, dy) = dy where x > 0, else 0. Same shape as x. This is the\n// transpose rule for relu, exposed as an op so codegen can emit it.\nexport function reluGrad(x: Tensor, dy: Tensor): Tensor {\n  const site = captureSite('reluGrad')\n  if (x.dtype !== 'f32' || dy.dtype !== 'f32') {\n    throw new ShapeError(`reluGrad: requires f32, got ${x.dtype} and ${dy.dtype}`, site)\n  }\n  const outShape = inferReluGrad('reluGrad', x.shape, dy.shape, site)\n  return addOp(currentGraph(), 'relu_grad', outShape, 'f32', site, { x: x.id, dy: dy.id })\n}\n\n// ----------------------------------------------------------------------------\n// Adam-fused ops. Each does its full per-element update in one kernel.\n// ----------------------------------------------------------------------------\n\nexport function adamUpdateM(m: Tensor, g: Tensor, b1: number): Tensor {\n  const site = captureSite('adamUpdateM')\n  if (m.dtype !== 'f32' || g.dtype !== 'f32') throw new ShapeError(`adamUpdateM: requires f32`, site)\n  if (m.shape.length !== g.shape.length || m.shape.some((d, i) => d !== g.shape[i])) {\n    throw new ShapeError(`adamUpdateM: shape mismatch`, site)\n  }\n  return addOp(currentGraph(), 'adam_update_m', m.shape, 'f32', site, { m: m.id, g: g.id, b1 })\n}\n\nexport function adamUpdateV(v: Tensor, g: Tensor, b2: number): Tensor {\n  const site = captureSite('adamUpdateV')\n  if (v.dtype !== 'f32' || g.dtype !== 'f32') throw new ShapeError(`adamUpdateV: requires f32`, site)\n  if (v.shape.length !== g.shape.length || v.shape.some((d, i) => d !== g.shape[i])) {\n    throw new ShapeError(`adamUpdateV: shape mismatch`, site)\n  }\n  return addOp(currentGraph(), 'adam_update_v', v.shape, 'f32', site, { v: v.id, g: g.id, b2 })\n}\n\nexport function adamUpdateP(\n  p: Tensor,\n  mNew: Tensor,\n  vNew: Tensor,\n  lrt: Tensor,\n  eps: number,\n  decayShrink: number | Tensor = 1,\n): Tensor {\n  const site = captureSite('adamUpdateP')\n  if (p.dtype !== 'f32') throw new ShapeError(`adamUpdateP: requires f32`, site)\n  if (lrt.dtype !== 'f32' || lrt.shape.length !== 0) {\n    throw new ShapeError(`adamUpdateP: lrt must be a 0-d f32 scalar`, site)\n  }\n  if (p.shape.length !== mNew.shape.length || p.shape.some((d, i) => d !== mNew.shape[i])) {\n    throw new ShapeError(`adamUpdateP: p/mNew shape mismatch`, site)\n  }\n  // decayShrink is either a literal (baked into the kernel) or a 0-d scalar\n  // tensor input the runtime updates per step. The kernel binds at most one,\n  // chosen by whichever the caller provided.\n  const isTensor = typeof decayShrink === 'object'\n  if (isTensor) {\n    if (decayShrink.dtype !== 'f32' || decayShrink.shape.length !== 0) {\n      throw new ShapeError(`adamUpdateP: decayShrink tensor must be a 0-d f32 scalar`, site)\n    }\n  }\n  return addOp(currentGraph(), 'adam_update_p', p.shape, 'f32', site, {\n    p: p.id,\n    mNew: mNew.id,\n    vNew: vNew.id,\n    lrt: lrt.id,\n    eps,\n    decayShrink: isTensor ? 1 : decayShrink,\n    decayShrinkTensor: isTensor ? decayShrink.id : null,\n  })\n}\n", "// Reverse-mode autograd over a traced Graph.\n//\n// Given a graph that ends in a scalar loss tensor, this module walks the ops\n// in reverse and appends backward ops to the same graph, computing dL/dT for\n// every Tensor T that descends from a `param_input`. The final cotangents on\n// the param_input tensors are the parameter gradients.\n//\n// Cotangent accumulation: a tensor with multiple consumers ends up with\n// contributions from each. We add them as we encounter them, so by the time\n// reverse iteration reaches a tensor's producer op, its cotangent is complete.\n//\n// Why this works as \"more graph nodes\": the transpose rule for an op like\n// mul(a, b)\u2192c is `da += dc * b; db += dc * a`. The right-hand sides are\n// expressible in terms of existing forward ops (mul) plus accumulation (add).\n// We just call those op functions, which append nodes to the current graph\n// because we run inside an active trace context.\n\nimport type { Graph, OpNode, Tensor, Shape } from './ir.js'\nimport {\n  add, sub, mul, div, mulScalar,\n  matmul, matmulBatched, transpose, swapAxes, reshape,\n  exp,\n  broadcastTo, sumToShape,\n  constScalar, reluGrad,\n  sumLast, where,\n} from './ops.js'\nimport { traceInto } from './trace.js'\nimport { shapesEqual } from './shape.js'\n\n// ============================================================================\n// Public API\n// ============================================================================\n\nexport interface GradResult {\n  // The graph, augmented with backward ops.\n  readonly graph: Graph\n  // Cotangents (gradients) for each param_input, keyed by param name.\n  readonly paramGrads: Record<string, Tensor>\n  // The loss output (unchanged from input).\n  readonly loss: Tensor\n}\n\n// `appendGrad(graph)` augments `graph` (which must have already been built by\n// `trace(...)` and must have a single scalar output = the loss) with backward\n// ops. Returns gradients for every param_input.\n//\n// Internally re-enters the graph as the active trace context, so backward ops\n// emitted by transpose rules append to it. The caller doesn't need to manage\n// trace state.\nexport function appendGrad(graph: Graph): GradResult {\n  if (graph.outputs.length !== 1) {\n    throw new Error(`autograd: expected graph with exactly 1 output (the loss); got ${graph.outputs.length}`)\n  }\n  const lossId = graph.outputs[0]!\n  const lossTensor = graph.tensors[lossId]!\n  if (lossTensor.shape.length !== 0) {\n    throw new Error(\n      `autograd: loss must be a rank-0 scalar; got shape [${lossTensor.shape.join(', ')}]. ` +\n      `Reduce with sumLast / mulScalar to a scalar before calling appendGrad.`,\n    )\n  }\n\n  // Snapshot the forward portion of the graph before we start emitting backward\n  // ops, so the reverse walk only iterates over forward ops.\n  const forwardOpCount = graph.ops.length\n  const forwardOps = graph.ops.slice(0, forwardOpCount)\n\n  // cotangents: tensorId -> the Tensor representing dL/dTensor in the graph.\n  const cotangents = new Map<number, Tensor>()\n\n  return traceInto(graph, () => {\n    // Seed: dL/dLoss = 1.0\n    cotangents.set(lossId, constScalar(1.0, 'f32'))\n\n    // Reverse walk.\n    for (let i = forwardOpCount - 1; i >= 0; i--) {\n      const op = forwardOps[i]!\n      const outCotan = cotangents.get(op.out)\n      if (!outCotan) continue\n      runTransposeRule(op, outCotan, graph, cotangents)\n    }\n\n    // Collect param gradients by name. Skip non-param leaves.\n    const paramGrads: Record<string, Tensor> = {}\n    for (const op of forwardOps) {\n      if (op.kind !== 'param_input') continue\n      // (state_input and tensor_input don't produce gradients we hand back.)\n      const cotan = cotangents.get(op.out)\n      if (!cotan) {\n        // No path from this param to the loss \u2014 emit explicit zeros so the\n        // caller gets a tensor with the right shape.\n        const t = graph.tensors[op.out]!\n        paramGrads[op.name] = broadcastTo(constScalar(0.0, t.dtype), t.shape)\n      } else {\n        paramGrads[op.name] = cotan\n      }\n    }\n\n    return { graph, paramGrads, loss: lossTensor }\n  })\n}\n\n// ============================================================================\n// Cotangent accumulation\n// ============================================================================\n\n// Add `contribution` to the cotangent of tensor `inputId`. If a cotangent\n// already exists, sum them (multiple consumers); otherwise initialize.\nfunction accumulate(cotangents: Map<number, Tensor>, inputId: number, contribution: Tensor): void {\n  const existing = cotangents.get(inputId)\n  if (existing) {\n    cotangents.set(inputId, add(existing, contribution))\n  } else {\n    cotangents.set(inputId, contribution)\n  }\n}\n\n// Reduce a cotangent to match the input's shape, undoing any broadcast that\n// occurred during forward. If `fromShape == toShape`, no-op.\nfunction unbroadcast(cotan: Tensor, toShape: Shape): Tensor {\n  if (shapesEqual(cotan.shape, toShape)) return cotan\n  return sumToShape(cotan, toShape)\n}\n\n\n// ============================================================================\n// Transpose rules\n// ============================================================================\n//\n// One per OpNode kind. Each rule:\n//   * receives the forward op + its output cotangent\n//   * builds the backward expression(s) in graph terms (calling ops.ts functions)\n//   * accumulates cotangent contributions onto each input tensor\n\nfunction runTransposeRule(\n  op: OpNode,\n  outCotan: Tensor,\n  graph: Graph,\n  cotangents: Map<number, Tensor>,\n): void {\n  const tensorOf = (id: number) => graph.tensors[id]!\n\n  switch (op.kind) {\n    // ---- Leaves: no inputs to accumulate into. -----------------------------\n    case 'param_input':\n    case 'tensor_input':\n    case 'state_input':\n    case 'arange':\n    case 'const_scalar':\n      return\n\n    // ---- Element-wise binops (with broadcast) ------------------------------\n    // c = a op b; reduce cotan back to each operand's shape.\n    case 'add': {\n      const a = tensorOf(op.a), b = tensorOf(op.b)\n      accumulate(cotangents, op.a, unbroadcast(outCotan, a.shape))\n      accumulate(cotangents, op.b, unbroadcast(outCotan, b.shape))\n      return\n    }\n    case 'sub': {\n      const a = tensorOf(op.a), b = tensorOf(op.b)\n      accumulate(cotangents, op.a, unbroadcast(outCotan, a.shape))\n      accumulate(cotangents, op.b, unbroadcast(mulScalar(outCotan, -1), b.shape))\n      return\n    }\n    case 'mul': {\n      const a = tensorOf(op.a), b = tensorOf(op.b)\n      // dC/dA = b ; dC/dB = a. Both are forward tensors still alive in the graph.\n      // We must NOT consume the forward tensors \u2014 they're referenced by id.\n      // The mul() helper allocates fresh tensors, so referencing a/b multiple\n      // times in different mul() calls is fine: we just emit fresh ops.\n      accumulate(cotangents, op.a, unbroadcast(mul(outCotan, b), a.shape))\n      accumulate(cotangents, op.b, unbroadcast(mul(outCotan, a), b.shape))\n      return\n    }\n    case 'div': {\n      // c = a/b. dc/da = 1/b. dc/db = -a/b^2.\n      const a = tensorOf(op.a), b = tensorOf(op.b)\n      accumulate(cotangents, op.a, unbroadcast(div(outCotan, b), a.shape))\n      // -outCotan * a / (b*b)\n      const numer = mul(outCotan, a)\n      const bSq = mul(b, b)\n      accumulate(cotangents, op.b, unbroadcast(mulScalar(div(numer, bSq), -1), b.shape))\n      return\n    }\n\n    // ---- Element-wise scalar binops (scalar is a JS number, not a tensor) -\n    case 'mul_scalar': {\n      // c = a * s. dc/da = s.\n      accumulate(cotangents, op.a, mulScalar(outCotan, op.scalar))\n      return\n    }\n    case 'add_scalar': {\n      // c = a + s. dc/da = 1.\n      accumulate(cotangents, op.a, outCotan)\n      return\n    }\n\n    // ---- Unary -------------------------------------------------------------\n    case 'sqrt': {\n      // c = sqrt(a). dc/da = 1/(2*sqrt(a)) = 1/(2*c).\n      const c = tensorOf(op.out)\n      accumulate(cotangents, op.a, mulScalar(div(outCotan, c), 0.5))\n      return\n    }\n    case 'rsqrt': {\n      // c = a^(-0.5). dc/da = -0.5 * a^(-1.5) = -0.5 * c^3.\n      const c = tensorOf(op.out)\n      const c3 = mul(mul(c, c), c)\n      accumulate(cotangents, op.a, mulScalar(mul(outCotan, c3), -0.5))\n      return\n    }\n    case 'log': {\n      // c = log(a). dc/da = 1/a.\n      const a = tensorOf(op.a)\n      accumulate(cotangents, op.a, div(outCotan, a))\n      return\n    }\n    case 'exp': {\n      // c = exp(a). dc/da = exp(a) = c.\n      const c = tensorOf(op.out)\n      accumulate(cotangents, op.a, mul(outCotan, c))\n      return\n    }\n    case 'relu': {\n      // c = relu(a). dc/da = (a > 0 ? 1 : 0). Use the fused relu_grad op.\n      const a = tensorOf(op.a)\n      accumulate(cotangents, op.a, reluGrad(a, outCotan))\n      return\n    }\n\n    // ---- Reductions over last axis ---------------------------------------\n    case 'mean_last': {\n      // c[..., 1] = mean over last axis of a[..., D]. da[..., d] = dc[..., 0] / D.\n      // outCotan has shape [..., 1]; broadcast to a's shape and divide by D.\n      const a = tensorOf(op.a)\n      const D = a.shape[a.shape.length - 1]!\n      const expanded = broadcastTo(outCotan, a.shape)\n      accumulate(cotangents, op.a, mulScalar(expanded, 1 / D))\n      return\n    }\n    case 'sum_last': {\n      // c[...] = sum over last axis (keepdims=false). da[..., d] = dc[...].\n      // outCotan has rank one less than a; broadcast to a's shape (which inserts\n      // back the last axis with a's last-axis size).\n      const a = tensorOf(op.a)\n      // First reshape outCotan to add a trailing 1, then broadcast to a's shape.\n      const withKeep = reshape(outCotan, [...outCotan.shape, 1])\n      accumulate(cotangents, op.a, broadcastTo(withKeep, a.shape))\n      return\n    }\n\n    // ---- Shape ------------------------------------------------------------\n    case 'reshape': {\n      // c = reshape(a, ...). Backward: reshape outCotan back to a's shape.\n      const a = tensorOf(op.a)\n      accumulate(cotangents, op.a, reshape(outCotan, a.shape))\n      return\n    }\n    case 'transpose': {\n      // c = transpose(a, perm). Backward: transpose outCotan with inverse perm.\n      const inv = invertPerm(op.perm)\n      accumulate(cotangents, op.a, transpose(outCotan, inv))\n      return\n    }\n\n    // ---- Linear algebra ---------------------------------------------------\n    case 'matmul': {\n      // c = a @ b, where a: [..., M, K], b: [K, N], c: [..., M, N].\n      // dA = dC @ B^T  (matmul, since b is unbatched)\n      // dB = sum_over_batch( A^T @ dC )\n      //\n      // Implementation note: dA uses the same `matmul` (a [...,M,N] \u00B7 b [N,K])\n      // because b is rank-2. dB needs A^T which has shape [..., K, M], then\n      // matmul with dC ([..., M, N]) gives [..., K, N], which we sum over\n      // leading batch dims to get [K, N].\n      const a = tensorOf(op.a), b = tensorOf(op.b)\n      // dA = dC @ B^T\n      accumulate(cotangents, op.a, matmul(outCotan, swapAxes(b, -1, -2)))\n      // dB: per-batch A^T @ dC, then sum over batch dims.\n      // A is [..., M, K]; transpose last two axes.\n      const aT = swapAxes(a, -1, -2)  // [..., K, M]\n      // matmul_batched needs same rank on both sides. dC has rank `a.rank`;\n      // aT has rank `a.rank`; use matmul_batched if rank > 2, else matmul.\n      let perBatchDb: Tensor\n      if (a.shape.length > 2) {\n        perBatchDb = matmulBatched(aT, outCotan)  // [..., K, N]\n      } else {\n        perBatchDb = matmul(aT, outCotan)  // [K, N]\n      }\n      // Sum over leading batch dims to collapse to b's shape [K, N].\n      accumulate(cotangents, op.b, sumToShape(perBatchDb, b.shape))\n      return\n    }\n    case 'matmul_batched': {\n      // c = a @ b, both [..., M, K] \u00B7 [..., K, N] -> [..., M, N].\n      // dA = dC @ B^T   (per-batch, all batch dims preserved)\n      // dB = A^T @ dC   (per-batch)\n      const a = tensorOf(op.a), b = tensorOf(op.b)\n      accumulate(cotangents, op.a, matmulBatched(outCotan, swapAxes(b, -1, -2)))\n      accumulate(cotangents, op.b, matmulBatched(swapAxes(a, -1, -2), outCotan))\n      return\n    }\n\n    // ---- Indexing / casting (no gradient through integer indices) --------\n    case 'one_hot':\n      // The output is float, but the input (indices) is integer-valued \u2014 no\n      // continuous gradient flows through it. Stop here.\n      return\n\n    // ---- Slicing ---------------------------------------------------------\n    case 'slice_last_range': {\n      // c = a[..., start:end]. Backward: pad outCotan with zeros to a's shape.\n      // We construct this as: zeros at left, outCotan in middle, zeros at right,\n      // concatenated along the last axis. We don't have concat or generic pad\n      // ops; the simplest expression here is a sparse expansion via broadcasting\n      // and addition of zero tensors. For Phase 2 we punt: slice's autograd is\n      // implemented by emitting a single fused op that scatters the cotangent.\n      // For now: signal that slice's backward needs a dedicated op kind.\n      const a = tensorOf(op.a)\n      // Build a zeros tensor of a's shape, then add via... no, we can't do\n      // additive scatter without an index_put. Easiest path: add a dedicated\n      // backward op kind. For this pass, throw until we extend the IR.\n      throw new Error(\n        `autograd: slice_last_range backward not implemented yet ` +\n        `(would need a scatter-style op or a Concat op). ` +\n        `Workaround for now: avoid taking gradients through slices by using ` +\n        `separate matmuls for Q/K/V instead of a fused W_qkv. ` +\n        `Tensor: ${a.shape} -> ${tensorOf(op.out).shape}`,\n      )\n    }\n\n    // ---- Broadcast / un-broadcast (autograd infrastructure) ---------------\n    case 'broadcast_to': {\n      // c = broadcast(a, target). da = sum_to_shape(dc, a.shape).\n      const a = tensorOf(op.a)\n      accumulate(cotangents, op.a, sumToShape(outCotan, a.shape))\n      return\n    }\n    case 'sum_to_shape': {\n      // c = sum_to_shape(a, target). da = broadcast_to(dc, a.shape).\n      const a = tensorOf(op.a)\n      accumulate(cotangents, op.a, broadcastTo(outCotan, a.shape))\n      return\n    }\n\n    // ---- ML primitives ---------------------------------------------------\n    case 'log_softmax_last': {\n      // c = log_softmax(a, axis=-1). softmax(a) = exp(c).\n      // dL/dA = dL/dC - softmax(a) * sum_last_keepdims(dL/dC)\n      const c = tensorOf(op.out)\n      const sm = exp(c)  // softmax(a)\n      // sum_last with keepdims via reshape: sum_last drops the dim, then\n      // reshape to add a trailing 1 back, then broadcast multiplies.\n      const sumDc = sumLast(outCotan)            // shape: [..., ] (rank-1 less)\n      const sumDcKeep = reshape(sumDc, [...sumDc.shape, 1])\n      const term = mul(sm, broadcastTo(sumDcKeep, c.shape))\n      accumulate(cotangents, op.a, sub(outCotan, term))\n      return\n    }\n    case 'softmax_causal_last': {\n      // c = softmax_causal(a, axis=-1). The causal mask zeros the upper triangle\n      // of c; for the backward, the same mask zeros out dx_upper because both\n      // paths through softmax depend on c-values that are 0 there.\n      // dL/dA = (dL/dC - sum_last_keep(dL/dC * c)) * c\n      const c = tensorOf(op.out)\n      const dcXc = mul(outCotan, c)\n      const s = sumLast(dcXc)\n      const sKeep = reshape(s, [...s.shape, 1])\n      const inner = sub(outCotan, broadcastTo(sKeep, c.shape))\n      accumulate(cotangents, op.a, mul(inner, c))\n      return\n    }\n    // ---- Comparisons + select ---------------------------------------------\n    case 'less':\n    case 'greater':\n      // No gradient flows through bool comparisons. Stop here.\n      return\n\n    case 'where': {\n      // c = where(cond, a, b).\n      // dC flows to a where cond is true, to b where cond is false.\n      // Need broadcast-aware unreduction back to a's and b's original shapes.\n      const cond = tensorOf(op.cond)\n      const a = tensorOf(op.a)\n      const b = tensorOf(op.b)\n      // Build zero tensors via broadcasting a 0-d const scalar.\n      const zeroA = broadcastTo(constScalar(0, a.dtype), outCotan.shape)\n      const zeroB = broadcastTo(constScalar(0, b.dtype), outCotan.shape)\n      accumulate(cotangents, op.a, unbroadcast(where(cond, outCotan, zeroA), a.shape))\n      accumulate(cotangents, op.b, unbroadcast(where(cond, zeroB, outCotan), b.shape))\n      return\n    }\n\n    case 'where_causal': {\n      // c = where(causal_mask, a, fillValue). Upper triangle becomes constant\n      // (no gradient); lower triangle passes a through. So da_lower = dc_lower,\n      // da_upper = 0. We can't easily express this with current ops; punt.\n      throw new Error(\n        `autograd: where_causal backward not yet implemented. ` +\n        `Use softmax_causal_last (which fuses the mask + softmax) instead.`,\n      )\n    }\n\n    // ---- Adam ops are post-autograd; no backward through them. ----------\n    case 'adam_update_m':\n    case 'adam_update_v':\n    case 'adam_update_p':\n      throw new Error(`autograd: cannot differentiate through ${op.kind}`)\n\n    // ---- relu_grad has no further backward (autograd-internal) ----------\n    case 'relu_grad': {\n      // We don't double-differentiate. If someone tries, this will blow up \u2014\n      // intentional. Phase 2 doesn't need 2nd-order gradients.\n      throw new Error(\n        `autograd: cannot take second-order gradient through relu_grad. ` +\n        `Phase 2 does not support higher-order autodiff.`,\n      )\n    }\n\n    default: {\n      // Exhaustiveness check at type level.\n      const _exhaustive: never = op\n      void _exhaustive\n      throw new Error(`autograd: unhandled op kind ${(op as OpNode).kind}`)\n    }\n  }\n}\n\n// ============================================================================\n// Helpers\n// ============================================================================\n\nfunction invertPerm(perm: readonly number[]): number[] {\n  const inv: number[] = new Array(perm.length)\n  for (let i = 0; i < perm.length; i++) inv[perm[i]!] = i\n  return inv\n}\n", "// Adam / AdamW optimizer, in-graph.\n//\n// `appendAdam` extends a graph that already has a forward pass + autograd-emitted\n// backward (i.e., has paramGrads from `appendGrad`) with the Adam update math.\n//\n// Per parameter P with gradient g:\n//   m_new = b1 * m + (1 - b1) * g\n//   v_new = b2 * v + (1 - b2) * g\u00B2\n//   p_new = decayShrink * p - lrt * m_new / (sqrt(v_new) + eps)\n//\n// `decayShrink = 1 - lr * weightDecay` when the param is being decayed\n// (Loshchilov & Hutter, \"AdamW\") and 1 otherwise \u2014 at which point the\n// multiply folds out and you're left with plain Adam. `lrt` is supplied\n// per-step from CPU and includes the bias-correction factor\n// `sqrt(1-b2^t)/(1-b1^t)`; that's why convergence isn't affected by the\n// first-step warmup that bias-correction-free Adam suffers.\n//\n// **Static vs scheduled lr.** When `config.lr` is a number, decayShrink is\n// baked into the kernel as a literal. When it's a function `(step) => lr`,\n// decayShrink for decayed params becomes a per-step scalar input that the\n// runtime updates each call (computed from the current step's lr). lrt is\n// always per-step; the bias-correction factor changes every step regardless.\n//\n// Returns writeback declarations the buffer planner uses to wire up the\n// \"after step, copy the new value into the persistent home\" path. m and v\n// are state_inputs (zero-initialized, persistent across steps); the param\n// updates are aliased back to the param buffers.\n\nimport type { Tensor } from './ir.js'\nimport type { Graph } from './ir.js'\nimport type { WritebackDecl } from './buffers.js'\nimport { traceInto, stateInput, tensorInput } from './trace.js'\nimport { adamUpdateM, adamUpdateV, adamUpdateP } from './ops.js'\n\n/** Per-step learning-rate schedule. Either a fixed number or one of the\n *  serializable shape forms below. Functions/closures are not supported \u2014\n *  the schedule needs to cross thread boundaries and survive serialization\n *  for the worker-internal runtime, and every realistic LR pattern (constant,\n *  linear decay, cosine, warmup-then-decay) maps to a finite set of shapes.\n *  Use the `lr` helper namespace to construct shapes ergonomically. */\nexport type LRSchedule =\n  | number\n  | { readonly kind: 'constant'; readonly value: number }\n  | { readonly kind: 'linearDecay'; readonly peak: number; readonly final: number; readonly steps: number }\n  | { readonly kind: 'cosineDecay'; readonly peak: number; readonly final: number; readonly steps: number }\n  | { readonly kind: 'warmup'; readonly peakLr: number; readonly warmupSteps: number; readonly after: LRSchedule }\n\n/** Ergonomic constructors for LRSchedule shapes. */\nexport const lr = {\n  constant: (value: number): LRSchedule => ({ kind: 'constant', value }),\n  /** Linearly interpolate from `peak` at step 1 to `final` at step `steps`,\n   *  then hold at `final`. Matches `peak + (final - peak) * min(step/steps, 1)`. */\n  linearDecay: (opts: { peak: number; final: number; steps: number }): LRSchedule =>\n    ({ kind: 'linearDecay', ...opts }),\n  /** Half-cosine from `peak` at step 1 down to `final` at step `steps`,\n   *  then hold at `final`. */\n  cosineDecay: (opts: { peak: number; final: number; steps: number }): LRSchedule =>\n    ({ kind: 'cosineDecay', ...opts }),\n  /** Linear ramp from 0 to `peakLr` over `warmupSteps` steps, then hand off\n   *  to `after` (offset so step 1 of `after` = first post-warmup step). */\n  warmup: (opts: { peakLr: number; warmupSteps: number; after: LRSchedule }): LRSchedule =>\n    ({ kind: 'warmup', ...opts }),\n}\n\n/** Resolve a schedule to its scalar value at a given 1-based step. */\nexport function resolveLR(schedule: LRSchedule, step: number): number {\n  if (typeof schedule === 'number') return schedule\n  switch (schedule.kind) {\n    case 'constant': return schedule.value\n    case 'linearDecay': {\n      const f = Math.min(step / schedule.steps, 1)\n      return schedule.peak + (schedule.final - schedule.peak) * f\n    }\n    case 'cosineDecay': {\n      const f = Math.min(step / schedule.steps, 1)\n      return schedule.final + 0.5 * (schedule.peak - schedule.final) * (1 + Math.cos(Math.PI * f))\n    }\n    case 'warmup': {\n      if (step <= schedule.warmupSteps) return schedule.peakLr * (step / schedule.warmupSteps)\n      return resolveLR(schedule.after, step - schedule.warmupSteps)\n    }\n  }\n}\n\n/** True for shapes that produce different values at different steps (so the\n *  AdamW decayShrink scalar must be a per-step input rather than baked).\n *  Numbers and `{kind:'constant'}` are static; everything else varies. */\nexport function isLRDynamic(schedule: LRSchedule): boolean {\n  if (typeof schedule === 'number') return false\n  return schedule.kind !== 'constant'\n}\n\nexport interface AdamConfig {\n  /** Learning rate schedule. Pass a number for fixed lr, or a shape from\n   *  the `lr` helpers (e.g., `lr.linearDecay({ peak: 0.005, final: 0.0005, steps: 1500 })`). */\n  lr: LRSchedule\n  b1?: number   // default 0.9\n  b2?: number   // default 0.999\n  eps?: number  // default 1e-8\n  /** AdamW: decoupled weight decay coefficient. Default 0 (plain Adam).\n   *  When non-zero, every step shrinks each decayed param by a factor of\n   *  `1 - lr * weightDecay` before the gradient update. */\n  weightDecay?: number\n  /** Filter deciding which params get weight decay. Only consulted when\n   *  weightDecay > 0. Default: decay every param. Override for the standard\n   *  transformer convention (decay weights/embeddings, skip biases + LN gains).\n   *  Example: `(name) => name.includes('.W') || name.endsWith('_emb')`. */\n  decayFilter?: (paramName: string) => boolean\n}\n\n/** Resolved hyperparameters with all fields populated. `lr` stays as the\n *  shape (not pre-resolved) so the runtime can compute per-step values. */\nexport interface AdamResolvedConfig {\n  lr: LRSchedule\n  b1: number\n  b2: number\n  eps: number\n  weightDecay: number\n  decayFilter: (name: string) => boolean\n  /** True iff the lr shape varies with step (linearDecay, cosineDecay,\n   *  warmup). When false, decayShrink is baked at compile time. */\n  lrIsScheduled: boolean\n}\n\nexport interface AdamResult {\n  /** Writebacks the buffer planner should wire into the runtime. */\n  writebacks: WritebackDecl[]\n  /** Name of the per-step scalar tensor_input. The runtime fills this each call\n   * with `lr * sqrt(1-b2^t)/(1-b1^t)` (Adam's bias-corrected effective LR). */\n  lrtInputName: string\n  /** Name of the per-step decayShrink scalar tensor_input, or null when lr is\n   *  static (decayShrink baked into the kernel) or no params are decayed. */\n  decayShrinkInputName: string | null\n  /** Hyperparameters as captured (so the runtime can compute lrt and decayShrink). */\n  config: AdamResolvedConfig\n}\n\n/**\n * Append Adam update ops to `graph`. Must be called inside an active trace\n * context (or after a trace, since traceInto re-enters the graph).\n *\n * @param graph the graph (already containing forward + backward)\n * @param paramGrads param name -> gradient tensor (output of `appendGrad`)\n * @param paramTensors param name -> the param's leaf Tensor (the param_input).\n *                     Needed because the param_input lives in the graph but we\n *                     don't have a direct map by name in `Graph` \u2014 caller passes it.\n * @param config Adam hyperparameters. Set `weightDecay > 0` for AdamW; an\n *               optional `decayFilter` selects which params receive decay.\n */\nexport function appendAdam(\n  graph: Graph,\n  paramGrads: Record<string, Tensor>,\n  paramTensors: Record<string, Tensor>,\n  config: AdamConfig,\n  /** Per-param decay flags from `materializeParams`. When supplied, overrides\n   *  `config.decayFilter` for any name in the map; falls back to `decayFilter`\n   *  for names not present (e.g., for low-level callers using `compile()`\n   *  directly without a Module). */\n  decayFlags?: Record<string, boolean>,\n): AdamResult {\n  const lrIsScheduled = isLRDynamic(config.lr)\n  const initialLr = resolveLR(config.lr, 1)\n  const fullConfig: AdamResolvedConfig = {\n    lr: config.lr,\n    b1: config.b1 ?? 0.9,\n    b2: config.b2 ?? 0.999,\n    eps: config.eps ?? 1e-8,\n    weightDecay: config.weightDecay ?? 0,\n    decayFilter: config.decayFilter ?? (() => true),\n    lrIsScheduled,\n  }\n  const writebacks: WritebackDecl[] = []\n  const lrtInputName = '_adam_lrt'\n  // Tensor input for runtime-updated decayShrink (only created when lr is a\n  // schedule fn AND at least one param will receive weight decay).\n  let decayShrinkInputName: string | null = null\n\n  return traceInto(graph, () => {\n    const lrt = tensorInput(lrtInputName, [], 'f32')\n\n    // Up-front: which params receive weight decay? Per-param decayFlags (set\n    // by Module.param's options) wins; falls back to decayFilter for names\n    // not in the map. Empty when weightDecay = 0 so the rest of the function\n    // can just ask \"is this name in the set?\".\n    const decayedNames = new Set<string>(\n      fullConfig.weightDecay > 0\n        ? Object.keys(paramGrads).filter(name =>\n            (decayFlags && name in decayFlags) ? decayFlags[name]! : fullConfig.decayFilter(name))\n        : [],\n    )\n\n    // We only need a runtime decayShrink scalar when lr varies per step AND\n    // at least one param is being decayed. Otherwise the value is constant\n    // and bakes into the kernel as a literal.\n    let decayShrinkScalar: Tensor | null = null\n    if (lrIsScheduled && decayedNames.size > 0) {\n      decayShrinkInputName = '_adam_decay_shrink'\n      decayShrinkScalar = tensorInput(decayShrinkInputName, [], 'f32')\n    }\n\n    for (const name of Object.keys(paramGrads)) {\n      const p = paramTensors[name]\n      const g = paramGrads[name]\n      if (!p) throw new Error(`appendAdam: missing param tensor for '${name}'`)\n      if (!g) throw new Error(`appendAdam: missing gradient for '${name}'`)\n\n      const mState = stateInput(`adam_m_${name}`, p.shape, 'f32', 0)\n      const vState = stateInput(`adam_v_${name}`, p.shape, 'f32', 0)\n\n      // Choose the decayShrink form per param:\n      //   - non-decayed params: literal 1 (kernel multiply folds out).\n      //   - decayed + scheduled lr: tensor input updated per step.\n      //   - decayed + static lr: literal `1 - lr * wd` baked at compile.\n      const decayShrink: number | Tensor =\n        !decayedNames.has(name) ? 1\n        : decayShrinkScalar !== null ? decayShrinkScalar\n        : 1 - initialLr * fullConfig.weightDecay\n\n      // Three fused kernels per parameter \u2014 one for each of m_new / v_new / p_new.\n      const newM = adamUpdateM(mState, g, fullConfig.b1)\n      const newV = adamUpdateV(vState, g, fullConfig.b2)\n      const newP = adamUpdateP(p, newM, newV, lrt, fullConfig.eps, decayShrink)\n\n      writebacks.push({ source: newM, destName: `adam_m_${name}`, destKind: 'state' })\n      writebacks.push({ source: newV, destName: `adam_v_${name}`, destKind: 'state' })\n      writebacks.push({ source: newP, destName: name,             destKind: 'param' })\n    }\n    return { writebacks, lrtInputName, decayShrinkInputName, config: fullConfig }\n  })\n}\n", "// Buffer planning: walk a Graph and decide which GPU buffer each Tensor maps to.\n//\n// v1 strategy: one GPU buffer per IR Tensor. Static shapes mean every buffer's\n// size is known at compile time and lifetimes don't overlap between steps \u2014\n// so no pooling needed. Total memory is the sum of every intermediate tensor.\n// For our transformer at B=256: ~30 MB of activations + grads. Easily fits.\n//\n// Categorization is what the runtime cares about:\n//   * param        \u2014 uploaded by user via uploadParams; persistent across steps\n//   * param_grad   \u2014 written each step by the backward pass; readable for inspection\n//   * tensor_input \u2014 uploaded each step (tokens, targets, masks)\n//   * intermediate \u2014 produced by an op; lifetime = within a single step\n//   * output       \u2014 special intermediate that should be made readable (loss)\n\nimport type { Graph, Tensor, Dtype, Shape, OpNode } from './ir.js'\nimport { shapeSize } from './shape.js'\n\nexport interface BufferSpec {\n  /** Matches tensor.id. */\n  id: number\n  byteSize: number\n  dtype: Dtype\n  shape: Shape\n  kind: 'param' | 'param_grad' | 'tensor_input' | 'state' | 'intermediate' | 'output'\n  /** External name for param/param_grad/tensor_input/state bindings. null otherwise. */\n  name: string | null\n  /** For state buffers: the value to fill on initial allocation. 0 by default. */\n  initValue?: number\n}\n\n/**\n * After step(), copy `source`'s buffer into `dest`'s buffer.\n * Used to write back updated optimizer state and updated parameters into\n * their persistent home buffers.\n */\nexport interface Writeback {\n  source: number  // buffer id of the tensor holding the new value\n  dest: number    // buffer id of the persistent state/param to overwrite\n  bytes: number\n}\n\nexport interface BufferPlan {\n  buffers: BufferSpec[]\n  /** Tensor id -> buffer id (currently 1:1 but kept opaque for future pooling). */\n  tensorToBuffer: Map<number, number>\n  /** Easy lookup tables for the runtime. */\n  paramsByName: Map<string, number>           // name -> buffer id\n  inputsByName: Map<string, number>           // name -> buffer id\n  paramGradsByName: Map<string, number>       // name -> buffer id\n  statesByName: Map<string, number>           // name -> buffer id (persistent state homes)\n  capturesByName: Map<string, number>         // name -> buffer id (activation captures)\n  outputBufferIds: number[]                   // graph.outputs mapped through\n  /** End-of-step writebacks (Adam updates for params, m, v, etc.) */\n  writebacks: Writeback[]\n}\n\nconst dtypeBytes: Record<Dtype, number> = { f32: 4, i32: 4, bool: 4 }\n\n/**\n * Caller-supplied writeback declarations: \"after each step, copy this Tensor's\n * buffer into the persistent home of this param/state.\"\n */\nexport interface WritebackDecl {\n  /** The Tensor (output of some op) holding the new value to write back. */\n  source: Tensor\n  /** Either a param name (writes to that param's home buffer) or a state name. */\n  destName: string\n  destKind: 'param' | 'state'\n}\n\n/**\n * Build a BufferPlan from a graph + the param-grad map produced by appendGrad.\n * @param graph the full graph (forward + backward + any optimizer ops)\n * @param paramGrads map from param name -> the Tensor that holds its gradient\n * @param writebackDecls list of end-of-step writebacks (e.g. from appendAdam).\n *                       Empty when there's no optimizer in the graph.\n */\nexport function planBuffers(\n  graph: Graph,\n  paramGrads: Record<string, Tensor>,\n  writebackDecls: WritebackDecl[] = [],\n): BufferPlan {\n  const buffers: BufferSpec[] = []\n  const tensorToBuffer = new Map<number, number>()\n  const paramsByName = new Map<string, number>()\n  const inputsByName = new Map<string, number>()\n  const paramGradsByName = new Map<string, number>()\n  const statesByName = new Map<string, number>()\n\n  // Build a quick reverse map: tensorId -> param name (for grads).\n  const gradTensorIdToName = new Map<number, string>()\n  for (const [name, tensor] of Object.entries(paramGrads)) {\n    gradTensorIdToName.set(tensor.id, name)\n  }\n  // ...and tensorId -> param/input op (so we can name the buffer correctly).\n  const opByOutId = new Map<number, OpNode>()\n  for (const op of graph.ops) opByOutId.set(op.out, op)\n\n  const outputSet = new Set(graph.outputs)\n\n  // Walk all tensors in id order. Categorize each.\n  for (const t of graph.tensors) {\n    const op = opByOutId.get(t.id)\n    let kind: BufferSpec['kind'] = 'intermediate'\n    let name: string | null = null\n    let initValue: number | undefined\n\n    if (op?.kind === 'param_input') {\n      kind = 'param'\n      name = op.name\n    } else if (op?.kind === 'tensor_input') {\n      kind = 'tensor_input'\n      name = op.name\n    } else if (op?.kind === 'state_input') {\n      kind = 'state'\n      name = op.name\n      initValue = op.initValue\n    } else if (gradTensorIdToName.has(t.id)) {\n      kind = 'param_grad'\n      name = gradTensorIdToName.get(t.id)!\n    } else if (outputSet.has(t.id)) {\n      kind = 'output'\n    }\n\n    const spec: BufferSpec = {\n      id: t.id,\n      byteSize: Math.max(4, shapeSize(t.shape) * dtypeBytes[t.dtype]),\n      dtype: t.dtype,\n      shape: t.shape,\n      kind,\n      name,\n      ...(initValue !== undefined ? { initValue } : {}),\n    }\n    buffers.push(spec)\n    tensorToBuffer.set(t.id, t.id)  // 1:1 for v1\n\n    if (kind === 'param') paramsByName.set(name!, t.id)\n    if (kind === 'tensor_input') inputsByName.set(name!, t.id)\n    if (kind === 'param_grad') paramGradsByName.set(name!, t.id)\n    if (kind === 'state') statesByName.set(name!, t.id)\n  }\n\n  const outputBufferIds = graph.outputs.map(id => tensorToBuffer.get(id)!)\n\n  // Resolve writeback declarations to (source, dest) buffer-id pairs.\n  const writebacks: Writeback[] = writebackDecls.map(decl => {\n    const sourceBufId = tensorToBuffer.get(decl.source.id)\n    if (sourceBufId === undefined) {\n      throw new Error(`planBuffers: writeback source tensor #${decl.source.id} not in graph`)\n    }\n    const destBufId = decl.destKind === 'param'\n      ? paramsByName.get(decl.destName)\n      : statesByName.get(decl.destName)\n    if (destBufId === undefined) {\n      throw new Error(`planBuffers: writeback dest ${decl.destKind}:'${decl.destName}' not found`)\n    }\n    const sourceSpec = buffers[sourceBufId]!\n    const destSpec = buffers[destBufId]!\n    if (sourceSpec.byteSize !== destSpec.byteSize) {\n      throw new Error(\n        `planBuffers: writeback size mismatch for ${decl.destKind}:'${decl.destName}' ` +\n        `(source ${sourceSpec.byteSize} bytes vs dest ${destSpec.byteSize})`,\n      )\n    }\n    return { source: sourceBufId, dest: destBufId, bytes: sourceSpec.byteSize }\n  })\n\n  // Resolve graph.captures (name -> tensor id) to (name -> buffer id).\n  // No pinning needed at the planner level: each tensor already has its own\n  // buffer (see \"v1 strategy\" comment at top \u2014 no pooling yet).\n  const capturesByName = new Map<string, number>()\n  for (const [name, tensorId] of graph.captures) {\n    const bufId = tensorToBuffer.get(tensorId)\n    if (bufId === undefined) {\n      throw new Error(`planBuffers: capture '${name}' references unknown tensor #${tensorId}`)\n    }\n    capturesByName.set(name, bufId)\n  }\n\n  return { buffers, tensorToBuffer, paramsByName, inputsByName, paramGradsByName, statesByName, capturesByName, outputBufferIds, writebacks }\n}\n", "// WGSL codegen: one kernel per IR op.\n//\n// All shapes are baked into the WGSL as compile-time constants \u2014 no shape\n// uniforms. This means each shape combination produces a distinct shader\n// (so `add([B, T, D], [D])` and `add([B, T, D], [B, T, D])` get different\n// kernels), which is fine for our static-shape model and gives the WGSL\n// compiler full freedom to specialize.\n//\n// Most kernels are direct ports of `transformer-gpu.bulb.md`'s WGSL \u2014 those\n// are already debugged and tuned. The autograd ops (broadcast_to, sum_to_shape,\n// relu_grad, etc.) are new.\n\nimport type { Graph, OpNode, Tensor, Shape } from './ir.js'\nimport type { BufferPlan } from './buffers.js'\nimport { shapeSize } from './shape.js'\n\n// Workgroup size of 256 means even our biggest kernel (~8M threads in\n// matmul_bwd_dW) needs only ~32K workgroups, well under WebGPU's 65535-per-dim\n// dispatch cap. Smaller WG_SIZE forced 2D dispatch with significant over-dispatch.\nconst WG_SIZE = 256\n\n// Global thread index, packed across the 2D dispatch grid that lets us route\n// past WebGPU's 65535-per-dim cap. Every kernel uses this exact line \u2014 keep\n// the formula consistent with the dispatch-stride math in runtime.ts (MAX_X\n// = 65535, so per-row stride = 65535 * WG_SIZE = 16776960). Inlined into\n// each WGSL string via interpolation rather than a function so the WGSL\n// compiler still sees a literal constant.\nconst GID_LINE = 'let i = gid.x + gid.y * 16776960u;'\n\nexport interface KernelSpec {\n  /** Index into graph.ops. */\n  opIndex: number\n  /** Op kind (for debugging / pipeline cache key). */\n  opKind: OpNode['kind']\n  /** Generated WGSL source. Empty string for \"logical\" ops with no kernel. */\n  wgsl: string\n  /**\n   * Buffer ids in binding-index order. The runtime creates a bind group with\n   * these in @binding(0..N) on @group(0). Inputs come first (read), output last\n   * (read_write).\n   */\n  bindings: number[]\n  /** Number of threads to dispatch (1-D). 0 means \"skip\" (e.g. reshape no-op). */\n  threads: number\n  /** Workgroup size; usually WG_SIZE. */\n  workgroupSize: number\n}\n\n// ============================================================================\n// Public entry point\n// ============================================================================\n\n/** Generate a KernelSpec per compute op in graph.ops (in dispatch order). */\nexport function emitKernels(graph: Graph, plan: BufferPlan): KernelSpec[] {\n  const out: KernelSpec[] = []\n  for (let i = 0; i < graph.ops.length; i++) {\n    const op = graph.ops[i]!\n    const spec = emitKernel(op, graph, plan, i)\n    out.push(spec)\n  }\n  return out\n}\n\nfunction emitKernel(op: OpNode, graph: Graph, plan: BufferPlan, opIndex: number): KernelSpec {\n  const tof = (id: number) => graph.tensors[id]!\n  const buf = (tensorId: number) => plan.tensorToBuffer.get(tensorId)!\n  const empty = (): KernelSpec => ({ opIndex, opKind: op.kind, wgsl: '', bindings: [], threads: 0, workgroupSize: WG_SIZE })\n\n  switch (op.kind) {\n    // ---- Leaves: data is supplied externally; no kernel ---------------------\n    case 'param_input':\n    case 'tensor_input':\n    case 'state_input':\n      return empty()\n\n    // ---- arange / const_scalar: kernel that fills the buffer once -----------\n    case 'arange': {\n      const out = tof(op.out)\n      const wgsl = `\n@group(0) @binding(0) var<storage, read_write> buf : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${op.n}u) { return; }\n  buf[i] = ${castFromI32('i32(i)', out.dtype)};\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.out)], threads: op.n, workgroupSize: WG_SIZE }\n    }\n    case 'const_scalar': {\n      const wgsl = `\n@group(0) @binding(0) var<storage, read_write> buf : array<${wgslDtype(op.dtype)}>;\n@compute @workgroup_size(1)\nfn main() {\n  buf[0] = ${wgslLiteral(op.value, op.dtype)};\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.out)], threads: 1, workgroupSize: 1 }\n    }\n\n    // ---- Element-wise binops with broadcast --------------------------------\n    case 'add':\n    case 'sub':\n    case 'mul':\n    case 'div': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const b = tof(op.b)\n      const opStr = { add: '+', sub: '-', mul: '*', div: '/' }[op.kind]\n      const total = shapeSize(out.shape)\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<${wgslDtype(a.dtype)}>;\n@group(0) @binding(1) var<storage, read> b : array<${wgslDtype(b.dtype)}>;\n@group(0) @binding(2) var<storage, read_write> out : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n${broadcastIndexBlock('i', out.shape, a.shape, 'aIdx')}\n${broadcastIndexBlock('i', out.shape, b.shape, 'bIdx')}\n  out[i] = a[aIdx] ${opStr} b[bIdx];\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.b), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- Element-wise scalar binops (scalar baked into WGSL) ---------------\n    case 'mul_scalar':\n    case 'add_scalar': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const opStr = op.kind === 'mul_scalar' ? '*' : '+'\n      const total = shapeSize(out.shape)\n      const lit = wgslLiteral(op.scalar, out.dtype)\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<${wgslDtype(a.dtype)}>;\n@group(0) @binding(1) var<storage, read_write> out : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  out[i] = a[i] ${opStr} ${lit};\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- Unary -------------------------------------------------------------\n    case 'sqrt':\n    case 'rsqrt':\n    case 'log':\n    case 'exp':\n    case 'relu': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const total = shapeSize(out.shape)\n      const expr =\n        op.kind === 'sqrt'  ? 'sqrt(x)' :\n        op.kind === 'rsqrt' ? '1.0 / sqrt(x)' :\n        op.kind === 'log'   ? 'log(x)' :\n        op.kind === 'exp'   ? 'exp(x)' :\n        /* relu */            'max(x, 0.0)'\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<${wgslDtype(a.dtype)}>;\n@group(0) @binding(1) var<storage, read_write> out : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  let x = a[i];\n  out[i] = ${expr};\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- Comparisons + select --------------------------------------------\n    case 'less':\n    case 'greater': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const b = tof(op.b)\n      const opStr = op.kind === 'less' ? '<' : '>'\n      const total = shapeSize(out.shape)\n      // bool tensors lower to u32 in storage (1 if true, 0 if false).\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<${wgslDtype(a.dtype)}>;\n@group(0) @binding(1) var<storage, read> b : array<${wgslDtype(b.dtype)}>;\n@group(0) @binding(2) var<storage, read_write> out : array<u32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n${broadcastIndexBlock('i', out.shape, a.shape, 'aIdx')}\n${broadcastIndexBlock('i', out.shape, b.shape, 'bIdx')}\n  out[i] = select(0u, 1u, a[aIdx] ${opStr} b[bIdx]);\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.b), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n    case 'where': {\n      const out = tof(op.out)\n      const cond = tof(op.cond)\n      const a = tof(op.a)\n      const b = tof(op.b)\n      const total = shapeSize(out.shape)\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> cond : array<u32>;\n@group(0) @binding(1) var<storage, read> a : array<${wgslDtype(a.dtype)}>;\n@group(0) @binding(2) var<storage, read> b : array<${wgslDtype(b.dtype)}>;\n@group(0) @binding(3) var<storage, read_write> out : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n${broadcastIndexBlock('i', out.shape, cond.shape, 'cIdx')}\n${broadcastIndexBlock('i', out.shape, a.shape, 'aIdx')}\n${broadcastIndexBlock('i', out.shape, b.shape, 'bIdx')}\n  out[i] = select(b[bIdx], a[aIdx], cond[cIdx] != 0u);\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.cond), buf(op.a), buf(op.b), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    case 'relu_grad': {\n      const out = tof(op.out)\n      const total = shapeSize(out.shape)\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> x : array<f32>;\n@group(0) @binding(1) var<storage, read> dy : array<f32>;\n@group(0) @binding(2) var<storage, read_write> out : array<f32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  out[i] = select(0.0, dy[i], x[i] > 0.0);\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.x), buf(op.dy), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- Reductions over last axis -----------------------------------------\n    case 'mean_last':\n    case 'sum_last': {\n      const a = tof(op.a)\n      const D = a.shape[a.shape.length - 1]!\n      const outerSize = shapeSize(a.shape) / D\n      const divisor = op.kind === 'mean_last' ? `f32(${D}u)` : '1.0'\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<f32>;\n@group(0) @binding(1) var<storage, read_write> out : array<f32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${outerSize}u) { return; }\n  let base = i * ${D}u;\n  var s : f32 = 0.0;\n  for (var j : u32 = 0u; j < ${D}u; j = j + 1u) {\n    s = s + a[base + j];\n  }\n  out[i] = s / ${divisor};\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: outerSize, workgroupSize: WG_SIZE }\n    }\n\n    // ---- Shape ---------------------------------------------------------------\n    // reshape: no kernel needed if buffers can alias (shape change only). For\n    // v1 simplicity we emit a memcpy-style kernel rather than aliasing buffers,\n    // because aliasing complicates the buffer plan and we have memory headroom.\n    case 'reshape': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const total = shapeSize(out.shape)\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<${wgslDtype(a.dtype)}>;\n@group(0) @binding(1) var<storage, read_write> out : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  out[i] = a[i];\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    case 'transpose': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const total = shapeSize(out.shape)\n      // Emit per-axis index computation. For each output flat index i, decompose\n      // into per-axis output indices, then use op.perm to find the source axis order.\n      // Source flat index = sum(outIdx[perm.invert()[k]] * a_stride[k] for k).\n      const aStrides = computeStrides(a.shape)\n      const outDimDecls = decomposeFlatIndexBlock('i', out.shape, 'oIdx')\n      const srcExpr: string[] = []\n      for (let k = 0; k < a.shape.length; k++) {\n        const srcAxis = op.perm.indexOf(k)  // which output axis came from input axis k\n        srcExpr.push(`oIdx_${srcAxis} * ${aStrides[k]}u`)\n      }\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<${wgslDtype(a.dtype)}>;\n@group(0) @binding(1) var<storage, read_write> out : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n${outDimDecls}\n  let srcIdx = ${srcExpr.join(' + ')};\n  out[i] = a[srcIdx];\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- Linear algebra ----------------------------------------------------\n    // matmul: a [..., M, K] \u00B7 b [K, N] -> [..., M, N]. b is unbatched.\n    case 'matmul': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const b = tof(op.b)\n      const M = a.shape[a.shape.length - 2]!\n      const K = a.shape[a.shape.length - 1]!\n      const N = b.shape[1]!\n      const batch = shapeSize(a.shape) / (M * K)\n      const total = batch * M * N\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<f32>;\n@group(0) @binding(1) var<storage, read> b : array<f32>;\n@group(0) @binding(2) var<storage, read_write> c : array<f32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  let bi = i / ${M * N}u;          // batch index\n  let mn = i % ${M * N}u;\n  let m = mn / ${N}u;\n  let n = mn % ${N}u;\n  let aBase = bi * ${M * K}u + m * ${K}u;\n  var s : f32 = 0.0;\n  for (var k : u32 = 0u; k < ${K}u; k = k + 1u) {\n    s = s + a[aBase + k] * b[k * ${N}u + n];\n  }\n  c[i] = s;\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.b), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    case 'matmul_batched': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const b = tof(op.b)\n      const M = a.shape[a.shape.length - 2]!\n      const K = a.shape[a.shape.length - 1]!\n      const N = b.shape[b.shape.length - 1]!\n      const batch = shapeSize(a.shape) / (M * K)\n      const total = batch * M * N\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<f32>;\n@group(0) @binding(1) var<storage, read> b : array<f32>;\n@group(0) @binding(2) var<storage, read_write> c : array<f32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  let bi = i / ${M * N}u;\n  let mn = i % ${M * N}u;\n  let m = mn / ${N}u;\n  let n = mn % ${N}u;\n  let aBase = bi * ${M * K}u + m * ${K}u;\n  let bBase = bi * ${K * N}u;\n  var s : f32 = 0.0;\n  for (var k : u32 = 0u; k < ${K}u; k = k + 1u) {\n    s = s + a[aBase + k] * b[bBase + k * ${N}u + n];\n  }\n  c[i] = s;\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.b), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- One-hot ------------------------------------------------------------\n    case 'one_hot': {\n      const out = tof(op.out)\n      const indices = tof(op.indices)\n      const total = shapeSize(out.shape)\n      const depth = op.depth\n      const zeroLit = wgslLiteral(0, out.dtype)\n      const oneLit = wgslLiteral(1, out.dtype)\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> indices : array<i32>;\n@group(0) @binding(1) var<storage, read_write> out : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  let outerIdx = i / ${depth}u;\n  let depthIdx = i % ${depth}u;\n  let tgt = u32(indices[outerIdx]);\n  out[i] = select(${zeroLit}, ${oneLit}, tgt == depthIdx);\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.indices), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- ML primitives -----------------------------------------------------\n    case 'log_softmax_last': {\n      const a = tof(op.a)\n      const D = a.shape[a.shape.length - 1]!\n      const outerSize = shapeSize(a.shape) / D\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<f32>;\n@group(0) @binding(1) var<storage, read_write> out : array<f32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${outerSize}u) { return; }\n  let base = i * ${D}u;\n  var m : f32 = -1.0e30;\n  for (var j : u32 = 0u; j < ${D}u; j = j + 1u) {\n    let v = a[base + j];\n    if (v > m) { m = v; }\n  }\n  var s : f32 = 0.0;\n  for (var j : u32 = 0u; j < ${D}u; j = j + 1u) {\n    s = s + exp(a[base + j] - m);\n  }\n  let logZ = m + log(s);\n  for (var j : u32 = 0u; j < ${D}u; j = j + 1u) {\n    out[base + j] = a[base + j] - logZ;\n  }\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: outerSize, workgroupSize: WG_SIZE }\n    }\n\n    case 'softmax_causal_last': {\n      const a = tof(op.a)\n      const T = a.shape[a.shape.length - 1]!  // == second-to-last (square)\n      // Outer size = (everything except last 2 axes) * (second-to-last axis)\n      const outerSize = shapeSize(a.shape) / T\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<f32>;\n@group(0) @binding(1) var<storage, read_write> out : array<f32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  // Each thread handles one (..., qpos)-row, softmaxing over kpos\u2208[0..qpos].\n  ${GID_LINE}\n  if (i >= ${outerSize}u) { return; }\n  let qpos = i % ${T}u;\n  let base = i * ${T}u;\n  var m : f32 = -1.0e30;\n  for (var k : u32 = 0u; k <= qpos; k = k + 1u) {\n    let v = a[base + k];\n    if (v > m) { m = v; }\n  }\n  var s : f32 = 0.0;\n  for (var k : u32 = 0u; k <= qpos; k = k + 1u) {\n    let e = exp(a[base + k] - m);\n    out[base + k] = e;\n    s = s + e;\n  }\n  for (var k : u32 = 0u; k <= qpos; k = k + 1u) {\n    out[base + k] = out[base + k] / s;\n  }\n  for (var k : u32 = qpos + 1u; k < ${T}u; k = k + 1u) {\n    out[base + k] = 0.0;\n  }\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: outerSize, workgroupSize: WG_SIZE }\n    }\n\n    case 'where_causal': {\n      const a = tof(op.a)\n      const T = a.shape[a.shape.length - 1]!\n      const total = shapeSize(a.shape)\n      const fillLit = wgslLiteral(op.fillValue, 'f32')\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<f32>;\n@group(0) @binding(1) var<storage, read_write> out : array<f32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  let kpos = i % ${T}u;\n  let qpos = (i / ${T}u) % ${T}u;\n  if (kpos > qpos) {\n    out[i] = ${fillLit};\n  } else {\n    out[i] = a[i];\n  }\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- Slicing -----------------------------------------------------------\n    case 'slice_last_range': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const D_in = a.shape[a.shape.length - 1]!\n      const D_out = op.end - op.start\n      const total = shapeSize(out.shape)\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<${wgslDtype(a.dtype)}>;\n@group(0) @binding(1) var<storage, read_write> out : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  let outer = i / ${D_out}u;\n  let inner = i % ${D_out}u;\n  out[i] = a[outer * ${D_in}u + ${op.start}u + inner];\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- Broadcast / un-broadcast (autograd infrastructure) ----------------\n    case 'broadcast_to': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const total = shapeSize(out.shape)\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<${wgslDtype(a.dtype)}>;\n@group(0) @binding(1) var<storage, read_write> out : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n${broadcastIndexBlock('i', out.shape, a.shape, 'srcIdx')}\n  out[i] = a[srcIdx];\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- Adam (fused per-element) -----------------------------------------\n    case 'adam_update_m': {\n      // m_new = b1 * m + (1 - b1) * g\n      const out = tof(op.out)\n      const total = shapeSize(out.shape)\n      const b1 = op.b1\n      const oneMinusB1 = 1 - b1\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> m : array<f32>;\n@group(0) @binding(1) var<storage, read> g : array<f32>;\n@group(0) @binding(2) var<storage, read_write> out : array<f32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  out[i] = ${wgslLiteral(b1, 'f32')} * m[i] + ${wgslLiteral(oneMinusB1, 'f32')} * g[i];\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.m), buf(op.g), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n    case 'adam_update_v': {\n      // v_new = b2 * v + (1 - b2) * g\u00B2\n      const out = tof(op.out)\n      const total = shapeSize(out.shape)\n      const b2 = op.b2\n      const oneMinusB2 = 1 - b2\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> v : array<f32>;\n@group(0) @binding(1) var<storage, read> g : array<f32>;\n@group(0) @binding(2) var<storage, read_write> out : array<f32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  let gv = g[i];\n  out[i] = ${wgslLiteral(b2, 'f32')} * v[i] + ${wgslLiteral(oneMinusB2, 'f32')} * gv * gv;\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.v), buf(op.g), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n    case 'adam_update_p': {\n      // p_new = decayShrink * p - lrt[0] * m_new / (sqrt(v_new) + eps).\n      // lrt is supplied per-step from CPU (already includes bias correction).\n      // decayShrink is either baked as a literal (no schedule, fixed lr) or\n      // bound as a per-step scalar input (when the user supplies an lr\n      // schedule via `adam: { lr: (step) => ... }`). When literal=1 the WGSL\n      // compiler folds the multiply away.\n      const out = tof(op.out)\n      const total = shapeSize(out.shape)\n      const dynamicShrink = op.decayShrinkTensor !== null\n      const shrinkExpr = dynamicShrink ? 'decayShrink[0]' : wgslLiteral(op.decayShrink, 'f32')\n      const shrinkBinding = dynamicShrink\n        ? `@group(0) @binding(4) var<storage, read> decayShrink : array<f32>;\\n` +\n          `@group(0) @binding(5) var<storage, read_write> out : array<f32>;`\n        : `@group(0) @binding(4) var<storage, read_write> out : array<f32>;`\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> p : array<f32>;\n@group(0) @binding(1) var<storage, read> mNew : array<f32>;\n@group(0) @binding(2) var<storage, read> vNew : array<f32>;\n@group(0) @binding(3) var<storage, read> lrt : array<f32>;\n${shrinkBinding}\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  out[i] = ${shrinkExpr} * p[i] - lrt[0] * mNew[i] / (sqrt(vNew[i]) + ${wgslLiteral(op.eps, 'f32')});\n}`.trim()\n      const bindings = dynamicShrink\n        ? [buf(op.p), buf(op.mNew), buf(op.vNew), buf(op.lrt), buf(op.decayShrinkTensor!), buf(op.out)]\n        : [buf(op.p), buf(op.mNew), buf(op.vNew), buf(op.lrt), buf(op.out)]\n      return { opIndex, opKind: op.kind, wgsl, bindings, threads: total, workgroupSize: WG_SIZE }\n    }\n\n    case 'sum_to_shape': {\n      // Sum-reduce src down to target by summing over each axis where target=1\n      // or where target is missing (offset-prefix axes that get fully summed).\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const wgsl = emitSumToShape(a.shape, out.shape, a.dtype)\n      const total = shapeSize(out.shape)\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n  }\n}\n\n// ============================================================================\n// WGSL helpers\n// ============================================================================\n\nfunction wgslDtype(d: 'f32' | 'i32' | 'bool'): string {\n  // bool can't be in storage buffers in WGSL; we lower bool-typed tensors to\n  // u32 (0/1). For Phase 3a there are no bool-typed storage buffers in the\n  // forward+backward graph (causal mask is built inline in softmax kernels),\n  // so this only matters if the user explicitly creates a bool tensor.\n  if (d === 'bool') return 'u32'\n  return d\n}\n\nfunction wgslLiteral(value: number, dtype: 'f32' | 'i32' | 'bool'): string {\n  if (dtype === 'f32') {\n    if (Number.isFinite(value)) {\n      // WGSL requires `.` in float literals; force decimal form.\n      return value.toString().includes('.') || value.toString().includes('e')\n        ? `${value}f`\n        : `${value}.0f`\n    }\n    return value > 0 ? '1.0e30f' : '-1.0e30f'\n  }\n  if (dtype === 'i32') return `${Math.trunc(value)}i`\n  return value ? '1u' : '0u'\n}\n\nfunction castFromI32(expr: string, dtype: 'f32' | 'i32' | 'bool'): string {\n  if (dtype === 'f32') return `f32(${expr})`\n  if (dtype === 'i32') return `i32(${expr})`\n  return `u32(${expr})`\n}\n\nfunction computeStrides(shape: Shape): number[] {\n  const strides: number[] = new Array(shape.length).fill(1)\n  for (let i = shape.length - 2; i >= 0; i--) {\n    strides[i] = strides[i + 1]! * shape[i + 1]!\n  }\n  return strides\n}\n\n/**\n * Generate WGSL that decomposes a flat index `flatVar` into per-axis indices\n * `outVar_0, outVar_1, ...` according to `shape`.\n */\nfunction decomposeFlatIndexBlock(flatVar: string, shape: Shape, outVar: string): string {\n  if (shape.length === 0) return `  let ${outVar}_0 : u32 = 0u;`  // not used but parser-safe\n  const strides = computeStrides(shape)\n  const lines: string[] = []\n  let remaining = flatVar\n  for (let i = 0; i < shape.length; i++) {\n    if (i === shape.length - 1) {\n      lines.push(`  let ${outVar}_${i} = ${remaining};`)\n    } else {\n      lines.push(`  let ${outVar}_${i} = ${remaining} / ${strides[i]}u;`)\n      const newRem = `${outVar}_rem${i}`\n      lines.push(`  let ${newRem} = ${remaining} % ${strides[i]}u;`)\n      remaining = newRem\n    }\n  }\n  return lines.join('\\n')\n}\n\n/**\n * Generate WGSL that computes the source flat index in `srcVar` for an output\n * flat index `flatVar`, given output shape `outShape` and source shape `srcShape`\n * under right-aligned NumPy-style broadcasting (size-1 axes broadcast).\n *\n * Strategy:\n *   1. Decompose flat output index into per-axis output indices.\n *   2. For each output axis that maps onto a source axis (right-aligned), use\n *      the output index there if src.dim != 1, else 0 (broadcast).\n *   3. Drop output-only axes (those with no corresponding source axis).\n *   4. Combine source indices with source strides.\n */\nfunction broadcastIndexBlock(flatVar: string, outShape: Shape, srcShape: Shape, srcVar: string): string {\n  // Name the per-axis decomposition vars after `srcVar` so multiple\n  // broadcastIndexBlock calls in the same WGSL function don't collide.\n  const prefix = `${srcVar}_ax`\n  const decompose = decomposeFlatIndexBlock(flatVar, outShape, prefix)\n  const offset = outShape.length - srcShape.length\n  if (srcShape.length === 0) {\n    return `${decompose}\\n  let ${srcVar} : u32 = 0u;`\n  }\n  const srcStrides = computeStrides(srcShape)\n  const terms: string[] = []\n  for (let i = 0; i < srcShape.length; i++) {\n    const outAxis = i + offset\n    const srcDim = srcShape[i]!\n    const term = srcDim === 1 ? '0u' : `${prefix}_${outAxis} * ${srcStrides[i]}u`\n    terms.push(term)\n  }\n  return `${decompose}\\n  let ${srcVar} = ${terms.join(' + ')};`\n}\n\n/**\n * sum_to_shape: each output cell sums over the source axes that are reduced.\n * For source shape S and target shape T (right-aligned):\n *   - Axes in S not in T (leading prefix): fully reduced (sum over whole axis).\n *   - Axes where T=1 but S>1: reduced (sum over that axis).\n *   - Axes where T=S: passed through.\n *\n * Implementation: each thread = one output cell. It iterates over the reduced\n * axes via nested-loop unrolling (we generate explicit nested for-loops).\n */\nfunction emitSumToShape(srcShape: Shape, tgtShape: Shape, dtype: 'f32' | 'i32' | 'bool'): string {\n  const srcStrides = computeStrides(srcShape)\n  const tgtStrides = computeStrides(tgtShape)\n  const offset = srcShape.length - tgtShape.length\n\n  // Decompose flat output index into per-axis target indices.\n  const decompose = decomposeFlatIndexBlock('i', tgtShape, 'tgt')\n\n  // Identify reduced axes of the SOURCE: axis k in src is reduced if either\n  // it's in the leading prefix (k < offset) or its corresponding target axis\n  // has size 1. For non-reduced axes (k >= offset and tgt=src), the source\n  // index is the target index along that axis.\n  const reducedAxes: number[] = []\n  for (let k = 0; k < srcShape.length; k++) {\n    if (k < offset) { reducedAxes.push(k); continue }\n    const tDim = tgtShape[k - offset]!\n    const sDim = srcShape[k]!\n    if (tDim === 1 && sDim > 1) reducedAxes.push(k)\n  }\n\n  // Build the source flat index expression. Initialize from the non-reduced axes.\n  const baseTerms: string[] = []\n  for (let k = 0; k < srcShape.length; k++) {\n    if (reducedAxes.includes(k)) continue  // contributed by loop var instead\n    const tAxis = k - offset\n    baseTerms.push(`tgt_${tAxis} * ${srcStrides[k]}u`)\n  }\n  const baseExpr = baseTerms.length > 0 ? baseTerms.join(' + ') : '0u'\n\n  // Emit nested for loops over the reduced axes.\n  const indent = (depth: number) => '  '.repeat(depth + 1)\n  const loops: string[] = []\n  for (let depth = 0; depth < reducedAxes.length; depth++) {\n    const k = reducedAxes[depth]!\n    const dim = srcShape[k]!\n    loops.push(`${indent(depth)}for (var r${k} : u32 = 0u; r${k} < ${dim}u; r${k} = r${k} + 1u) {`)\n  }\n  // Inside innermost loop, compute source index.\n  const reducedTerms = reducedAxes.map(k => `r${k} * ${srcStrides[k]}u`)\n  const fullExpr = reducedTerms.length > 0\n    ? `${baseExpr} + ${reducedTerms.join(' + ')}`\n    : baseExpr\n  loops.push(`${indent(reducedAxes.length)}s = s + a[${fullExpr}];`)\n  for (let depth = reducedAxes.length - 1; depth >= 0; depth--) {\n    loops.push(`${indent(depth)}}`)\n  }\n\n  const total = tgtShape.length === 0 ? 1 : (tgtStrides[0]! * tgtShape[0]!)\n  const loopBody = reducedAxes.length === 0\n    ? `  s = s + a[${baseExpr}];`\n    : loops.join('\\n')\n\n  return `\n@group(0) @binding(0) var<storage, read> a : array<${wgslDtype(dtype)}>;\n@group(0) @binding(1) var<storage, read_write> out : array<${wgslDtype(dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n${decompose}\n  var s : ${wgslDtype(dtype)} = ${dtype === 'f32' ? '0.0f' : (dtype === 'i32' ? '0i' : '0u')};\n${loopBody}\n  out[i] = s;\n}`.trim()\n}\n", "// WebGPU runtime. Reads a BufferPlan + KernelSpec[] (produced by codegen),\r\n// allocates real GPU buffers and pipelines, and provides a `step()` method\r\n// that uploads inputs, dispatches all kernels, and reads back outputs.\r\n//\r\n// Browser-only: this module needs `navigator.gpu` at runtime.\r\n\r\nimport type { BufferPlan } from './buffers.js'\r\nimport type { KernelSpec } from './codegen.js'\r\n\r\n// TS lib.dom defines WebGPU types but not the GPUMapMode runtime constant.\r\n// Provided by the browser per WebGPU spec; declare just what we use.\r\ndeclare const GPUMapMode: { readonly READ: number; readonly WRITE: number }\r\n\r\nexport interface UploadParamsOptions {\r\n  /** Skip the \"missing param\" check, allowing the caller to update only some\r\n   *  params and leave the rest at their current GPU values. Extra (unknown)\r\n   *  keys are still rejected \u2014 that's always a typo. Default: false. */\r\n  partial?: boolean\r\n}\r\n\r\n/**\r\n * Activation readbacks for one `step()`/`run()` call. Keyed by the names\r\n * passed to `capture(name, t)` during the trace. `get(name)` throws if the\r\n * name isn't registered or wasn't read back this call (i.e., the call was\r\n * made without `{ withCaptures: true }`); use `has(name)` if you need to\r\n * branch. `shapeOf(name)` returns the static-after-compile shape and works\r\n * regardless of whether captures were read back.\r\n */\r\nexport class Captures {\r\n  constructor(\r\n    private readonly shapes: Record<string, readonly number[]>,\r\n    private readonly data: Map<string, Float32Array>,\r\n  ) {}\r\n  get(name: string): Float32Array {\r\n    const d = this.data.get(name)\r\n    if (!d) {\r\n      const known = [...this.data.keys()].sort().join(', ')\r\n      const detail = known ? `Known this call: ${known}` : `(call run/step with { withCaptures: true } to populate)`\r\n      throw new Error(`Captures.get: '${name}' not present. ${detail}`)\r\n    }\r\n    return d\r\n  }\r\n  shapeOf(name: string): readonly number[] {\r\n    const s = this.shapes[name]\r\n    if (!s) {\r\n      const known = Object.keys(this.shapes).sort().join(', ') || '(none registered)'\r\n      throw new Error(`Captures.shapeOf: '${name}' not registered. Known: ${known}`)\r\n    }\r\n    return s\r\n  }\r\n  has(name: string): boolean { return this.data.has(name) }\r\n  names(): string[] { return [...this.data.keys()].sort() }\r\n}\r\n\r\nexport interface RunResult {\r\n  output: Float32Array\r\n  captures: Captures\r\n}\r\n\r\nexport interface StepResult {\r\n  loss: number\r\n  captures: Captures\r\n}\r\n\r\nexport interface RunOptions {\r\n  /** Read back tensors registered via `capture(name, t)` during the trace.\r\n   *  Default false. When false, the returned `captures` is empty (calling\r\n   *  `.get` throws); when true, captures are read back and accessible. */\r\n  withCaptures?: boolean\r\n}\r\n\r\nexport interface StepOptions extends RunOptions {\r\n  /** If false, the training submit is queued but the JS thread does not\r\n   *  await `mapAsync` of the loss buffer. Returns `void` immediately.\r\n   *  Use `runtime.readLoss()` to read the latest loss explicitly when\r\n   *  you want it (e.g., every Nth step for UI display).\r\n   *\r\n   *  Why: each `mapAsync` round-trip is ~1 ms on desktop but 10\u201330 ms on\r\n   *  Android Chrome. A training loop that awaits per step pays N \u00D7 that\r\n   *  on the main thread, which on mobile starves the OS compositor and\r\n   *  causes visible UI sluggishness. With `readLoss: false` plus a\r\n   *  `requestAnimationFrame` yield between steps, the main thread stays\r\n   *  responsive while training runs at GPU speed.\r\n   *\r\n   *  Implies `withCaptures: false`. Default: true. */\r\n  readLoss?: boolean\r\n}\r\n\r\n/** Common surface for both training and forward-only compiled runtimes. */\r\nexport interface CompiledBase {\r\n  /** The GPUDevice this runtime is bound to. Pass to sibling compiles to\r\n   *  share the device, or use directly for other GPU work. */\r\n  device: GPUDevice\r\n  /** Param name -> the underlying GPUBuffer. Pass to a sibling compile via\r\n   *  `sharedParams` to share without copies. */\r\n  params: Map<string, GPUBuffer>\r\n  /** Shape of the graph's output (loss scalar `[]` for training; the user's\r\n   *  returned tensor for forward-only compiles). */\r\n  outputShape: number[]\r\n  /** Upload parameter Float32Arrays to their GPU buffers. By default, requires\r\n   *  *all* params to be present; throws on any unknown or missing key. Pass\r\n   *  `{ partial: true }` to skip the missing-key check. */\r\n  uploadParams(params: Record<string, Float32Array>, opts?: UploadParamsOptions): void\r\n  /** Read all parameters back as Float32Arrays \u2014 used for UI panels. */\r\n  downloadParams(): Promise<Record<string, Float32Array>>\r\n  /** Free GPU resources. */\r\n  destroy(): void\r\n}\r\n\r\n/** Run a dispatch and read back the full output tensor. Default returns the\r\n *  output as a `Float32Array`; with `{ withCaptures: true }` returns\r\n *  `{ output, captures }`. Same shape as `step()`'s overloads. */\r\nexport interface RunFn {\r\n  (inputs: Record<string, Int32Array | Float32Array>): Promise<Float32Array>\r\n  (inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<RunResult>\r\n  (inputs: Record<string, Int32Array | Float32Array>, opts: RunOptions): Promise<Float32Array | RunResult>\r\n}\r\n\r\nexport interface CompiledRuntime extends CompiledBase {\r\n  /** Read all parameter gradients back. Mostly for verification / debugging. */\r\n  downloadParamGrads(): Promise<Record<string, Float32Array>>\r\n  /**\r\n   * One full forward+backward step.\r\n   *   1. Uploads `inputs` (tokens, targets, masks) to input buffers.\r\n   *   2. Dispatches every kernel in order.\r\n   *   3. Reads back the loss scalar (and any registered captures, if requested).\r\n   * Default returns the loss as a JS number; with `{ withCaptures: true }`\r\n   * returns `{ loss, captures }`.\r\n   */\r\n  step(inputs: Record<string, Int32Array | Float32Array>): Promise<number>\r\n  step(inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<StepResult>\r\n  step(inputs: Record<string, Int32Array | Float32Array>, opts: { readLoss: false }): Promise<void>\r\n  step(inputs: Record<string, Int32Array | Float32Array>, opts: StepOptions): Promise<number | StepResult | void>\r\n  /** Same dispatch as step() but returns the full output Float32Array \u2014 for\r\n   *  training graphs the output is a scalar loss, so step() is usually more\r\n   *  convenient. Provided for parity with `compileForward`. */\r\n  run: RunFn\r\n  /** Read the latest loss value from the GPU. Pair with `step({ readLoss: false })`\r\n   *  fire-and-forget training: every Nth iteration, call `readLoss()` for the\r\n   *  UI, but most iterations don't pay the `mapAsync` cost. */\r\n  readLoss(): Promise<number>\r\n  /** Re-zero all optimizer state buffers (Adam's m/v) in place. Pair with\r\n   *  `uploadInitialParams()` for a full training reset without recompile. */\r\n  resetOptimizerState(): void\r\n}\r\n\r\n/** Forward-only compiled runtime \u2014 produced by `compileForward`. No optimizer,\r\n *  no backward. Returns the output tensor (not just a scalar) per `run()` call. */\r\nexport interface CompiledForward extends CompiledBase {\r\n  run: RunFn\r\n}\r\n\r\nexport interface RuntimeOpts {\r\n  /** Pre-acquired GPUDevice. If omitted, runtime requests its own. */\r\n  device?: GPUDevice\r\n  /** External param buffers to bind in place of allocating fresh ones, keyed\r\n   *  by param name. Used to share params between a training compile and a\r\n   *  sibling forward-only compile (e.g., a B=1 inference graph). When a name\r\n   *  is in this map, the runtime reuses the provided GPUBuffer; otherwise it\r\n   *  allocates as usual. */\r\n  sharedParams?: Map<string, GPUBuffer>\r\n}\r\n\r\n// Inlined numeric values (per WebGPU spec) so this module is importable in Node\r\n// for codegen-only usage. The browser provides GPUBufferUsage as a global, but\r\n// referencing it at module scope would crash before any browser code runs.\r\nconst STORAGE_RW = 0x80 /*STORAGE*/ | 0x8 /*COPY_DST*/ | 0x4 /*COPY_SRC*/\r\nconst READBACK = 0x1 /*MAP_READ*/ | 0x8 /*COPY_DST*/\r\n\r\nexport async function createRuntime(\r\n  plan: BufferPlan,\r\n  kernels: KernelSpec[],\r\n  lossBufferId: number,\r\n  opts: RuntimeOpts = {},\r\n): Promise<CompiledRuntime> {\r\n  const device = opts.device ?? await acquireDevice()\r\n  const queue = device.queue\r\n\r\n  // ---- Allocate one GPUBuffer per BufferSpec --------------------------------\r\n  // State buffers also get filled with their initValue at allocation time.\r\n  // Param buffers may be supplied externally via opts.sharedParams; in that\r\n  // case we reuse the provided GPUBuffer instead of allocating, and the\r\n  // sibling compile that owns it is responsible for upload + lifetime.\r\n  // ownedBufferIds tracks which buffers we allocated ourselves (and so must\r\n  // destroy on .destroy()) vs which were handed in by a sibling compile.\r\n  const buffers = new Map<number, GPUBuffer>()\r\n  const ownedBufferIds = new Set<number>()\r\n  const sharedParams = opts.sharedParams\r\n  for (const spec of plan.buffers) {\r\n    const shared = spec.kind === 'param' ? sharedParams?.get(spec.name!) : undefined\r\n    if (shared) {\r\n      if (shared.size !== spec.byteSize) {\r\n        throw new Error(\r\n          `sharedParams: size mismatch for '${spec.name}' \u2014 supplied ${shared.size} bytes, ` +\r\n          `compiled graph expects ${spec.byteSize}.`,\r\n        )\r\n      }\r\n      buffers.set(spec.id, shared)\r\n      continue\r\n    }\r\n    const buf = device.createBuffer({\r\n      size: spec.byteSize,\r\n      usage: STORAGE_RW,\r\n      label: spec.name ?? `t${spec.id}-${spec.kind}`,\r\n    })\r\n    buffers.set(spec.id, buf)\r\n    ownedBufferIds.add(spec.id)\r\n    if (spec.kind === 'state') fillStateBuffer(spec, buf)\r\n  }\r\n\r\n  // ---- Compile pipelines per kernel; cache by WGSL source -------------------\r\n  // Push an error scope around each shader+pipeline creation so we can surface\r\n  // the actual compile error rather than the cryptic \"previous error\" that\r\n  // comes from using an invalid pipeline at dispatch time.\r\n  const moduleCache = new Map<string, GPUShaderModule>()\r\n  const pipelines: (GPUComputePipeline | null)[] = []\r\n  type ErrorProbe = Promise<{ k: KernelSpec; module: GPUShaderModule; err: GPUError } | null>\r\n  const probes: ErrorProbe[] = []\r\n  for (const k of kernels) {\r\n    if (!k.wgsl) { pipelines.push(null); continue }\r\n    let module = moduleCache.get(k.wgsl)\r\n    if (!module) {\r\n      module = device.createShaderModule({ code: k.wgsl, label: k.opKind })\r\n      moduleCache.set(k.wgsl, module)\r\n    }\r\n    device.pushErrorScope('validation')\r\n    const pipeline = device.createComputePipeline({\r\n      layout: 'auto',\r\n      compute: { module, entryPoint: 'main' },\r\n      label: k.opKind,\r\n    })\r\n    pipelines.push(pipeline)\r\n    probes.push(device.popErrorScope().then(err => err ? { k, module: module!, err } : null))\r\n  }\r\n  const probeResults = await Promise.all(probes)\r\n  const failures = probeResults.filter((p): p is { k: KernelSpec; module: GPUShaderModule; err: GPUError } => p != null)\r\n  if (failures.length > 0) {\r\n    const reports: string[] = []\r\n    for (const { k, module, err } of failures) {\r\n      const info = await module.getCompilationInfo()\r\n      const messages = info.messages\r\n        .map(m => `  L${m.lineNum}:${m.linePos} [${m.type}] ${m.message}`)\r\n        .join('\\n')\r\n      reports.push(\r\n        `[shader compile error] ${k.opKind} (op #${k.opIndex}): ${err.message}\\n` +\r\n        (messages || '  (no compilation messages)') +\r\n        `\\n--- WGSL ---\\n${k.wgsl}\\n-----------`,\r\n      )\r\n    }\r\n    // eslint-disable-next-line no-console\r\n    console.error(reports.join('\\n\\n'))\r\n    throw new Error(`tensorgrad: ${failures.length} shader(s) failed to compile (see console).`)\r\n  }\r\n\r\n  // ---- Pre-build bind groups (static \u2014 buffer ids don't change per step) ---\r\n  const bindGroups: (GPUBindGroup | null)[] = kernels.map((k, i) => {\r\n    const pipeline = pipelines[i]\r\n    if (!pipeline) return null\r\n    return device.createBindGroup({\r\n      layout: pipeline.getBindGroupLayout(0),\r\n      entries: k.bindings.map((bufId, idx) => ({\r\n        binding: idx,\r\n        resource: { buffer: buffers.get(bufId)! },\r\n      })),\r\n    })\r\n  })\r\n\r\n  // ---- Output readback staging buffer ---------------------------------------\r\n  // `outputBufferId` is the graph's main output (loss for training, the user's\r\n  // returned tensor for forward-only). step() reads back its first element;\r\n  // run() reads back the full Float32Array.\r\n  const outputSpec = plan.buffers[lossBufferId]!\r\n  const outputReadback = device.createBuffer({ size: outputSpec.byteSize, usage: READBACK })\r\n\r\n  // ---- Capture readback staging buffer (lazy, single concatenated) ---------\r\n  // One buffer for ALL captures, with each capture occupying a slice. Matters\r\n  // on mobile: each `mapAsync` round-trip on Android Chrome adds significant\r\n  // GPU-fence latency (~10\u201330 ms vs ~1 ms on desktop). With N captures, the\r\n  // per-call mobile cost is N \u00D7 that latency on the main thread. Concatenating\r\n  // and reading back via one `mapAsync` collapses N stalls into one. Allocated\r\n  // on first `step({ withCaptures: true })` call.\r\n  type CaptureLayout = {\r\n    buffer: GPUBuffer\r\n    slices: { name: string; bufId: number; offset: number; byteSize: number }[]\r\n  }\r\n  let captureStaging: CaptureLayout | null = null\r\n  function ensureCaptureStaging(): CaptureLayout {\r\n    if (captureStaging) return captureStaging\r\n    let totalBytes = 0\r\n    const slices: CaptureLayout['slices'] = []\r\n    for (const [name, bufId] of plan.capturesByName) {\r\n      const spec = plan.buffers[bufId]!\r\n      // copyBufferToBuffer offsets must be 4-aligned. Capture byteSizes are\r\n      // always shape-product \u00D7 4 (f32/i32/bool all 4 bytes), so cumulative\r\n      // offsets stay aligned.\r\n      slices.push({ name, bufId, offset: totalBytes, byteSize: spec.byteSize })\r\n      totalBytes += spec.byteSize\r\n    }\r\n    const buffer = device.createBuffer({ size: totalBytes, usage: READBACK, label: 'captures-staging' })\r\n    captureStaging = { buffer, slices }\r\n    return captureStaging\r\n  }\r\n\r\n  // ---- dispatch() \u2014 shared core for step() and run() -----------------------\r\n  // Uploads inputs, dispatches all kernels (in order), queues writebacks, copies\r\n  // the output buffer into its staging, optionally copies captures into theirs,\r\n  // submits, and reads back. Returns the full output Float32Array; step() takes\r\n  // [0] for scalar loss, run() returns it whole.\r\n  //\r\n  // **Concurrent calls auto-serialize.** Two `step()`/`run()` calls on the same\r\n  // runtime would otherwise both try to `mapAsync` the shared output staging\r\n  // buffer at the same time and trip \"Buffer already has an outstanding map\r\n  // pending.\" We chain each new dispatch onto the prior one's promise so they\r\n  // run sequentially even when fired from independent async paths (e.g., a\r\n  // training loop's auxiliary `refreshPrediction()` + `writeDiagnostic()`).\r\n  let pending: Promise<unknown> = Promise.resolve()\r\n  type DispatchOpts = { wantCaptures: boolean; readback: boolean }\r\n  type DispatchResult = { output: Float32Array; captures: Map<string, Float32Array> } | null\r\n  async function dispatch(\r\n    inputs: Record<string, Int32Array | Float32Array>,\r\n    opts: DispatchOpts,\r\n  ): Promise<DispatchResult> {\r\n    const turn = pending.catch(() => {}).then(() => dispatchUnsynchronized(inputs, opts))\r\n    pending = turn\r\n    return turn\r\n  }\r\n  async function dispatchUnsynchronized(\r\n    inputs: Record<string, Int32Array | Float32Array>,\r\n    opts: DispatchOpts,\r\n  ): Promise<DispatchResult> {\r\n    const wantCaptures = opts.wantCaptures\r\n    if (wantCaptures && plan.capturesByName.size === 0) {\r\n      throw new Error(\r\n        `withCaptures=true but no capture(...) calls were registered during ` +\r\n        `the trace. Add capture('name', tensor) inside your forward pass for ` +\r\n        `the intermediates you want read back.`,\r\n      )\r\n    }\r\n    for (const [name, bufId] of plan.inputsByName) {\r\n      const data = inputs[name]\r\n      if (!data) throw new Error(`tensorgrad: missing input '${name}'`)\r\n      const expectedBytes = plan.buffers[bufId]!.byteSize\r\n      if (data.byteLength !== expectedBytes) {\r\n        throw new Error(`tensorgrad: input '${name}' has ${data.byteLength} bytes, expected ${expectedBytes}`)\r\n      }\r\n      // Cast to BufferSource: typed arrays are accepted by writeBuffer at runtime\r\n      // but TS may infer ArrayBufferLike (vs ArrayBuffer) under strict configs.\r\n      queue.writeBuffer(buffers.get(bufId)!, 0, data as unknown as BufferSource)\r\n    }\r\n\r\n    // Chunked submit. One queue.submit() of all 240 kernels monopolizes the\r\n    // GPU for the full step duration, blocking compositor frames the entire\r\n    // time. Splitting into chunks with an explicit GPU-drain await between\r\n    // them gives the compositor a slot at each chunk boundary. On graphs\r\n    // smaller than CHUNK_SIZE this collapses to a single submit (no\r\n    // overhead). See specs/WorkerArchitecture.md / mobile-jank investigation.\r\n    const CHUNK_SIZE = 32\r\n    let layout: CaptureLayout | null = null\r\n    if (wantCaptures) {\r\n      // Compute layout up front so the last chunk can append capture copies.\r\n      layout = ensureCaptureStaging()\r\n    }\r\n\r\n    let kernelIdx = 0\r\n    while (kernelIdx < kernels.length) {\r\n      const chunkEnd = Math.min(kernelIdx + CHUNK_SIZE, kernels.length)\r\n      const isLast = chunkEnd === kernels.length\r\n      const encoder = device.createCommandEncoder({\r\n        label: kernels.length > CHUNK_SIZE ? `tensorgrad-chunk-${kernelIdx}` : 'tensorgrad-step',\r\n      })\r\n      for (let i = kernelIdx; i < chunkEnd; i++) {\r\n        const k = kernels[i]!\r\n        if (!k.wgsl || k.threads === 0) continue\r\n        const pipeline = pipelines[i]!\r\n        const bindGroup = bindGroups[i]!\r\n        const pass = encoder.beginComputePass({ label: k.opKind })\r\n        pass.setPipeline(pipeline)\r\n        pass.setBindGroup(0, bindGroup)\r\n        // WebGPU caps each dispatch dimension at 65535 workgroups. Split into 2D\r\n        // when a kernel needs more than that on the X axis. Kernels compute their\r\n        // global index as `gid.x + gid.y * (65535 * workgroup_size)`, matching the\r\n        // stride we set here. For dispatches that fit in one row, gid.y is 0.\r\n        const wgCount = Math.max(1, Math.ceil(k.threads / k.workgroupSize))\r\n        const MAX_X = 65535\r\n        const wgX = Math.min(wgCount, MAX_X)\r\n        const wgY = Math.ceil(wgCount / MAX_X)\r\n        pass.dispatchWorkgroups(wgX, wgY, 1)\r\n        pass.end()\r\n      }\r\n      if (isLast) {\r\n        // Writebacks (Adam state, updated params; empty for forward-only) +\r\n        // output readback copy + capture readback copies all go into the\r\n        // final chunk so a single mapAsync below sees everything.\r\n        for (const wb of plan.writebacks) {\r\n          encoder.copyBufferToBuffer(buffers.get(wb.source)!, 0, buffers.get(wb.dest)!, 0, wb.bytes)\r\n        }\r\n        encoder.copyBufferToBuffer(buffers.get(lossBufferId)!, 0, outputReadback, 0, outputSpec.byteSize)\r\n        if (layout) {\r\n          for (const s of layout.slices) {\r\n            encoder.copyBufferToBuffer(buffers.get(s.bufId)!, 0, layout.buffer, s.offset, s.byteSize)\r\n          }\r\n        }\r\n      }\r\n      queue.submit([encoder.finish()])\r\n      if (!isLast) {\r\n        // Drain the chunk before queuing the next one. This is the moment\r\n        // the compositor can interleave its own frame work onto the GPU.\r\n        await queue.onSubmittedWorkDone()\r\n      }\r\n      kernelIdx = chunkEnd\r\n    }\r\n\r\n    // readback=false: training fire-and-forget. The encoder still copied\r\n    // loss \u2192 outputReadback (and captures \u2192 staging), but we don't await\r\n    // mapAsync. The caller can read the latest loss later via readLoss()\r\n    // when it actually wants to display it.\r\n    if (!opts.readback) return null\r\n\r\n    await outputReadback.mapAsync(GPUMapMode.READ)\r\n    const output = new Float32Array(outputReadback.getMappedRange().slice(0))\r\n    outputReadback.unmap()\r\n\r\n    const captures = new Map<string, Float32Array>()\r\n    if (layout) {\r\n      await layout.buffer.mapAsync(GPUMapMode.READ)\r\n      const range = layout.buffer.getMappedRange()\r\n      for (const s of layout.slices) {\r\n        // Copy out (slice) before unmap \u2014 the underlying ArrayBuffer is\r\n        // detached when the buffer unmaps.\r\n        captures.set(s.name, new Float32Array(range, s.offset, s.byteSize / 4).slice())\r\n      }\r\n      layout.buffer.unmap()\r\n    }\r\n    return { output, captures }\r\n  }\r\n\r\n  // ---- step() \u2014 training-mode wrapper, returns scalar [0] of output ---------\r\n  function step(inputs: Record<string, Int32Array | Float32Array>): Promise<number>\r\n  function step(inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<StepResult>\r\n  function step(inputs: Record<string, Int32Array | Float32Array>, opts: { readLoss: false }): Promise<void>\r\n  function step(inputs: Record<string, Int32Array | Float32Array>, opts: StepOptions): Promise<number | StepResult | void>\r\n  async function step(\r\n    inputs: Record<string, Int32Array | Float32Array>,\r\n    opts?: StepOptions,\r\n  ): Promise<number | StepResult | void> {\r\n    if (opts?.readLoss === false) {\r\n      await dispatch(inputs, { wantCaptures: false, readback: false })\r\n      return\r\n    }\r\n    const r = (await dispatch(inputs, { wantCaptures: opts?.withCaptures === true, readback: true }))!\r\n    if (opts?.withCaptures) return { loss: r.output[0]!, captures: new Captures(captureShapes, r.captures) }\r\n    return r.output[0]!\r\n  }\r\n\r\n  // ---- readLoss() \u2014 explicit late readback for fire-and-forget training -----\r\n  // Maps the output buffer (which step() always copies the latest loss into,\r\n  // even when readLoss:false) and returns the value. Goes through the same\r\n  // serialization chain as step()/run() so two readLoss() calls don't both\r\n  // try to mapAsync the same buffer.\r\n  async function readLoss(): Promise<number> {\r\n    const turn = pending.catch(() => {}).then(async () => {\r\n      await outputReadback.mapAsync(GPUMapMode.READ)\r\n      const v = new Float32Array(outputReadback.getMappedRange())[0]!\r\n      outputReadback.unmap()\r\n      return v\r\n    })\r\n    pending = turn\r\n    return turn\r\n  }\r\n\r\n  // ---- run() \u2014 forward-mode wrapper, returns Float32Array by default -------\r\n  // Same overloaded shape as step(): scalar-shaped result (here Float32Array,\r\n  // there a JS number) is the default; { ..., captures } is the opt-in form.\r\n  function run(inputs: Record<string, Int32Array | Float32Array>): Promise<Float32Array>\r\n  function run(inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<RunResult>\r\n  function run(inputs: Record<string, Int32Array | Float32Array>, opts: RunOptions): Promise<Float32Array | RunResult>\r\n  async function run(\r\n    inputs: Record<string, Int32Array | Float32Array>,\r\n    opts?: RunOptions,\r\n  ): Promise<Float32Array | RunResult> {\r\n    const r = (await dispatch(inputs, { wantCaptures: opts?.withCaptures === true, readback: true }))!\r\n    if (opts?.withCaptures) return { output: r.output, captures: new Captures(captureShapes, r.captures) }\r\n    return r.output\r\n  }\r\n\r\n  // ---- uploadParams ---------------------------------------------------------\r\n  function uploadParams(params: Record<string, Float32Array>, opts?: UploadParamsOptions) {\r\n    const partial = opts?.partial ?? false\r\n    for (const name of Object.keys(params)) {\r\n      if (!plan.paramsByName.has(name)) {\r\n        throw new Error(\r\n          `uploadParams: unknown param '${name}'. ` +\r\n          `Known: ${[...plan.paramsByName.keys()].sort().join(', ')}`,\r\n        )\r\n      }\r\n    }\r\n    if (!partial) {\r\n      for (const name of plan.paramsByName.keys()) {\r\n        if (!(name in params)) {\r\n          throw new Error(\r\n            `uploadParams: missing param '${name}'. ` +\r\n            `Pass { partial: true } if you mean to update only some params.`,\r\n          )\r\n        }\r\n      }\r\n    }\r\n    for (const [name, bufId] of plan.paramsByName) {\r\n      const data = params[name]\r\n      if (!data) continue\r\n      const expected = plan.buffers[bufId]!.byteSize / 4\r\n      if (data.length !== expected) {\r\n        throw new Error(`uploadParams: '${name}' has ${data.length} elements, expected ${expected}`)\r\n      }\r\n      queue.writeBuffer(buffers.get(bufId)!, 0, data as unknown as BufferSource)\r\n    }\r\n  }\r\n\r\n  // ---- download helpers -----------------------------------------------------\r\n  async function downloadFromMap(map: Map<string, number>): Promise<Record<string, Float32Array>> {\r\n    const stagings: { name: string; buf: GPUBuffer; bytes: number }[] = []\r\n    const encoder = device.createCommandEncoder({ label: 'tensorgrad-download' })\r\n    for (const [name, bufId] of map) {\r\n      const spec = plan.buffers[bufId]!\r\n      const staging = device.createBuffer({ size: spec.byteSize, usage: READBACK })\r\n      encoder.copyBufferToBuffer(buffers.get(bufId)!, 0, staging, 0, spec.byteSize)\r\n      stagings.push({ name, buf: staging, bytes: spec.byteSize })\r\n    }\r\n    queue.submit([encoder.finish()])\r\n    const out: Record<string, Float32Array> = {}\r\n    for (const s of stagings) {\r\n      await s.buf.mapAsync(GPUMapMode.READ)\r\n      out[s.name] = new Float32Array(s.buf.getMappedRange().slice(0))\r\n      s.buf.unmap()\r\n      s.buf.destroy()\r\n    }\r\n    return out\r\n  }\r\n\r\n  // Fill a state buffer with its declared initValue (typically 0). Float and\r\n  // int both serialize to 4 bytes per element. Used at allocation time and on\r\n  // resetOptimizerState() \u2014 same logic, two callers.\r\n  function fillStateBuffer(spec: { byteSize: number; dtype: 'f32' | 'i32' | 'bool'; initValue?: number }, target: GPUBuffer): void {\r\n    const elements = spec.byteSize / 4\r\n    const init = spec.dtype === 'f32'\r\n      ? new Float32Array(elements).fill(spec.initValue ?? 0)\r\n      : new Int32Array(elements).fill(Math.trunc(spec.initValue ?? 0))\r\n    queue.writeBuffer(target, 0, init as unknown as BufferSource)\r\n  }\r\n\r\n  function resetOptimizerState() {\r\n    for (const spec of plan.buffers) {\r\n      if (spec.kind === 'state') fillStateBuffer(spec, buffers.get(spec.id)!)\r\n    }\r\n  }\r\n\r\n  // Build the params map AFTER buffer allocation so it points at the actual\r\n  // GPUBuffers (shared or freshly allocated).\r\n  const params = new Map<string, GPUBuffer>()\r\n  for (const [name, bufId] of plan.paramsByName) {\r\n    params.set(name, buffers.get(bufId)!)\r\n  }\r\n  // Static-after-compile shape metadata so users don't have to recompute\r\n  // strides to interpret a flat capture readback.\r\n  const captureShapes: Record<string, number[]> = {}\r\n  for (const [name, bufId] of plan.capturesByName) {\r\n    captureShapes[name] = [...plan.buffers[bufId]!.shape]\r\n  }\r\n  const outputShape = [...plan.buffers[lossBufferId]!.shape]\r\n\r\n  const destroy = () => {\r\n    for (const [id, b] of buffers) {\r\n      if (ownedBufferIds.has(id)) b.destroy()\r\n    }\r\n    outputReadback.destroy()\r\n    if (captureStaging) captureStaging.buffer.destroy()\r\n  }\r\n\r\n  return {\r\n    device,\r\n    params,\r\n    outputShape,\r\n    uploadParams,\r\n    downloadParams: () => downloadFromMap(plan.paramsByName),\r\n    downloadParamGrads: () => downloadFromMap(plan.paramGradsByName),\r\n    step,\r\n    run,\r\n    readLoss,\r\n    resetOptimizerState,\r\n    destroy,\r\n  }\r\n}\r\n\r\n/** Same machinery as `createRuntime`, narrower public type: a forward-only\r\n *  graph exposes `run()` instead of `step()` (no optimizer state, no scalar-\r\n *  loss readback). The full runtime object is built once and projected by\r\n *  `compileForward` to the public shape. */\r\nexport async function createForwardRuntime(\r\n  plan: BufferPlan,\r\n  kernels: KernelSpec[],\r\n  outputBufferId: number,\r\n  opts: RuntimeOpts = {},\r\n): Promise<CompiledForward> {\r\n  return await createRuntime(plan, kernels, outputBufferId, opts)\r\n}\r\n\r\nasync function acquireDevice(): Promise<GPUDevice> {\r\n  if (typeof navigator === 'undefined' || !navigator.gpu) {\r\n    throw new Error('tensorgrad: WebGPU not available in this environment')\r\n  }\r\n  const adapter = await navigator.gpu.requestAdapter()\r\n  if (!adapter) throw new Error('tensorgrad: no WebGPU adapter')\r\n  return await adapter.requestDevice()\r\n}\r\n", "// Module abstraction \u2014 a Domeleon-style component layer for parameter trees.\n//\n// User code defines a model as nested classes:\n//\n//   class Linear extends Module {\n//     W: Tensor; b: Tensor\n//     constructor(inDim: number, outDim: number) {\n//       super()\n//       this.W = this.param([inDim, outDim])               // randn, scale 0.02\n//       this.b = this.param([outDim], { init: 'zeros' })\n//     }\n//   }\n//   class Block extends Module {\n//     attn = new Attention(D)\n//     mlp  = new MLP(D, 4 * D)\n//   }\n//   class Model extends Module {\n//     embed = new Linear(VOCAB, D)\n//     layers = range(N).map(() => new Block())\n//   }\n//\n// The param tree is discovered automatically at compile time by walking\n// enumerable instance properties. Each parameter gets a name auto-derived\n// from its path (`layers.0.attn.W_q`); names are used for upload/download\n// and writeback wiring. Forward functions are pure and stateless \u2014 they\n// take the materialized model and inputs, return a Tensor.\n\nimport type { Tensor, Shape, Dtype } from './ir.js'\nimport { paramInput } from './trace.js'\n\n// ============================================================================\n// Init metadata\n// ============================================================================\n\n/** How a parameter's initial values are produced. Serializable shape \u2014 no\n *  closures, since the initial values cross the worker boundary at compile\n *  time. Use the `init` helpers for ergonomic construction.\n *\n *  String shorthands:\n *  - `'randn'` \u2014 Gaussian with std 0.02 (the common weight-matrix init).\n *  - `'zeros'` \u2014 fill with 0 (biases, LayerNorm beta).\n *  - `'ones'`  \u2014 fill with 1 (LayerNorm gain).\n *\n *  Object shapes:\n *  - `{ kind: 'randn', scale }` \u2014 randn with explicit std.\n *  - `{ kind: 'kaiming', gain? }` \u2014 `std = gain / sqrt(fan_in)`. Default\n *    gain `sqrt(2)` (good for ReLU). `fan_in = shape[0]`.\n *  - `{ kind: 'literal', data }` \u2014 explicit Float32Array; length must\n *    match the parameter's element count.\n */\nexport type InitSpec =\n  | 'randn'\n  | 'zeros'\n  | 'ones'\n  | { readonly kind: 'randn'; readonly scale: number }\n  | { readonly kind: 'kaiming'; readonly gain?: number }\n  | { readonly kind: 'literal'; readonly data: Float32Array }\n\n/** Ergonomic constructors for InitSpec object shapes. */\nexport const init = {\n  randn: (opts: { scale?: number } = {}): InitSpec => ({ kind: 'randn', scale: opts.scale ?? 0.02 }),\n  kaiming: (opts: { gain?: number } = {}): InitSpec =>\n    opts.gain !== undefined ? { kind: 'kaiming', gain: opts.gain } : { kind: 'kaiming' },\n  literal: (data: Float32Array): InitSpec => ({ kind: 'literal', data }),\n}\n\nexport interface ParamOptions {\n  dtype?: Dtype\n  /** Init shape. Default: `'randn'` (std 0.02). */\n  init?: InitSpec\n  /** Whether AdamW (when `weightDecay > 0`) should apply decoupled weight\n   *  decay to this param. Default: `true` for randn/kaiming/literal init\n   *  (weight matrices, embeddings); `false` for zeros/ones (biases, LN\n   *  gains). Override to force or skip. Replaces `adam.decayFilter` for\n   *  the common case. */\n  decay?: boolean\n}\n\ntype InitFn = (size: number, shape: readonly number[]) => Float32Array\n\nfunction boxMuller(): number {\n  return Math.sqrt(-2 * Math.log(Math.max(1e-10, Math.random()))) * Math.cos(2 * Math.PI * Math.random())\n}\n\nfunction randnFn(scale: number): InitFn {\n  return (size) => {\n    const arr = new Float32Array(size)\n    for (let i = 0; i < size; i++) arr[i] = boxMuller() * scale\n    return arr\n  }\n}\n\n/** Compile-time-only: resolve an InitSpec shape into the closure that\n *  generates the initial Float32Array for a given parameter shape. Runs\n *  on the main thread before initial values are transferred to the worker. */\nfunction resolveInit(spec: InitSpec | undefined): InitFn {\n  if (!spec || spec === 'randn') return randnFn(0.02)\n  if (spec === 'zeros') return (size) => new Float32Array(size)\n  if (spec === 'ones') return (size) => { const a = new Float32Array(size); a.fill(1); return a }\n  switch (spec.kind) {\n    case 'randn': return randnFn(spec.scale)\n    case 'kaiming': {\n      const gain = spec.gain ?? Math.sqrt(2)\n      return (size, shape) => {\n        const fanIn = shape[0] ?? size\n        const std = gain / Math.sqrt(fanIn)\n        const arr = new Float32Array(size)\n        for (let i = 0; i < size; i++) arr[i] = boxMuller() * std\n        return arr\n      }\n    }\n    case 'literal': {\n      const data = spec.data\n      return (size) => {\n        if (data.length !== size) {\n          throw new Error(`init.literal: data length ${data.length} doesn't match param size ${size}`)\n        }\n        return new Float32Array(data)\n      }\n    }\n  }\n}\n\n/** Resolve the decay default for a param. Weight-shaped inits (randn,\n *  kaiming, literal) default to decay=true; ones/zeros default to false\n *  (biases, LN gains). Explicit `decay` opt overrides. */\nfunction resolveDecay(opts: ParamOptions | undefined): boolean {\n  if (opts?.decay !== undefined) return opts.decay\n  const spec = opts?.init ?? 'randn'\n  return spec !== 'zeros' && spec !== 'ones'\n}\n\n// ============================================================================\n// Internals: param sentinel\n// ============================================================================\n//\n// `this.param(shape)` returns a placeholder that's replaced by a real Tensor\n// during `materializeParams`. We type-cheat by declaring the return type as\n// `Tensor` so user code can write `this.W` and have TS happy; the cheat is\n// only valid post-materialization (which is always before forward runs).\n\nclass ParamSentinel {\n  constructor(\n    public readonly shape: Shape,\n    public readonly dtype: Dtype,\n    public readonly initFn: InitFn,\n    public readonly decay: boolean,\n  ) {}\n}\n\n// ============================================================================\n// Module base class\n// ============================================================================\n\nexport abstract class Module {\n  /**\n   * Declare a learnable parameter at this module. Must be called from inside\n   * the constructor (typically as a field assignment). Returns a placeholder\n   * that gets replaced with a real Tensor at compile time.\n   *\n   * The parameter's name is auto-derived from its property path in the model\n   * tree (e.g. `layers.0.attn.W_q`). Init metadata travels with the param;\n   * call `compiled.uploadInitialParams()` to apply it after compile.\n   */\n  protected param(shape: Shape, opts?: ParamOptions): Tensor {\n    const dtype = opts?.dtype ?? 'f32'\n    // Lie to TypeScript: the sentinel becomes a Tensor at materialize time.\n    return new ParamSentinel(shape, dtype, resolveInit(opts?.init), resolveDecay(opts)) as unknown as Tensor\n  }\n}\n\n// ============================================================================\n// Tree walking\n// ============================================================================\n\nexport interface MaterializedParams {\n  /** Map from auto-derived path (e.g. `layers.0.attn.W_q`) to its Tensor. */\n  tensors: Record<string, Tensor>\n  /** Init function per param path. Used by `uploadInitialParams`. */\n  initFns: Record<string, InitFn>\n  /** Whether this param should receive AdamW weight decay. Resolved at\n   *  `param()` time from `ParamOptions.decay` (with init-based default). */\n  decayFlags: Record<string, boolean>\n}\n\n/**\n * Walk the module tree and replace every ParamSentinel with a real Tensor\n * created via `paramInput(autoName, ...)`. Must be called inside an active\n * trace context (paramInput appends to the current graph).\n *\n * Returns the param tensors keyed by path, plus init functions for use by\n * `uploadInitialParams`.\n */\nexport function materializeParams(root: Module): MaterializedParams {\n  const tensors: Record<string, Tensor> = {}\n  const initFns: Record<string, InitFn> = {}\n  const decayFlags: Record<string, boolean> = {}\n  visit(root, '', (path, val, owner, key) => {\n    if (val instanceof ParamSentinel) {\n      const t = paramInput(path, val.shape, val.dtype)\n      ;(owner as any)[key] = t\n      tensors[path] = t\n      initFns[path] = val.initFn\n      decayFlags[path] = val.decay\n    }\n  })\n  return { tensors, initFns, decayFlags }\n}\n\n// ----------------------------------------------------------------------------\n// Visitor\n// ----------------------------------------------------------------------------\n//\n// Walks enumerable own properties recursively, building a path string. Recurses\n// into nested Modules and arrays of Modules (or arrays of arrays, etc.).\n// Calls `visitor` on every leaf \u2014 including ParamSentinels (pre-materialize)\n// and real Tensor leaves (post-materialize).\n\ntype Visitor = (path: string, val: unknown, owner: object, key: string | number) => void\n\nfunction visit(node: unknown, path: string, visitor: Visitor): void {\n  if (node === null || node === undefined) return\n  if (typeof node !== 'object') return\n\n  if (node instanceof Module) {\n    for (const key of Object.keys(node as object)) {\n      const child = (node as any)[key]\n      const childPath = path ? `${path}.${key}` : key\n      visitChild(child, childPath, node, key, visitor)\n    }\n    return\n  }\n  if (Array.isArray(node)) {\n    node.forEach((item, i) => {\n      const childPath = path ? `${path}.${i}` : String(i)\n      visitChild(item, childPath, node as unknown as object, i, visitor)\n    })\n    return\n  }\n  // Plain leaf object (sentinel / tensor / something else): visitor decides.\n  // No deeper recursion.\n}\n\nfunction visitChild(child: unknown, path: string, owner: object, key: string | number, visitor: Visitor): void {\n  if (child instanceof Module || Array.isArray(child)) {\n    visit(child, path, visitor)\n  } else {\n    visitor(path, child, owner, key)\n  }\n}\n", "// Wire format for the main-thread \u2194 worker postMessage channel.\n//\n// All requests carry a numeric `id` assigned by the main thread; responses\n// echo it back so the proxy can match concurrent in-flight calls. Every\n// response is either `{ ok: true, result }` or `{ ok: false, error }`.\n// Errors carry serialized name/message/stack so the proxy can reconstitute\n// an Error with a working `instanceof` check on the receiving side.\n//\n// Inputs (typed arrays) and outputs (typed arrays, captures) are transferred\n// rather than copied \u2014 see the per-request notes for which fields go on the\n// transfer list. A single worker may host multiple compiled graphs (a train\n// graph plus sibling forward graphs); each has a `graphId` issued by the\n// main thread at compile time.\n\nimport type { Graph } from './ir.js'\nimport type { BufferPlan } from './buffers.js'\nimport type { KernelSpec } from './codegen.js'\nimport type { LRSchedule } from './adam.js'\n\n// ============================================================================\n// Serializable config (subset of AdamResolvedConfig that crosses the wire).\n// `decayFilter` (a function, used only at compile time) is NOT part of this \u2014\n// the per-param decay decision is already baked into the IR by appendAdam\n// before the IR ships to the worker.\n// ============================================================================\n\nexport interface WireAdamConfig {\n  lr: LRSchedule\n  b1: number\n  b2: number\n  eps: number\n  weightDecay: number\n  lrIsScheduled: boolean\n  /** Names of the per-step scalar inputs the worker must populate before\n   *  every step (`_adam_lrt`, optionally `_adam_decay_shrink`). Mirrors\n   *  AdamResult so the worker can update them without re-deriving. */\n  lrtInputName: string\n  decayShrinkInputName: string | null\n}\n\n/** Compile output that crosses to the worker. Same fields as CompiledIR\n *  minus the `loss` tensor (carried by graph.outputs[0]). */\nexport interface WireIR {\n  graph: Graph\n  plan: BufferPlan\n  kernels: KernelSpec[]\n}\n\n// ============================================================================\n// Requests (main \u2192 worker)\n// ============================================================================\n\nexport type Req =\n  | { id: number; kind: 'createRuntime'; payload: CreateRuntimePayload }\n  | { id: number; kind: 'compileForward'; payload: CompileForwardPayload }\n  | { id: number; kind: 'step'; payload: StepPayload }\n  | { id: number; kind: 'run'; payload: RunPayload }\n  | { id: number; kind: 'uploadParams'; payload: UploadParamsPayload }\n  | { id: number; kind: 'downloadParams'; payload: { graphId: number } }\n  | { id: number; kind: 'downloadParamGrads'; payload: { graphId: number } }\n  | { id: number; kind: 'resetOptimizer'; payload: { graphId: number } }\n  | { id: number; kind: 'destroy'; payload: { graphId: number } }\n\n/** Build the training runtime. Always graphId=0 for a fresh worker. */\nexport interface CreateRuntimePayload {\n  graphId: number\n  ir: WireIR\n  /** Initial param values per name. Transferred (zero-copy) \u2014 the main\n   *  thread loses access after postMessage. */\n  initialParams: Record<string, Float32Array>\n  /** Adam config when training; absent for forward-only compiles. */\n  adam: WireAdamConfig | null\n}\n\n/** Build a sibling forward-only graph that shares param buffers with an\n *  existing graph (typically the training graph at graphId=0). */\nexport interface CompileForwardPayload {\n  graphId: number\n  parentGraphId: number\n  ir: WireIR\n}\n\n/** One training step. Inputs are transferred; the caller's typed arrays\n *  become detached after postMessage. */\nexport interface StepPayload {\n  graphId: number\n  inputs: Record<string, Int32Array | Float32Array>\n  withCaptures: boolean\n}\n\n/** Forward-only run. Same transfer semantics as `step`. */\nexport interface RunPayload {\n  graphId: number\n  inputs: Record<string, Int32Array | Float32Array>\n  withCaptures: boolean\n}\n\nexport interface UploadParamsPayload {\n  graphId: number\n  params: Record<string, Float32Array>  // transferred\n  partial: boolean\n}\n\n// ============================================================================\n// Responses (worker \u2192 main)\n// ============================================================================\n\nexport type Res<R = unknown> =\n  | { id: number; ok: true; result: R }\n  | { id: number; ok: false; error: WireError }\n\nexport interface WireError {\n  name: string\n  message: string\n  stack: string\n}\n\n// Per-request result shapes:\n\nexport interface CreateRuntimeResult {\n  paramNames: string[]\n  outputShape: number[]\n  kernelCount: number\n  captureShapes: Record<string, number[]>\n}\n\nexport interface CompileForwardResult {\n  paramNames: string[]\n  outputShape: number[]\n  kernelCount: number\n  captureShapes: Record<string, number[]>\n}\n\n/** Step without `withCaptures` returns just `loss`. With captures, also\n *  populates `captures` (per-name Float32Array, all transferred back). */\nexport interface StepResultWire {\n  loss: number\n  captures: Record<string, Float32Array> | null\n}\n\n/** Run without `withCaptures` returns `{ output, captures: null }`.\n *  With captures, also populates `captures`. */\nexport interface RunResultWire {\n  output: Float32Array\n  captures: Record<string, Float32Array> | null\n}\n\nexport interface DownloadParamsResult {\n  params: Record<string, Float32Array>  // transferred\n}\n\n// ============================================================================\n// Transfer-list helpers\n// ============================================================================\n\n/** Collect the underlying ArrayBuffers from a Record of typed arrays so we\n *  can pass them on `postMessage`'s transfer list. The values themselves\n *  stay in the Record; only their backing buffers move. */\nexport function transferablesOfRecord(\n  rec: Record<string, Int32Array | Float32Array>,\n): ArrayBuffer[] {\n  const out: ArrayBuffer[] = []\n  for (const v of Object.values(rec)) out.push(v.buffer as ArrayBuffer)\n  return out\n}\n\n/** Serialize an Error to a wire-friendly shape, preserving stack + name so\n *  the receiving side can reconstitute an Error that an `instanceof`-aware\n *  caller (e.g., for `ShapeError`) can still pattern-match by name. */\nexport function wireError(e: unknown): WireError {\n  if (e instanceof Error) {\n    return { name: e.name, message: e.message, stack: e.stack ?? '' }\n  }\n  return { name: 'Error', message: String(e), stack: '' }\n}\n\n/** Reconstitute an Error from the wire shape on the receiving (main) side. */\nexport function reconstituteError(w: WireError): Error {\n  const err = new Error(w.message)\n  err.name = w.name\n  err.stack = w.stack\n  return err\n}\n", "// Main-thread half of the worker channel: request/response correlation,\n// promise wiring, error reconstitution. Knows nothing about Adam, captures,\n// IR, etc. \u2014 just shuttles typed messages.\n\nimport type { Req, Res, WireError } from './worker-protocol.js'\nimport { reconstituteError } from './worker-protocol.js'\n\ninterface PendingHandlers {\n  resolve: (v: unknown) => void\n  reject: (e: Error) => void\n}\n\n/** Spawn a worker from an inlined source string and provide a typed\n *  request/response channel. One WorkerProxy = one Worker = one GPUDevice\n *  on the worker side. Sibling graphs share the same WorkerProxy. */\nexport class WorkerProxy {\n  private worker: Worker\n  private nextId = 1\n  private pending = new Map<number, PendingHandlers>()\n  private terminated = false\n\n  constructor(workerSource: string) {\n    const blob = new Blob([workerSource], { type: 'application/javascript' })\n    const url = URL.createObjectURL(blob)\n    this.worker = new Worker(url, { type: 'module' })\n    // The Blob URL keeps memory alive as long as it's referenced; revoke\n    // once the worker has loaded its source. Browsers tolerate revoke\n    // immediately after construction in practice.\n    URL.revokeObjectURL(url)\n\n    this.worker.onmessage = (ev: MessageEvent<Res>) => {\n      const reply = ev.data\n      const handlers = this.pending.get(reply.id)\n      if (!handlers) return  // stale reply; ignore\n      this.pending.delete(reply.id)\n      if (reply.ok) handlers.resolve(reply.result)\n      else handlers.reject(reconstituteError(reply.error))\n    }\n\n    this.worker.onerror = (ev: ErrorEvent) => {\n      const err = new Error(`tensorgrad worker error: ${ev.message || 'unknown'}`)\n      const wire: WireError = { name: 'WorkerError', message: err.message, stack: err.stack ?? '' }\n      // Reject everything in flight; subsequent calls will fail too.\n      for (const handlers of this.pending.values()) handlers.reject(reconstituteError(wire))\n      this.pending.clear()\n    }\n  }\n\n  /** Send a request and await its matching response. `transfer` lists the\n   *  ArrayBuffers to move (zero-copy) into the worker. */\n  request<R>(req: Omit<Req, 'id'>, transfer: ArrayBuffer[] = []): Promise<R> {\n    if (this.terminated) return Promise.reject(new Error('tensorgrad: worker has been terminated'))\n    const id = this.nextId++\n    return new Promise<R>((resolve, reject) => {\n      this.pending.set(id, { resolve: resolve as (v: unknown) => void, reject })\n      this.worker.postMessage({ ...req, id } as Req, transfer)\n    })\n  }\n\n  /** Fire-and-forget variant for cases where the caller doesn't need a reply\n   *  (currently unused; keep for symmetry / future use). */\n  send(req: Omit<Req, 'id'>, transfer: ArrayBuffer[] = []): void {\n    if (this.terminated) return\n    const id = this.nextId++\n    this.worker.postMessage({ ...req, id } as Req, transfer)\n  }\n\n  terminate(): void {\n    if (this.terminated) return\n    this.terminated = true\n    this.worker.terminate()\n    const err = new Error('tensorgrad: worker terminated')\n    for (const handlers of this.pending.values()) handlers.reject(err)\n    this.pending.clear()\n  }\n}\n", "// Top-level compile(): trace \u2192 autograd \u2192 buffer plan \u2192 codegen \u2192 runtime.\n//\n// Two entry points:\n//   * `compile(traceFn)`        \u2014 low-level. User declares params via\n//                                 paramInput() inside the trace.\n//   * `compileModule(model, \u2026)` \u2014 high-level. User defines the model as a\n//                                 Module tree; the library auto-discovers\n//                                 params, traces the forward, appends grad\n//                                 and Adam, and returns a runtime.\n//\n// As of the worker-architecture refactor: compile-time work (trace, autograd,\n// buffer planning, codegen) runs on the main thread. createRuntime and all\n// dispatch/mapAsync work runs in a Web Worker spawned per top-level compile;\n// the returned `CompiledModule` is a thin proxy over the worker channel.\n// See specs/WorkerArchitecture.md.\n\nimport type { Tensor, Shape, Dtype } from './ir.js'\nimport { trace, tensorInput } from './trace.js'\nimport { appendGrad, type GradResult } from './grad.js'\nimport {\n  appendAdam, resolveLR,\n  type AdamConfig, type AdamResult, type AdamResolvedConfig,\n} from './adam.js'\nimport { planBuffers, type BufferPlan } from './buffers.js'\nimport { emitKernels, type KernelSpec } from './codegen.js'\nimport {\n  Captures, type RunResult, type StepResult, type RunOptions, type UploadParamsOptions,\n} from './runtime.js'\nimport { Module, materializeParams, type MaterializedParams } from './module.js'\nimport { WorkerProxy } from './worker-proxy.js'\nimport {\n  transferablesOfRecord,\n  type Req, type WireIR, type WireAdamConfig,\n  type CreateRuntimeResult, type CompileForwardResult,\n  type StepResultWire, type RunResultWire, type DownloadParamsResult,\n} from './worker-protocol.js'\n\n// `__WORKER_SOURCE__` is replaced at build time by scripts/build.mjs with the\n// stringified contents of the bundled src/worker.ts. Declared here so TS is\n// happy; substituted as a string literal by esbuild's `define` during\n// `npm run build:js`. See scripts/build.mjs.\ndeclare const __WORKER_SOURCE__: string\n\n// ============================================================================\n// Public types\n// ============================================================================\n\n/** Declares one input tensor of the model's forward function. The name is the\n *  key in the `inputs:` Record at compile time and the key on the `step()`/\n *  `run()` data object at runtime. */\nexport interface InputDecl {\n  shape: Shape\n  dtype?: Dtype\n}\n\n/** Inputs declaration: a Record from input name to its shape/dtype. */\nexport type InputDecls = Record<string, InputDecl>\n\n/** Maps an `InputDecls` Record to its forward-time tensor counterpart \u2014\n *  same keys, each value is a Tensor. */\nexport type InputsTensors<I extends InputDecls> = { [K in keyof I]: Tensor }\n\n/** Forward function shape. */\nexport type ForwardFn<M extends Module, I extends InputDecls = InputDecls> =\n  (m: M, inputs: InputsTensors<I>) => Tensor\n\nexport interface CompiledIR {\n  graph: GradResult['graph']\n  paramGrads: GradResult['paramGrads']\n  loss: Tensor\n  plan: BufferPlan\n  kernels: KernelSpec[]\n}\n\n/** Trace + autograd + buffer-plan + codegen, without touching WebGPU. */\nexport function compileToIR(traceFn: () => Tensor): CompiledIR {\n  const graph = trace(traceFn)\n  const { paramGrads, loss } = appendGrad(graph)\n  const plan = planBuffers(graph, paramGrads)\n  const kernels = emitKernels(graph, plan)\n  return { graph, paramGrads, loss, plan, kernels }\n}\n\n// ============================================================================\n// CompiledModule / CompiledForwardModule \u2014 main-thread proxy surface\n// ============================================================================\n\nexport interface CompileModuleOptions<I extends InputDecls = InputDecls> {\n  inputs?: I\n  adam?: AdamConfig\n}\n\nexport interface CompileForwardOptions<I extends InputDecls = InputDecls> {\n  inputs?: I\n}\n\nexport interface CompileForwardMethodOptions<I extends InputDecls = InputDecls> {\n  inputs?: I\n}\n\n/** Returned by `compileModule`. Proxies all GPU work to a worker held\n *  internally; user code awaits Promises and never sees the worker. */\nexport interface CompiledModule<M extends Module> {\n  readonly ir: CompiledIR\n  readonly kernelCount: number\n  readonly outputShape: readonly number[]\n  /** Names of the model's parameters, in materialization order. The actual\n   *  GPUBuffers live in the worker; use `downloadParams()` for values. */\n  readonly paramNames: readonly string[]\n\n  step(inputs: Record<string, Int32Array | Float32Array>): Promise<number>\n  step(inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<StepResult>\n\n  run(inputs: Record<string, Int32Array | Float32Array>): Promise<Float32Array>\n  run(inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<RunResult>\n\n  uploadParams(params: Record<string, Float32Array>, opts?: UploadParamsOptions): Promise<void>\n  downloadParams(): Promise<Record<string, Float32Array>>\n  downloadParamGrads(): Promise<Record<string, Float32Array>>\n\n  /** Re-initialize all params + zero optimizer state. */\n  reset(): Promise<void>\n  resetOptimizerState(): Promise<void>\n\n  /** Compile a sibling forward-only graph that shares this runtime's worker\n   *  (and therefore its param GPUBuffers). */\n  compileForward<I extends InputDecls>(\n    forward: ForwardFn<M, I>,\n    opts?: CompileForwardMethodOptions<I>,\n  ): Promise<CompiledForwardModule>\n\n  /** Free the runtime's GPU resources and terminate the worker. */\n  destroy(): void\n}\n\n/** Returned by `compileForward` (and by the `compileForward` method). */\nexport interface CompiledForwardModule {\n  readonly ir: CompiledIR\n  readonly kernelCount: number\n  readonly outputShape: readonly number[]\n  readonly paramNames: readonly string[]\n\n  run(inputs: Record<string, Int32Array | Float32Array>): Promise<Float32Array>\n  run(inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<RunResult>\n\n  uploadParams(params: Record<string, Float32Array>, opts?: UploadParamsOptions): Promise<void>\n  downloadParams(): Promise<Record<string, Float32Array>>\n\n  destroy(): void\n}\n\n// ============================================================================\n// compileModule / compileForward\n// ============================================================================\n\n/**\n * Compile a Module-based model. Pass a *factory* `() => new Model()`, not the\n * model instance itself: compilation mutates the tree (every `ParamSentinel`\n * field becomes a real `Tensor`), so the instance is consumed and shouldn't be\n * referenced afterwards.\n *\n * The forward function takes the materialized model and a Record of named\n * input tensors, returns the loss tensor:\n *\n *   inputs: {\n *     tokens:  { shape: [B, T], dtype: 'i32' },\n *     targets: { shape: [B, T], dtype: 'i32' },\n *   }\n *   forward: (m, { tokens, targets }) => \u2026\n *\n * Returns a `CompiledModule` proxy. All GPU work (createRuntime, step, run,\n * mapAsync) happens in an internal worker; calls return Promises that resolve\n * when the worker replies.\n */\nexport async function compileModule<M extends Module, I extends InputDecls = InputDecls>(\n  modelFactory: () => M,\n  forward: ForwardFn<M, I>,\n  opts: CompileModuleOptions<I> = {},\n): Promise<CompiledModule<M>> {\n  // ---- Compile-time work (main thread) ------------------------------------\n  const { graph, materialized } = traceModule(modelFactory, forward, opts.inputs ?? {})\n  const { paramGrads, loss } = appendGrad(graph)\n  const adamResult = opts.adam\n    ? appendAdam(graph, paramGrads, materialized.tensors, opts.adam, materialized.decayFlags)\n    : undefined\n\n  const plan = planBuffers(graph, paramGrads, adamResult?.writebacks ?? [])\n  const kernels = emitKernels(graph, plan)\n  const ir: CompiledIR = { graph, paramGrads, loss, plan, kernels }\n\n  // Initial params: resolve init shapes to Float32Arrays now (main thread).\n  // These transfer (zero-copy) to the worker as part of createRuntime.\n  const initialParams = buildInitialParams(plan, materialized.initFns)\n\n  // ---- Spawn worker, send IR + initial params -----------------------------\n  const proxy = new WorkerProxy(__WORKER_SOURCE__)\n  const wireIR: WireIR = { graph, plan, kernels }\n  const wireAdam = adamResult ? wireAdamConfig(adamResult) : null\n  const transfers = transferablesOfRecord(initialParams)\n\n  let meta: CreateRuntimeResult\n  try {\n    meta = await proxy.request<CreateRuntimeResult>(\n      { kind: 'createRuntime', payload: { graphId: 0, ir: wireIR, initialParams, adam: wireAdam } },\n      transfers,\n    )\n  } catch (e) {\n    proxy.terminate()\n    throw e\n  }\n\n  return new CompiledModuleProxy<M>(\n    proxy, /* graphId */ 0, ir, meta, modelFactory,\n    /* initFns */ materialized.initFns,\n    /* nextGraphId */ { v: 1 },\n  )\n}\n\n/**\n * Forward-only compile. Spawns its own worker. For sibling graphs that share\n * params with a training graph, prefer the `compileForward` method on the\n * CompiledModule returned by `compileModule()`.\n */\nexport async function compileForward<M extends Module, I extends InputDecls = InputDecls>(\n  modelFactory: () => M,\n  forward: ForwardFn<M, I>,\n  opts: CompileForwardOptions<I> = {},\n): Promise<CompiledForwardModule> {\n  const { graph, materialized } = traceModule(modelFactory, forward, opts.inputs ?? {})\n  const outputTensor = graph.tensors[graph.outputs[0]!]!\n  const plan = planBuffers(graph, /* paramGrads */ {})\n  const kernels = emitKernels(graph, plan)\n  const ir: CompiledIR = { graph, paramGrads: {}, loss: outputTensor, plan, kernels }\n\n  const initialParams = buildInitialParams(plan, materialized.initFns)\n  const proxy = new WorkerProxy(__WORKER_SOURCE__)\n  const wireIR: WireIR = { graph, plan, kernels }\n  const transfers = transferablesOfRecord(initialParams)\n\n  let meta: CreateRuntimeResult\n  try {\n    meta = await proxy.request<CreateRuntimeResult>(\n      { kind: 'createRuntime', payload: { graphId: 0, ir: wireIR, initialParams, adam: null } },\n      transfers,\n    )\n  } catch (e) {\n    proxy.terminate()\n    throw e\n  }\n\n  return new CompiledForwardModuleProxy(proxy, /* graphId */ 0, ir, meta, /* ownsWorker */ true)\n}\n\n// ============================================================================\n// Proxy implementations\n// ============================================================================\n\nclass CompiledModuleProxy<M extends Module> implements CompiledModule<M> {\n  constructor(\n    private readonly proxy: WorkerProxy,\n    private readonly graphId: number,\n    public readonly ir: CompiledIR,\n    private readonly meta: CreateRuntimeResult,\n    private readonly modelFactory: () => M,\n    /** Init closures captured from materializeParams at compile time. Used\n     *  by reset() to regenerate initial param values. */\n    private readonly initFns: Record<string, InitFn>,\n    private readonly nextGraphId: { v: number },\n  ) {}\n\n  get kernelCount(): number { return this.meta.kernelCount }\n  get outputShape(): readonly number[] { return this.meta.outputShape }\n  get paramNames(): readonly string[] { return this.meta.paramNames }\n\n  step(inputs: Record<string, Int32Array | Float32Array>): Promise<number>\n  step(inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<StepResult>\n  async step(\n    inputs: Record<string, Int32Array | Float32Array>,\n    opts?: { withCaptures?: boolean },\n  ): Promise<number | StepResult> {\n    // Note: inputs are copied (not transferred) into the worker. Callers\n    // commonly reuse the same TypedArray as a scratch buffer across step()\n    // calls; transferring would detach it. The copy cost is small relative\n    // to a training step's GPU work.\n    const r = await this.proxy.request<StepResultWire>(\n      { kind: 'step', payload: { graphId: this.graphId, inputs, withCaptures: opts?.withCaptures === true } },\n    )\n    if (opts?.withCaptures) {\n      return { loss: r.loss, captures: makeCaptures(r.captures, this.meta.captureShapes) }\n    }\n    return r.loss\n  }\n\n  run(inputs: Record<string, Int32Array | Float32Array>): Promise<Float32Array>\n  run(inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<RunResult>\n  async run(\n    inputs: Record<string, Int32Array | Float32Array>,\n    opts?: { withCaptures?: boolean },\n  ): Promise<Float32Array | RunResult> {\n    // Inputs copied (see note in step()).\n    const r = await this.proxy.request<RunResultWire>(\n      { kind: 'run', payload: { graphId: this.graphId, inputs, withCaptures: opts?.withCaptures === true } },\n    )\n    if (opts?.withCaptures) {\n      return { output: r.output, captures: makeCaptures(r.captures, this.meta.captureShapes) }\n    }\n    return r.output\n  }\n\n  uploadParams(params: Record<string, Float32Array>, opts?: UploadParamsOptions): Promise<void> {\n    // Params copied (see note in step()) \u2014 caller's Float32Arrays stay valid.\n    return this.proxy.request<null>(\n      { kind: 'uploadParams', payload: { graphId: this.graphId, params, partial: !!opts?.partial } },\n    ).then(() => undefined)\n  }\n\n  async downloadParams(): Promise<Record<string, Float32Array>> {\n    const r = await this.proxy.request<DownloadParamsResult>(\n      { kind: 'downloadParams', payload: { graphId: this.graphId } },\n    )\n    return r.params\n  }\n\n  async downloadParamGrads(): Promise<Record<string, Float32Array>> {\n    const r = await this.proxy.request<DownloadParamsResult>(\n      { kind: 'downloadParamGrads', payload: { graphId: this.graphId } },\n    )\n    return r.params\n  }\n\n  async reset(): Promise<void> {\n    // Re-init main-thread, upload, then reset Adam state on worker. Two\n    // round-trips but reset() is rare. The init closures were captured at\n    // compile time and stashed on the proxy.\n    const initialParams = buildInitialParams(this.ir.plan, this.initFns)\n    await this.uploadParams(initialParams)\n    await this.resetOptimizerState()\n  }\n\n  resetOptimizerState(): Promise<void> {\n    return this.proxy.request<null>(\n      { kind: 'resetOptimizer', payload: { graphId: this.graphId } },\n    ).then(() => undefined)\n  }\n\n  async compileForward<I extends InputDecls>(\n    forward: ForwardFn<M, I>,\n    opts: CompileForwardMethodOptions<I> = {},\n  ): Promise<CompiledForwardModule> {\n    const { graph, materialized: _materialized } = traceModule(this.modelFactory, forward, opts.inputs ?? {})\n    const outputTensor = graph.tensors[graph.outputs[0]!]!\n    const plan = planBuffers(graph, /* paramGrads */ {})\n    const kernels = emitKernels(graph, plan)\n    const ir: CompiledIR = { graph, paramGrads: {}, loss: outputTensor, plan, kernels }\n\n    const childGraphId = this.nextGraphId.v++\n    const wireIR: WireIR = { graph, plan, kernels }\n\n    const meta = await this.proxy.request<CompileForwardResult>(\n      { kind: 'compileForward', payload: { graphId: childGraphId, parentGraphId: this.graphId, ir: wireIR } },\n    )\n\n    return new CompiledForwardModuleProxy(this.proxy, childGraphId, ir, meta, /* ownsWorker */ false)\n  }\n\n  destroy(): void {\n    // Fire-and-forget destroy; postMessage ordering ensures the worker\n    // processes any in-flight requests before we terminate it.\n    this.proxy.send({ kind: 'destroy', payload: { graphId: this.graphId } })\n    this.proxy.terminate()\n  }\n}\n\nclass CompiledForwardModuleProxy implements CompiledForwardModule {\n  constructor(\n    private readonly proxy: WorkerProxy,\n    private readonly graphId: number,\n    public readonly ir: CompiledIR,\n    private readonly meta: CompileForwardResult | CreateRuntimeResult,\n    private readonly ownsWorker: boolean,\n  ) {}\n\n  get kernelCount(): number { return this.meta.kernelCount }\n  get outputShape(): readonly number[] { return this.meta.outputShape }\n  get paramNames(): readonly string[] { return this.meta.paramNames }\n\n  run(inputs: Record<string, Int32Array | Float32Array>): Promise<Float32Array>\n  run(inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<RunResult>\n  async run(\n    inputs: Record<string, Int32Array | Float32Array>,\n    opts?: { withCaptures?: boolean },\n  ): Promise<Float32Array | RunResult> {\n    // Inputs copied; caller's TypedArrays stay valid.\n    const r = await this.proxy.request<RunResultWire>(\n      { kind: 'run', payload: { graphId: this.graphId, inputs, withCaptures: opts?.withCaptures === true } },\n    )\n    if (opts?.withCaptures) {\n      return { output: r.output, captures: makeCaptures(r.captures, this.meta.captureShapes) }\n    }\n    return r.output\n  }\n\n  uploadParams(params: Record<string, Float32Array>, opts?: UploadParamsOptions): Promise<void> {\n    return this.proxy.request<null>(\n      { kind: 'uploadParams', payload: { graphId: this.graphId, params, partial: !!opts?.partial } },\n    ).then(() => undefined)\n  }\n\n  async downloadParams(): Promise<Record<string, Float32Array>> {\n    const r = await this.proxy.request<DownloadParamsResult>(\n      { kind: 'downloadParams', payload: { graphId: this.graphId } },\n    )\n    return r.params\n  }\n\n  destroy(): void {\n    this.proxy.send({ kind: 'destroy', payload: { graphId: this.graphId } })\n    if (this.ownsWorker) this.proxy.terminate()\n  }\n}\n\n// ============================================================================\n// Internals\n// ============================================================================\n\ntype Graph = ReturnType<typeof trace>\ntype InitFn = (size: number, shape: readonly number[]) => Float32Array\n\n/** Trace the forward function with a fresh model + tensor inputs and capture\n *  the materialized params. Shared by both compile entry points; everything\n *  past this point (grad/adam/buffer plan/runtime) diverges. */\nfunction traceModule<M extends Module, I extends InputDecls>(\n  modelFactory: () => M,\n  forward: ForwardFn<M, I>,\n  inputDecls: InputDecls,\n): { graph: Graph; materialized: MaterializedParams } {\n  const model = modelFactory()\n  let materialized: MaterializedParams = { tensors: {}, initFns: {}, decayFlags: {} }\n  const graph = trace(() => {\n    materialized = materializeParams(model)\n    const inputTensors: Record<string, Tensor> = {}\n    for (const [name, decl] of Object.entries(inputDecls)) {\n      inputTensors[name] = tensorInput(name, decl.shape, decl.dtype ?? 'f32')\n    }\n    return forward(model, inputTensors as InputsTensors<I>)\n  })\n  return { graph, materialized }\n}\n\n/** Run each param's init function against its declared shape to produce the\n *  initial Float32Arrays. Runs main-thread before transfer to the worker. */\nfunction buildInitialParams(plan: BufferPlan, initFns: Record<string, InitFn>): Record<string, Float32Array> {\n  const out: Record<string, Float32Array> = {}\n  for (const [name, bufId] of plan.paramsByName) {\n    const shape = plan.buffers[bufId]!.shape\n    const size = shape.reduce((a, b) => a * b, 1)\n    const initFn = initFns[name]\n    if (!initFn) throw new Error(`compile: no init for param '${name}'`)\n    out[name] = initFn(size, shape)\n  }\n  return out\n}\n\n/** Subset of AdamResolvedConfig that crosses the wire (drops decayFilter,\n *  which is only used at compile time). */\nfunction wireAdamConfig(r: AdamResult): WireAdamConfig {\n  const c: AdamResolvedConfig = r.config\n  return {\n    lr: c.lr,\n    b1: c.b1,\n    b2: c.b2,\n    eps: c.eps,\n    weightDecay: c.weightDecay,\n    lrIsScheduled: c.lrIsScheduled,\n    lrtInputName: r.lrtInputName,\n    decayShrinkInputName: r.decayShrinkInputName,\n  }\n}\n\n/** Wrap a worker-returned `Record<name, Float32Array>` in a Captures instance\n *  using the static capture shapes captured at compile time. */\nfunction makeCaptures(\n  captures: Record<string, Float32Array> | null,\n  captureShapes: Record<string, number[]>,\n): Captures {\n  const data = new Map<string, Float32Array>()\n  if (captures) {\n    for (const [name, arr] of Object.entries(captures)) data.set(name, arr)\n  }\n  return new Captures(captureShapes, data)\n}\n\n", "// Standard \"batteries-included\" Module subclasses for the most common layers.\n//\n// Each class declares its params and a `.fwd(x)` method that runs the forward\n// computation. Forward methods are pure tensorgrad ops \u2014 autograd traces\n// through them just like any other call.\n//\n//   import { nn } from 'tensorgrad'\n//   class Block extends Module {\n//     ln  = new nn.LayerNorm(D)\n//     ffn = new nn.Linear(D, 4 * D)\n//   }\n//   const y = p.ffn.fwd(p.ln.fwd(x))\n\nimport { Module } from './module.js'\nimport type { Tensor } from './ir.js'\nimport { add, matmul, sub, mul, div, sqrt, meanLast, sumLast, reshape, swapAxes, oneHot, logSoftmaxLast } from './ops.js'\nimport { ShapeError } from './shape.js'\nimport { captureSite } from './ir.js'\nimport type { Captures } from './runtime.js'\n\n// ----------------------------------------------------------------------------\n// Linear: y = x @ W (+ b)\n// ----------------------------------------------------------------------------\n\nexport interface LinearOptions {\n  /** Include a bias term (default true). */\n  bias?: boolean\n}\n\nexport class Linear extends Module {\n  W: Tensor\n  b: Tensor | null\n  constructor(public readonly inDim: number, public readonly outDim: number, opts: LinearOptions = {}) {\n    super()\n    this.W = this.param([inDim, outDim])                      // randn, scale 0.02\n    this.b = opts.bias === false ? null : this.param([outDim], { init: 'zeros' })\n  }\n  fwd(x: Tensor): Tensor {\n    const out = matmul(x, this.W)\n    return this.b ? add(out, this.b) : out\n  }\n}\n\n// ----------------------------------------------------------------------------\n// LayerNorm \u2014 normalizes over the last axis. eps defaults to 1e-5.\n// ----------------------------------------------------------------------------\n\nexport class LayerNorm extends Module {\n  g: Tensor\n  b: Tensor\n  constructor(public readonly d: number, public readonly eps: number = 1e-5) {\n    super()\n    this.g = this.param([d], { init: 'ones' })\n    this.b = this.param([d], { init: 'zeros' })\n  }\n  fwd(x: Tensor): Tensor {\n    const m = meanLast(x)\n    const c = sub(x, m)\n    const v = meanLast(mul(c, c))\n    const stdev = sqrt(add(v, this.eps))\n    return add(mul(div(c, stdev), this.g), this.b)\n  }\n}\n\n// ----------------------------------------------------------------------------\n// Multi-head attention shape helpers \u2014 split the last (model) axis into\n// [nHeads, headDim] and bring heads ahead of the sequence axis.\n// ----------------------------------------------------------------------------\n\n/** [..., T, D] \u2192 [..., H, T, D/H]. Folds the standard\n *  `transpose(reshape(x, [..., T, H, d]), [..., H, T, d])` pattern into one\n *  call. Last dim of `x` must divide evenly by `nHeads`. */\nexport function splitHeads(x: Tensor, nHeads: number): Tensor {\n  const site = captureSite('splitHeads')\n  const r = x.shape.length\n  if (r < 2) throw new ShapeError(`splitHeads: requires rank >= 2, got ${r}`, site)\n  const T = x.shape[r - 2]!\n  const D = x.shape[r - 1]!\n  if (D % nHeads !== 0) {\n    throw new ShapeError(`splitHeads: last dim ${D} not divisible by nHeads ${nHeads}`, site)\n  }\n  const lead = x.shape.slice(0, r - 2)\n  const reshaped = reshape(x, [...lead, T, nHeads, D / nHeads])\n  // Swap T (axis lead.length) with H (axis lead.length + 1).\n  return swapAxes(reshaped, lead.length, lead.length + 1)\n}\n\n/** Inverse of `splitHeads`: [..., H, T, d] \u2192 [..., T, H*d]. */\nexport function mergeHeads(x: Tensor): Tensor {\n  const site = captureSite('mergeHeads')\n  const r = x.shape.length\n  if (r < 3) throw new ShapeError(`mergeHeads: requires rank >= 3, got ${r}`, site)\n  const H = x.shape[r - 3]!\n  const T = x.shape[r - 2]!\n  const d = x.shape[r - 1]!\n  const lead = x.shape.slice(0, r - 3)\n  // Swap H (axis r-3) and T (axis r-2): [..., H, T, d] \u2192 [..., T, H, d]\n  const swapped = swapAxes(x, r - 3, r - 2)\n  return reshape(swapped, [...lead, T, H * d])\n}\n\n/** Slice a captured tensor named `name` into one Float32Array per head, using\n *  the static shape registered at compile time. The leading axis is treated as\n *  heads (matching `splitHeads` layout at B=1); a leading singleton batch is\n *  stripped if present so callers can pass capture names directly. Throws if\n *  the capture isn't registered or wasn't read back this call. */\nexport function unsplitHeads(captures: Captures, name: string): Float32Array[] {\n  const flat = captures.get(name)\n  const shape = captures.shapeOf(name)\n  if (shape.length < 2) {\n    throw new Error(`unsplitHeads: '${name}' shape needs >= 2 dims, got [${shape.join(', ')}]`)\n  }\n  // For inference graphs at B=1, captures have shape [1, H, ..., ...]. Strip\n  // the leading 1 if present so the next axis is heads.\n  const s = shape[0] === 1 ? shape.slice(1) : shape\n  const H = s[0]!\n  let stride = 1\n  for (let i = 1; i < s.length; i++) stride *= s[i]!\n  const expected = H * stride\n  if (flat.length !== expected) {\n    throw new Error(`unsplitHeads: '${name}' length ${flat.length} doesn't match shape product ${expected}`)\n  }\n  return Array.from({ length: H }, (_, h) => flat.slice(h * stride, (h + 1) * stride))\n}\n\n// ----------------------------------------------------------------------------\n// Loss helpers\n// ----------------------------------------------------------------------------\n\n/** Per-position cross-entropy along the last (vocab) axis: returns\n *  `-log p(target)` at each position. `logits` is `[..., V]`; `targets` is\n *  `[...]` of i32; result is `[...]` (one rank less than logits). The user\n *  applies their own masking + reduction downstream \u2014 useful when only some\n *  positions contribute (e.g. result-digit masking) or for label smoothing. */\nexport function crossEntropyLast(logits: Tensor, targets: Tensor): Tensor {\n  const site = captureSite('crossEntropyLast')\n  if (targets.dtype !== 'i32') {\n    throw new ShapeError(`crossEntropyLast: targets must be i32, got ${targets.dtype}`, site)\n  }\n  const vocab = logits.shape[logits.shape.length - 1]!\n  const lp = logSoftmaxLast(logits)                                   // [..., V]\n  const targetLp = sumLast(mul(lp, oneHot(targets, vocab, 'f32')))    // [...]\n  return mul(targetLp, -1)\n}\n"],
-  "mappings": ";;;;;;;AAiKO,SAAS,YAAmB;AACjC,SAAO,EAAE,KAAK,CAAC,GAAG,SAAS,CAAC,GAAG,SAAS,CAAC,GAAG,UAAU,oBAAI,IAAI,EAAE;AAClE;AAGO,SAAS,UAAU,GAAU,OAAc,OAAc,QAAuB,MAA+B;AACpH,QAAM,KAAK,EAAE,QAAQ;AACrB,QAAM,IAAY,EAAE,IAAI,OAAO,OAAO,QAAQ,KAAK;AACnD,IAAE,QAAQ,KAAK,CAAC;AAChB,SAAO;AACT;AAMO,SAAS,MACd,GACA,MACA,OACA,OACA,MACA,QACQ;AACR,QAAM,UAAU,EAAE,IAAI;AACtB,QAAM,MAAM,UAAU,GAAG,OAAO,OAAO,SAAS,IAAI;AACpD,QAAM,OAAO,EAAE,MAAM,KAAK,IAAI,IAAI,GAAG,OAAO;AAC5C,IAAE,IAAI,KAAK,IAAI;AACf,SAAO;AACT;AAIO,SAAS,YAAY,QAA0B;AAEpD,QAAM,QAAS,IAAI,MAAM,EAAG,SAAS;AACrC,SAAO,EAAE,QAAQ,MAAM;AACzB;AAIO,SAAS,WAAW,MAAwB;AACjD,QAAM,QAAQ,KAAK,MAAM,MAAM,IAAI;AAGnC,QAAM,aAAuB,CAAC;AAC9B,aAAW,QAAQ,MAAM,MAAM,CAAC,GAAG;AACjC,QAAI,KAAK,SAAS,kBAAkB,KAAK,KAAK,SAAS,qBAAqB,EAAG;AAC/E,eAAW,KAAK,KAAK,KAAK,CAAC;AAC3B,QAAI,WAAW,UAAU,EAAG;AAAA,EAC9B;AACA,MAAI,WAAW,WAAW,EAAG,QAAO,IAAI,KAAK,MAAM;AACnD,SAAO,IAAI,KAAK,MAAM;AAAA,IAAQ,WAAW,KAAK,MAAM,CAAC;AACvD;;;AC7LO,IAAM,aAAN,cAAyB,MAAM;AAAA,EACpC,YAAY,SAAiB,MAAuB;AAClD,UAAM,YAAY,OAAO,GAAG,OAAO;AAAA,OAAU,WAAW,IAAI,CAAC,KAAK;AAClE,UAAM,SAAS;AACf,SAAK,OAAO;AAAA,EACd;AACF;AAEA,SAAS,KAAK,SAAiB,MAA8B;AAC3D,QAAM,IAAI,WAAW,SAAS,IAAI;AACpC;AAMO,SAAS,YAAY,GAAU,GAAmB;AACvD,MAAI,EAAE,WAAW,EAAE,OAAQ,QAAO;AAClC,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,IAAK,KAAI,EAAE,CAAC,MAAM,EAAE,CAAC,EAAG,QAAO;AAC7D,SAAO;AACT;AAEO,SAAS,UAAU,OAAsB;AAC9C,MAAI,IAAI;AACR,aAAW,KAAK,MAAO,MAAK;AAC5B,SAAO;AACT;AAEO,SAAS,UAAU,OAAsB;AAC9C,SAAO,IAAI,MAAM,KAAK,IAAI,CAAC;AAC7B;AAKO,SAAS,kBAAkB,GAAU,GAAwB;AAClE,QAAM,OAAO,KAAK,IAAI,EAAE,QAAQ,EAAE,MAAM;AACxC,QAAM,MAAgB,IAAI,MAAM,IAAI;AACpC,WAAS,IAAI,GAAG,IAAI,MAAM,KAAK;AAC7B,UAAM,KAAK,KAAK,OAAO,EAAE;AACzB,UAAM,KAAK,KAAK,OAAO,EAAE;AACzB,UAAM,KAAK,KAAK,IAAI,IAAI,EAAE,EAAE;AAC5B,UAAM,KAAK,KAAK,IAAI,IAAI,EAAE,EAAE;AAC5B,QAAI,OAAO,GAAI,KAAI,CAAC,IAAI;AAAA,aACf,OAAO,EAAG,KAAI,CAAC,IAAI;AAAA,aACnB,OAAO,EAAG,KAAI,CAAC,IAAI;AAAA,QACvB,QAAO;AAAA,EACd;AACA,SAAO;AACT;AASO,SAAS,sBACd,QAAgB,QAAe,QAAe,MACvC;AACP,QAAM,SAAS,kBAAkB,QAAQ,MAAM;AAC/C,MAAI,CAAC,QAAQ;AACX;AAAA,MACE,GAAG,MAAM,yBAAyB,UAAU,MAAM,CAAC,QAAQ,UAAU,MAAM,CAAC;AAAA,MAG5E;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,WAAW,SAAiB,QAAe,OAA+B;AACxF,SAAO;AACT;AAEO,SAAS,cAAc,QAAgB,QAAe,MAA8B;AACzF,MAAI,OAAO,WAAW,EAAG,MAAK,GAAG,MAAM,gCAAgC,IAAI;AAE3E,SAAO,CAAC,GAAG,OAAO,MAAM,GAAG,EAAE,GAAG,CAAC;AACnC;AAEO,SAAS,aAAa,QAAgB,QAAe,MAA8B;AACxF,MAAI,OAAO,WAAW,EAAG,MAAK,GAAG,MAAM,gCAAgC,IAAI;AAE3E,SAAO,OAAO,MAAM,GAAG,EAAE;AAC3B;AAEO,SAAS,aAAa,QAAgB,QAAe,UAAiB,MAA8B;AAEzG,MAAI,WAAW;AACf,MAAI,YAAY;AAChB,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,UAAM,IAAI,SAAS,CAAC;AACpB,QAAI,MAAM,IAAI;AACZ,UAAI,aAAa,GAAI,MAAK,GAAG,MAAM,4CAA4C,UAAU,QAAQ,CAAC,IAAI,IAAI;AAC1G,iBAAW;AAAA,IACb,WAAW,KAAK,GAAG;AACjB,WAAK,GAAG,MAAM,iBAAiB,CAAC,gBAAgB,UAAU,QAAQ,CAAC,IAAI,IAAI;AAAA,IAC7E,OAAO;AACL,mBAAa;AAAA,IACf;AAAA,EACF;AACA,QAAM,UAAU,UAAU,MAAM;AAChC,QAAM,MAAM,CAAC,GAAG,QAAQ;AACxB,MAAI,aAAa,IAAI;AACnB,QAAI,UAAU,cAAc,GAAG;AAC7B,WAAK,GAAG,MAAM,oBAAoB,UAAU,MAAM,CAAC,UAAU,OAAO,QAAQ,UAAU,QAAQ,CAAC,kCAA6B,SAAS,IAAI,IAAI;AAAA,IAC/I;AACA,QAAI,QAAQ,IAAI,UAAU;AAAA,EAC5B,WAAW,cAAc,SAAS;AAChC,SAAK,GAAG,MAAM,gCAA2B,UAAU,MAAM,CAAC,QAAQ,OAAO,0BAA0B,UAAU,QAAQ,CAAC,QAAQ,SAAS,IAAI,IAAI;AAAA,EACjJ;AACA,SAAO;AACT;AAEO,SAAS,eAAe,QAAgB,QAAe,MAAyB,MAA8B;AACnH,MAAI,KAAK,WAAW,OAAO,QAAQ;AACjC,SAAK,GAAG,MAAM,iBAAiB,KAAK,MAAM,0BAA0B,OAAO,MAAM,IAAI,IAAI;AAAA,EAC3F;AACA,QAAM,OAAO,oBAAI,IAAY;AAC7B,aAAW,KAAK,MAAM;AACpB,QAAI,IAAI,KAAK,KAAK,OAAO,OAAQ,MAAK,GAAG,MAAM,gBAAgB,CAAC,0BAA0B,OAAO,MAAM,IAAI,IAAI;AAC/G,QAAI,KAAK,IAAI,CAAC,EAAG,MAAK,GAAG,MAAM,8BAA8B,CAAC,IAAI,IAAI;AACtE,SAAK,IAAI,CAAC;AAAA,EACZ;AACA,SAAO,KAAK,IAAI,OAAK,OAAO,CAAC,CAAE;AACjC;AAGO,SAAS,YAAY,QAAgB,QAAe,QAAe,MAA8B;AACtG,MAAI,OAAO,SAAS,EAAG,MAAK,GAAG,MAAM,kCAAkC,UAAU,MAAM,CAAC,IAAI,IAAI;AAChG,MAAI,OAAO,WAAW,EAAG,MAAK,GAAG,MAAM,+BAA+B,UAAU,MAAM,CAAC,6CAAwC,IAAI;AACnI,QAAM,IAAI,OAAO,OAAO,SAAS,CAAC;AAClC,QAAM,KAAK,OAAO,OAAO,SAAS,CAAC;AACnC,QAAM,KAAK,OAAO,CAAC;AACnB,QAAM,IAAI,OAAO,CAAC;AAClB,MAAI,OAAO,GAAI,MAAK,GAAG,MAAM,mCAA8B,UAAU,MAAM,CAAC,SAAM,UAAU,MAAM,CAAC,wBAAwB,EAAE,yBAAyB,EAAE,KAAK,IAAI;AACjK,SAAO,CAAC,GAAG,OAAO,MAAM,GAAG,EAAE,GAAG,GAAG,CAAC;AACtC;AAGO,SAAS,mBAAmB,QAAgB,QAAe,QAAe,MAA8B;AAC7G,MAAI,OAAO,SAAS,KAAK,OAAO,SAAS,GAAG;AAC1C,SAAK,GAAG,MAAM,0CAA0C,UAAU,MAAM,CAAC,QAAQ,UAAU,MAAM,CAAC,IAAI,IAAI;AAAA,EAC5G;AACA,MAAI,OAAO,WAAW,OAAO,QAAQ;AACnC,SAAK,GAAG,MAAM,2BAA2B,OAAO,MAAM,OAAO,OAAO,MAAM,gDAAgD,IAAI;AAAA,EAChI;AACA,QAAM,SAAS,OAAO,MAAM,GAAG,EAAE;AACjC,QAAM,SAAS,OAAO,MAAM,GAAG,EAAE;AACjC,WAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,QAAI,OAAO,CAAC,MAAM,OAAO,CAAC,GAAG;AAC3B,WAAK,GAAG,MAAM,kCAA6B,UAAU,MAAM,CAAC,OAAO,UAAU,MAAM,CAAC,IAAI,IAAI;AAAA,IAC9F;AAAA,EACF;AACA,QAAM,IAAI,OAAO,OAAO,SAAS,CAAC;AAClC,QAAM,KAAK,OAAO,OAAO,SAAS,CAAC;AACnC,QAAM,KAAK,OAAO,OAAO,SAAS,CAAC;AACnC,QAAM,IAAI,OAAO,OAAO,SAAS,CAAC;AAClC,MAAI,OAAO,GAAI,MAAK,GAAG,MAAM,sDAAiD,EAAE,6BAA6B,EAAE,IAAI,IAAI;AACvH,SAAO,CAAC,GAAG,QAAQ,GAAG,CAAC;AACzB;AAEO,SAAS,YAAY,QAAgB,cAAqB,OAAe,MAA8B;AAC5G,MAAI,SAAS,EAAG,MAAK,GAAG,MAAM,iCAAiC,KAAK,IAAI,IAAI;AAC5E,SAAO,CAAC,GAAG,cAAc,KAAK;AAChC;AAGO,SAAS,iBAAiB,QAAgB,QAAe,MAA8B;AAC5F,MAAI,OAAO,SAAS,EAAG,MAAK,GAAG,MAAM,6BAA6B,UAAU,MAAM,CAAC,IAAI,IAAI;AAC3F,QAAM,IAAI,OAAO,OAAO,SAAS,CAAC;AAClC,QAAM,IAAI,OAAO,OAAO,SAAS,CAAC;AAClC,MAAI,MAAM,EAAG,MAAK,GAAG,MAAM,oDAAoD,UAAU,MAAM,CAAC,IAAI,IAAI;AACxG,SAAO;AACT;AAEO,SAAS,oBAAoB,QAAgB,QAAe,OAAe,KAAa,MAA8B;AAC3H,MAAI,OAAO,WAAW,EAAG,MAAK,GAAG,MAAM,6BAA6B,IAAI;AACxE,QAAM,OAAO,OAAO,OAAO,SAAS,CAAC;AACrC,MAAI,QAAQ,KAAK,MAAM,QAAQ,SAAS,KAAK;AAC3C,SAAK,GAAG,MAAM,oBAAoB,KAAK,KAAK,GAAG,2BAA2B,IAAI,IAAI,IAAI;AAAA,EACxF;AACA,SAAO,CAAC,GAAG,OAAO,MAAM,GAAG,EAAE,GAAG,MAAM,KAAK;AAC7C;AAIO,SAAS,iBAAiB,QAAgB,QAAe,aAAoB,MAA8B;AAChH,MAAI,OAAO,SAAS,YAAY,QAAQ;AACtC,SAAK,GAAG,MAAM,iBAAiB,OAAO,MAAM,kBAAkB,YAAY,MAAM,IAAI,IAAI;AAAA,EAC1F;AACA,QAAM,SAAS,YAAY,SAAS,OAAO;AAC3C,WAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,UAAM,KAAK,OAAO,CAAC;AACnB,UAAM,KAAK,YAAY,SAAS,CAAC;AACjC,QAAI,OAAO,MAAM,OAAO,GAAG;AACzB,WAAK,GAAG,MAAM,sBAAsB,UAAU,MAAM,CAAC,OAAO,UAAU,WAAW,CAAC,gBAAW,CAAC,UAAU,EAAE,+BAA+B,SAAS,CAAC,UAAU,EAAE,iBAAiB,IAAI;AAAA,IACtL;AAAA,EACF;AACA,SAAO;AACT;AAIO,SAAS,gBAAgB,QAAgB,QAAe,aAAoB,MAA8B;AAC/G,MAAI,YAAY,SAAS,OAAO,QAAQ;AACtC,SAAK,GAAG,MAAM,iBAAiB,YAAY,MAAM,kBAAkB,OAAO,MAAM,IAAI,IAAI;AAAA,EAC1F;AACA,QAAM,SAAS,OAAO,SAAS,YAAY;AAC3C,WAAS,IAAI,GAAG,IAAI,YAAY,QAAQ,KAAK;AAC3C,UAAM,KAAK,OAAO,SAAS,CAAC;AAC5B,UAAM,KAAK,YAAY,CAAC;AACxB,QAAI,OAAO,MAAM,OAAO,GAAG;AACzB,WAAK,GAAG,MAAM,uBAAuB,UAAU,MAAM,CAAC,OAAO,UAAU,WAAW,CAAC,uBAAkB,CAAC,UAAU,EAAE,+BAA+B,IAAI;AAAA,IACvJ;AAAA,EACF;AACA,SAAO;AACT;AAIO,SAAS,WAAW,QAAgB,WAAkB,QAAe,QAAe,MAA8B;AACvH,QAAM,KAAK,kBAAkB,QAAQ,MAAM;AAC3C,MAAI,CAAC,GAAI,MAAK,GAAG,MAAM,uBAAuB,UAAU,MAAM,CAAC,OAAO,UAAU,MAAM,CAAC,IAAI,IAAI;AAC/F,QAAM,SAAS,kBAAkB,WAAW,EAAE;AAC9C,MAAI,CAAC,OAAQ,MAAK,GAAG,MAAM,UAAU,UAAU,SAAS,CAAC,sCAAsC,UAAU,EAAE,CAAC,IAAI,IAAI;AACpH,SAAO;AACT;AAEO,SAAS,cAAc,QAAgB,QAAe,SAAgB,MAA8B;AACzG,MAAI,CAAC,YAAY,QAAQ,OAAO,GAAG;AACjC,SAAK,GAAG,MAAM,6CAA6C,UAAU,MAAM,CAAC,QAAQ,UAAU,OAAO,CAAC,IAAI,IAAI;AAAA,EAChH;AACA,SAAO;AACT;;;AClPA,IAAI,WAAyB;AAI7B,IAAI,kBAAkB;AAEf,SAAS,eAAsB;AACpC,MAAI,CAAC,UAAU;AACb,UAAM,IAAI;AAAA,MACR;AAAA,IAEF;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,mBAA4B;AAC1C,SAAO;AACT;AAIO,SAAS,MAAM,IAAoC;AACxD,MAAI,UAAU;AACZ,UAAM,IAAI,MAAM,6CAA6C;AAAA,EAC/D;AACA,QAAM,IAAI,UAAU;AACpB,aAAW;AACX,oBAAkB;AAClB,MAAI;AACF,UAAM,SAAS,GAAG;AAClB,UAAM,UAAU,MAAM,QAAQ,MAAM,IAAI,SAAS,CAAC,MAAM;AACxD,eAAW,KAAK,SAAS;AACvB;AAAC,MAAC,EAAE,QAAqB,KAAK,EAAE,EAAE;AAAA,IACpC;AAAA,EACF,UAAE;AACA,eAAW;AACX,sBAAkB;AAAA,EACpB;AACA,SAAO;AACT;AAQO,SAAS,UAAa,GAAU,IAAgB;AACrD,MAAI,UAAU;AACZ,UAAM,IAAI,MAAM,8DAA8D;AAAA,EAChF;AACA,aAAW;AAEX,MAAI;AACF,WAAO,GAAG;AAAA,EACZ,UAAE;AACA,eAAW;AAAA,EACb;AACF;AAUA,SAAS,iBAAiB,GAAU,MAAc,OAAyB,OAAqB;AAC9F,MAAI,EAAE,IAAI,KAAK,QAAM,MAAM,SAAS,GAAG,IAAsB,KAAM,GAAyB,SAAS,IAAI,GAAG;AAC1G,UAAM,IAAI,MAAM,eAAe,KAAK,UAAU,IAAI,8BAA8B;AAAA,EAClF;AACF;AAEO,SAAS,WAAW,MAAc,OAAc,QAAe,OAAe;AACnF,QAAM,IAAI,aAAa;AACvB,mBAAiB,GAAG,MAAM,CAAC,eAAe,cAAc,GAAG,OAAO;AAClE,QAAM,OAAO,YAAY,YAAY;AACrC,SAAO,MAAM,GAAG,eAAe,OAAO,OAAO,MAAM,EAAE,KAAK,CAAQ;AACpE;AAEO,SAAS,YAAY,MAAc,OAAc,QAAe,OAAe;AACpF,QAAM,IAAI,aAAa;AACvB,mBAAiB,GAAG,MAAM,CAAC,eAAe,cAAc,GAAG,OAAO;AAClE,QAAM,OAAO,YAAY,aAAa;AACtC,SAAO,MAAM,GAAG,gBAAgB,OAAO,OAAO,MAAM,EAAE,KAAK,CAAQ;AACrE;AAIO,SAAS,WAAW,MAAc,OAAc,QAAe,OAAO,YAAY,GAAW;AAClG,QAAM,IAAI,aAAa;AACvB,mBAAiB,GAAG,MAAM,CAAC,aAAa,GAAG,OAAO;AAClD,QAAM,OAAO,YAAY,YAAY;AACrC,SAAO,MAAM,GAAG,eAAe,OAAO,OAAO,MAAM,EAAE,MAAM,UAAU,CAAQ;AAC/E;;;AC5FO,SAAS,QAA0B,MAAc,GAAS;AAC/D,MAAI,CAAC,iBAAiB,EAAG,QAAO;AAChC,QAAM,IAAI,aAAa;AACvB,MAAI,EAAE,SAAS,IAAI,IAAI,GAAG;AACxB,UAAM,IAAI;AAAA,MACR,kBAAkB,IAAI;AAAA,IAExB;AAAA,EACF;AACA,IAAE,SAAS,IAAI,MAAM,EAAE,EAAE;AACzB,SAAO;AACT;;;ACJA,SAAS,QACP,MACA,MACA,GAAW,GACX,WAAkB,EAAE,OACZ;AACR,QAAM,OAAO,YAAY,IAAI;AAC7B,MAAI,EAAE,UAAU,EAAE,MAAO,OAAM,IAAI,WAAW,GAAG,IAAI,qBAAqB,EAAE,KAAK,OAAO,EAAE,KAAK,KAAK,IAAI;AACxG,QAAM,WAAW,sBAAsB,MAAM,EAAE,OAAO,EAAE,OAAO,IAAI;AACnE,SAAO,MAAM,aAAa,GAAG,MAAM,UAAU,UAAU,MAAM,EAAE,GAAG,EAAE,IAAI,GAAG,EAAE,GAAG,CAAC;AACnF;AAKO,SAAS,IAAI,GAAW,GAA4B;AACzD,SAAO,OAAO,MAAM,WAAW,UAAU,GAAG,CAAC,IAAI,QAAQ,OAAO,OAAO,GAAG,CAAC;AAC7E;AACO,SAAS,IAAI,GAAW,GAA4B;AACzD,SAAO,OAAO,MAAM,WAAW,UAAU,GAAG,CAAC,CAAC,IAAI,QAAQ,OAAO,OAAO,GAAG,CAAC;AAC9E;AACO,SAAS,IAAI,GAAW,GAA4B;AACzD,SAAO,OAAO,MAAM,WAAW,UAAU,GAAG,CAAC,IAAI,QAAQ,OAAO,OAAO,GAAG,CAAC;AAC7E;AACO,SAAS,IAAI,GAAW,GAA4B;AACzD,MAAI,OAAO,MAAM,UAAU;AACzB,QAAI,MAAM,EAAG,OAAM,IAAI,WAAW,sCAAsC,YAAY,KAAK,CAAC;AAC1F,WAAO,UAAU,GAAG,IAAI,CAAC;AAAA,EAC3B;AACA,SAAO,QAAQ,OAAO,OAAO,GAAG,CAAC;AACnC;AAQO,SAAS,UAAU,GAAW,QAAwB;AAC3D,QAAM,OAAO,YAAY,WAAW;AACpC,SAAO,MAAM,aAAa,GAAG,cAAc,EAAE,OAAO,EAAE,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,OAAO,CAAC;AACxF;AAEO,SAAS,UAAU,GAAW,QAAwB;AAC3D,QAAM,OAAO,YAAY,WAAW;AACpC,SAAO,MAAM,aAAa,GAAG,cAAc,EAAE,OAAO,EAAE,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,OAAO,CAAC;AACxF;AAMA,SAAS,MAAM,MAAiD,GAAmB;AACjF,QAAM,OAAO,YAAY,IAAI;AAC7B,MAAI,EAAE,UAAU,MAAO,OAAM,IAAI,WAAW,GAAG,IAAI,uBAAuB,EAAE,KAAK,IAAI,IAAI;AACzF,SAAO,MAAM,aAAa,GAAG,MAAM,WAAW,MAAM,EAAE,OAAO,IAAI,GAAG,OAAO,MAAM,EAAE,GAAG,EAAE,GAAG,CAAC;AAC9F;AAEO,IAAM,OAAQ,CAAC,MAAsB,MAAM,QAAS,CAAC;AACrD,IAAM,QAAQ,CAAC,MAAsB,MAAM,SAAS,CAAC;AACrD,IAAM,MAAQ,CAAC,MAAsB,MAAM,OAAS,CAAC;AACrD,IAAM,MAAQ,CAAC,MAAsB,MAAM,OAAS,CAAC;AACrD,IAAM,OAAQ,CAAC,MAAsB,MAAM,QAAS,CAAC;AAOrD,SAAS,SAAS,GAAmB;AAC1C,QAAM,OAAO,YAAY,UAAU;AACnC,MAAI,EAAE,UAAU,MAAO,OAAM,IAAI,WAAW,+BAA+B,EAAE,KAAK,IAAI,IAAI;AAC1F,QAAM,WAAW,cAAc,YAAY,EAAE,OAAO,IAAI;AACxD,SAAO,MAAM,aAAa,GAAG,aAAa,UAAU,EAAE,OAAO,MAAM,EAAE,GAAG,EAAE,GAAG,CAAC;AAChF;AAEO,SAAS,QAAQ,GAAmB;AACzC,QAAM,OAAO,YAAY,SAAS;AAClC,MAAI,EAAE,UAAU,MAAO,OAAM,IAAI,WAAW,8BAA8B,EAAE,KAAK,IAAI,IAAI;AACzF,QAAM,WAAW,aAAa,WAAW,EAAE,OAAO,IAAI;AACtD,SAAO,MAAM,aAAa,GAAG,YAAY,UAAU,EAAE,OAAO,MAAM,EAAE,GAAG,EAAE,GAAG,CAAC;AAC/E;AAGO,SAAS,OAAO,GAAmB;AACxC,SAAO,QAAQ,QAAQ,GAAG,CAAC,EAAE,CAAC,CAAC;AACjC;AAMO,SAAS,QAAQ,GAAW,UAAyB;AAC1D,QAAM,OAAO,YAAY,SAAS;AAClC,QAAM,WAAW,aAAa,WAAW,EAAE,OAAO,UAAU,IAAI;AAChE,SAAO,MAAM,aAAa,GAAG,WAAW,UAAU,EAAE,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,UAAU,SAAS,CAAC;AAClG;AAEO,SAAS,UAAU,GAAW,MAAiC;AACpE,QAAM,OAAO,YAAY,WAAW;AACpC,QAAM,WAAW,eAAe,aAAa,EAAE,OAAO,MAAM,IAAI;AAChE,SAAO,MAAM,aAAa,GAAG,aAAa,UAAU,EAAE,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,KAAK,CAAC;AACtF;AAMO,SAAS,SAAS,GAAW,OAAe,OAAuB;AACxE,QAAM,IAAI,EAAE,MAAM;AAClB,QAAM,OAAO,CAAC,SAAyB,OAAO,IAAI,IAAI,OAAO;AAC7D,QAAM,KAAK,KAAK,KAAK;AACrB,QAAM,KAAK,KAAK,KAAK;AACrB,QAAM,OAAO,YAAY,UAAU;AACnC,MAAI,KAAK,KAAK,MAAM,KAAK,KAAK,KAAK,MAAM,GAAG;AAC1C,UAAM,IAAI,WAAW,2CAAsC,KAAK,KAAK,KAAK,cAAc,CAAC,WAAW,IAAI;AAAA,EAC1G;AACA,MAAI,OAAO,GAAI,QAAO;AACtB,QAAM,OAAO,MAAM,KAAK,EAAE,QAAQ,EAAE,GAAG,CAAC,GAAG,MAAM,CAAC;AAClD,OAAK,EAAE,IAAI;AACX,OAAK,EAAE,IAAI;AACX,SAAO,UAAU,GAAG,IAAI;AAC1B;AAMO,SAAS,OAAO,GAAW,GAAmB;AACnD,QAAM,OAAO,YAAY,QAAQ;AACjC,MAAI,EAAE,UAAU,SAAS,EAAE,UAAU,OAAO;AAC1C,UAAM,IAAI,WAAW,6BAA6B,EAAE,KAAK,QAAQ,EAAE,KAAK,IAAI,IAAI;AAAA,EAClF;AACA,QAAM,WAAW,YAAY,UAAU,EAAE,OAAO,EAAE,OAAO,IAAI;AAC7D,SAAO,MAAM,aAAa,GAAG,UAAU,UAAU,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,GAAG,EAAE,GAAG,CAAC;AACpF;AAEO,SAAS,cAAc,GAAW,GAAmB;AAC1D,QAAM,OAAO,YAAY,eAAe;AACxC,MAAI,EAAE,UAAU,SAAS,EAAE,UAAU,OAAO;AAC1C,UAAM,IAAI,WAAW,oCAAoC,EAAE,KAAK,QAAQ,EAAE,KAAK,IAAI,IAAI;AAAA,EACzF;AACA,QAAM,WAAW,mBAAmB,iBAAiB,EAAE,OAAO,EAAE,OAAO,IAAI;AAC3E,SAAO,MAAM,aAAa,GAAG,kBAAkB,UAAU,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,GAAG,EAAE,GAAG,CAAC;AAC5F;AAMO,SAAS,OAAO,SAAiB,OAAe,QAAe,OAAe;AACnF,QAAM,OAAO,YAAY,QAAQ;AACjC,MAAI,QAAQ,UAAU,OAAO;AAC3B,UAAM,IAAI,WAAW,oCAAoC,QAAQ,KAAK,IAAI,IAAI;AAAA,EAChF;AACA,QAAM,WAAW,YAAY,UAAU,QAAQ,OAAO,OAAO,IAAI;AACjE,SAAO,MAAM,aAAa,GAAG,WAAW,UAAU,OAAO,MAAM,EAAE,SAAS,QAAQ,IAAI,OAAO,MAAM,CAAC;AACtG;AAOO,SAAS,UAAU,OAAe,SAAyB;AAChE,QAAM,OAAO,YAAY,WAAW;AACpC,MAAI,MAAM,MAAM,WAAW,GAAG;AAC5B,UAAM,IAAI,WAAW,kDAAkD,UAAU,MAAM,KAAK,CAAC,IAAI,IAAI;AAAA,EACvG;AACA,MAAI,QAAQ,UAAU,OAAO;AAC3B,UAAM,IAAI,WAAW,uCAAuC,QAAQ,KAAK,IAAI,IAAI;AAAA,EACnF;AACA,SAAO,OAAO,OAAO,SAAS,MAAM,MAAM,CAAC,GAAI,KAAK,GAAG,KAAK;AAC9D;AAGO,SAAS,OAAO,GAAW,QAAe,OAAe;AAC9D,QAAM,OAAO,YAAY,QAAQ;AACjC,MAAI,KAAK,KAAK,CAAC,OAAO,UAAU,CAAC,GAAG;AAClC,UAAM,IAAI,WAAW,6CAA6C,CAAC,IAAI,IAAI;AAAA,EAC7E;AACA,SAAO,MAAM,aAAa,GAAG,UAAU,CAAC,CAAC,GAAG,OAAO,MAAM,EAAE,GAAG,MAAM,CAAC;AACvE;AASO,SAAS,kBAAkB,GAAmB;AACnD,QAAM,OAAO,YAAY,mBAAmB;AAC5C,MAAI,EAAE,UAAU,MAAO,OAAM,IAAI,WAAW,wCAAwC,EAAE,KAAK,IAAI,IAAI;AACnG,mBAAiB,qBAAqB,EAAE,OAAO,IAAI;AACnD,SAAO,MAAM,aAAa,GAAG,uBAAuB,EAAE,OAAO,OAAO,MAAM,EAAE,GAAG,EAAE,GAAG,CAAC;AACvF;AAGO,SAAS,eAAe,GAAmB;AAChD,QAAM,OAAO,YAAY,gBAAgB;AACzC,MAAI,EAAE,UAAU,MAAO,OAAM,IAAI,WAAW,qCAAqC,EAAE,KAAK,IAAI,IAAI;AAChG,SAAO,MAAM,aAAa,GAAG,oBAAoB,EAAE,OAAO,OAAO,MAAM,EAAE,GAAG,EAAE,GAAG,CAAC;AACpF;AAMO,SAAS,YAAY,GAAW,WAA2B;AAChE,QAAM,OAAO,YAAY,aAAa;AACtC,MAAI,EAAE,UAAU,MAAO,OAAM,IAAI,WAAW,kCAAkC,EAAE,KAAK,IAAI,IAAI;AAC7F,mBAAiB,eAAe,EAAE,OAAO,IAAI;AAC7C,SAAO,MAAM,aAAa,GAAG,gBAAgB,EAAE,OAAO,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,UAAU,CAAC;AAC3F;AAQO,SAAS,eAAe,GAAW,OAAe,KAAqB;AAC5E,QAAM,OAAO,YAAY,gBAAgB;AACzC,QAAM,WAAW,oBAAoB,kBAAkB,EAAE,OAAO,OAAO,KAAK,IAAI;AAChF,SAAO,MAAM,aAAa,GAAG,oBAAoB,UAAU,EAAE,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,OAAO,IAAI,CAAC;AACnG;AAOO,SAAS,YAAY,GAAW,aAA4B;AACjE,QAAM,OAAO,YAAY,aAAa;AACtC,mBAAiB,eAAe,EAAE,OAAO,aAAa,IAAI;AAC1D,SAAO,MAAM,aAAa,GAAG,gBAAgB,aAAa,EAAE,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,YAAY,CAAC;AACnG;AAEO,SAAS,WAAW,GAAW,aAA4B;AAChE,QAAM,OAAO,YAAY,YAAY;AACrC,kBAAgB,cAAc,EAAE,OAAO,aAAa,IAAI;AACxD,SAAO,MAAM,aAAa,GAAG,gBAAgB,aAAa,EAAE,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,YAAY,CAAC;AACnG;AAOO,SAAS,YAAY,OAAe,QAAe,OAAe;AACvE,QAAM,OAAO,YAAY,aAAa;AACtC,SAAO,MAAM,aAAa,GAAG,gBAAgB,CAAC,GAAG,OAAO,MAAM,EAAE,OAAO,MAAM,CAAC;AAChF;AAWO,IAAM,OAAU,CAAC,GAAW,MAAsB,QAAQ,QAAW,QAAW,GAAG,GAAG,MAAM;AAC5F,IAAM,UAAU,CAAC,GAAW,MAAsB,QAAQ,WAAW,WAAW,GAAG,GAAG,MAAM;AAG5F,SAAS,MAAM,MAAc,GAAW,GAAmB;AAChE,QAAM,OAAO,YAAY,OAAO;AAChC,MAAI,KAAK,UAAU,OAAQ,OAAM,IAAI,WAAW,iCAAiC,KAAK,KAAK,IAAI,IAAI;AACnG,MAAI,EAAE,UAAU,EAAE,MAAO,OAAM,IAAI,WAAW,8BAA8B,EAAE,KAAK,OAAO,EAAE,KAAK,KAAK,IAAI;AAC1G,QAAM,WAAW,WAAW,SAAS,KAAK,OAAO,EAAE,OAAO,EAAE,OAAO,IAAI;AACvE,SAAO,MAAM,aAAa,GAAG,SAAS,UAAU,EAAE,OAAO,MAAM,EAAE,MAAM,KAAK,IAAI,GAAG,EAAE,IAAI,GAAG,EAAE,GAAG,CAAC;AACpG;AAIO,SAAS,SAAS,GAAW,IAAoB;AACtD,QAAM,OAAO,YAAY,UAAU;AACnC,MAAI,EAAE,UAAU,SAAS,GAAG,UAAU,OAAO;AAC3C,UAAM,IAAI,WAAW,+BAA+B,EAAE,KAAK,QAAQ,GAAG,KAAK,IAAI,IAAI;AAAA,EACrF;AACA,QAAM,WAAW,cAAc,YAAY,EAAE,OAAO,GAAG,OAAO,IAAI;AAClE,SAAO,MAAM,aAAa,GAAG,aAAa,UAAU,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,IAAI,GAAG,GAAG,CAAC;AACzF;AAMO,SAAS,YAAY,GAAW,GAAW,IAAoB;AACpE,QAAM,OAAO,YAAY,aAAa;AACtC,MAAI,EAAE,UAAU,SAAS,EAAE,UAAU,MAAO,OAAM,IAAI,WAAW,6BAA6B,IAAI;AAClG,MAAI,EAAE,MAAM,WAAW,EAAE,MAAM,UAAU,EAAE,MAAM,KAAK,CAAC,GAAG,MAAM,MAAM,EAAE,MAAM,CAAC,CAAC,GAAG;AACjF,UAAM,IAAI,WAAW,+BAA+B,IAAI;AAAA,EAC1D;AACA,SAAO,MAAM,aAAa,GAAG,iBAAiB,EAAE,OAAO,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,GAAG,EAAE,IAAI,GAAG,CAAC;AAC9F;AAEO,SAAS,YAAY,GAAW,GAAW,IAAoB;AACpE,QAAM,OAAO,YAAY,aAAa;AACtC,MAAI,EAAE,UAAU,SAAS,EAAE,UAAU,MAAO,OAAM,IAAI,WAAW,6BAA6B,IAAI;AAClG,MAAI,EAAE,MAAM,WAAW,EAAE,MAAM,UAAU,EAAE,MAAM,KAAK,CAAC,GAAG,MAAM,MAAM,EAAE,MAAM,CAAC,CAAC,GAAG;AACjF,UAAM,IAAI,WAAW,+BAA+B,IAAI;AAAA,EAC1D;AACA,SAAO,MAAM,aAAa,GAAG,iBAAiB,EAAE,OAAO,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,GAAG,EAAE,IAAI,GAAG,CAAC;AAC9F;AAEO,SAAS,YACd,GACA,MACA,MACA,KACA,KACA,cAA+B,GACvB;AACR,QAAM,OAAO,YAAY,aAAa;AACtC,MAAI,EAAE,UAAU,MAAO,OAAM,IAAI,WAAW,6BAA6B,IAAI;AAC7E,MAAI,IAAI,UAAU,SAAS,IAAI,MAAM,WAAW,GAAG;AACjD,UAAM,IAAI,WAAW,6CAA6C,IAAI;AAAA,EACxE;AACA,MAAI,EAAE,MAAM,WAAW,KAAK,MAAM,UAAU,EAAE,MAAM,KAAK,CAAC,GAAG,MAAM,MAAM,KAAK,MAAM,CAAC,CAAC,GAAG;AACvF,UAAM,IAAI,WAAW,sCAAsC,IAAI;AAAA,EACjE;AAIA,QAAM,WAAW,OAAO,gBAAgB;AACxC,MAAI,UAAU;AACZ,QAAI,YAAY,UAAU,SAAS,YAAY,MAAM,WAAW,GAAG;AACjE,YAAM,IAAI,WAAW,4DAA4D,IAAI;AAAA,IACvF;AAAA,EACF;AACA,SAAO,MAAM,aAAa,GAAG,iBAAiB,EAAE,OAAO,OAAO,MAAM;AAAA,IAClE,GAAG,EAAE;AAAA,IACL,MAAM,KAAK;AAAA,IACX,MAAM,KAAK;AAAA,IACX,KAAK,IAAI;AAAA,IACT;AAAA,IACA,aAAa,WAAW,IAAI;AAAA,IAC5B,mBAAmB,WAAW,YAAY,KAAK;AAAA,EACjD,CAAC;AACH;;;ACrUO,SAAS,WAAW,OAA0B;AACnD,MAAI,MAAM,QAAQ,WAAW,GAAG;AAC9B,UAAM,IAAI,MAAM,kEAAkE,MAAM,QAAQ,MAAM,EAAE;AAAA,EAC1G;AACA,QAAM,SAAS,MAAM,QAAQ,CAAC;AAC9B,QAAM,aAAa,MAAM,QAAQ,MAAM;AACvC,MAAI,WAAW,MAAM,WAAW,GAAG;AACjC,UAAM,IAAI;AAAA,MACR,sDAAsD,WAAW,MAAM,KAAK,IAAI,CAAC;AAAA,IAEnF;AAAA,EACF;AAIA,QAAM,iBAAiB,MAAM,IAAI;AACjC,QAAM,aAAa,MAAM,IAAI,MAAM,GAAG,cAAc;AAGpD,QAAM,aAAa,oBAAI,IAAoB;AAE3C,SAAO,UAAU,OAAO,MAAM;AAE5B,eAAW,IAAI,QAAQ,YAAY,GAAK,KAAK,CAAC;AAG9C,aAAS,IAAI,iBAAiB,GAAG,KAAK,GAAG,KAAK;AAC5C,YAAM,KAAK,WAAW,CAAC;AACvB,YAAM,WAAW,WAAW,IAAI,GAAG,GAAG;AACtC,UAAI,CAAC,SAAU;AACf,uBAAiB,IAAI,UAAU,OAAO,UAAU;AAAA,IAClD;AAGA,UAAM,aAAqC,CAAC;AAC5C,eAAW,MAAM,YAAY;AAC3B,UAAI,GAAG,SAAS,cAAe;AAE/B,YAAM,QAAQ,WAAW,IAAI,GAAG,GAAG;AACnC,UAAI,CAAC,OAAO;AAGV,cAAM,IAAI,MAAM,QAAQ,GAAG,GAAG;AAC9B,mBAAW,GAAG,IAAI,IAAI,YAAY,YAAY,GAAK,EAAE,KAAK,GAAG,EAAE,KAAK;AAAA,MACtE,OAAO;AACL,mBAAW,GAAG,IAAI,IAAI;AAAA,MACxB;AAAA,IACF;AAEA,WAAO,EAAE,OAAO,YAAY,MAAM,WAAW;AAAA,EAC/C,CAAC;AACH;AAQA,SAAS,WAAW,YAAiC,SAAiB,cAA4B;AAChG,QAAM,WAAW,WAAW,IAAI,OAAO;AACvC,MAAI,UAAU;AACZ,eAAW,IAAI,SAAS,IAAI,UAAU,YAAY,CAAC;AAAA,EACrD,OAAO;AACL,eAAW,IAAI,SAAS,YAAY;AAAA,EACtC;AACF;AAIA,SAAS,YAAY,OAAe,SAAwB;AAC1D,MAAI,YAAY,MAAM,OAAO,OAAO,EAAG,QAAO;AAC9C,SAAO,WAAW,OAAO,OAAO;AAClC;AAYA,SAAS,iBACP,IACA,UACA,OACA,YACM;AACN,QAAM,WAAW,CAAC,OAAe,MAAM,QAAQ,EAAE;AAEjD,UAAQ,GAAG,MAAM;AAAA;AAAA,IAEf,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AACH;AAAA;AAAA;AAAA,IAIF,KAAK,OAAO;AACV,YAAM,IAAI,SAAS,GAAG,CAAC,GAAG,IAAI,SAAS,GAAG,CAAC;AAC3C,iBAAW,YAAY,GAAG,GAAG,YAAY,UAAU,EAAE,KAAK,CAAC;AAC3D,iBAAW,YAAY,GAAG,GAAG,YAAY,UAAU,EAAE,KAAK,CAAC;AAC3D;AAAA,IACF;AAAA,IACA,KAAK,OAAO;AACV,YAAM,IAAI,SAAS,GAAG,CAAC,GAAG,IAAI,SAAS,GAAG,CAAC;AAC3C,iBAAW,YAAY,GAAG,GAAG,YAAY,UAAU,EAAE,KAAK,CAAC;AAC3D,iBAAW,YAAY,GAAG,GAAG,YAAY,UAAU,UAAU,EAAE,GAAG,EAAE,KAAK,CAAC;AAC1E;AAAA,IACF;AAAA,IACA,KAAK,OAAO;AACV,YAAM,IAAI,SAAS,GAAG,CAAC,GAAG,IAAI,SAAS,GAAG,CAAC;AAK3C,iBAAW,YAAY,GAAG,GAAG,YAAY,IAAI,UAAU,CAAC,GAAG,EAAE,KAAK,CAAC;AACnE,iBAAW,YAAY,GAAG,GAAG,YAAY,IAAI,UAAU,CAAC,GAAG,EAAE,KAAK,CAAC;AACnE;AAAA,IACF;AAAA,IACA,KAAK,OAAO;AAEV,YAAM,IAAI,SAAS,GAAG,CAAC,GAAG,IAAI,SAAS,GAAG,CAAC;AAC3C,iBAAW,YAAY,GAAG,GAAG,YAAY,IAAI,UAAU,CAAC,GAAG,EAAE,KAAK,CAAC;AAEnE,YAAM,QAAQ,IAAI,UAAU,CAAC;AAC7B,YAAM,MAAM,IAAI,GAAG,CAAC;AACpB,iBAAW,YAAY,GAAG,GAAG,YAAY,UAAU,IAAI,OAAO,GAAG,GAAG,EAAE,GAAG,EAAE,KAAK,CAAC;AACjF;AAAA,IACF;AAAA;AAAA,IAGA,KAAK,cAAc;AAEjB,iBAAW,YAAY,GAAG,GAAG,UAAU,UAAU,GAAG,MAAM,CAAC;AAC3D;AAAA,IACF;AAAA,IACA,KAAK,cAAc;AAEjB,iBAAW,YAAY,GAAG,GAAG,QAAQ;AACrC;AAAA,IACF;AAAA;AAAA,IAGA,KAAK,QAAQ;AAEX,YAAM,IAAI,SAAS,GAAG,GAAG;AACzB,iBAAW,YAAY,GAAG,GAAG,UAAU,IAAI,UAAU,CAAC,GAAG,GAAG,CAAC;AAC7D;AAAA,IACF;AAAA,IACA,KAAK,SAAS;AAEZ,YAAM,IAAI,SAAS,GAAG,GAAG;AACzB,YAAM,KAAK,IAAI,IAAI,GAAG,CAAC,GAAG,CAAC;AAC3B,iBAAW,YAAY,GAAG,GAAG,UAAU,IAAI,UAAU,EAAE,GAAG,IAAI,CAAC;AAC/D;AAAA,IACF;AAAA,IACA,KAAK,OAAO;AAEV,YAAM,IAAI,SAAS,GAAG,CAAC;AACvB,iBAAW,YAAY,GAAG,GAAG,IAAI,UAAU,CAAC,CAAC;AAC7C;AAAA,IACF;AAAA,IACA,KAAK,OAAO;AAEV,YAAM,IAAI,SAAS,GAAG,GAAG;AACzB,iBAAW,YAAY,GAAG,GAAG,IAAI,UAAU,CAAC,CAAC;AAC7C;AAAA,IACF;AAAA,IACA,KAAK,QAAQ;AAEX,YAAM,IAAI,SAAS,GAAG,CAAC;AACvB,iBAAW,YAAY,GAAG,GAAG,SAAS,GAAG,QAAQ,CAAC;AAClD;AAAA,IACF;AAAA;AAAA,IAGA,KAAK,aAAa;AAGhB,YAAM,IAAI,SAAS,GAAG,CAAC;AACvB,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACpC,YAAM,WAAW,YAAY,UAAU,EAAE,KAAK;AAC9C,iBAAW,YAAY,GAAG,GAAG,UAAU,UAAU,IAAI,CAAC,CAAC;AACvD;AAAA,IACF;AAAA,IACA,KAAK,YAAY;AAIf,YAAM,IAAI,SAAS,GAAG,CAAC;AAEvB,YAAM,WAAW,QAAQ,UAAU,CAAC,GAAG,SAAS,OAAO,CAAC,CAAC;AACzD,iBAAW,YAAY,GAAG,GAAG,YAAY,UAAU,EAAE,KAAK,CAAC;AAC3D;AAAA,IACF;AAAA;AAAA,IAGA,KAAK,WAAW;AAEd,YAAM,IAAI,SAAS,GAAG,CAAC;AACvB,iBAAW,YAAY,GAAG,GAAG,QAAQ,UAAU,EAAE,KAAK,CAAC;AACvD;AAAA,IACF;AAAA,IACA,KAAK,aAAa;AAEhB,YAAM,MAAM,WAAW,GAAG,IAAI;AAC9B,iBAAW,YAAY,GAAG,GAAG,UAAU,UAAU,GAAG,CAAC;AACrD;AAAA,IACF;AAAA;AAAA,IAGA,KAAK,UAAU;AASb,YAAM,IAAI,SAAS,GAAG,CAAC,GAAG,IAAI,SAAS,GAAG,CAAC;AAE3C,iBAAW,YAAY,GAAG,GAAG,OAAO,UAAU,SAAS,GAAG,IAAI,EAAE,CAAC,CAAC;AAGlE,YAAM,KAAK,SAAS,GAAG,IAAI,EAAE;AAG7B,UAAI;AACJ,UAAI,EAAE,MAAM,SAAS,GAAG;AACtB,qBAAa,cAAc,IAAI,QAAQ;AAAA,MACzC,OAAO;AACL,qBAAa,OAAO,IAAI,QAAQ;AAAA,MAClC;AAEA,iBAAW,YAAY,GAAG,GAAG,WAAW,YAAY,EAAE,KAAK,CAAC;AAC5D;AAAA,IACF;AAAA,IACA,KAAK,kBAAkB;AAIrB,YAAM,IAAI,SAAS,GAAG,CAAC,GAAG,IAAI,SAAS,GAAG,CAAC;AAC3C,iBAAW,YAAY,GAAG,GAAG,cAAc,UAAU,SAAS,GAAG,IAAI,EAAE,CAAC,CAAC;AACzE,iBAAW,YAAY,GAAG,GAAG,cAAc,SAAS,GAAG,IAAI,EAAE,GAAG,QAAQ,CAAC;AACzE;AAAA,IACF;AAAA;AAAA,IAGA,KAAK;AAGH;AAAA;AAAA,IAGF,KAAK,oBAAoB;AAQvB,YAAM,IAAI,SAAS,GAAG,CAAC;AAIvB,YAAM,IAAI;AAAA,QACR,2OAIW,EAAE,KAAK,OAAO,SAAS,GAAG,GAAG,EAAE,KAAK;AAAA,MACjD;AAAA,IACF;AAAA;AAAA,IAGA,KAAK,gBAAgB;AAEnB,YAAM,IAAI,SAAS,GAAG,CAAC;AACvB,iBAAW,YAAY,GAAG,GAAG,WAAW,UAAU,EAAE,KAAK,CAAC;AAC1D;AAAA,IACF;AAAA,IACA,KAAK,gBAAgB;AAEnB,YAAM,IAAI,SAAS,GAAG,CAAC;AACvB,iBAAW,YAAY,GAAG,GAAG,YAAY,UAAU,EAAE,KAAK,CAAC;AAC3D;AAAA,IACF;AAAA;AAAA,IAGA,KAAK,oBAAoB;AAGvB,YAAM,IAAI,SAAS,GAAG,GAAG;AACzB,YAAM,KAAK,IAAI,CAAC;AAGhB,YAAM,QAAQ,QAAQ,QAAQ;AAC9B,YAAM,YAAY,QAAQ,OAAO,CAAC,GAAG,MAAM,OAAO,CAAC,CAAC;AACpD,YAAM,OAAO,IAAI,IAAI,YAAY,WAAW,EAAE,KAAK,CAAC;AACpD,iBAAW,YAAY,GAAG,GAAG,IAAI,UAAU,IAAI,CAAC;AAChD;AAAA,IACF;AAAA,IACA,KAAK,uBAAuB;AAK1B,YAAM,IAAI,SAAS,GAAG,GAAG;AACzB,YAAM,OAAO,IAAI,UAAU,CAAC;AAC5B,YAAM,IAAI,QAAQ,IAAI;AACtB,YAAM,QAAQ,QAAQ,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;AACxC,YAAM,QAAQ,IAAI,UAAU,YAAY,OAAO,EAAE,KAAK,CAAC;AACvD,iBAAW,YAAY,GAAG,GAAG,IAAI,OAAO,CAAC,CAAC;AAC1C;AAAA,IACF;AAAA;AAAA,IAEA,KAAK;AAAA,IACL,KAAK;AAEH;AAAA,IAEF,KAAK,SAAS;AAIZ,YAAM,OAAO,SAAS,GAAG,IAAI;AAC7B,YAAM,IAAI,SAAS,GAAG,CAAC;AACvB,YAAM,IAAI,SAAS,GAAG,CAAC;AAEvB,YAAM,QAAQ,YAAY,YAAY,GAAG,EAAE,KAAK,GAAG,SAAS,KAAK;AACjE,YAAM,QAAQ,YAAY,YAAY,GAAG,EAAE,KAAK,GAAG,SAAS,KAAK;AACjE,iBAAW,YAAY,GAAG,GAAG,YAAY,MAAM,MAAM,UAAU,KAAK,GAAG,EAAE,KAAK,CAAC;AAC/E,iBAAW,YAAY,GAAG,GAAG,YAAY,MAAM,MAAM,OAAO,QAAQ,GAAG,EAAE,KAAK,CAAC;AAC/E;AAAA,IACF;AAAA,IAEA,KAAK,gBAAgB;AAInB,YAAM,IAAI;AAAA,QACR;AAAA,MAEF;AAAA,IACF;AAAA;AAAA,IAGA,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AACH,YAAM,IAAI,MAAM,0CAA0C,GAAG,IAAI,EAAE;AAAA;AAAA,IAGrE,KAAK,aAAa;AAGhB,YAAM,IAAI;AAAA,QACR;AAAA,MAEF;AAAA,IACF;AAAA,IAEA,SAAS;AAEP,YAAM,cAAqB;AAC3B,WAAK;AACL,YAAM,IAAI,MAAM,+BAAgC,GAAc,IAAI,EAAE;AAAA,IACtE;AAAA,EACF;AACF;AAMA,SAAS,WAAW,MAAmC;AACrD,QAAM,MAAgB,IAAI,MAAM,KAAK,MAAM;AAC3C,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,IAAK,KAAI,KAAK,CAAC,CAAE,IAAI;AACtD,SAAO;AACT;;;ACrYO,IAAM,KAAK;AAAA,EAChB,UAAU,CAAC,WAA+B,EAAE,MAAM,YAAY,MAAM;AAAA;AAAA;AAAA,EAGpE,aAAa,CAAC,UACX,EAAE,MAAM,eAAe,GAAG,KAAK;AAAA;AAAA;AAAA,EAGlC,aAAa,CAAC,UACX,EAAE,MAAM,eAAe,GAAG,KAAK;AAAA;AAAA;AAAA,EAGlC,QAAQ,CAAC,UACN,EAAE,MAAM,UAAU,GAAG,KAAK;AAC/B;AAGO,SAAS,UAAU,UAAsB,MAAsB;AACpE,MAAI,OAAO,aAAa,SAAU,QAAO;AACzC,UAAQ,SAAS,MAAM;AAAA,IACrB,KAAK;AAAY,aAAO,SAAS;AAAA,IACjC,KAAK,eAAe;AAClB,YAAM,IAAI,KAAK,IAAI,OAAO,SAAS,OAAO,CAAC;AAC3C,aAAO,SAAS,QAAQ,SAAS,QAAQ,SAAS,QAAQ;AAAA,IAC5D;AAAA,IACA,KAAK,eAAe;AAClB,YAAM,IAAI,KAAK,IAAI,OAAO,SAAS,OAAO,CAAC;AAC3C,aAAO,SAAS,QAAQ,OAAO,SAAS,OAAO,SAAS,UAAU,IAAI,KAAK,IAAI,KAAK,KAAK,CAAC;AAAA,IAC5F;AAAA,IACA,KAAK,UAAU;AACb,UAAI,QAAQ,SAAS,YAAa,QAAO,SAAS,UAAU,OAAO,SAAS;AAC5E,aAAO,UAAU,SAAS,OAAO,OAAO,SAAS,WAAW;AAAA,IAC9D;AAAA,EACF;AACF;AAKO,SAAS,YAAY,UAA+B;AACzD,MAAI,OAAO,aAAa,SAAU,QAAO;AACzC,SAAO,SAAS,SAAS;AAC3B;AA2DO,SAAS,WACd,OACA,YACA,cACA,QAKA,YACY;AACZ,QAAM,gBAAgB,YAAY,OAAO,EAAE;AAC3C,QAAM,YAAY,UAAU,OAAO,IAAI,CAAC;AACxC,QAAM,aAAiC;AAAA,IACrC,IAAI,OAAO;AAAA,IACX,IAAI,OAAO,MAAM;AAAA,IACjB,IAAI,OAAO,MAAM;AAAA,IACjB,KAAK,OAAO,OAAO;AAAA,IACnB,aAAa,OAAO,eAAe;AAAA,IACnC,aAAa,OAAO,gBAAgB,MAAM;AAAA,IAC1C;AAAA,EACF;AACA,QAAM,aAA8B,CAAC;AACrC,QAAM,eAAe;AAGrB,MAAI,uBAAsC;AAE1C,SAAO,UAAU,OAAO,MAAM;AAC5B,UAAM,MAAM,YAAY,cAAc,CAAC,GAAG,KAAK;AAM/C,UAAM,eAAe,IAAI;AAAA,MACvB,WAAW,cAAc,IACrB,OAAO,KAAK,UAAU,EAAE,OAAO,UAC5B,cAAc,QAAQ,aAAc,WAAW,IAAI,IAAK,WAAW,YAAY,IAAI,CAAC,IACvF,CAAC;AAAA,IACP;AAKA,QAAI,oBAAmC;AACvC,QAAI,iBAAiB,aAAa,OAAO,GAAG;AAC1C,6BAAuB;AACvB,0BAAoB,YAAY,sBAAsB,CAAC,GAAG,KAAK;AAAA,IACjE;AAEA,eAAW,QAAQ,OAAO,KAAK,UAAU,GAAG;AAC1C,YAAM,IAAI,aAAa,IAAI;AAC3B,YAAM,IAAI,WAAW,IAAI;AACzB,UAAI,CAAC,EAAG,OAAM,IAAI,MAAM,yCAAyC,IAAI,GAAG;AACxE,UAAI,CAAC,EAAG,OAAM,IAAI,MAAM,qCAAqC,IAAI,GAAG;AAEpE,YAAM,SAAS,WAAW,UAAU,IAAI,IAAI,EAAE,OAAO,OAAO,CAAC;AAC7D,YAAM,SAAS,WAAW,UAAU,IAAI,IAAI,EAAE,OAAO,OAAO,CAAC;AAM7D,YAAM,cACJ,CAAC,aAAa,IAAI,IAAI,IAAI,IACxB,sBAAsB,OAAO,oBAC7B,IAAI,YAAY,WAAW;AAG/B,YAAM,OAAO,YAAY,QAAQ,GAAG,WAAW,EAAE;AACjD,YAAM,OAAO,YAAY,QAAQ,GAAG,WAAW,EAAE;AACjD,YAAM,OAAO,YAAY,GAAG,MAAM,MAAM,KAAK,WAAW,KAAK,WAAW;AAExE,iBAAW,KAAK,EAAE,QAAQ,MAAM,UAAU,UAAU,IAAI,IAAI,UAAU,QAAQ,CAAC;AAC/E,iBAAW,KAAK,EAAE,QAAQ,MAAM,UAAU,UAAU,IAAI,IAAI,UAAU,QAAQ,CAAC;AAC/E,iBAAW,KAAK,EAAE,QAAQ,MAAM,UAAU,MAAkB,UAAU,QAAQ,CAAC;AAAA,IACjF;AACA,WAAO,EAAE,YAAY,cAAc,sBAAsB,QAAQ,WAAW;AAAA,EAC9E,CAAC;AACH;;;AC7KA,IAAM,aAAoC,EAAE,KAAK,GAAG,KAAK,GAAG,MAAM,EAAE;AAqB7D,SAAS,YACd,OACA,YACA,iBAAkC,CAAC,GACvB;AACZ,QAAM,UAAwB,CAAC;AAC/B,QAAM,iBAAiB,oBAAI,IAAoB;AAC/C,QAAM,eAAe,oBAAI,IAAoB;AAC7C,QAAM,eAAe,oBAAI,IAAoB;AAC7C,QAAM,mBAAmB,oBAAI,IAAoB;AACjD,QAAM,eAAe,oBAAI,IAAoB;AAG7C,QAAM,qBAAqB,oBAAI,IAAoB;AACnD,aAAW,CAAC,MAAM,MAAM,KAAK,OAAO,QAAQ,UAAU,GAAG;AACvD,uBAAmB,IAAI,OAAO,IAAI,IAAI;AAAA,EACxC;AAEA,QAAM,YAAY,oBAAI,IAAoB;AAC1C,aAAW,MAAM,MAAM,IAAK,WAAU,IAAI,GAAG,KAAK,EAAE;AAEpD,QAAM,YAAY,IAAI,IAAI,MAAM,OAAO;AAGvC,aAAW,KAAK,MAAM,SAAS;AAC7B,UAAM,KAAK,UAAU,IAAI,EAAE,EAAE;AAC7B,QAAI,OAA2B;AAC/B,QAAI,OAAsB;AAC1B,QAAI;AAEJ,QAAI,IAAI,SAAS,eAAe;AAC9B,aAAO;AACP,aAAO,GAAG;AAAA,IACZ,WAAW,IAAI,SAAS,gBAAgB;AACtC,aAAO;AACP,aAAO,GAAG;AAAA,IACZ,WAAW,IAAI,SAAS,eAAe;AACrC,aAAO;AACP,aAAO,GAAG;AACV,kBAAY,GAAG;AAAA,IACjB,WAAW,mBAAmB,IAAI,EAAE,EAAE,GAAG;AACvC,aAAO;AACP,aAAO,mBAAmB,IAAI,EAAE,EAAE;AAAA,IACpC,WAAW,UAAU,IAAI,EAAE,EAAE,GAAG;AAC9B,aAAO;AAAA,IACT;AAEA,UAAM,OAAmB;AAAA,MACvB,IAAI,EAAE;AAAA,MACN,UAAU,KAAK,IAAI,GAAG,UAAU,EAAE,KAAK,IAAI,WAAW,EAAE,KAAK,CAAC;AAAA,MAC9D,OAAO,EAAE;AAAA,MACT,OAAO,EAAE;AAAA,MACT;AAAA,MACA;AAAA,MACA,GAAI,cAAc,SAAY,EAAE,UAAU,IAAI,CAAC;AAAA,IACjD;AACA,YAAQ,KAAK,IAAI;AACjB,mBAAe,IAAI,EAAE,IAAI,EAAE,EAAE;AAE7B,QAAI,SAAS,QAAS,cAAa,IAAI,MAAO,EAAE,EAAE;AAClD,QAAI,SAAS,eAAgB,cAAa,IAAI,MAAO,EAAE,EAAE;AACzD,QAAI,SAAS,aAAc,kBAAiB,IAAI,MAAO,EAAE,EAAE;AAC3D,QAAI,SAAS,QAAS,cAAa,IAAI,MAAO,EAAE,EAAE;AAAA,EACpD;AAEA,QAAM,kBAAkB,MAAM,QAAQ,IAAI,QAAM,eAAe,IAAI,EAAE,CAAE;AAGvE,QAAM,aAA0B,eAAe,IAAI,UAAQ;AACzD,UAAM,cAAc,eAAe,IAAI,KAAK,OAAO,EAAE;AACrD,QAAI,gBAAgB,QAAW;AAC7B,YAAM,IAAI,MAAM,yCAAyC,KAAK,OAAO,EAAE,eAAe;AAAA,IACxF;AACA,UAAM,YAAY,KAAK,aAAa,UAChC,aAAa,IAAI,KAAK,QAAQ,IAC9B,aAAa,IAAI,KAAK,QAAQ;AAClC,QAAI,cAAc,QAAW;AAC3B,YAAM,IAAI,MAAM,+BAA+B,KAAK,QAAQ,KAAK,KAAK,QAAQ,aAAa;AAAA,IAC7F;AACA,UAAM,aAAa,QAAQ,WAAW;AACtC,UAAM,WAAW,QAAQ,SAAS;AAClC,QAAI,WAAW,aAAa,SAAS,UAAU;AAC7C,YAAM,IAAI;AAAA,QACR,4CAA4C,KAAK,QAAQ,KAAK,KAAK,QAAQ,aAChE,WAAW,QAAQ,kBAAkB,SAAS,QAAQ;AAAA,MACnE;AAAA,IACF;AACA,WAAO,EAAE,QAAQ,aAAa,MAAM,WAAW,OAAO,WAAW,SAAS;AAAA,EAC5E,CAAC;AAKD,QAAM,iBAAiB,oBAAI,IAAoB;AAC/C,aAAW,CAAC,MAAM,QAAQ,KAAK,MAAM,UAAU;AAC7C,UAAM,QAAQ,eAAe,IAAI,QAAQ;AACzC,QAAI,UAAU,QAAW;AACvB,YAAM,IAAI,MAAM,yBAAyB,IAAI,gCAAgC,QAAQ,EAAE;AAAA,IACzF;AACA,mBAAe,IAAI,MAAM,KAAK;AAAA,EAChC;AAEA,SAAO,EAAE,SAAS,gBAAgB,cAAc,cAAc,kBAAkB,cAAc,gBAAgB,iBAAiB,WAAW;AAC5I;;;ACjKA,IAAM,UAAU;AAQhB,IAAM,WAAW;AA0BV,SAAS,YAAY,OAAc,MAAgC;AACxE,QAAM,MAAoB,CAAC;AAC3B,WAAS,IAAI,GAAG,IAAI,MAAM,IAAI,QAAQ,KAAK;AACzC,UAAM,KAAK,MAAM,IAAI,CAAC;AACtB,UAAM,OAAO,WAAW,IAAI,OAAO,MAAM,CAAC;AAC1C,QAAI,KAAK,IAAI;AAAA,EACf;AACA,SAAO;AACT;AAEA,SAAS,WAAW,IAAY,OAAc,MAAkB,SAA6B;AAC3F,QAAM,MAAM,CAAC,OAAe,MAAM,QAAQ,EAAE;AAC5C,QAAM,MAAM,CAAC,aAAqB,KAAK,eAAe,IAAI,QAAQ;AAClE,QAAM,QAAQ,OAAmB,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,IAAI,UAAU,CAAC,GAAG,SAAS,GAAG,eAAe,QAAQ;AAExH,UAAQ,GAAG,MAAM;AAAA;AAAA,IAEf,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AACH,aAAO,MAAM;AAAA;AAAA,IAGf,KAAK,UAAU;AACb,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,OAAO;AAAA,6DAC0C,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,GAAG,CAAC;AAAA,aACJ,YAAY,UAAU,IAAI,KAAK,CAAC;AAAA,GAC1C,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,GAAG,GAAG,eAAe,QAAQ;AAAA,IAC1G;AAAA,IACA,KAAK,gBAAgB;AACnB,YAAM,OAAO;AAAA,6DAC0C,UAAU,GAAG,KAAK,CAAC;AAAA;AAAA;AAAA,aAGnE,YAAY,GAAG,OAAO,GAAG,KAAK,CAAC;AAAA,GACzC,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,GAAG,eAAe,EAAE;AAAA,IACjG;AAAA;AAAA,IAGA,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK,OAAO;AACV,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,QAAQ,EAAE,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,IAAI,EAAE,GAAG,IAAI;AAChE,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,OAAO;AAAA,qDACkC,UAAU,EAAE,KAAK,CAAC;AAAA,qDAClB,UAAU,EAAE,KAAK,CAAC;AAAA,6DACV,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,EAChB,oBAAoB,KAAK,IAAI,OAAO,EAAE,OAAO,MAAM,CAAC;AAAA,EACpD,oBAAoB,KAAK,IAAI,OAAO,EAAE,OAAO,MAAM,CAAC;AAAA,qBACjC,KAAK;AAAA,GACvB,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACjI;AAAA;AAAA,IAGA,KAAK;AAAA,IACL,KAAK,cAAc;AACjB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,QAAQ,GAAG,SAAS,eAAe,MAAM;AAC/C,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,MAAM,YAAY,GAAG,QAAQ,IAAI,KAAK;AAC5C,YAAM,OAAO;AAAA,qDACkC,UAAU,EAAE,KAAK,CAAC;AAAA,6DACV,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,kBACA,KAAK,IAAI,GAAG;AAAA,GAC3B,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACtH;AAAA;AAAA,IAGA,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK,QAAQ;AACX,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,OACJ,GAAG,SAAS,SAAU,YACtB,GAAG,SAAS,UAAU,kBACtB,GAAG,SAAS,QAAU,WACtB,GAAG,SAAS,QAAU;AAAA;AAAA,QACA;AAAA;AACxB,YAAM,OAAO;AAAA,qDACkC,UAAU,EAAE,KAAK,CAAC;AAAA,6DACV,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA;AAAA,aAEL,IAAI;AAAA,GACd,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACtH;AAAA;AAAA,IAGA,KAAK;AAAA,IACL,KAAK,WAAW;AACd,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,QAAQ,GAAG,SAAS,SAAS,MAAM;AACzC,YAAM,QAAQ,UAAU,IAAI,KAAK;AAEjC,YAAM,OAAO;AAAA,qDACkC,UAAU,EAAE,KAAK,CAAC;AAAA,qDAClB,UAAU,EAAE,KAAK,CAAC;AAAA;AAAA,2BAE5C,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,EAChB,oBAAoB,KAAK,IAAI,OAAO,EAAE,OAAO,MAAM,CAAC;AAAA,EACpD,oBAAoB,KAAK,IAAI,OAAO,EAAE,OAAO,MAAM,CAAC;AAAA,oCAClB,KAAK;AAAA,GACtC,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACjI;AAAA,IACA,KAAK,SAAS;AACZ,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,OAAO,IAAI,GAAG,IAAI;AACxB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,OAAO;AAAA;AAAA,qDAEkC,UAAU,EAAE,KAAK,CAAC;AAAA,qDAClB,UAAU,EAAE,KAAK,CAAC;AAAA,6DACV,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,EAChB,oBAAoB,KAAK,IAAI,OAAO,KAAK,OAAO,MAAM,CAAC;AAAA,EACvD,oBAAoB,KAAK,IAAI,OAAO,EAAE,OAAO,MAAM,CAAC;AAAA,EACpD,oBAAoB,KAAK,IAAI,OAAO,EAAE,OAAO,MAAM,CAAC;AAAA;AAAA,GAEnD,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IAC/I;AAAA,IAEA,KAAK,aAAa;AAChB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,OAAO;AAAA;AAAA;AAAA;AAAA,2BAIQ,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA;AAAA,GAEf,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,EAAE,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IAClI;AAAA;AAAA,IAGA,KAAK;AAAA,IACL,KAAK,YAAY;AACf,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACpC,YAAM,YAAY,UAAU,EAAE,KAAK,IAAI;AACvC,YAAM,UAAU,GAAG,SAAS,cAAc,OAAO,CAAC,OAAO;AACzD,YAAM,OAAO;AAAA;AAAA;AAAA,2BAGQ,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,SAAS;AAAA,mBACH,CAAC;AAAA;AAAA,+BAEW,CAAC;AAAA;AAAA;AAAA,iBAGf,OAAO;AAAA,GACrB,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,WAAW,eAAe,QAAQ;AAAA,IAC1H;AAAA;AAAA;AAAA;AAAA;AAAA,IAMA,KAAK,WAAW;AACd,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,OAAO;AAAA,qDACkC,UAAU,EAAE,KAAK,CAAC;AAAA,6DACV,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA;AAAA,GAEf,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACtH;AAAA,IAEA,KAAK,aAAa;AAChB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,QAAQ,UAAU,IAAI,KAAK;AAIjC,YAAM,WAAW,eAAe,EAAE,KAAK;AACvC,YAAM,cAAc,wBAAwB,KAAK,IAAI,OAAO,MAAM;AAClE,YAAM,UAAoB,CAAC;AAC3B,eAAS,IAAI,GAAG,IAAI,EAAE,MAAM,QAAQ,KAAK;AACvC,cAAM,UAAU,GAAG,KAAK,QAAQ,CAAC;AACjC,gBAAQ,KAAK,QAAQ,OAAO,MAAM,SAAS,CAAC,CAAC,GAAG;AAAA,MAClD;AACA,YAAM,OAAO;AAAA,qDACkC,UAAU,EAAE,KAAK,CAAC;AAAA,6DACV,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,EAChB,WAAW;AAAA,iBACI,QAAQ,KAAK,KAAK,CAAC;AAAA;AAAA,GAEjC,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACtH;AAAA;AAAA;AAAA,IAIA,KAAK,UAAU;AACb,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACpC,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACpC,YAAM,IAAI,EAAE,MAAM,CAAC;AACnB,YAAM,QAAQ,UAAU,EAAE,KAAK,KAAK,IAAI;AACxC,YAAM,QAAQ,QAAQ,IAAI;AAC1B,YAAM,OAAO;AAAA;AAAA;AAAA;AAAA,2BAIQ,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,iBACD,IAAI,CAAC;AAAA,iBACL,IAAI,CAAC;AAAA,iBACL,CAAC;AAAA,iBACD,CAAC;AAAA,qBACG,IAAI,CAAC,WAAW,CAAC;AAAA;AAAA,+BAEP,CAAC;AAAA,mCACG,CAAC;AAAA;AAAA;AAAA,GAGjC,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACjI;AAAA,IAEA,KAAK,kBAAkB;AACrB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACpC,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACpC,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACpC,YAAM,QAAQ,UAAU,EAAE,KAAK,KAAK,IAAI;AACxC,YAAM,QAAQ,QAAQ,IAAI;AAC1B,YAAM,OAAO;AAAA;AAAA;AAAA;AAAA,2BAIQ,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,iBACD,IAAI,CAAC;AAAA,iBACL,IAAI,CAAC;AAAA,iBACL,CAAC;AAAA,iBACD,CAAC;AAAA,qBACG,IAAI,CAAC,WAAW,CAAC;AAAA,qBACjB,IAAI,CAAC;AAAA;AAAA,+BAEK,CAAC;AAAA,2CACW,CAAC;AAAA;AAAA;AAAA,GAGzC,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACjI;AAAA;AAAA,IAGA,KAAK,WAAW;AACd,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,UAAU,IAAI,GAAG,OAAO;AAC9B,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,QAAQ,GAAG;AACjB,YAAM,UAAU,YAAY,GAAG,IAAI,KAAK;AACxC,YAAM,SAAS,YAAY,GAAG,IAAI,KAAK;AACvC,YAAM,OAAO;AAAA;AAAA,6DAE0C,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,uBACK,KAAK;AAAA,uBACL,KAAK;AAAA;AAAA,oBAER,OAAO,KAAK,MAAM;AAAA,GACnC,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,OAAO,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IAC5H;AAAA;AAAA,IAGA,KAAK,oBAAoB;AACvB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACpC,YAAM,YAAY,UAAU,EAAE,KAAK,IAAI;AACvC,YAAM,OAAO;AAAA;AAAA;AAAA,2BAGQ,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,SAAS;AAAA,mBACH,CAAC;AAAA;AAAA,+BAEW,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA,+BAKD,CAAC;AAAA;AAAA;AAAA;AAAA,+BAID,CAAC;AAAA;AAAA;AAAA,GAG7B,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,WAAW,eAAe,QAAQ;AAAA,IAC1H;AAAA,IAEA,KAAK,uBAAuB;AAC1B,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAEpC,YAAM,YAAY,UAAU,EAAE,KAAK,IAAI;AACvC,YAAM,OAAO;AAAA;AAAA;AAAA,2BAGQ,OAAO;AAAA;AAAA;AAAA,IAG9B,QAAQ;AAAA,aACC,SAAS;AAAA,mBACH,CAAC;AAAA,mBACD,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,sCAekB,CAAC;AAAA;AAAA;AAAA,GAGpC,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,WAAW,eAAe,QAAQ;AAAA,IAC1H;AAAA,IAEA,KAAK,gBAAgB;AACnB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACpC,YAAM,QAAQ,UAAU,EAAE,KAAK;AAC/B,YAAM,UAAU,YAAY,GAAG,WAAW,KAAK;AAC/C,YAAM,OAAO;AAAA;AAAA;AAAA,2BAGQ,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,mBACC,CAAC;AAAA,oBACA,CAAC,QAAQ,CAAC;AAAA;AAAA,eAEf,OAAO;AAAA;AAAA;AAAA;AAAA,GAInB,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACtH;AAAA;AAAA,IAGA,KAAK,oBAAoB;AACvB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACvC,YAAM,QAAQ,GAAG,MAAM,GAAG;AAC1B,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,OAAO;AAAA,qDACkC,UAAU,EAAE,KAAK,CAAC;AAAA,6DACV,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,oBACE,KAAK;AAAA,oBACL,KAAK;AAAA,uBACF,IAAI,OAAO,GAAG,KAAK;AAAA,GACvC,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACtH;AAAA;AAAA,IAGA,KAAK,gBAAgB;AACnB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,OAAO;AAAA,qDACkC,UAAU,EAAE,KAAK,CAAC;AAAA,6DACV,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,EAChB,oBAAoB,KAAK,IAAI,OAAO,EAAE,OAAO,QAAQ,CAAC;AAAA;AAAA,GAErD,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACtH;AAAA;AAAA,IAGA,KAAK,iBAAiB;AAEpB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,KAAK,GAAG;AACd,YAAM,aAAa,IAAI;AACvB,YAAM,OAAO;AAAA;AAAA;AAAA;AAAA,2BAIQ,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,aACL,YAAY,IAAI,KAAK,CAAC,aAAa,YAAY,YAAY,KAAK,CAAC;AAAA,GAC3E,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACjI;AAAA,IACA,KAAK,iBAAiB;AAEpB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,KAAK,GAAG;AACd,YAAM,aAAa,IAAI;AACvB,YAAM,OAAO;AAAA;AAAA;AAAA;AAAA,2BAIQ,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA;AAAA,aAEL,YAAY,IAAI,KAAK,CAAC,aAAa,YAAY,YAAY,KAAK,CAAC;AAAA,GAC3E,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACjI;AAAA,IACA,KAAK,iBAAiB;AAOpB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,gBAAgB,GAAG,sBAAsB;AAC/C,YAAM,aAAa,gBAAgB,mBAAmB,YAAY,GAAG,aAAa,KAAK;AACvF,YAAM,gBAAgB,gBAClB;AAAA,oEAEA;AACJ,YAAM,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA,EAKjB,aAAa;AAAA,2BACY,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,aACL,UAAU,iDAAiD,YAAY,GAAG,KAAK,KAAK,CAAC;AAAA,GAC/F,KAAK;AACF,YAAM,WAAW,gBACb,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,GAAG,GAAG,IAAI,GAAG,iBAAkB,GAAG,IAAI,GAAG,GAAG,CAAC,IAC5F,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,GAAG,GAAG,IAAI,GAAG,GAAG,CAAC;AACpE,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,SAAS,OAAO,eAAe,QAAQ;AAAA,IAC5F;AAAA,IAEA,KAAK,gBAAgB;AAGnB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,OAAO,eAAe,EAAE,OAAO,IAAI,OAAO,EAAE,KAAK;AACvD,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACtH;AAAA,EACF;AACF;AAMA,SAAS,UAAU,GAAmC;AAKpD,MAAI,MAAM,OAAQ,QAAO;AACzB,SAAO;AACT;AAEA,SAAS,YAAY,OAAe,OAAuC;AACzE,MAAI,UAAU,OAAO;AACnB,QAAI,OAAO,SAAS,KAAK,GAAG;AAE1B,aAAO,MAAM,SAAS,EAAE,SAAS,GAAG,KAAK,MAAM,SAAS,EAAE,SAAS,GAAG,IAClE,GAAG,KAAK,MACR,GAAG,KAAK;AAAA,IACd;AACA,WAAO,QAAQ,IAAI,YAAY;AAAA,EACjC;AACA,MAAI,UAAU,MAAO,QAAO,GAAG,KAAK,MAAM,KAAK,CAAC;AAChD,SAAO,QAAQ,OAAO;AACxB;AAEA,SAAS,YAAY,MAAc,OAAuC;AACxE,MAAI,UAAU,MAAO,QAAO,OAAO,IAAI;AACvC,MAAI,UAAU,MAAO,QAAO,OAAO,IAAI;AACvC,SAAO,OAAO,IAAI;AACpB;AAEA,SAAS,eAAe,OAAwB;AAC9C,QAAM,UAAoB,IAAI,MAAM,MAAM,MAAM,EAAE,KAAK,CAAC;AACxD,WAAS,IAAI,MAAM,SAAS,GAAG,KAAK,GAAG,KAAK;AAC1C,YAAQ,CAAC,IAAI,QAAQ,IAAI,CAAC,IAAK,MAAM,IAAI,CAAC;AAAA,EAC5C;AACA,SAAO;AACT;AAMA,SAAS,wBAAwB,SAAiB,OAAc,QAAwB;AACtF,MAAI,MAAM,WAAW,EAAG,QAAO,SAAS,MAAM;AAC9C,QAAM,UAAU,eAAe,KAAK;AACpC,QAAM,QAAkB,CAAC;AACzB,MAAI,YAAY;AAChB,WAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,QAAI,MAAM,MAAM,SAAS,GAAG;AAC1B,YAAM,KAAK,SAAS,MAAM,IAAI,CAAC,MAAM,SAAS,GAAG;AAAA,IACnD,OAAO;AACL,YAAM,KAAK,SAAS,MAAM,IAAI,CAAC,MAAM,SAAS,MAAM,QAAQ,CAAC,CAAC,IAAI;AAClE,YAAM,SAAS,GAAG,MAAM,OAAO,CAAC;AAChC,YAAM,KAAK,SAAS,MAAM,MAAM,SAAS,MAAM,QAAQ,CAAC,CAAC,IAAI;AAC7D,kBAAY;AAAA,IACd;AAAA,EACF;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAcA,SAAS,oBAAoB,SAAiB,UAAiB,UAAiB,QAAwB;AAGtG,QAAM,SAAS,GAAG,MAAM;AACxB,QAAM,YAAY,wBAAwB,SAAS,UAAU,MAAM;AACnE,QAAM,SAAS,SAAS,SAAS,SAAS;AAC1C,MAAI,SAAS,WAAW,GAAG;AACzB,WAAO,GAAG,SAAS;AAAA,QAAW,MAAM;AAAA,EACtC;AACA,QAAM,aAAa,eAAe,QAAQ;AAC1C,QAAM,QAAkB,CAAC;AACzB,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,UAAM,UAAU,IAAI;AACpB,UAAM,SAAS,SAAS,CAAC;AACzB,UAAM,OAAO,WAAW,IAAI,OAAO,GAAG,MAAM,IAAI,OAAO,MAAM,WAAW,CAAC,CAAC;AAC1E,UAAM,KAAK,IAAI;AAAA,EACjB;AACA,SAAO,GAAG,SAAS;AAAA,QAAW,MAAM,MAAM,MAAM,KAAK,KAAK,CAAC;AAC7D;AAYA,SAAS,eAAe,UAAiB,UAAiB,OAAuC;AAC/F,QAAM,aAAa,eAAe,QAAQ;AAC1C,QAAM,aAAa,eAAe,QAAQ;AAC1C,QAAM,SAAS,SAAS,SAAS,SAAS;AAG1C,QAAM,YAAY,wBAAwB,KAAK,UAAU,KAAK;AAM9D,QAAM,cAAwB,CAAC;AAC/B,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,QAAI,IAAI,QAAQ;AAAE,kBAAY,KAAK,CAAC;AAAG;AAAA,IAAS;AAChD,UAAM,OAAO,SAAS,IAAI,MAAM;AAChC,UAAM,OAAO,SAAS,CAAC;AACvB,QAAI,SAAS,KAAK,OAAO,EAAG,aAAY,KAAK,CAAC;AAAA,EAChD;AAGA,QAAM,YAAsB,CAAC;AAC7B,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,QAAI,YAAY,SAAS,CAAC,EAAG;AAC7B,UAAM,QAAQ,IAAI;AAClB,cAAU,KAAK,OAAO,KAAK,MAAM,WAAW,CAAC,CAAC,GAAG;AAAA,EACnD;AACA,QAAM,WAAW,UAAU,SAAS,IAAI,UAAU,KAAK,KAAK,IAAI;AAGhE,QAAM,SAAS,CAAC,UAAkB,KAAK,OAAO,QAAQ,CAAC;AACvD,QAAM,QAAkB,CAAC;AACzB,WAAS,QAAQ,GAAG,QAAQ,YAAY,QAAQ,SAAS;AACvD,UAAM,IAAI,YAAY,KAAK;AAC3B,UAAM,MAAM,SAAS,CAAC;AACtB,UAAM,KAAK,GAAG,OAAO,KAAK,CAAC,aAAa,CAAC,iBAAiB,CAAC,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,UAAU;AAAA,EAChG;AAEA,QAAM,eAAe,YAAY,IAAI,OAAK,IAAI,CAAC,MAAM,WAAW,CAAC,CAAC,GAAG;AACrE,QAAM,WAAW,aAAa,SAAS,IACnC,GAAG,QAAQ,MAAM,aAAa,KAAK,KAAK,CAAC,KACzC;AACJ,QAAM,KAAK,GAAG,OAAO,YAAY,MAAM,CAAC,aAAa,QAAQ,IAAI;AACjE,WAAS,QAAQ,YAAY,SAAS,GAAG,SAAS,GAAG,SAAS;AAC5D,UAAM,KAAK,GAAG,OAAO,KAAK,CAAC,GAAG;AAAA,EAChC;AAEA,QAAM,QAAQ,SAAS,WAAW,IAAI,IAAK,WAAW,CAAC,IAAK,SAAS,CAAC;AACtE,QAAM,WAAW,YAAY,WAAW,IACpC,eAAe,QAAQ,OACvB,MAAM,KAAK,IAAI;AAEnB,SAAO;AAAA,qDAC4C,UAAU,KAAK,CAAC;AAAA,6DACR,UAAU,KAAK,CAAC;AAAA,2BAClD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,EAChB,SAAS;AAAA,YACC,UAAU,KAAK,CAAC,MAAM,UAAU,QAAQ,SAAU,UAAU,QAAQ,OAAO,IAAK;AAAA,EAC1F,QAAQ;AAAA;AAAA,GAEP,KAAK;AACR;;;ACzuBO,IAAM,WAAN,MAAe;AAAA,EACpB,YACmB,QACA,MACjB;AAFiB;AACA;AAAA,EAChB;AAAA,EAFgB;AAAA,EACA;AAAA,EAEnB,IAAI,MAA4B;AAC9B,UAAM,IAAI,KAAK,KAAK,IAAI,IAAI;AAC5B,QAAI,CAAC,GAAG;AACN,YAAM,QAAQ,CAAC,GAAG,KAAK,KAAK,KAAK,CAAC,EAAE,KAAK,EAAE,KAAK,IAAI;AACpD,YAAM,SAAS,QAAQ,oBAAoB,KAAK,KAAK;AACrD,YAAM,IAAI,MAAM,kBAAkB,IAAI,kBAAkB,MAAM,EAAE;AAAA,IAClE;AACA,WAAO;AAAA,EACT;AAAA,EACA,QAAQ,MAAiC;AACvC,UAAM,IAAI,KAAK,OAAO,IAAI;AAC1B,QAAI,CAAC,GAAG;AACN,YAAM,QAAQ,OAAO,KAAK,KAAK,MAAM,EAAE,KAAK,EAAE,KAAK,IAAI,KAAK;AAC5D,YAAM,IAAI,MAAM,sBAAsB,IAAI,4BAA4B,KAAK,EAAE;AAAA,IAC/E;AACA,WAAO;AAAA,EACT;AAAA,EACA,IAAI,MAAuB;AAAE,WAAO,KAAK,KAAK,IAAI,IAAI;AAAA,EAAE;AAAA,EACxD,QAAkB;AAAE,WAAO,CAAC,GAAG,KAAK,KAAK,KAAK,CAAC,EAAE,KAAK;AAAA,EAAE;AAC1D;AAkHA,IAAM,aAAa,MAAmB,IAAmB;AACzD,IAAM,WAAW,IAAmB;;;AC5G7B,IAAM,OAAO;AAAA,EAClB,OAAO,CAAC,OAA2B,CAAC,OAAiB,EAAE,MAAM,SAAS,OAAO,KAAK,SAAS,KAAK;AAAA,EAChG,SAAS,CAAC,OAA0B,CAAC,MACnC,KAAK,SAAS,SAAY,EAAE,MAAM,WAAW,MAAM,KAAK,KAAK,IAAI,EAAE,MAAM,UAAU;AAAA,EACrF,SAAS,CAAC,UAAkC,EAAE,MAAM,WAAW,KAAK;AACtE;AAgBA,SAAS,YAAoB;AAC3B,SAAO,KAAK,KAAK,KAAK,KAAK,IAAI,KAAK,IAAI,OAAO,KAAK,OAAO,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,IAAI,KAAK,KAAK,KAAK,OAAO,CAAC;AACxG;AAEA,SAAS,QAAQ,OAAuB;AACtC,SAAO,CAAC,SAAS;AACf,UAAM,MAAM,IAAI,aAAa,IAAI;AACjC,aAAS,IAAI,GAAG,IAAI,MAAM,IAAK,KAAI,CAAC,IAAI,UAAU,IAAI;AACtD,WAAO;AAAA,EACT;AACF;AAKA,SAAS,YAAY,MAAoC;AACvD,MAAI,CAAC,QAAQ,SAAS,QAAS,QAAO,QAAQ,IAAI;AAClD,MAAI,SAAS,QAAS,QAAO,CAAC,SAAS,IAAI,aAAa,IAAI;AAC5D,MAAI,SAAS,OAAQ,QAAO,CAAC,SAAS;AAAE,UAAM,IAAI,IAAI,aAAa,IAAI;AAAG,MAAE,KAAK,CAAC;AAAG,WAAO;AAAA,EAAE;AAC9F,UAAQ,KAAK,MAAM;AAAA,IACjB,KAAK;AAAS,aAAO,QAAQ,KAAK,KAAK;AAAA,IACvC,KAAK,WAAW;AACd,YAAM,OAAO,KAAK,QAAQ,KAAK,KAAK,CAAC;AACrC,aAAO,CAAC,MAAM,UAAU;AACtB,cAAM,QAAQ,MAAM,CAAC,KAAK;AAC1B,cAAM,MAAM,OAAO,KAAK,KAAK,KAAK;AAClC,cAAM,MAAM,IAAI,aAAa,IAAI;AACjC,iBAAS,IAAI,GAAG,IAAI,MAAM,IAAK,KAAI,CAAC,IAAI,UAAU,IAAI;AACtD,eAAO;AAAA,MACT;AAAA,IACF;AAAA,IACA,KAAK,WAAW;AACd,YAAM,OAAO,KAAK;AAClB,aAAO,CAAC,SAAS;AACf,YAAI,KAAK,WAAW,MAAM;AACxB,gBAAM,IAAI,MAAM,6BAA6B,KAAK,MAAM,6BAA6B,IAAI,EAAE;AAAA,QAC7F;AACA,eAAO,IAAI,aAAa,IAAI;AAAA,MAC9B;AAAA,IACF;AAAA,EACF;AACF;AAKA,SAAS,aAAa,MAAyC;AAC7D,MAAI,MAAM,UAAU,OAAW,QAAO,KAAK;AAC3C,QAAM,OAAO,MAAM,QAAQ;AAC3B,SAAO,SAAS,WAAW,SAAS;AACtC;AAWA,IAAM,gBAAN,MAAoB;AAAA,EAClB,YACkB,OACA,OACA,QACA,OAChB;AAJgB;AACA;AACA;AACA;AAAA,EACf;AAAA,EAJe;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAEpB;AAMO,IAAe,SAAf,MAAsB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUjB,MAAM,OAAc,MAA6B;AACzD,UAAM,QAAQ,MAAM,SAAS;AAE7B,WAAO,IAAI,cAAc,OAAO,OAAO,YAAY,MAAM,IAAI,GAAG,aAAa,IAAI,CAAC;AAAA,EACpF;AACF;AAwBO,SAAS,kBAAkB,MAAkC;AAClE,QAAM,UAAkC,CAAC;AACzC,QAAM,UAAkC,CAAC;AACzC,QAAM,aAAsC,CAAC;AAC7C,QAAM,MAAM,IAAI,CAAC,MAAM,KAAK,OAAO,QAAQ;AACzC,QAAI,eAAe,eAAe;AAChC,YAAM,IAAI,WAAW,MAAM,IAAI,OAAO,IAAI,KAAK;AAC9C,MAAC,MAAc,GAAG,IAAI;AACvB,cAAQ,IAAI,IAAI;AAChB,cAAQ,IAAI,IAAI,IAAI;AACpB,iBAAW,IAAI,IAAI,IAAI;AAAA,IACzB;AAAA,EACF,CAAC;AACD,SAAO,EAAE,SAAS,SAAS,WAAW;AACxC;AAaA,SAAS,MAAM,MAAe,MAAc,SAAwB;AAClE,MAAI,SAAS,QAAQ,SAAS,OAAW;AACzC,MAAI,OAAO,SAAS,SAAU;AAE9B,MAAI,gBAAgB,QAAQ;AAC1B,eAAW,OAAO,OAAO,KAAK,IAAc,GAAG;AAC7C,YAAM,QAAS,KAAa,GAAG;AAC/B,YAAM,YAAY,OAAO,GAAG,IAAI,IAAI,GAAG,KAAK;AAC5C,iBAAW,OAAO,WAAW,MAAM,KAAK,OAAO;AAAA,IACjD;AACA;AAAA,EACF;AACA,MAAI,MAAM,QAAQ,IAAI,GAAG;AACvB,SAAK,QAAQ,CAAC,MAAM,MAAM;AACxB,YAAM,YAAY,OAAO,GAAG,IAAI,IAAI,CAAC,KAAK,OAAO,CAAC;AAClD,iBAAW,MAAM,WAAW,MAA2B,GAAG,OAAO;AAAA,IACnE,CAAC;AACD;AAAA,EACF;AAGF;AAEA,SAAS,WAAW,OAAgB,MAAc,OAAe,KAAsB,SAAwB;AAC7G,MAAI,iBAAiB,UAAU,MAAM,QAAQ,KAAK,GAAG;AACnD,UAAM,OAAO,MAAM,OAAO;AAAA,EAC5B,OAAO;AACL,YAAQ,MAAM,OAAO,OAAO,GAAG;AAAA,EACjC;AACF;;;AC3FO,SAAS,sBACd,KACe;AACf,QAAM,MAAqB,CAAC;AAC5B,aAAW,KAAK,OAAO,OAAO,GAAG,EAAG,KAAI,KAAK,EAAE,MAAqB;AACpE,SAAO;AACT;AAaO,SAAS,kBAAkB,GAAqB;AACrD,QAAM,MAAM,IAAI,MAAM,EAAE,OAAO;AAC/B,MAAI,OAAO,EAAE;AACb,MAAI,QAAQ,EAAE;AACd,SAAO;AACT;;;ACvKO,IAAM,cAAN,MAAkB;AAAA,EACf;AAAA,EACA,SAAS;AAAA,EACT,UAAU,oBAAI,IAA6B;AAAA,EAC3C,aAAa;AAAA,EAErB,YAAY,cAAsB;AAChC,UAAM,OAAO,IAAI,KAAK,CAAC,YAAY,GAAG,EAAE,MAAM,yBAAyB,CAAC;AACxE,UAAM,MAAM,IAAI,gBAAgB,IAAI;AACpC,SAAK,SAAS,IAAI,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAIhD,QAAI,gBAAgB,GAAG;AAEvB,SAAK,OAAO,YAAY,CAAC,OAA0B;AACjD,YAAM,QAAQ,GAAG;AACjB,YAAM,WAAW,KAAK,QAAQ,IAAI,MAAM,EAAE;AAC1C,UAAI,CAAC,SAAU;AACf,WAAK,QAAQ,OAAO,MAAM,EAAE;AAC5B,UAAI,MAAM,GAAI,UAAS,QAAQ,MAAM,MAAM;AAAA,UACtC,UAAS,OAAO,kBAAkB,MAAM,KAAK,CAAC;AAAA,IACrD;AAEA,SAAK,OAAO,UAAU,CAAC,OAAmB;AACxC,YAAM,MAAM,IAAI,MAAM,4BAA4B,GAAG,WAAW,SAAS,EAAE;AAC3E,YAAM,OAAkB,EAAE,MAAM,eAAe,SAAS,IAAI,SAAS,OAAO,IAAI,SAAS,GAAG;AAE5F,iBAAW,YAAY,KAAK,QAAQ,OAAO,EAAG,UAAS,OAAO,kBAAkB,IAAI,CAAC;AACrF,WAAK,QAAQ,MAAM;AAAA,IACrB;AAAA,EACF;AAAA;AAAA;AAAA,EAIA,QAAW,KAAsB,WAA0B,CAAC,GAAe;AACzE,QAAI,KAAK,WAAY,QAAO,QAAQ,OAAO,IAAI,MAAM,wCAAwC,CAAC;AAC9F,UAAM,KAAK,KAAK;AAChB,WAAO,IAAI,QAAW,CAAC,SAAS,WAAW;AACzC,WAAK,QAAQ,IAAI,IAAI,EAAE,SAA0C,OAAO,CAAC;AACzE,WAAK,OAAO,YAAY,EAAE,GAAG,KAAK,GAAG,GAAU,QAAQ;AAAA,IACzD,CAAC;AAAA,EACH;AAAA;AAAA;AAAA,EAIA,KAAK,KAAsB,WAA0B,CAAC,GAAS;AAC7D,QAAI,KAAK,WAAY;AACrB,UAAM,KAAK,KAAK;AAChB,SAAK,OAAO,YAAY,EAAE,GAAG,KAAK,GAAG,GAAU,QAAQ;AAAA,EACzD;AAAA,EAEA,YAAkB;AAChB,QAAI,KAAK,WAAY;AACrB,SAAK,aAAa;AAClB,SAAK,OAAO,UAAU;AACtB,UAAM,MAAM,IAAI,MAAM,+BAA+B;AACrD,eAAW,YAAY,KAAK,QAAQ,OAAO,EAAG,UAAS,OAAO,GAAG;AACjE,SAAK,QAAQ,MAAM;AAAA,EACrB;AACF;;;ACAO,SAAS,YAAY,SAAmC;AAC7D,QAAM,QAAQ,MAAM,OAAO;AAC3B,QAAM,EAAE,YAAY,KAAK,IAAI,WAAW,KAAK;AAC7C,QAAM,OAAO,YAAY,OAAO,UAAU;AAC1C,QAAM,UAAU,YAAY,OAAO,IAAI;AACvC,SAAO,EAAE,OAAO,YAAY,MAAM,MAAM,QAAQ;AAClD;AA6FA,eAAsB,cACpB,cACA,SACA,OAAgC,CAAC,GACL;AAE5B,QAAM,EAAE,OAAO,aAAa,IAAI,YAAY,cAAc,SAAS,KAAK,UAAU,CAAC,CAAC;AACpF,QAAM,EAAE,YAAY,KAAK,IAAI,WAAW,KAAK;AAC7C,QAAM,aAAa,KAAK,OACpB,WAAW,OAAO,YAAY,aAAa,SAAS,KAAK,MAAM,aAAa,UAAU,IACtF;AAEJ,QAAM,OAAO,YAAY,OAAO,YAAY,YAAY,cAAc,CAAC,CAAC;AACxE,QAAM,UAAU,YAAY,OAAO,IAAI;AACvC,QAAM,KAAiB,EAAE,OAAO,YAAY,MAAM,MAAM,QAAQ;AAIhE,QAAM,gBAAgB,mBAAmB,MAAM,aAAa,OAAO;AAGnE,QAAM,QAAQ,IAAI,YAAY,wroBAAiB;AAC/C,QAAM,SAAiB,EAAE,OAAO,MAAM,QAAQ;AAC9C,QAAM,WAAW,aAAa,eAAe,UAAU,IAAI;AAC3D,QAAM,YAAY,sBAAsB,aAAa;AAErD,MAAI;AACJ,MAAI;AACF,WAAO,MAAM,MAAM;AAAA,MACjB,EAAE,MAAM,iBAAiB,SAAS,EAAE,SAAS,GAAG,IAAI,QAAQ,eAAe,MAAM,SAAS,EAAE;AAAA,MAC5F;AAAA,IACF;AAAA,EACF,SAAS,GAAG;AACV,UAAM,UAAU;AAChB,UAAM;AAAA,EACR;AAEA,SAAO,IAAI;AAAA,IACT;AAAA;AAAA,IAAqB;AAAA,IAAG;AAAA,IAAI;AAAA,IAAM;AAAA;AAAA,IACpB,aAAa;AAAA;AAAA,IACT,EAAE,GAAG,EAAE;AAAA,EAC3B;AACF;AAOA,eAAsB,eACpB,cACA,SACA,OAAiC,CAAC,GACF;AAChC,QAAM,EAAE,OAAO,aAAa,IAAI,YAAY,cAAc,SAAS,KAAK,UAAU,CAAC,CAAC;AACpF,QAAM,eAAe,MAAM,QAAQ,MAAM,QAAQ,CAAC,CAAE;AACpD,QAAM,OAAO;AAAA,IAAY;AAAA;AAAA,IAAwB,CAAC;AAAA,EAAC;AACnD,QAAM,UAAU,YAAY,OAAO,IAAI;AACvC,QAAM,KAAiB,EAAE,OAAO,YAAY,CAAC,GAAG,MAAM,cAAc,MAAM,QAAQ;AAElF,QAAM,gBAAgB,mBAAmB,MAAM,aAAa,OAAO;AACnE,QAAM,QAAQ,IAAI,YAAY,wroBAAiB;AAC/C,QAAM,SAAiB,EAAE,OAAO,MAAM,QAAQ;AAC9C,QAAM,YAAY,sBAAsB,aAAa;AAErD,MAAI;AACJ,MAAI;AACF,WAAO,MAAM,MAAM;AAAA,MACjB,EAAE,MAAM,iBAAiB,SAAS,EAAE,SAAS,GAAG,IAAI,QAAQ,eAAe,MAAM,KAAK,EAAE;AAAA,MACxF;AAAA,IACF;AAAA,EACF,SAAS,GAAG;AACV,UAAM,UAAU;AAChB,UAAM;AAAA,EACR;AAEA,SAAO,IAAI;AAAA,IAA2B;AAAA;AAAA,IAAqB;AAAA,IAAG;AAAA,IAAI;AAAA;AAAA,IAAuB;AAAA,EAAI;AAC/F;AAMA,IAAM,sBAAN,MAAyE;AAAA,EACvE,YACmB,OACA,SACD,IACC,MACA,cAGA,SACA,aACjB;AATiB;AACA;AACD;AACC;AACA;AAGA;AACA;AAAA,EAChB;AAAA,EATgB;AAAA,EACA;AAAA,EACD;AAAA,EACC;AAAA,EACA;AAAA,EAGA;AAAA,EACA;AAAA,EAGnB,IAAI,cAAsB;AAAE,WAAO,KAAK,KAAK;AAAA,EAAY;AAAA,EACzD,IAAI,cAAiC;AAAE,WAAO,KAAK,KAAK;AAAA,EAAY;AAAA,EACpE,IAAI,aAAgC;AAAE,WAAO,KAAK,KAAK;AAAA,EAAW;AAAA,EAIlE,MAAM,KACJ,QACA,MAC8B;AAK9B,UAAM,IAAI,MAAM,KAAK,MAAM;AAAA,MACzB,EAAE,MAAM,QAAQ,SAAS,EAAE,SAAS,KAAK,SAAS,QAAQ,cAAc,MAAM,iBAAiB,KAAK,EAAE;AAAA,IACxG;AACA,QAAI,MAAM,cAAc;AACtB,aAAO,EAAE,MAAM,EAAE,MAAM,UAAU,aAAa,EAAE,UAAU,KAAK,KAAK,aAAa,EAAE;AAAA,IACrF;AACA,WAAO,EAAE;AAAA,EACX;AAAA,EAIA,MAAM,IACJ,QACA,MACmC;AAEnC,UAAM,IAAI,MAAM,KAAK,MAAM;AAAA,MACzB,EAAE,MAAM,OAAO,SAAS,EAAE,SAAS,KAAK,SAAS,QAAQ,cAAc,MAAM,iBAAiB,KAAK,EAAE;AAAA,IACvG;AACA,QAAI,MAAM,cAAc;AACtB,aAAO,EAAE,QAAQ,EAAE,QAAQ,UAAU,aAAa,EAAE,UAAU,KAAK,KAAK,aAAa,EAAE;AAAA,IACzF;AACA,WAAO,EAAE;AAAA,EACX;AAAA,EAEA,aAAa,QAAsC,MAA2C;AAE5F,WAAO,KAAK,MAAM;AAAA,MAChB,EAAE,MAAM,gBAAgB,SAAS,EAAE,SAAS,KAAK,SAAS,QAAQ,SAAS,CAAC,CAAC,MAAM,QAAQ,EAAE;AAAA,IAC/F,EAAE,KAAK,MAAM,MAAS;AAAA,EACxB;AAAA,EAEA,MAAM,iBAAwD;AAC5D,UAAM,IAAI,MAAM,KAAK,MAAM;AAAA,MACzB,EAAE,MAAM,kBAAkB,SAAS,EAAE,SAAS,KAAK,QAAQ,EAAE;AAAA,IAC/D;AACA,WAAO,EAAE;AAAA,EACX;AAAA,EAEA,MAAM,qBAA4D;AAChE,UAAM,IAAI,MAAM,KAAK,MAAM;AAAA,MACzB,EAAE,MAAM,sBAAsB,SAAS,EAAE,SAAS,KAAK,QAAQ,EAAE;AAAA,IACnE;AACA,WAAO,EAAE;AAAA,EACX;AAAA,EAEA,MAAM,QAAuB;AAI3B,UAAM,gBAAgB,mBAAmB,KAAK,GAAG,MAAM,KAAK,OAAO;AACnE,UAAM,KAAK,aAAa,aAAa;AACrC,UAAM,KAAK,oBAAoB;AAAA,EACjC;AAAA,EAEA,sBAAqC;AACnC,WAAO,KAAK,MAAM;AAAA,MAChB,EAAE,MAAM,kBAAkB,SAAS,EAAE,SAAS,KAAK,QAAQ,EAAE;AAAA,IAC/D,EAAE,KAAK,MAAM,MAAS;AAAA,EACxB;AAAA,EAEA,MAAM,eACJ,SACA,OAAuC,CAAC,GACR;AAChC,UAAM,EAAE,OAAO,cAAc,cAAc,IAAI,YAAY,KAAK,cAAc,SAAS,KAAK,UAAU,CAAC,CAAC;AACxG,UAAM,eAAe,MAAM,QAAQ,MAAM,QAAQ,CAAC,CAAE;AACpD,UAAM,OAAO;AAAA,MAAY;AAAA;AAAA,MAAwB,CAAC;AAAA,IAAC;AACnD,UAAM,UAAU,YAAY,OAAO,IAAI;AACvC,UAAM,KAAiB,EAAE,OAAO,YAAY,CAAC,GAAG,MAAM,cAAc,MAAM,QAAQ;AAElF,UAAM,eAAe,KAAK,YAAY;AACtC,UAAM,SAAiB,EAAE,OAAO,MAAM,QAAQ;AAE9C,UAAM,OAAO,MAAM,KAAK,MAAM;AAAA,MAC5B,EAAE,MAAM,kBAAkB,SAAS,EAAE,SAAS,cAAc,eAAe,KAAK,SAAS,IAAI,OAAO,EAAE;AAAA,IACxG;AAEA,WAAO,IAAI;AAAA,MAA2B,KAAK;AAAA,MAAO;AAAA,MAAc;AAAA,MAAI;AAAA;AAAA,MAAuB;AAAA,IAAK;AAAA,EAClG;AAAA,EAEA,UAAgB;AAGd,SAAK,MAAM,KAAK,EAAE,MAAM,WAAW,SAAS,EAAE,SAAS,KAAK,QAAQ,EAAE,CAAC;AACvE,SAAK,MAAM,UAAU;AAAA,EACvB;AACF;AAEA,IAAM,6BAAN,MAAkE;AAAA,EAChE,YACmB,OACA,SACD,IACC,MACA,YACjB;AALiB;AACA;AACD;AACC;AACA;AAAA,EAChB;AAAA,EALgB;AAAA,EACA;AAAA,EACD;AAAA,EACC;AAAA,EACA;AAAA,EAGnB,IAAI,cAAsB;AAAE,WAAO,KAAK,KAAK;AAAA,EAAY;AAAA,EACzD,IAAI,cAAiC;AAAE,WAAO,KAAK,KAAK;AAAA,EAAY;AAAA,EACpE,IAAI,aAAgC;AAAE,WAAO,KAAK,KAAK;AAAA,EAAW;AAAA,EAIlE,MAAM,IACJ,QACA,MACmC;AAEnC,UAAM,IAAI,MAAM,KAAK,MAAM;AAAA,MACzB,EAAE,MAAM,OAAO,SAAS,EAAE,SAAS,KAAK,SAAS,QAAQ,cAAc,MAAM,iBAAiB,KAAK,EAAE;AAAA,IACvG;AACA,QAAI,MAAM,cAAc;AACtB,aAAO,EAAE,QAAQ,EAAE,QAAQ,UAAU,aAAa,EAAE,UAAU,KAAK,KAAK,aAAa,EAAE;AAAA,IACzF;AACA,WAAO,EAAE;AAAA,EACX;AAAA,EAEA,aAAa,QAAsC,MAA2C;AAC5F,WAAO,KAAK,MAAM;AAAA,MAChB,EAAE,MAAM,gBAAgB,SAAS,EAAE,SAAS,KAAK,SAAS,QAAQ,SAAS,CAAC,CAAC,MAAM,QAAQ,EAAE;AAAA,IAC/F,EAAE,KAAK,MAAM,MAAS;AAAA,EACxB;AAAA,EAEA,MAAM,iBAAwD;AAC5D,UAAM,IAAI,MAAM,KAAK,MAAM;AAAA,MACzB,EAAE,MAAM,kBAAkB,SAAS,EAAE,SAAS,KAAK,QAAQ,EAAE;AAAA,IAC/D;AACA,WAAO,EAAE;AAAA,EACX;AAAA,EAEA,UAAgB;AACd,SAAK,MAAM,KAAK,EAAE,MAAM,WAAW,SAAS,EAAE,SAAS,KAAK,QAAQ,EAAE,CAAC;AACvE,QAAI,KAAK,WAAY,MAAK,MAAM,UAAU;AAAA,EAC5C;AACF;AAYA,SAAS,YACP,cACA,SACA,YACoD;AACpD,QAAM,QAAQ,aAAa;AAC3B,MAAI,eAAmC,EAAE,SAAS,CAAC,GAAG,SAAS,CAAC,GAAG,YAAY,CAAC,EAAE;AAClF,QAAM,QAAQ,MAAM,MAAM;AACxB,mBAAe,kBAAkB,KAAK;AACtC,UAAM,eAAuC,CAAC;AAC9C,eAAW,CAAC,MAAM,IAAI,KAAK,OAAO,QAAQ,UAAU,GAAG;AACrD,mBAAa,IAAI,IAAI,YAAY,MAAM,KAAK,OAAO,KAAK,SAAS,KAAK;AAAA,IACxE;AACA,WAAO,QAAQ,OAAO,YAAgC;AAAA,EACxD,CAAC;AACD,SAAO,EAAE,OAAO,aAAa;AAC/B;AAIA,SAAS,mBAAmB,MAAkB,SAA+D;AAC3G,QAAM,MAAoC,CAAC;AAC3C,aAAW,CAAC,MAAM,KAAK,KAAK,KAAK,cAAc;AAC7C,UAAM,QAAQ,KAAK,QAAQ,KAAK,EAAG;AACnC,UAAM,OAAO,MAAM,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC;AAC5C,UAAM,SAAS,QAAQ,IAAI;AAC3B,QAAI,CAAC,OAAQ,OAAM,IAAI,MAAM,+BAA+B,IAAI,GAAG;AACnE,QAAI,IAAI,IAAI,OAAO,MAAM,KAAK;AAAA,EAChC;AACA,SAAO;AACT;AAIA,SAAS,eAAe,GAA+B;AACrD,QAAM,IAAwB,EAAE;AAChC,SAAO;AAAA,IACL,IAAI,EAAE;AAAA,IACN,IAAI,EAAE;AAAA,IACN,IAAI,EAAE;AAAA,IACN,KAAK,EAAE;AAAA,IACP,aAAa,EAAE;AAAA,IACf,eAAe,EAAE;AAAA,IACjB,cAAc,EAAE;AAAA,IAChB,sBAAsB,EAAE;AAAA,EAC1B;AACF;AAIA,SAAS,aACP,UACA,eACU;AACV,QAAM,OAAO,oBAAI,IAA0B;AAC3C,MAAI,UAAU;AACZ,eAAW,CAAC,MAAM,GAAG,KAAK,OAAO,QAAQ,QAAQ,EAAG,MAAK,IAAI,MAAM,GAAG;AAAA,EACxE;AACA,SAAO,IAAI,SAAS,eAAe,IAAI;AACzC;;;AC1eA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AA6BO,IAAM,SAAN,cAAqB,OAAO;AAAA,EAGjC,YAA4B,OAA+B,QAAgB,OAAsB,CAAC,GAAG;AACnG,UAAM;AADoB;AAA+B;AAEzD,SAAK,IAAI,KAAK,MAAM,CAAC,OAAO,MAAM,CAAC;AACnC,SAAK,IAAI,KAAK,SAAS,QAAQ,OAAO,KAAK,MAAM,CAAC,MAAM,GAAG,EAAE,MAAM,QAAQ,CAAC;AAAA,EAC9E;AAAA,EAJ4B;AAAA,EAA+B;AAAA,EAF3D;AAAA,EACA;AAAA,EAMA,IAAI,GAAmB;AACrB,UAAM,MAAM,OAAO,GAAG,KAAK,CAAC;AAC5B,WAAO,KAAK,IAAI,IAAI,KAAK,KAAK,CAAC,IAAI;AAAA,EACrC;AACF;AAMO,IAAM,YAAN,cAAwB,OAAO;AAAA,EAGpC,YAA4B,GAA2B,MAAc,MAAM;AACzE,UAAM;AADoB;AAA2B;AAErD,SAAK,IAAI,KAAK,MAAM,CAAC,CAAC,GAAG,EAAE,MAAM,OAAO,CAAC;AACzC,SAAK,IAAI,KAAK,MAAM,CAAC,CAAC,GAAG,EAAE,MAAM,QAAQ,CAAC;AAAA,EAC5C;AAAA,EAJ4B;AAAA,EAA2B;AAAA,EAFvD;AAAA,EACA;AAAA,EAMA,IAAI,GAAmB;AACrB,UAAM,IAAI,SAAS,CAAC;AACpB,UAAM,IAAI,IAAI,GAAG,CAAC;AAClB,UAAM,IAAI,SAAS,IAAI,GAAG,CAAC,CAAC;AAC5B,UAAM,QAAQ,KAAK,IAAI,GAAG,KAAK,GAAG,CAAC;AACnC,WAAO,IAAI,IAAI,IAAI,GAAG,KAAK,GAAG,KAAK,CAAC,GAAG,KAAK,CAAC;AAAA,EAC/C;AACF;AAUO,SAAS,WAAW,GAAW,QAAwB;AAC5D,QAAM,OAAO,YAAY,YAAY;AACrC,QAAM,IAAI,EAAE,MAAM;AAClB,MAAI,IAAI,EAAG,OAAM,IAAI,WAAW,uCAAuC,CAAC,IAAI,IAAI;AAChF,QAAM,IAAI,EAAE,MAAM,IAAI,CAAC;AACvB,QAAM,IAAI,EAAE,MAAM,IAAI,CAAC;AACvB,MAAI,IAAI,WAAW,GAAG;AACpB,UAAM,IAAI,WAAW,wBAAwB,CAAC,4BAA4B,MAAM,IAAI,IAAI;AAAA,EAC1F;AACA,QAAM,OAAO,EAAE,MAAM,MAAM,GAAG,IAAI,CAAC;AACnC,QAAM,WAAW,QAAQ,GAAG,CAAC,GAAG,MAAM,GAAG,QAAQ,IAAI,MAAM,CAAC;AAE5D,SAAO,SAAS,UAAU,KAAK,QAAQ,KAAK,SAAS,CAAC;AACxD;AAGO,SAAS,WAAW,GAAmB;AAC5C,QAAM,OAAO,YAAY,YAAY;AACrC,QAAM,IAAI,EAAE,MAAM;AAClB,MAAI,IAAI,EAAG,OAAM,IAAI,WAAW,uCAAuC,CAAC,IAAI,IAAI;AAChF,QAAM,IAAI,EAAE,MAAM,IAAI,CAAC;AACvB,QAAM,IAAI,EAAE,MAAM,IAAI,CAAC;AACvB,QAAM,IAAI,EAAE,MAAM,IAAI,CAAC;AACvB,QAAM,OAAO,EAAE,MAAM,MAAM,GAAG,IAAI,CAAC;AAEnC,QAAM,UAAU,SAAS,GAAG,IAAI,GAAG,IAAI,CAAC;AACxC,SAAO,QAAQ,SAAS,CAAC,GAAG,MAAM,GAAG,IAAI,CAAC,CAAC;AAC7C;AAOO,SAAS,aAAa,UAAoB,MAA8B;AAC7E,QAAM,OAAO,SAAS,IAAI,IAAI;AAC9B,QAAM,QAAQ,SAAS,QAAQ,IAAI;AACnC,MAAI,MAAM,SAAS,GAAG;AACpB,UAAM,IAAI,MAAM,kBAAkB,IAAI,iCAAiC,MAAM,KAAK,IAAI,CAAC,GAAG;AAAA,EAC5F;AAGA,QAAM,IAAI,MAAM,CAAC,MAAM,IAAI,MAAM,MAAM,CAAC,IAAI;AAC5C,QAAM,IAAI,EAAE,CAAC;AACb,MAAI,SAAS;AACb,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,IAAK,WAAU,EAAE,CAAC;AAChD,QAAM,WAAW,IAAI;AACrB,MAAI,KAAK,WAAW,UAAU;AAC5B,UAAM,IAAI,MAAM,kBAAkB,IAAI,YAAY,KAAK,MAAM,gCAAgC,QAAQ,EAAE;AAAA,EACzG;AACA,SAAO,MAAM,KAAK,EAAE,QAAQ,EAAE,GAAG,CAAC,GAAG,MAAM,KAAK,MAAM,IAAI,SAAS,IAAI,KAAK,MAAM,CAAC;AACrF;AAWO,SAAS,iBAAiB,QAAgB,SAAyB;AACxE,QAAM,OAAO,YAAY,kBAAkB;AAC3C,MAAI,QAAQ,UAAU,OAAO;AAC3B,UAAM,IAAI,WAAW,8CAA8C,QAAQ,KAAK,IAAI,IAAI;AAAA,EAC1F;AACA,QAAM,QAAQ,OAAO,MAAM,OAAO,MAAM,SAAS,CAAC;AAClD,QAAM,KAAK,eAAe,MAAM;AAChC,QAAM,WAAW,QAAQ,IAAI,IAAI,OAAO,SAAS,OAAO,KAAK,CAAC,CAAC;AAC/D,SAAO,IAAI,UAAU,EAAE;AACzB;",
+  "sourcesContent": ["// Intermediate representation for tensor computations.\n//\n// A `Graph` is a flat array of `OpNode`s in topological (= construction) order.\n// A `Tensor` is an opaque handle: shape + dtype + a pointer back to the OpNode\n// that produced it (or `null` for graph leaves \u2014 params and external inputs).\n//\n// This is the data structure everything else operates on:\n//   - tracing builds it (src/trace.ts)\n//   - autograd walks it in reverse to add backward nodes (src/grad.ts, later)\n//   - codegen reads it to emit WGSL kernels and a dispatch plan (src/codegen.ts, later)\n//\n// Design intent: keep this file boring. No tracing logic, no shape inference,\n// no codegen \u2014 those live in their own modules and consume `Graph` / `OpNode`.\n\nexport type Dtype = 'f32' | 'i32' | 'bool'\nexport type Shape = readonly number[]\n\n// A Tensor is just metadata + a unique id. The actual storage doesn't exist\n// until the graph is compiled and run on a device.\nexport interface Tensor {\n  readonly id: number\n  readonly shape: Shape\n  readonly dtype: Dtype\n  // null for leaves (params, external inputs); otherwise the index into Graph.ops.\n  readonly source: number | null\n  // Captured at op-call time so shape errors blame the user's frame, not the\n  // library's. Lazy: only formatted on demand.\n  readonly site: CallSite | null\n}\n\nexport interface CallSite {\n  readonly opName: string\n  // Full Error stack at the point of op invocation. Format on demand.\n  readonly stack: string\n}\n\n// Discriminated union over every op the IR knows about. Adding an op means:\n//   1. add a variant here,\n//   2. add a shape rule in src/shape.ts,\n//   3. add a transpose rule in src/grad.ts (later),\n//   4. add a kernel template in src/codegen.ts (later).\n// The kinds intentionally match the surface API in src/ops.ts one-to-one.\nexport type OpNode =\n  // ---- Leaves ----------------------------------------------------------------\n  // A trainable parameter, supplied by the caller as a Float32Array at runtime.\n  | { kind: 'param_input'; out: number; name: string }\n  // A non-trainable input (tokens, targets, constants). Bound at runtime.\n  | { kind: 'tensor_input'; out: number; name: string }\n  // Persistent state buffer (e.g. Adam's m/v). Allocated and zero-initialized\n  // at compile time; survives across step() calls. Updated via writebacks\n  // declared in the compile result.\n  | { kind: 'state_input'; out: number; name: string; initValue: number }\n\n  // ---- Element-wise --------------------------------------------------------\n  | { kind: 'add'; out: number; a: number; b: number }\n  | { kind: 'sub'; out: number; a: number; b: number }\n  | { kind: 'mul'; out: number; a: number; b: number }\n  | { kind: 'div'; out: number; a: number; b: number }\n  | { kind: 'mul_scalar'; out: number; a: number; scalar: number }\n  | { kind: 'add_scalar'; out: number; a: number; scalar: number }\n\n  // ---- Unary ---------------------------------------------------------------\n  | { kind: 'sqrt'; out: number; a: number }\n  | { kind: 'rsqrt'; out: number; a: number }\n  | { kind: 'log'; out: number; a: number }\n  | { kind: 'exp'; out: number; a: number }\n  | { kind: 'relu'; out: number; a: number }\n\n  // ---- Reductions (over last axis only; reshape if you need other axes) ----\n  | { kind: 'mean_last'; out: number; a: number }   // keepdims=true\n  | { kind: 'sum_last'; out: number; a: number }    // keepdims=false\n\n  // ---- Shape ---------------------------------------------------------------\n  | { kind: 'reshape'; out: number; a: number; newShape: Shape }\n  | { kind: 'transpose'; out: number; a: number; perm: readonly number[] }\n\n  // ---- Linear algebra -----------------------------------------------------\n  // matmul: a [..., M, K] \u00B7 b [K, N] -> [..., M, N]. b is unbatched.\n  // (Batched-on-both-sides matmul, e.g. for attention scores, is a separate kind\n  //  to keep autograd transpose rules simple.)\n  | { kind: 'matmul'; out: number; a: number; b: number }\n  // matmul_batched: a [..., M, K] \u00B7 b [..., K, N] -> [..., M, N]. Used by attention.\n  | { kind: 'matmul_batched'; out: number; a: number; b: number }\n\n  // ---- Indexing / casting --------------------------------------------------\n  | { kind: 'one_hot'; out: number; indices: number; depth: number; dtype: Dtype }\n  | { kind: 'arange'; out: number; n: number; dtype: Dtype }\n\n  // ---- ML primitives (fused for cleaner autograd) -------------------------\n  | { kind: 'softmax_causal_last'; out: number; a: number }\n  | { kind: 'log_softmax_last'; out: number; a: number }\n  // Sets cells where (i >= j) on the last two axes; for masking attention scores\n  // *before* softmax. Lower-triangle entries pass through; upper-triangle entries\n  // become `fillValue` (typically -inf or a large negative number).\n  | { kind: 'where_causal'; out: number; a: number; fillValue: number }\n\n  // ---- Comparisons + selection -------------------------------------------\n  // Element-wise comparison; result is bool (lowered to u32 in storage).\n  // Supports the same trailing-axis broadcast as element-wise binops.\n  | { kind: 'less'; out: number; a: number; b: number }\n  | { kind: 'greater'; out: number; a: number; b: number }\n  // Element-wise select: out[i] = cond[i] ? a[i] : b[i]. cond must be bool.\n  // a, b, cond all broadcast-compatible to out's shape.\n  | { kind: 'where'; out: number; cond: number; a: number; b: number }\n\n  // ---- Optimizer-fused ops (Adam) ----------------------------------------\n  // Each is a single kernel doing the full per-element math, baking in the\n  // hyperparameter constant. Used by appendAdam() to avoid decomposing the\n  // update into ~12 element-wise dispatches per param.\n  | { kind: 'adam_update_m'; out: number; m: number; g: number; b1: number }\n  | { kind: 'adam_update_v'; out: number; v: number; g: number; b2: number }\n  // adam_update_p: p_new = decayShrink * p - lrt[0] * m_new / (sqrt(v_new) + eps).\n  // `lrt` is a scalar tensor (provided as a tensor_input updated per step) that\n  // already includes Adam's bias-correction factor: lrt = lr * sqrt(1-b2^t) / (1-b1^t).\n  // `decayShrink` is the decoupled-weight-decay factor (Loshchilov & Hutter,\n  // \"AdamW\"): 1 - lr * weightDecay when the param is being decayed, 1 otherwise.\n  // It can be either a compile-time literal (number) for fixed-lr training, or a\n  // tensor id pointing at a scalar input that the runtime updates per step (used\n  // when the user supplies an lr schedule via `adam: { lr: (step) => ... }`).\n  | {\n      kind: 'adam_update_p'\n      out: number\n      p: number\n      mNew: number\n      vNew: number\n      lrt: number\n      eps: number\n      decayShrink: number               // literal (used when decayShrinkTensor is null)\n      decayShrinkTensor: number | null  // tensor id of a scalar input; takes precedence when set\n    }\n\n  // ---- Slicing / broadcasting / autograd infrastructure -------------------\n  // Slice [start, end) along the last axis. Output shape: input shape with\n  // last axis replaced by (end - start). Used for splitting Q/K/V from a\n  // single fused QKV matmul.\n  | { kind: 'slice_last_range'; out: number; a: number; start: number; end: number }\n  // Broadcast `a` to `targetShape`. Standard right-aligned NumPy broadcast.\n  // Used by autograd to expand cotangents back over reduced/broadcast axes.\n  | { kind: 'broadcast_to'; out: number; a: number; targetShape: Shape }\n  // Inverse of broadcast_to: sum-reduce `a` to `targetShape`. Used by autograd\n  // to \"un-broadcast\" a cotangent back to the smaller operand's shape.\n  | { kind: 'sum_to_shape'; out: number; a: number; targetShape: Shape }\n  // 0-d tensor with a constant value. Used to seed loss cotangent (1.0).\n  | { kind: 'const_scalar'; out: number; value: number; dtype: Dtype }\n  // ReLU's backward: passes `dy` through where `x > 0`, else 0. Output shape = x's.\n  | { kind: 'relu_grad'; out: number; x: number; dy: number }\n\n// A Graph collects ops and tensors during tracing, then becomes the input to\n// autograd and codegen. Once tracing is done it should be treated as immutable.\nexport interface Graph {\n  readonly ops: OpNode[]\n  readonly tensors: Tensor[]\n  // Names of tensors that should be exposed as outputs of the compiled function.\n  // Set by the trace driver; for a loss function, this is `[lossTensor]`.\n  readonly outputs: number[]\n  // Tensors registered for activation readback via `capture(name, t)`.\n  // Keyed by user-supplied name; insertion order preserved. Empty when no\n  // captures registered (the common training case \u2014 zero overhead).\n  readonly captures: Map<string, number>\n}\n\nexport function makeGraph(): Graph {\n  return { ops: [], tensors: [], outputs: [], captures: new Map() }\n}\n\n// Internal: register a fresh tensor in the graph and return its id.\nexport function addTensor(g: Graph, shape: Shape, dtype: Dtype, source: number | null, site: CallSite | null): Tensor {\n  const id = g.tensors.length\n  const t: Tensor = { id, shape, dtype, source, site }\n  g.tensors.push(t)\n  return t\n}\n\n// Internal: append an op and the tensor it produces. Returns the produced tensor.\n// Generic over the specific op kind so callers don't need `as any` casts.\n// `Extract<OpNode, { kind: K }>` narrows the union to the chosen variant, then\n// `Omit` strips the parts addOp itself supplies (the kind tag and out tensor id).\nexport function addOp<K extends OpNode['kind']>(\n  g: Graph,\n  kind: K,\n  shape: Shape,\n  dtype: Dtype,\n  site: CallSite | null,\n  fields: Omit<Extract<OpNode, { kind: K }>, 'kind' | 'out'>,\n): Tensor {\n  const opIndex = g.ops.length\n  const out = addTensor(g, shape, dtype, opIndex, site)\n  const node = { kind, out: out.id, ...fields } as Extract<OpNode, { kind: K }>\n  g.ops.push(node)\n  return out\n}\n\n// Capture a call site without paying full Error formatting cost up-front.\n// The stack is materialised but parsing/trimming is deferred to error reporting.\nexport function captureSite(opName: string): CallSite {\n  // Skip our own frame plus the op wrapper's frame; user's frame is what's left.\n  const stack = (new Error()).stack ?? ''\n  return { opName, stack }\n}\n\n// Format a CallSite for inclusion in a thrown error. Strips Tensorgrad frames\n// and library internals so the user sees their code first.\nexport function formatSite(site: CallSite): string {\n  const lines = site.stack.split('\\n')\n  // Stack starts with \"Error\" line; drop it. Then drop frames from this file\n  // and from src/ops.ts so the first surviving frame is user code.\n  const userFrames: string[] = []\n  for (const line of lines.slice(1)) {\n    if (line.includes('/tensorgrad/src/') || line.includes('\\\\tensorgrad\\\\src\\\\')) continue\n    userFrames.push(line.trim())\n    if (userFrames.length >= 3) break\n  }\n  if (userFrames.length === 0) return `[${site.opName}] (no user frame found)`\n  return `[${site.opName}]\\n  ${userFrames.join('\\n  ')}`\n}\n", "// Shape inference and validation for each op kind.\n//\n// Every op in src/ops.ts validates its inputs and computes its output shape\n// through helpers here. Errors throw with the captured call-site so the\n// stack trace points at the user's line, not into the library.\n//\n// Broadcasting rules (deliberately limited):\n//   * For element-wise binops (add/sub/mul/div), we support trailing-axis\n//     broadcasting: the smaller operand's shape must be a suffix of the\n//     larger's, with axes of size 1 broadcasting to any size. Examples\n//     ALLOWED:  [B, T, D] op [D]  \u2192  [B, T, D]\n//               [B, T, D] op [1, D]  \u2192 [B, T, D]\n//               [B, T, D] op [B, T, D]  \u2192 [B, T, D]\n//     Examples REJECTED:  [B, T, D] op [B]   (suffix mismatch)\n//                         [B, T, D] op [T, D] when T != B (legal numpy, banned here)\n//   The restriction makes codegen and autograd much simpler and covers every\n//   broadcast pattern in our transformer (biases, layernorm gain/bias, masks).\n\nimport type { Shape, CallSite } from './ir.js'\nimport { formatSite } from './ir.js'\n\n// ============================================================================\n// Errors\n// ============================================================================\n\nexport class ShapeError extends Error {\n  constructor(message: string, site: CallSite | null) {\n    const formatted = site ? `${message}\\n  at ${formatSite(site)}` : message\n    super(formatted)\n    this.name = 'ShapeError'\n  }\n}\n\nfunction fail(message: string, site: CallSite | null): never {\n  throw new ShapeError(message, site)\n}\n\n// ============================================================================\n// Shape utilities\n// ============================================================================\n\nexport function shapesEqual(a: Shape, b: Shape): boolean {\n  if (a.length !== b.length) return false\n  for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false\n  return true\n}\n\nexport function shapeSize(shape: Shape): number {\n  let n = 1\n  for (const d of shape) n *= d\n  return n\n}\n\nexport function showShape(shape: Shape): string {\n  return `[${shape.join(', ')}]`\n}\n\n// Standard right-aligned NumPy-style broadcasting. Pad the shorter shape with\n// leading 1s, then per-axis: equal dims unify, size-1 dims broadcast on either\n// side, otherwise incompatible. Returns the resulting shape or null.\nexport function broadcastTrailing(a: Shape, b: Shape): Shape | null {\n  const rank = Math.max(a.length, b.length)\n  const out: number[] = new Array(rank)\n  for (let i = 0; i < rank; i++) {\n    const ai = i - (rank - a.length)\n    const bi = i - (rank - b.length)\n    const av = ai < 0 ? 1 : a[ai]!\n    const bv = bi < 0 ? 1 : b[bi]!\n    if (av === bv) out[i] = av\n    else if (av === 1) out[i] = bv\n    else if (bv === 1) out[i] = av\n    else return null\n  }\n  return out\n}\n\n// ============================================================================\n// Per-op shape rules\n// ============================================================================\n//\n// Each rule takes the input shapes and returns the output shape, or throws.\n// All rules accept a `site` for error attribution.\n\nexport function inferElementwiseBinop(\n  opName: string, aShape: Shape, bShape: Shape, site: CallSite | null,\n): Shape {\n  const result = broadcastTrailing(aShape, bShape)\n  if (!result) {\n    fail(\n      `${opName}: incompatible shapes ${showShape(aShape)} and ${showShape(bShape)}. ` +\n      `Trailing-suffix broadcasting only \u2014 the smaller shape must be a suffix of the larger, ` +\n      `with size-1 axes broadcasting to any size.`,\n      site,\n    )\n  }\n  return result\n}\n\nexport function inferUnary(_opName: string, aShape: Shape, _site: CallSite | null): Shape {\n  return aShape\n}\n\nexport function inferMeanLast(opName: string, aShape: Shape, site: CallSite | null): Shape {\n  if (aShape.length === 0) fail(`${opName}: cannot reduce a 0-d tensor`, site)\n  // keepdims=true: replace last axis with 1.\n  return [...aShape.slice(0, -1), 1]\n}\n\nexport function inferSumLast(opName: string, aShape: Shape, site: CallSite | null): Shape {\n  if (aShape.length === 0) fail(`${opName}: cannot reduce a 0-d tensor`, site)\n  // keepdims=false: drop the last axis.\n  return aShape.slice(0, -1)\n}\n\nexport function inferReshape(opName: string, aShape: Shape, newShape: Shape, site: CallSite | null): Shape {\n  // Validate -1 placeholder (at most one allowed) and total size match.\n  let inferIdx = -1\n  let knownSize = 1\n  for (let i = 0; i < newShape.length; i++) {\n    const d = newShape[i]!\n    if (d === -1) {\n      if (inferIdx !== -1) fail(`${opName}: at most one -1 dim allowed in newShape ${showShape(newShape)}`, site)\n      inferIdx = i\n    } else if (d <= 0) {\n      fail(`${opName}: invalid dim ${d} in newShape ${showShape(newShape)}`, site)\n    } else {\n      knownSize *= d\n    }\n  }\n  const totalIn = shapeSize(aShape)\n  const out = [...newShape]\n  if (inferIdx !== -1) {\n    if (totalIn % knownSize !== 0) {\n      fail(`${opName}: cannot reshape ${showShape(aShape)} (size ${totalIn}) to ${showShape(newShape)} \u2014 known dims multiply to ${knownSize}`, site)\n    }\n    out[inferIdx] = totalIn / knownSize\n  } else if (knownSize !== totalIn) {\n    fail(`${opName}: size mismatch \u2014 input ${showShape(aShape)} has ${totalIn} elements but newShape ${showShape(newShape)} has ${knownSize}`, site)\n  }\n  return out\n}\n\nexport function inferTranspose(opName: string, aShape: Shape, perm: readonly number[], site: CallSite | null): Shape {\n  if (perm.length !== aShape.length) {\n    fail(`${opName}: perm length ${perm.length} must equal input rank ${aShape.length}`, site)\n  }\n  const seen = new Set<number>()\n  for (const p of perm) {\n    if (p < 0 || p >= aShape.length) fail(`${opName}: perm index ${p} out of range for rank ${aShape.length}`, site)\n    if (seen.has(p)) fail(`${opName}: perm has duplicate index ${p}`, site)\n    seen.add(p)\n  }\n  return perm.map(p => aShape[p]!)\n}\n\n// matmul: a [..., M, K] \u00B7 b [K, N]  \u2192  [..., M, N].  b is unbatched.\nexport function inferMatmul(opName: string, aShape: Shape, bShape: Shape, site: CallSite | null): Shape {\n  if (aShape.length < 2) fail(`${opName}: lhs must have rank >= 2, got ${showShape(aShape)}`, site)\n  if (bShape.length !== 2) fail(`${opName}: rhs must have rank 2, got ${showShape(bShape)} \u2014 use matmulBatched for batched rhs`, site)\n  const M = aShape[aShape.length - 2]!\n  const Ka = aShape[aShape.length - 1]!\n  const Kb = bShape[0]!\n  const N = bShape[1]!\n  if (Ka !== Kb) fail(`${opName}: inner dims don't match \u2014 ${showShape(aShape)} \u00B7 ${showShape(bShape)} (last axis of lhs = ${Ka}, first axis of rhs = ${Kb})`, site)\n  return [...aShape.slice(0, -2), M, N]\n}\n\n// matmul_batched: a [..., M, K] \u00B7 b [..., K, N]  \u2192  [..., M, N].  Both have leading batch dims.\nexport function inferMatmulBatched(opName: string, aShape: Shape, bShape: Shape, site: CallSite | null): Shape {\n  if (aShape.length < 2 || bShape.length < 2) {\n    fail(`${opName}: both inputs must have rank >= 2, got ${showShape(aShape)} and ${showShape(bShape)}`, site)\n  }\n  if (aShape.length !== bShape.length) {\n    fail(`${opName}: ranks must match (got ${aShape.length} vs ${bShape.length}). Reshape if you need different batch dims.`, site)\n  }\n  const aBatch = aShape.slice(0, -2)\n  const bBatch = bShape.slice(0, -2)\n  for (let i = 0; i < aBatch.length; i++) {\n    if (aBatch[i] !== bBatch[i]) {\n      fail(`${opName}: batch dims must match \u2014 ${showShape(aShape)} vs ${showShape(bShape)}`, site)\n    }\n  }\n  const M = aShape[aShape.length - 2]!\n  const Ka = aShape[aShape.length - 1]!\n  const Kb = bShape[bShape.length - 2]!\n  const N = bShape[bShape.length - 1]!\n  if (Ka !== Kb) fail(`${opName}: inner dims don't match \u2014 last axis of lhs = ${Ka}, second-to-last of rhs = ${Kb}`, site)\n  return [...aBatch, M, N]\n}\n\nexport function inferOneHot(opName: string, indicesShape: Shape, depth: number, site: CallSite | null): Shape {\n  if (depth <= 0) fail(`${opName}: depth must be positive, got ${depth}`, site)\n  return [...indicesShape, depth]\n}\n\n// where_causal preserves shape but requires the last two axes to be square.\nexport function inferWhereCausal(opName: string, aShape: Shape, site: CallSite | null): Shape {\n  if (aShape.length < 2) fail(`${opName}: requires rank >= 2, got ${showShape(aShape)}`, site)\n  const m = aShape[aShape.length - 2]!\n  const n = aShape[aShape.length - 1]!\n  if (m !== n) fail(`${opName}: last two axes must be equal (square mask), got ${showShape(aShape)}`, site)\n  return aShape\n}\n\nexport function inferSliceLastRange(opName: string, aShape: Shape, start: number, end: number, site: CallSite | null): Shape {\n  if (aShape.length === 0) fail(`${opName}: cannot slice 0-d tensor`, site)\n  const last = aShape[aShape.length - 1]!\n  if (start < 0 || end > last || start >= end) {\n    fail(`${opName}: invalid range [${start}, ${end}) for last axis of size ${last}`, site)\n  }\n  return [...aShape.slice(0, -1), end - start]\n}\n\n// broadcast_to: validate that `aShape` can broadcast to `targetShape` under\n// right-aligned NumPy rules. Returns targetShape on success.\nexport function inferBroadcastTo(opName: string, aShape: Shape, targetShape: Shape, site: CallSite | null): Shape {\n  if (aShape.length > targetShape.length) {\n    fail(`${opName}: source rank ${aShape.length} > target rank ${targetShape.length}`, site)\n  }\n  const offset = targetShape.length - aShape.length\n  for (let i = 0; i < aShape.length; i++) {\n    const av = aShape[i]!\n    const tv = targetShape[offset + i]!\n    if (av !== tv && av !== 1) {\n      fail(`${opName}: cannot broadcast ${showShape(aShape)} to ${showShape(targetShape)} \u2014 axis ${i} (size ${av}) doesn't match target axis ${offset + i} (size ${tv}) and isn't 1`, site)\n    }\n  }\n  return targetShape\n}\n\n// sum_to_shape: validate that `targetShape` is a valid right-aligned reduction\n// of `aShape` (i.e., aShape can have been produced by broadcasting targetShape).\nexport function inferSumToShape(opName: string, aShape: Shape, targetShape: Shape, site: CallSite | null): Shape {\n  if (targetShape.length > aShape.length) {\n    fail(`${opName}: target rank ${targetShape.length} > source rank ${aShape.length}`, site)\n  }\n  const offset = aShape.length - targetShape.length\n  for (let i = 0; i < targetShape.length; i++) {\n    const av = aShape[offset + i]!\n    const tv = targetShape[i]!\n    if (av !== tv && tv !== 1) {\n      fail(`${opName}: cannot sum-reduce ${showShape(aShape)} to ${showShape(targetShape)} \u2014 target axis ${i} (size ${tv}) must be 1 or match source`, site)\n    }\n  }\n  return targetShape\n}\n\n// Three-way broadcast for `where(cond, a, b)`. All three shapes must broadcast\n// to a common shape under standard NumPy rules.\nexport function inferWhere(opName: string, condShape: Shape, aShape: Shape, bShape: Shape, site: CallSite | null): Shape {\n  const ab = broadcastTrailing(aShape, bShape)\n  if (!ab) fail(`${opName}: a/b incompatible: ${showShape(aShape)} vs ${showShape(bShape)}`, site)\n  const result = broadcastTrailing(condShape, ab)\n  if (!result) fail(`${opName}: cond ${showShape(condShape)} incompatible with broadcast(a, b) ${showShape(ab)}`, site)\n  return result\n}\n\nexport function inferReluGrad(opName: string, xShape: Shape, dyShape: Shape, site: CallSite | null): Shape {\n  if (!shapesEqual(xShape, dyShape)) {\n    fail(`${opName}: x and dy must have matching shapes, got ${showShape(xShape)} and ${showShape(dyShape)}`, site)\n  }\n  return xShape\n}\n", "// Trace driver. Holds the \"current graph\" in module-local state so user code\n// can call ops without threading a graph parameter through every function.\n//\n// Usage:\n//\n//   const graph = trace(() => {\n//     const x = tensorInput('x', [B, T], 'i32')\n//     const w = paramInput('w', [V, D], 'f32')\n//     // ... user computation building tensors ...\n//     return finalLossTensor\n//   })\n//\n// `trace` is single-threaded and re-entrant only via nested calls (which share\n// the outer graph \u2014 but we don't currently have a use for nesting). Calling an\n// op outside a `trace(...)` block is an error.\n\nimport type { Graph, Tensor, Shape, Dtype } from './ir.js'\nimport { makeGraph, addOp, captureSite } from './ir.js'\n\n// Module-local: the graph being built right now, or null if no trace is active.\nlet _current: Graph | null = null\n// Module-local: whether `capture(name, t)` calls should register on the current\n// graph. True only during the user's forward trace; false during `traceInto`\n// (autograd / optimizer ops shouldn't accidentally publish gradient tensors).\nlet _captureEnabled = false\n\nexport function currentGraph(): Graph {\n  if (!_current) {\n    throw new Error(\n      'tensorgrad: ops can only be called inside trace(). ' +\n      'Did you forget to wrap your forward pass?',\n    )\n  }\n  return _current\n}\n\nexport function isCaptureEnabled(): boolean {\n  return _captureEnabled\n}\n\n// Run `fn` with a fresh graph as the current one; capture and return the graph.\n// `fn` must return the tensor (or array of tensors) to mark as graph outputs.\nexport function trace(fn: () => Tensor | Tensor[]): Graph {\n  if (_current) {\n    throw new Error('tensorgrad: nested trace() is not supported')\n  }\n  const g = makeGraph()\n  _current = g\n  _captureEnabled = true\n  try {\n    const result = fn()\n    const outputs = Array.isArray(result) ? result : [result]\n    for (const t of outputs) {\n      ;(g.outputs as number[]).push(t.id)\n    }\n  } finally {\n    _current = null\n    _captureEnabled = false\n  }\n  return g\n}\n\n// Re-enter an existing graph to append more ops. Used by autograd to add\n// backward ops to a graph that's already been traced. `fn` runs with the\n// supplied graph as the current one; any ops it calls append to that graph.\n// Capture is intentionally disabled here \u2014 backward / optimizer rules\n// shouldn't publish their internal tensors via `capture()`.\n// Returns whatever `fn` returns.\nexport function traceInto<T>(g: Graph, fn: () => T): T {\n  if (_current) {\n    throw new Error('tensorgrad: traceInto() called while another trace is active')\n  }\n  _current = g\n  // _captureEnabled stays false (default) \u2014 explicit, but not toggled.\n  try {\n    return fn()\n  } finally {\n    _current = null\n  }\n}\n\n// ---- Leaf tensor builders --------------------------------------------------\n// Inputs are added to the graph as `param_input` or `tensor_input` op nodes.\n// Their .source on the Tensor points at that node so codegen knows where to\n// bind external data.\n\n// Param/tensor inputs share a namespace (a step() call passes both as keys in\n// the same dispatch object); state inputs have their own namespace.\ntype NamedInputKind = 'param_input' | 'tensor_input' | 'state_input'\nfunction assertNameUnused(g: Graph, name: string, kinds: NamedInputKind[], label: string): void {\n  if (g.ops.some(op => kinds.includes(op.kind as NamedInputKind) && (op as { name?: string }).name === name)) {\n    throw new Error(`tensorgrad: ${label} name '${name}' already used in this trace`)\n  }\n}\n\nexport function paramInput(name: string, shape: Shape, dtype: Dtype = 'f32'): Tensor {\n  const g = currentGraph()\n  assertNameUnused(g, name, ['param_input', 'tensor_input'], 'input')\n  const site = captureSite('paramInput')\n  return addOp(g, 'param_input', shape, dtype, site, { name } as any)\n}\n\nexport function tensorInput(name: string, shape: Shape, dtype: Dtype = 'f32'): Tensor {\n  const g = currentGraph()\n  assertNameUnused(g, name, ['param_input', 'tensor_input'], 'input')\n  const site = captureSite('tensorInput')\n  return addOp(g, 'tensor_input', shape, dtype, site, { name } as any)\n}\n\n// Persistent state buffer. Allocated at compile time, zero-(or initValue-)initialized,\n// and updated across step() calls via writebacks declared by the optimizer helper.\nexport function stateInput(name: string, shape: Shape, dtype: Dtype = 'f32', initValue = 0): Tensor {\n  const g = currentGraph()\n  assertNameUnused(g, name, ['state_input'], 'state')\n  const site = captureSite('stateInput')\n  return addOp(g, 'state_input', shape, dtype, site, { name, initValue } as any)\n}\n", "// Activation capture \u2014 opt-in readback of intermediate tensors at training step.\n//\n// Usage (inside the user's forward pass):\n//\n//   import { capture } from 'tensorgrad'\n//\n//   function attentionFwd(p, x) {\n//     const scores = mul(matmulBatched(q, kT), SCALE_QK)\n//     const attn = capture(`attn.${layerIdx}`, softmaxCausalLast(scores))\n//     return matmulBatched(attn, v)\n//   }\n//\n// Pass-through return type: `capture(name, t)` returns `t` unchanged so it\n// inlines at the point of computation. Behind the scenes it registers `t.id`\n// against `name` on the current graph; runtime exposes the registered tensors\n// via `step(inputs, { withCaptures: true })`.\n//\n// Outside the user's forward trace (during `appendGrad` / `appendAdam`'s\n// `traceInto` re-entry), `capture()` is a no-op \u2014 gradient and optimizer\n// internals shouldn't accidentally publish themselves to the UI.\n\nimport type { Tensor } from './ir.js'\nimport { currentGraph, isCaptureEnabled } from './trace.js'\n\nexport function capture<T extends Tensor>(name: string, t: T): T {\n  if (!isCaptureEnabled()) return t\n  const g = currentGraph()\n  if (g.captures.has(name)) {\n    throw new Error(\n      `capture: name '${name}' already registered. Use unique names ` +\n      `(e.g. \\`attn.\\${layerIdx}\\`) when capturing across a loop.`,\n    )\n  }\n  g.captures.set(name, t.id)\n  return t\n}\n", "// User-facing op surface.\n//\n// Each function here is a thin wrapper:\n//   1. capture the call site (for error attribution)\n//   2. validate input shapes via src/shape.ts (which throws on mismatch)\n//   3. compute the output shape and dtype\n//   4. append the op to the current Graph (held in module state by src/trace.ts)\n//   5. return the produced Tensor handle\n//\n// No actual numeric work happens here. These calls just build the IR.\n\nimport type { Tensor, Shape, Dtype, OpNode } from './ir.js'\nimport { addOp, captureSite } from './ir.js'\nimport { currentGraph } from './trace.js'\nimport {\n  inferElementwiseBinop, inferUnary, inferMeanLast, inferSumLast,\n  inferReshape, inferTranspose, inferMatmul, inferMatmulBatched,\n  inferOneHot, inferWhereCausal, inferSliceLastRange,\n  inferBroadcastTo, inferSumToShape, inferReluGrad, inferWhere,\n  ShapeError, showShape,\n} from './shape.js'\n\n// ----------------------------------------------------------------------------\n// Element-wise binops (add/sub/mul/div). Trailing-suffix broadcast.\n// ----------------------------------------------------------------------------\n\n/**\n * Build an element-wise binop op (forward declaration only \u2014 appends to the\n * graph). Used by both arithmetic ops (add/sub/mul/div, output dtype = input\n * dtype) and comparisons (less/greater, output dtype = bool).\n */\nfunction binopOp(\n  name: string,\n  kind: OpNode['kind'],\n  a: Tensor, b: Tensor,\n  outDtype: Dtype = a.dtype,\n): Tensor {\n  const site = captureSite(name)\n  if (a.dtype !== b.dtype) throw new ShapeError(`${name}: dtype mismatch (${a.dtype} vs ${b.dtype})`, site)\n  const outShape = inferElementwiseBinop(name, a.shape, b.shape, site)\n  return addOp(currentGraph(), kind, outShape, outDtype, site, { a: a.id, b: b.id })\n}\n\n// Element-wise binops. Second arg can be a Tensor or a JS number; the latter\n// dispatches to scalar-fused IR ops internally. `mul(x, 2)` and `mul(x, y)`\n// both work \u2014 matches every NumPy-shaped library.\nexport function add(a: Tensor, b: Tensor | number): Tensor {\n  return typeof b === 'number' ? addScalar(a, b) : binopOp('add', 'add', a, b)\n}\nexport function sub(a: Tensor, b: Tensor | number): Tensor {\n  return typeof b === 'number' ? addScalar(a, -b) : binopOp('sub', 'sub', a, b)\n}\nexport function mul(a: Tensor, b: Tensor | number): Tensor {\n  return typeof b === 'number' ? mulScalar(a, b) : binopOp('mul', 'mul', a, b)\n}\nexport function div(a: Tensor, b: Tensor | number): Tensor {\n  if (typeof b === 'number') {\n    if (b === 0) throw new ShapeError(`div: scalar divisor cannot be zero`, captureSite('div'))\n    return mulScalar(a, 1 / b)\n  }\n  return binopOp('div', 'div', a, b)\n}\n\n// ----------------------------------------------------------------------------\n// Element-wise scalar binops (mul/add by JS number). Used for things like\n// `scores * (1/sqrt(d))` and `logits + 1e-5` where allocating a 0-d tensor\n// for the scalar is wasteful.\n// ----------------------------------------------------------------------------\n\nexport function mulScalar(a: Tensor, scalar: number): Tensor {\n  const site = captureSite('mulScalar')\n  return addOp(currentGraph(), 'mul_scalar', a.shape, a.dtype, site, { a: a.id, scalar })\n}\n\nexport function addScalar(a: Tensor, scalar: number): Tensor {\n  const site = captureSite('addScalar')\n  return addOp(currentGraph(), 'add_scalar', a.shape, a.dtype, site, { a: a.id, scalar })\n}\n\n// ----------------------------------------------------------------------------\n// Unary ops.\n// ----------------------------------------------------------------------------\n\nfunction unary(name: 'sqrt' | 'rsqrt' | 'log' | 'exp' | 'relu', a: Tensor): Tensor {\n  const site = captureSite(name)\n  if (a.dtype !== 'f32') throw new ShapeError(`${name}: requires f32, got ${a.dtype}`, site)\n  return addOp(currentGraph(), name, inferUnary(name, a.shape, site), 'f32', site, { a: a.id })\n}\n\nexport const sqrt  = (a: Tensor): Tensor => unary('sqrt',  a)\nexport const rsqrt = (a: Tensor): Tensor => unary('rsqrt', a)\nexport const log   = (a: Tensor): Tensor => unary('log',   a)\nexport const exp   = (a: Tensor): Tensor => unary('exp',   a)\nexport const relu  = (a: Tensor): Tensor => unary('relu',  a)\n\n// ----------------------------------------------------------------------------\n// Reductions over the last axis. To reduce along other axes, transpose first.\n// (This is intentional \u2014 keeps codegen and autograd small.)\n// ----------------------------------------------------------------------------\n\nexport function meanLast(a: Tensor): Tensor {\n  const site = captureSite('meanLast')\n  if (a.dtype !== 'f32') throw new ShapeError(`meanLast: requires f32, got ${a.dtype}`, site)\n  const outShape = inferMeanLast('meanLast', a.shape, site)\n  return addOp(currentGraph(), 'mean_last', outShape, a.dtype, site, { a: a.id })\n}\n\nexport function sumLast(a: Tensor): Tensor {\n  const site = captureSite('sumLast')\n  if (a.dtype !== 'f32') throw new ShapeError(`sumLast: requires f32, got ${a.dtype}`, site)\n  const outShape = inferSumLast('sumLast', a.shape, site)\n  return addOp(currentGraph(), 'sum_last', outShape, a.dtype, site, { a: a.id })\n}\n\n/** Reduce all elements to a 0-d scalar. Composes `reshape` + `sumLast`. */\nexport function sumAll(a: Tensor): Tensor {\n  return sumLast(reshape(a, [-1]))\n}\n\n// ----------------------------------------------------------------------------\n// Shape ops.\n// ----------------------------------------------------------------------------\n\nexport function reshape(a: Tensor, newShape: Shape): Tensor {\n  const site = captureSite('reshape')\n  const outShape = inferReshape('reshape', a.shape, newShape, site)\n  return addOp(currentGraph(), 'reshape', outShape, a.dtype, site, { a: a.id, newShape: outShape })\n}\n\nexport function transpose(a: Tensor, perm: readonly number[]): Tensor {\n  const site = captureSite('transpose')\n  const outShape = inferTranspose('transpose', a.shape, perm, site)\n  return addOp(currentGraph(), 'transpose', outShape, a.dtype, site, { a: a.id, perm })\n}\n\n/** Swap two axes of a tensor. Negative indices count from the end (so\n *  `swapAxes(x, -1, -2)` swaps the last two \u2014 the common attention pattern).\n *  All other axes keep their position. Implemented as `transpose` with the\n *  permutation `[0, 1, ..., axis2, ..., axis1, ..., n-1]`. */\nexport function swapAxes(a: Tensor, axis1: number, axis2: number): Tensor {\n  const r = a.shape.length\n  const norm = (axis: number): number => axis < 0 ? r + axis : axis\n  const i1 = norm(axis1)\n  const i2 = norm(axis2)\n  const site = captureSite('swapAxes')\n  if (i1 < 0 || i1 >= r || i2 < 0 || i2 >= r) {\n    throw new ShapeError(`swapAxes: axis out of range \u2014 got (${axis1}, ${axis2}) for rank-${r} tensor`, site)\n  }\n  if (i1 === i2) return a\n  const perm = Array.from({ length: r }, (_, k) => k)\n  perm[i1] = i2\n  perm[i2] = i1\n  return transpose(a, perm)\n}\n\n// ----------------------------------------------------------------------------\n// Linear algebra.\n// ----------------------------------------------------------------------------\n\nexport function matmul(a: Tensor, b: Tensor): Tensor {\n  const site = captureSite('matmul')\n  if (a.dtype !== 'f32' || b.dtype !== 'f32') {\n    throw new ShapeError(`matmul: requires f32, got ${a.dtype} and ${b.dtype}`, site)\n  }\n  const outShape = inferMatmul('matmul', a.shape, b.shape, site)\n  return addOp(currentGraph(), 'matmul', outShape, 'f32', site, { a: a.id, b: b.id })\n}\n\nexport function matmulBatched(a: Tensor, b: Tensor): Tensor {\n  const site = captureSite('matmulBatched')\n  if (a.dtype !== 'f32' || b.dtype !== 'f32') {\n    throw new ShapeError(`matmulBatched: requires f32, got ${a.dtype} and ${b.dtype}`, site)\n  }\n  const outShape = inferMatmulBatched('matmulBatched', a.shape, b.shape, site)\n  return addOp(currentGraph(), 'matmul_batched', outShape, 'f32', site, { a: a.id, b: b.id })\n}\n\n// ----------------------------------------------------------------------------\n// Indexing / casting.\n// ----------------------------------------------------------------------------\n\nexport function oneHot(indices: Tensor, depth: number, dtype: Dtype = 'f32'): Tensor {\n  const site = captureSite('oneHot')\n  if (indices.dtype !== 'i32') {\n    throw new ShapeError(`oneHot: indices must be i32, got ${indices.dtype}`, site)\n  }\n  const outShape = inferOneHot('oneHot', indices.shape, depth, site)\n  return addOp(currentGraph(), 'one_hot', outShape, dtype, site, { indices: indices.id, depth, dtype })\n}\n\n/** Embedding lookup: pull rows from `table` indexed by `indices`. Decomposes\n *  to `oneHot(indices, vocab) @ table` so autograd works without a dedicated\n *  scatter-with-atomic-add backward \u2014 the matmul transpose rule handles it.\n *  `table` is `[vocab, dim]`; `indices` is any shape `[...]` of i32; result\n *  is `[..., dim]`. The vocab size is taken from `table.shape[0]`. */\nexport function embedding(table: Tensor, indices: Tensor): Tensor {\n  const site = captureSite('embedding')\n  if (table.shape.length !== 2) {\n    throw new ShapeError(`embedding: table must be 2-d [vocab, dim], got ${showShape(table.shape)}`, site)\n  }\n  if (indices.dtype !== 'i32') {\n    throw new ShapeError(`embedding: indices must be i32, got ${indices.dtype}`, site)\n  }\n  return matmul(oneHot(indices, table.shape[0]!, 'f32'), table)\n}\n\n// arange(n) \u2192 [n] of values [0, 1, ..., n-1]. Used for position embeddings.\nexport function arange(n: number, dtype: Dtype = 'i32'): Tensor {\n  const site = captureSite('arange')\n  if (n <= 0 || !Number.isInteger(n)) {\n    throw new ShapeError(`arange: n must be a positive integer, got ${n}`, site)\n  }\n  return addOp(currentGraph(), 'arange', [n], dtype, site, { n, dtype })\n}\n\n// ----------------------------------------------------------------------------\n// ML primitives. Fused so autograd's transpose rule is straightforward and the\n// kernels can be hand-tuned for our specific shapes.\n// ----------------------------------------------------------------------------\n\n// Causal-masked softmax along the last axis. Shape preserved. Last two axes\n// must be square (TxT attention scores).\nexport function softmaxCausalLast(a: Tensor): Tensor {\n  const site = captureSite('softmaxCausalLast')\n  if (a.dtype !== 'f32') throw new ShapeError(`softmaxCausalLast: requires f32, got ${a.dtype}`, site)\n  inferWhereCausal('softmaxCausalLast', a.shape, site)  // shape check (square last 2 axes)\n  return addOp(currentGraph(), 'softmax_causal_last', a.shape, 'f32', site, { a: a.id })\n}\n\n// Numerically-stable log-softmax along the last axis. Shape preserved.\nexport function logSoftmaxLast(a: Tensor): Tensor {\n  const site = captureSite('logSoftmaxLast')\n  if (a.dtype !== 'f32') throw new ShapeError(`logSoftmaxLast: requires f32, got ${a.dtype}`, site)\n  return addOp(currentGraph(), 'log_softmax_last', a.shape, 'f32', site, { a: a.id })\n}\n\n// Pre-softmax causal mask. Sets cells where (i < j) on the last two axes to\n// `fillValue` (typically -1e30). Lower-triangle entries pass through.\n// Use this when you want the masked scores explicitly (e.g. for capture);\n// for the common case, prefer softmaxCausalLast which fuses both.\nexport function whereCausal(a: Tensor, fillValue: number): Tensor {\n  const site = captureSite('whereCausal')\n  if (a.dtype !== 'f32') throw new ShapeError(`whereCausal: requires f32, got ${a.dtype}`, site)\n  inferWhereCausal('whereCausal', a.shape, site)\n  return addOp(currentGraph(), 'where_causal', a.shape, 'f32', site, { a: a.id, fillValue })\n}\n\n// ----------------------------------------------------------------------------\n// Slicing.\n// ----------------------------------------------------------------------------\n\n// sliceLastRange(a, start, end): slice [start, end) along the last axis.\n// Used for splitting Q/K/V from a fused QKV matmul.\nexport function sliceLastRange(a: Tensor, start: number, end: number): Tensor {\n  const site = captureSite('sliceLastRange')\n  const outShape = inferSliceLastRange('sliceLastRange', a.shape, start, end, site)\n  return addOp(currentGraph(), 'slice_last_range', outShape, a.dtype, site, { a: a.id, start, end })\n}\n\n// ----------------------------------------------------------------------------\n// Broadcast / un-broadcast. Mostly used by autograd, but exposed in case user\n// code needs them (e.g. explicit broadcasting for clarity).\n// ----------------------------------------------------------------------------\n\nexport function broadcastTo(a: Tensor, targetShape: Shape): Tensor {\n  const site = captureSite('broadcastTo')\n  inferBroadcastTo('broadcastTo', a.shape, targetShape, site)\n  return addOp(currentGraph(), 'broadcast_to', targetShape, a.dtype, site, { a: a.id, targetShape })\n}\n\nexport function sumToShape(a: Tensor, targetShape: Shape): Tensor {\n  const site = captureSite('sumToShape')\n  inferSumToShape('sumToShape', a.shape, targetShape, site)\n  return addOp(currentGraph(), 'sum_to_shape', targetShape, a.dtype, site, { a: a.id, targetShape })\n}\n\n// ----------------------------------------------------------------------------\n// Constants.\n// ----------------------------------------------------------------------------\n\n// 0-d tensor with a constant value. Used by autograd to seed the loss cotangent.\nexport function constScalar(value: number, dtype: Dtype = 'f32'): Tensor {\n  const site = captureSite('constScalar')\n  return addOp(currentGraph(), 'const_scalar', [], dtype, site, { value, dtype })\n}\n\n// ----------------------------------------------------------------------------\n// Autograd-internal helpers (exposed for users writing custom transpose rules).\n// ----------------------------------------------------------------------------\n\n// ----------------------------------------------------------------------------\n// Comparisons and selection.\n// ----------------------------------------------------------------------------\n\n// Comparisons reuse the binop helper but return bool.\nexport const less    = (a: Tensor, b: Tensor): Tensor => binopOp('less',    'less',    a, b, 'bool')\nexport const greater = (a: Tensor, b: Tensor): Tensor => binopOp('greater', 'greater', a, b, 'bool')\n\n// where(cond, a, b): elementwise select. cond is bool; a and b can be any matching dtype.\nexport function where(cond: Tensor, a: Tensor, b: Tensor): Tensor {\n  const site = captureSite('where')\n  if (cond.dtype !== 'bool') throw new ShapeError(`where: cond must be bool, got ${cond.dtype}`, site)\n  if (a.dtype !== b.dtype) throw new ShapeError(`where: a/b dtype mismatch (${a.dtype} vs ${b.dtype})`, site)\n  const outShape = inferWhere('where', cond.shape, a.shape, b.shape, site)\n  return addOp(currentGraph(), 'where', outShape, a.dtype, site, { cond: cond.id, a: a.id, b: b.id })\n}\n\n// reluGrad(x, dy) = dy where x > 0, else 0. Same shape as x. This is the\n// transpose rule for relu, exposed as an op so codegen can emit it.\nexport function reluGrad(x: Tensor, dy: Tensor): Tensor {\n  const site = captureSite('reluGrad')\n  if (x.dtype !== 'f32' || dy.dtype !== 'f32') {\n    throw new ShapeError(`reluGrad: requires f32, got ${x.dtype} and ${dy.dtype}`, site)\n  }\n  const outShape = inferReluGrad('reluGrad', x.shape, dy.shape, site)\n  return addOp(currentGraph(), 'relu_grad', outShape, 'f32', site, { x: x.id, dy: dy.id })\n}\n\n// ----------------------------------------------------------------------------\n// Adam-fused ops. Each does its full per-element update in one kernel.\n// ----------------------------------------------------------------------------\n\nexport function adamUpdateM(m: Tensor, g: Tensor, b1: number): Tensor {\n  const site = captureSite('adamUpdateM')\n  if (m.dtype !== 'f32' || g.dtype !== 'f32') throw new ShapeError(`adamUpdateM: requires f32`, site)\n  if (m.shape.length !== g.shape.length || m.shape.some((d, i) => d !== g.shape[i])) {\n    throw new ShapeError(`adamUpdateM: shape mismatch`, site)\n  }\n  return addOp(currentGraph(), 'adam_update_m', m.shape, 'f32', site, { m: m.id, g: g.id, b1 })\n}\n\nexport function adamUpdateV(v: Tensor, g: Tensor, b2: number): Tensor {\n  const site = captureSite('adamUpdateV')\n  if (v.dtype !== 'f32' || g.dtype !== 'f32') throw new ShapeError(`adamUpdateV: requires f32`, site)\n  if (v.shape.length !== g.shape.length || v.shape.some((d, i) => d !== g.shape[i])) {\n    throw new ShapeError(`adamUpdateV: shape mismatch`, site)\n  }\n  return addOp(currentGraph(), 'adam_update_v', v.shape, 'f32', site, { v: v.id, g: g.id, b2 })\n}\n\nexport function adamUpdateP(\n  p: Tensor,\n  mNew: Tensor,\n  vNew: Tensor,\n  lrt: Tensor,\n  eps: number,\n  decayShrink: number | Tensor = 1,\n): Tensor {\n  const site = captureSite('adamUpdateP')\n  if (p.dtype !== 'f32') throw new ShapeError(`adamUpdateP: requires f32`, site)\n  if (lrt.dtype !== 'f32' || lrt.shape.length !== 0) {\n    throw new ShapeError(`adamUpdateP: lrt must be a 0-d f32 scalar`, site)\n  }\n  if (p.shape.length !== mNew.shape.length || p.shape.some((d, i) => d !== mNew.shape[i])) {\n    throw new ShapeError(`adamUpdateP: p/mNew shape mismatch`, site)\n  }\n  // decayShrink is either a literal (baked into the kernel) or a 0-d scalar\n  // tensor input the runtime updates per step. The kernel binds at most one,\n  // chosen by whichever the caller provided.\n  const isTensor = typeof decayShrink === 'object'\n  if (isTensor) {\n    if (decayShrink.dtype !== 'f32' || decayShrink.shape.length !== 0) {\n      throw new ShapeError(`adamUpdateP: decayShrink tensor must be a 0-d f32 scalar`, site)\n    }\n  }\n  return addOp(currentGraph(), 'adam_update_p', p.shape, 'f32', site, {\n    p: p.id,\n    mNew: mNew.id,\n    vNew: vNew.id,\n    lrt: lrt.id,\n    eps,\n    decayShrink: isTensor ? 1 : decayShrink,\n    decayShrinkTensor: isTensor ? decayShrink.id : null,\n  })\n}\n", "// Reverse-mode autograd over a traced Graph.\n//\n// Given a graph that ends in a scalar loss tensor, this module walks the ops\n// in reverse and appends backward ops to the same graph, computing dL/dT for\n// every Tensor T that descends from a `param_input`. The final cotangents on\n// the param_input tensors are the parameter gradients.\n//\n// Cotangent accumulation: a tensor with multiple consumers ends up with\n// contributions from each. We add them as we encounter them, so by the time\n// reverse iteration reaches a tensor's producer op, its cotangent is complete.\n//\n// Why this works as \"more graph nodes\": the transpose rule for an op like\n// mul(a, b)\u2192c is `da += dc * b; db += dc * a`. The right-hand sides are\n// expressible in terms of existing forward ops (mul) plus accumulation (add).\n// We just call those op functions, which append nodes to the current graph\n// because we run inside an active trace context.\n\nimport type { Graph, OpNode, Tensor, Shape } from './ir.js'\nimport {\n  add, sub, mul, div, mulScalar,\n  matmul, matmulBatched, transpose, swapAxes, reshape,\n  exp,\n  broadcastTo, sumToShape,\n  constScalar, reluGrad,\n  sumLast, where,\n} from './ops.js'\nimport { traceInto } from './trace.js'\nimport { shapesEqual } from './shape.js'\n\n// ============================================================================\n// Public API\n// ============================================================================\n\nexport interface GradResult {\n  // The graph, augmented with backward ops.\n  readonly graph: Graph\n  // Cotangents (gradients) for each param_input, keyed by param name.\n  readonly paramGrads: Record<string, Tensor>\n  // The loss output (unchanged from input).\n  readonly loss: Tensor\n}\n\n// `appendGrad(graph)` augments `graph` (which must have already been built by\n// `trace(...)` and must have a single scalar output = the loss) with backward\n// ops. Returns gradients for every param_input.\n//\n// Internally re-enters the graph as the active trace context, so backward ops\n// emitted by transpose rules append to it. The caller doesn't need to manage\n// trace state.\nexport function appendGrad(graph: Graph): GradResult {\n  if (graph.outputs.length !== 1) {\n    throw new Error(`autograd: expected graph with exactly 1 output (the loss); got ${graph.outputs.length}`)\n  }\n  const lossId = graph.outputs[0]!\n  const lossTensor = graph.tensors[lossId]!\n  if (lossTensor.shape.length !== 0) {\n    throw new Error(\n      `autograd: loss must be a rank-0 scalar; got shape [${lossTensor.shape.join(', ')}]. ` +\n      `Reduce with sumLast / mulScalar to a scalar before calling appendGrad.`,\n    )\n  }\n\n  // Snapshot the forward portion of the graph before we start emitting backward\n  // ops, so the reverse walk only iterates over forward ops.\n  const forwardOpCount = graph.ops.length\n  const forwardOps = graph.ops.slice(0, forwardOpCount)\n\n  // cotangents: tensorId -> the Tensor representing dL/dTensor in the graph.\n  const cotangents = new Map<number, Tensor>()\n\n  return traceInto(graph, () => {\n    // Seed: dL/dLoss = 1.0\n    cotangents.set(lossId, constScalar(1.0, 'f32'))\n\n    // Reverse walk.\n    for (let i = forwardOpCount - 1; i >= 0; i--) {\n      const op = forwardOps[i]!\n      const outCotan = cotangents.get(op.out)\n      if (!outCotan) continue\n      runTransposeRule(op, outCotan, graph, cotangents)\n    }\n\n    // Collect param gradients by name. Skip non-param leaves.\n    const paramGrads: Record<string, Tensor> = {}\n    for (const op of forwardOps) {\n      if (op.kind !== 'param_input') continue\n      // (state_input and tensor_input don't produce gradients we hand back.)\n      const cotan = cotangents.get(op.out)\n      if (!cotan) {\n        // No path from this param to the loss \u2014 emit explicit zeros so the\n        // caller gets a tensor with the right shape.\n        const t = graph.tensors[op.out]!\n        paramGrads[op.name] = broadcastTo(constScalar(0.0, t.dtype), t.shape)\n      } else {\n        paramGrads[op.name] = cotan\n      }\n    }\n\n    return { graph, paramGrads, loss: lossTensor }\n  })\n}\n\n// ============================================================================\n// Cotangent accumulation\n// ============================================================================\n\n// Add `contribution` to the cotangent of tensor `inputId`. If a cotangent\n// already exists, sum them (multiple consumers); otherwise initialize.\nfunction accumulate(cotangents: Map<number, Tensor>, inputId: number, contribution: Tensor): void {\n  const existing = cotangents.get(inputId)\n  if (existing) {\n    cotangents.set(inputId, add(existing, contribution))\n  } else {\n    cotangents.set(inputId, contribution)\n  }\n}\n\n// Reduce a cotangent to match the input's shape, undoing any broadcast that\n// occurred during forward. If `fromShape == toShape`, no-op.\nfunction unbroadcast(cotan: Tensor, toShape: Shape): Tensor {\n  if (shapesEqual(cotan.shape, toShape)) return cotan\n  return sumToShape(cotan, toShape)\n}\n\n\n// ============================================================================\n// Transpose rules\n// ============================================================================\n//\n// One per OpNode kind. Each rule:\n//   * receives the forward op + its output cotangent\n//   * builds the backward expression(s) in graph terms (calling ops.ts functions)\n//   * accumulates cotangent contributions onto each input tensor\n\nfunction runTransposeRule(\n  op: OpNode,\n  outCotan: Tensor,\n  graph: Graph,\n  cotangents: Map<number, Tensor>,\n): void {\n  const tensorOf = (id: number) => graph.tensors[id]!\n\n  switch (op.kind) {\n    // ---- Leaves: no inputs to accumulate into. -----------------------------\n    case 'param_input':\n    case 'tensor_input':\n    case 'state_input':\n    case 'arange':\n    case 'const_scalar':\n      return\n\n    // ---- Element-wise binops (with broadcast) ------------------------------\n    // c = a op b; reduce cotan back to each operand's shape.\n    case 'add': {\n      const a = tensorOf(op.a), b = tensorOf(op.b)\n      accumulate(cotangents, op.a, unbroadcast(outCotan, a.shape))\n      accumulate(cotangents, op.b, unbroadcast(outCotan, b.shape))\n      return\n    }\n    case 'sub': {\n      const a = tensorOf(op.a), b = tensorOf(op.b)\n      accumulate(cotangents, op.a, unbroadcast(outCotan, a.shape))\n      accumulate(cotangents, op.b, unbroadcast(mulScalar(outCotan, -1), b.shape))\n      return\n    }\n    case 'mul': {\n      const a = tensorOf(op.a), b = tensorOf(op.b)\n      // dC/dA = b ; dC/dB = a. Both are forward tensors still alive in the graph.\n      // We must NOT consume the forward tensors \u2014 they're referenced by id.\n      // The mul() helper allocates fresh tensors, so referencing a/b multiple\n      // times in different mul() calls is fine: we just emit fresh ops.\n      accumulate(cotangents, op.a, unbroadcast(mul(outCotan, b), a.shape))\n      accumulate(cotangents, op.b, unbroadcast(mul(outCotan, a), b.shape))\n      return\n    }\n    case 'div': {\n      // c = a/b. dc/da = 1/b. dc/db = -a/b^2.\n      const a = tensorOf(op.a), b = tensorOf(op.b)\n      accumulate(cotangents, op.a, unbroadcast(div(outCotan, b), a.shape))\n      // -outCotan * a / (b*b)\n      const numer = mul(outCotan, a)\n      const bSq = mul(b, b)\n      accumulate(cotangents, op.b, unbroadcast(mulScalar(div(numer, bSq), -1), b.shape))\n      return\n    }\n\n    // ---- Element-wise scalar binops (scalar is a JS number, not a tensor) -\n    case 'mul_scalar': {\n      // c = a * s. dc/da = s.\n      accumulate(cotangents, op.a, mulScalar(outCotan, op.scalar))\n      return\n    }\n    case 'add_scalar': {\n      // c = a + s. dc/da = 1.\n      accumulate(cotangents, op.a, outCotan)\n      return\n    }\n\n    // ---- Unary -------------------------------------------------------------\n    case 'sqrt': {\n      // c = sqrt(a). dc/da = 1/(2*sqrt(a)) = 1/(2*c).\n      const c = tensorOf(op.out)\n      accumulate(cotangents, op.a, mulScalar(div(outCotan, c), 0.5))\n      return\n    }\n    case 'rsqrt': {\n      // c = a^(-0.5). dc/da = -0.5 * a^(-1.5) = -0.5 * c^3.\n      const c = tensorOf(op.out)\n      const c3 = mul(mul(c, c), c)\n      accumulate(cotangents, op.a, mulScalar(mul(outCotan, c3), -0.5))\n      return\n    }\n    case 'log': {\n      // c = log(a). dc/da = 1/a.\n      const a = tensorOf(op.a)\n      accumulate(cotangents, op.a, div(outCotan, a))\n      return\n    }\n    case 'exp': {\n      // c = exp(a). dc/da = exp(a) = c.\n      const c = tensorOf(op.out)\n      accumulate(cotangents, op.a, mul(outCotan, c))\n      return\n    }\n    case 'relu': {\n      // c = relu(a). dc/da = (a > 0 ? 1 : 0). Use the fused relu_grad op.\n      const a = tensorOf(op.a)\n      accumulate(cotangents, op.a, reluGrad(a, outCotan))\n      return\n    }\n\n    // ---- Reductions over last axis ---------------------------------------\n    case 'mean_last': {\n      // c[..., 1] = mean over last axis of a[..., D]. da[..., d] = dc[..., 0] / D.\n      // outCotan has shape [..., 1]; broadcast to a's shape and divide by D.\n      const a = tensorOf(op.a)\n      const D = a.shape[a.shape.length - 1]!\n      const expanded = broadcastTo(outCotan, a.shape)\n      accumulate(cotangents, op.a, mulScalar(expanded, 1 / D))\n      return\n    }\n    case 'sum_last': {\n      // c[...] = sum over last axis (keepdims=false). da[..., d] = dc[...].\n      // outCotan has rank one less than a; broadcast to a's shape (which inserts\n      // back the last axis with a's last-axis size).\n      const a = tensorOf(op.a)\n      // First reshape outCotan to add a trailing 1, then broadcast to a's shape.\n      const withKeep = reshape(outCotan, [...outCotan.shape, 1])\n      accumulate(cotangents, op.a, broadcastTo(withKeep, a.shape))\n      return\n    }\n\n    // ---- Shape ------------------------------------------------------------\n    case 'reshape': {\n      // c = reshape(a, ...). Backward: reshape outCotan back to a's shape.\n      const a = tensorOf(op.a)\n      accumulate(cotangents, op.a, reshape(outCotan, a.shape))\n      return\n    }\n    case 'transpose': {\n      // c = transpose(a, perm). Backward: transpose outCotan with inverse perm.\n      const inv = invertPerm(op.perm)\n      accumulate(cotangents, op.a, transpose(outCotan, inv))\n      return\n    }\n\n    // ---- Linear algebra ---------------------------------------------------\n    case 'matmul': {\n      // c = a @ b, where a: [..., M, K], b: [K, N], c: [..., M, N].\n      // dA = dC @ B^T  (matmul, since b is unbatched)\n      // dB = sum_over_batch( A^T @ dC )\n      //\n      // Implementation note: dA uses the same `matmul` (a [...,M,N] \u00B7 b [N,K])\n      // because b is rank-2. dB needs A^T which has shape [..., K, M], then\n      // matmul with dC ([..., M, N]) gives [..., K, N], which we sum over\n      // leading batch dims to get [K, N].\n      const a = tensorOf(op.a), b = tensorOf(op.b)\n      // dA = dC @ B^T\n      accumulate(cotangents, op.a, matmul(outCotan, swapAxes(b, -1, -2)))\n      // dB: per-batch A^T @ dC, then sum over batch dims.\n      // A is [..., M, K]; transpose last two axes.\n      const aT = swapAxes(a, -1, -2)  // [..., K, M]\n      // matmul_batched needs same rank on both sides. dC has rank `a.rank`;\n      // aT has rank `a.rank`; use matmul_batched if rank > 2, else matmul.\n      let perBatchDb: Tensor\n      if (a.shape.length > 2) {\n        perBatchDb = matmulBatched(aT, outCotan)  // [..., K, N]\n      } else {\n        perBatchDb = matmul(aT, outCotan)  // [K, N]\n      }\n      // Sum over leading batch dims to collapse to b's shape [K, N].\n      accumulate(cotangents, op.b, sumToShape(perBatchDb, b.shape))\n      return\n    }\n    case 'matmul_batched': {\n      // c = a @ b, both [..., M, K] \u00B7 [..., K, N] -> [..., M, N].\n      // dA = dC @ B^T   (per-batch, all batch dims preserved)\n      // dB = A^T @ dC   (per-batch)\n      const a = tensorOf(op.a), b = tensorOf(op.b)\n      accumulate(cotangents, op.a, matmulBatched(outCotan, swapAxes(b, -1, -2)))\n      accumulate(cotangents, op.b, matmulBatched(swapAxes(a, -1, -2), outCotan))\n      return\n    }\n\n    // ---- Indexing / casting (no gradient through integer indices) --------\n    case 'one_hot':\n      // The output is float, but the input (indices) is integer-valued \u2014 no\n      // continuous gradient flows through it. Stop here.\n      return\n\n    // ---- Slicing ---------------------------------------------------------\n    case 'slice_last_range': {\n      // c = a[..., start:end]. Backward: pad outCotan with zeros to a's shape.\n      // We construct this as: zeros at left, outCotan in middle, zeros at right,\n      // concatenated along the last axis. We don't have concat or generic pad\n      // ops; the simplest expression here is a sparse expansion via broadcasting\n      // and addition of zero tensors. For Phase 2 we punt: slice's autograd is\n      // implemented by emitting a single fused op that scatters the cotangent.\n      // For now: signal that slice's backward needs a dedicated op kind.\n      const a = tensorOf(op.a)\n      // Build a zeros tensor of a's shape, then add via... no, we can't do\n      // additive scatter without an index_put. Easiest path: add a dedicated\n      // backward op kind. For this pass, throw until we extend the IR.\n      throw new Error(\n        `autograd: slice_last_range backward not implemented yet ` +\n        `(would need a scatter-style op or a Concat op). ` +\n        `Workaround for now: avoid taking gradients through slices by using ` +\n        `separate matmuls for Q/K/V instead of a fused W_qkv. ` +\n        `Tensor: ${a.shape} -> ${tensorOf(op.out).shape}`,\n      )\n    }\n\n    // ---- Broadcast / un-broadcast (autograd infrastructure) ---------------\n    case 'broadcast_to': {\n      // c = broadcast(a, target). da = sum_to_shape(dc, a.shape).\n      const a = tensorOf(op.a)\n      accumulate(cotangents, op.a, sumToShape(outCotan, a.shape))\n      return\n    }\n    case 'sum_to_shape': {\n      // c = sum_to_shape(a, target). da = broadcast_to(dc, a.shape).\n      const a = tensorOf(op.a)\n      accumulate(cotangents, op.a, broadcastTo(outCotan, a.shape))\n      return\n    }\n\n    // ---- ML primitives ---------------------------------------------------\n    case 'log_softmax_last': {\n      // c = log_softmax(a, axis=-1). softmax(a) = exp(c).\n      // dL/dA = dL/dC - softmax(a) * sum_last_keepdims(dL/dC)\n      const c = tensorOf(op.out)\n      const sm = exp(c)  // softmax(a)\n      // sum_last with keepdims via reshape: sum_last drops the dim, then\n      // reshape to add a trailing 1 back, then broadcast multiplies.\n      const sumDc = sumLast(outCotan)            // shape: [..., ] (rank-1 less)\n      const sumDcKeep = reshape(sumDc, [...sumDc.shape, 1])\n      const term = mul(sm, broadcastTo(sumDcKeep, c.shape))\n      accumulate(cotangents, op.a, sub(outCotan, term))\n      return\n    }\n    case 'softmax_causal_last': {\n      // c = softmax_causal(a, axis=-1). The causal mask zeros the upper triangle\n      // of c; for the backward, the same mask zeros out dx_upper because both\n      // paths through softmax depend on c-values that are 0 there.\n      // dL/dA = (dL/dC - sum_last_keep(dL/dC * c)) * c\n      const c = tensorOf(op.out)\n      const dcXc = mul(outCotan, c)\n      const s = sumLast(dcXc)\n      const sKeep = reshape(s, [...s.shape, 1])\n      const inner = sub(outCotan, broadcastTo(sKeep, c.shape))\n      accumulate(cotangents, op.a, mul(inner, c))\n      return\n    }\n    // ---- Comparisons + select ---------------------------------------------\n    case 'less':\n    case 'greater':\n      // No gradient flows through bool comparisons. Stop here.\n      return\n\n    case 'where': {\n      // c = where(cond, a, b).\n      // dC flows to a where cond is true, to b where cond is false.\n      // Need broadcast-aware unreduction back to a's and b's original shapes.\n      const cond = tensorOf(op.cond)\n      const a = tensorOf(op.a)\n      const b = tensorOf(op.b)\n      // Build zero tensors via broadcasting a 0-d const scalar.\n      const zeroA = broadcastTo(constScalar(0, a.dtype), outCotan.shape)\n      const zeroB = broadcastTo(constScalar(0, b.dtype), outCotan.shape)\n      accumulate(cotangents, op.a, unbroadcast(where(cond, outCotan, zeroA), a.shape))\n      accumulate(cotangents, op.b, unbroadcast(where(cond, zeroB, outCotan), b.shape))\n      return\n    }\n\n    case 'where_causal': {\n      // c = where(causal_mask, a, fillValue). Upper triangle becomes constant\n      // (no gradient); lower triangle passes a through. So da_lower = dc_lower,\n      // da_upper = 0. We can't easily express this with current ops; punt.\n      throw new Error(\n        `autograd: where_causal backward not yet implemented. ` +\n        `Use softmax_causal_last (which fuses the mask + softmax) instead.`,\n      )\n    }\n\n    // ---- Adam ops are post-autograd; no backward through them. ----------\n    case 'adam_update_m':\n    case 'adam_update_v':\n    case 'adam_update_p':\n      throw new Error(`autograd: cannot differentiate through ${op.kind}`)\n\n    // ---- relu_grad has no further backward (autograd-internal) ----------\n    case 'relu_grad': {\n      // We don't double-differentiate. If someone tries, this will blow up \u2014\n      // intentional. Phase 2 doesn't need 2nd-order gradients.\n      throw new Error(\n        `autograd: cannot take second-order gradient through relu_grad. ` +\n        `Phase 2 does not support higher-order autodiff.`,\n      )\n    }\n\n    default: {\n      // Exhaustiveness check at type level.\n      const _exhaustive: never = op\n      void _exhaustive\n      throw new Error(`autograd: unhandled op kind ${(op as OpNode).kind}`)\n    }\n  }\n}\n\n// ============================================================================\n// Helpers\n// ============================================================================\n\nfunction invertPerm(perm: readonly number[]): number[] {\n  const inv: number[] = new Array(perm.length)\n  for (let i = 0; i < perm.length; i++) inv[perm[i]!] = i\n  return inv\n}\n", "// Adam / AdamW optimizer, in-graph.\n//\n// `appendAdam` extends a graph that already has a forward pass + autograd-emitted\n// backward (i.e., has paramGrads from `appendGrad`) with the Adam update math.\n//\n// Per parameter P with gradient g:\n//   m_new = b1 * m + (1 - b1) * g\n//   v_new = b2 * v + (1 - b2) * g\u00B2\n//   p_new = decayShrink * p - lrt * m_new / (sqrt(v_new) + eps)\n//\n// `decayShrink = 1 - lr * weightDecay` when the param is being decayed\n// (Loshchilov & Hutter, \"AdamW\") and 1 otherwise \u2014 at which point the\n// multiply folds out and you're left with plain Adam. `lrt` is supplied\n// per-step from CPU and includes the bias-correction factor\n// `sqrt(1-b2^t)/(1-b1^t)`; that's why convergence isn't affected by the\n// first-step warmup that bias-correction-free Adam suffers.\n//\n// **Static vs scheduled lr.** When `config.lr` is a number, decayShrink is\n// baked into the kernel as a literal. When it's a function `(step) => lr`,\n// decayShrink for decayed params becomes a per-step scalar input that the\n// runtime updates each call (computed from the current step's lr). lrt is\n// always per-step; the bias-correction factor changes every step regardless.\n//\n// Returns writeback declarations the buffer planner uses to wire up the\n// \"after step, copy the new value into the persistent home\" path. m and v\n// are state_inputs (zero-initialized, persistent across steps); the param\n// updates are aliased back to the param buffers.\n\nimport type { Tensor } from './ir.js'\nimport type { Graph } from './ir.js'\nimport type { WritebackDecl } from './buffers.js'\nimport { traceInto, stateInput, tensorInput } from './trace.js'\nimport { adamUpdateM, adamUpdateV, adamUpdateP } from './ops.js'\n\n/** Per-step learning-rate schedule. Either a fixed number or one of the\n *  serializable shape forms below. Functions/closures are not supported \u2014\n *  the schedule needs to cross thread boundaries and survive serialization\n *  for the worker-internal runtime, and every realistic LR pattern (constant,\n *  linear decay, cosine, warmup-then-decay) maps to a finite set of shapes.\n *  Use the `lr` helper namespace to construct shapes ergonomically. */\nexport type LRSchedule =\n  | number\n  | { readonly kind: 'constant'; readonly value: number }\n  | { readonly kind: 'linearDecay'; readonly peak: number; readonly final: number; readonly steps: number }\n  | { readonly kind: 'cosineDecay'; readonly peak: number; readonly final: number; readonly steps: number }\n  | { readonly kind: 'warmup'; readonly peakLr: number; readonly warmupSteps: number; readonly after: LRSchedule }\n\n/** Ergonomic constructors for LRSchedule shapes. */\nexport const lr = {\n  constant: (value: number): LRSchedule => ({ kind: 'constant', value }),\n  /** Linearly interpolate from `peak` at step 1 to `final` at step `steps`,\n   *  then hold at `final`. Matches `peak + (final - peak) * min(step/steps, 1)`. */\n  linearDecay: (opts: { peak: number; final: number; steps: number }): LRSchedule =>\n    ({ kind: 'linearDecay', ...opts }),\n  /** Half-cosine from `peak` at step 1 down to `final` at step `steps`,\n   *  then hold at `final`. */\n  cosineDecay: (opts: { peak: number; final: number; steps: number }): LRSchedule =>\n    ({ kind: 'cosineDecay', ...opts }),\n  /** Linear ramp from 0 to `peakLr` over `warmupSteps` steps, then hand off\n   *  to `after` (offset so step 1 of `after` = first post-warmup step). */\n  warmup: (opts: { peakLr: number; warmupSteps: number; after: LRSchedule }): LRSchedule =>\n    ({ kind: 'warmup', ...opts }),\n}\n\n/** Resolve a schedule to its scalar value at a given 1-based step. */\nexport function resolveLR(schedule: LRSchedule, step: number): number {\n  if (typeof schedule === 'number') return schedule\n  switch (schedule.kind) {\n    case 'constant': return schedule.value\n    case 'linearDecay': {\n      const f = Math.min(step / schedule.steps, 1)\n      return schedule.peak + (schedule.final - schedule.peak) * f\n    }\n    case 'cosineDecay': {\n      const f = Math.min(step / schedule.steps, 1)\n      return schedule.final + 0.5 * (schedule.peak - schedule.final) * (1 + Math.cos(Math.PI * f))\n    }\n    case 'warmup': {\n      if (step <= schedule.warmupSteps) return schedule.peakLr * (step / schedule.warmupSteps)\n      return resolveLR(schedule.after, step - schedule.warmupSteps)\n    }\n  }\n}\n\n/** True for shapes that produce different values at different steps (so the\n *  AdamW decayShrink scalar must be a per-step input rather than baked).\n *  Numbers and `{kind:'constant'}` are static; everything else varies. */\nexport function isLRDynamic(schedule: LRSchedule): boolean {\n  if (typeof schedule === 'number') return false\n  return schedule.kind !== 'constant'\n}\n\nexport interface AdamConfig {\n  /** Learning rate schedule. Pass a number for fixed lr, or a shape from\n   *  the `lr` helpers (e.g., `lr.linearDecay({ peak: 0.005, final: 0.0005, steps: 1500 })`). */\n  lr: LRSchedule\n  b1?: number   // default 0.9\n  b2?: number   // default 0.999\n  eps?: number  // default 1e-8\n  /** AdamW: decoupled weight decay coefficient. Default 0 (plain Adam).\n   *  When non-zero, every step shrinks each decayed param by a factor of\n   *  `1 - lr * weightDecay` before the gradient update. */\n  weightDecay?: number\n  /** Filter deciding which params get weight decay. Only consulted when\n   *  weightDecay > 0. Default: decay every param. Override for the standard\n   *  transformer convention (decay weights/embeddings, skip biases + LN gains).\n   *  Example: `(name) => name.includes('.W') || name.endsWith('_emb')`. */\n  decayFilter?: (paramName: string) => boolean\n}\n\n/** Resolved hyperparameters with all fields populated. `lr` stays as the\n *  shape (not pre-resolved) so the runtime can compute per-step values. */\nexport interface AdamResolvedConfig {\n  lr: LRSchedule\n  b1: number\n  b2: number\n  eps: number\n  weightDecay: number\n  decayFilter: (name: string) => boolean\n  /** True iff the lr shape varies with step (linearDecay, cosineDecay,\n   *  warmup). When false, decayShrink is baked at compile time. */\n  lrIsScheduled: boolean\n}\n\nexport interface AdamResult {\n  /** Writebacks the buffer planner should wire into the runtime. */\n  writebacks: WritebackDecl[]\n  /** Name of the per-step scalar tensor_input. The runtime fills this each call\n   * with `lr * sqrt(1-b2^t)/(1-b1^t)` (Adam's bias-corrected effective LR). */\n  lrtInputName: string\n  /** Name of the per-step decayShrink scalar tensor_input, or null when lr is\n   *  static (decayShrink baked into the kernel) or no params are decayed. */\n  decayShrinkInputName: string | null\n  /** Hyperparameters as captured (so the runtime can compute lrt and decayShrink). */\n  config: AdamResolvedConfig\n}\n\n/**\n * Append Adam update ops to `graph`. Must be called inside an active trace\n * context (or after a trace, since traceInto re-enters the graph).\n *\n * @param graph the graph (already containing forward + backward)\n * @param paramGrads param name -> gradient tensor (output of `appendGrad`)\n * @param paramTensors param name -> the param's leaf Tensor (the param_input).\n *                     Needed because the param_input lives in the graph but we\n *                     don't have a direct map by name in `Graph` \u2014 caller passes it.\n * @param config Adam hyperparameters. Set `weightDecay > 0` for AdamW; an\n *               optional `decayFilter` selects which params receive decay.\n */\nexport function appendAdam(\n  graph: Graph,\n  paramGrads: Record<string, Tensor>,\n  paramTensors: Record<string, Tensor>,\n  config: AdamConfig,\n  /** Per-param decay flags from `materializeParams`. When supplied, overrides\n   *  `config.decayFilter` for any name in the map; falls back to `decayFilter`\n   *  for names not present (e.g., for low-level callers using `compile()`\n   *  directly without a Module). */\n  decayFlags?: Record<string, boolean>,\n): AdamResult {\n  const lrIsScheduled = isLRDynamic(config.lr)\n  const initialLr = resolveLR(config.lr, 1)\n  const fullConfig: AdamResolvedConfig = {\n    lr: config.lr,\n    b1: config.b1 ?? 0.9,\n    b2: config.b2 ?? 0.999,\n    eps: config.eps ?? 1e-8,\n    weightDecay: config.weightDecay ?? 0,\n    decayFilter: config.decayFilter ?? (() => true),\n    lrIsScheduled,\n  }\n  const writebacks: WritebackDecl[] = []\n  const lrtInputName = '_adam_lrt'\n  // Tensor input for runtime-updated decayShrink (only created when lr is a\n  // schedule fn AND at least one param will receive weight decay).\n  let decayShrinkInputName: string | null = null\n\n  return traceInto(graph, () => {\n    const lrt = tensorInput(lrtInputName, [], 'f32')\n\n    // Up-front: which params receive weight decay? Per-param decayFlags (set\n    // by Module.param's options) wins; falls back to decayFilter for names\n    // not in the map. Empty when weightDecay = 0 so the rest of the function\n    // can just ask \"is this name in the set?\".\n    const decayedNames = new Set<string>(\n      fullConfig.weightDecay > 0\n        ? Object.keys(paramGrads).filter(name =>\n            (decayFlags && name in decayFlags) ? decayFlags[name]! : fullConfig.decayFilter(name))\n        : [],\n    )\n\n    // We only need a runtime decayShrink scalar when lr varies per step AND\n    // at least one param is being decayed. Otherwise the value is constant\n    // and bakes into the kernel as a literal.\n    let decayShrinkScalar: Tensor | null = null\n    if (lrIsScheduled && decayedNames.size > 0) {\n      decayShrinkInputName = '_adam_decay_shrink'\n      decayShrinkScalar = tensorInput(decayShrinkInputName, [], 'f32')\n    }\n\n    for (const name of Object.keys(paramGrads)) {\n      const p = paramTensors[name]\n      const g = paramGrads[name]\n      if (!p) throw new Error(`appendAdam: missing param tensor for '${name}'`)\n      if (!g) throw new Error(`appendAdam: missing gradient for '${name}'`)\n\n      const mState = stateInput(`adam_m_${name}`, p.shape, 'f32', 0)\n      const vState = stateInput(`adam_v_${name}`, p.shape, 'f32', 0)\n\n      // Choose the decayShrink form per param:\n      //   - non-decayed params: literal 1 (kernel multiply folds out).\n      //   - decayed + scheduled lr: tensor input updated per step.\n      //   - decayed + static lr: literal `1 - lr * wd` baked at compile.\n      const decayShrink: number | Tensor =\n        !decayedNames.has(name) ? 1\n        : decayShrinkScalar !== null ? decayShrinkScalar\n        : 1 - initialLr * fullConfig.weightDecay\n\n      // Three fused kernels per parameter \u2014 one for each of m_new / v_new / p_new.\n      const newM = adamUpdateM(mState, g, fullConfig.b1)\n      const newV = adamUpdateV(vState, g, fullConfig.b2)\n      const newP = adamUpdateP(p, newM, newV, lrt, fullConfig.eps, decayShrink)\n\n      writebacks.push({ source: newM, destName: `adam_m_${name}`, destKind: 'state' })\n      writebacks.push({ source: newV, destName: `adam_v_${name}`, destKind: 'state' })\n      writebacks.push({ source: newP, destName: name,             destKind: 'param' })\n    }\n    return { writebacks, lrtInputName, decayShrinkInputName, config: fullConfig }\n  })\n}\n", "// Buffer planning: walk a Graph and decide which GPU buffer each Tensor maps to.\n//\n// v1 strategy: one GPU buffer per IR Tensor. Static shapes mean every buffer's\n// size is known at compile time and lifetimes don't overlap between steps \u2014\n// so no pooling needed. Total memory is the sum of every intermediate tensor.\n// For our transformer at B=256: ~30 MB of activations + grads. Easily fits.\n//\n// Categorization is what the runtime cares about:\n//   * param        \u2014 uploaded by user via uploadParams; persistent across steps\n//   * param_grad   \u2014 written each step by the backward pass; readable for inspection\n//   * tensor_input \u2014 uploaded each step (tokens, targets, masks)\n//   * intermediate \u2014 produced by an op; lifetime = within a single step\n//   * output       \u2014 special intermediate that should be made readable (loss)\n\nimport type { Graph, Tensor, Dtype, Shape, OpNode } from './ir.js'\nimport { shapeSize } from './shape.js'\n\nexport interface BufferSpec {\n  /** Matches tensor.id. */\n  id: number\n  byteSize: number\n  dtype: Dtype\n  shape: Shape\n  kind: 'param' | 'param_grad' | 'tensor_input' | 'state' | 'intermediate' | 'output'\n  /** External name for param/param_grad/tensor_input/state bindings. null otherwise. */\n  name: string | null\n  /** For state buffers: the value to fill on initial allocation. 0 by default. */\n  initValue?: number\n}\n\n/**\n * After step(), copy `source`'s buffer into `dest`'s buffer.\n * Used to write back updated optimizer state and updated parameters into\n * their persistent home buffers.\n */\nexport interface Writeback {\n  source: number  // buffer id of the tensor holding the new value\n  dest: number    // buffer id of the persistent state/param to overwrite\n  bytes: number\n}\n\nexport interface BufferPlan {\n  buffers: BufferSpec[]\n  /** Tensor id -> buffer id (currently 1:1 but kept opaque for future pooling). */\n  tensorToBuffer: Map<number, number>\n  /** Easy lookup tables for the runtime. */\n  paramsByName: Map<string, number>           // name -> buffer id\n  inputsByName: Map<string, number>           // name -> buffer id\n  paramGradsByName: Map<string, number>       // name -> buffer id\n  statesByName: Map<string, number>           // name -> buffer id (persistent state homes)\n  capturesByName: Map<string, number>         // name -> buffer id (activation captures)\n  outputBufferIds: number[]                   // graph.outputs mapped through\n  /** End-of-step writebacks (Adam updates for params, m, v, etc.) */\n  writebacks: Writeback[]\n}\n\nconst dtypeBytes: Record<Dtype, number> = { f32: 4, i32: 4, bool: 4 }\n\n/**\n * Caller-supplied writeback declarations: \"after each step, copy this Tensor's\n * buffer into the persistent home of this param/state.\"\n */\nexport interface WritebackDecl {\n  /** The Tensor (output of some op) holding the new value to write back. */\n  source: Tensor\n  /** Either a param name (writes to that param's home buffer) or a state name. */\n  destName: string\n  destKind: 'param' | 'state'\n}\n\n/**\n * Build a BufferPlan from a graph + the param-grad map produced by appendGrad.\n * @param graph the full graph (forward + backward + any optimizer ops)\n * @param paramGrads map from param name -> the Tensor that holds its gradient\n * @param writebackDecls list of end-of-step writebacks (e.g. from appendAdam).\n *                       Empty when there's no optimizer in the graph.\n */\nexport function planBuffers(\n  graph: Graph,\n  paramGrads: Record<string, Tensor>,\n  writebackDecls: WritebackDecl[] = [],\n): BufferPlan {\n  const buffers: BufferSpec[] = []\n  const tensorToBuffer = new Map<number, number>()\n  const paramsByName = new Map<string, number>()\n  const inputsByName = new Map<string, number>()\n  const paramGradsByName = new Map<string, number>()\n  const statesByName = new Map<string, number>()\n\n  // Build a quick reverse map: tensorId -> param name (for grads).\n  const gradTensorIdToName = new Map<number, string>()\n  for (const [name, tensor] of Object.entries(paramGrads)) {\n    gradTensorIdToName.set(tensor.id, name)\n  }\n  // ...and tensorId -> param/input op (so we can name the buffer correctly).\n  const opByOutId = new Map<number, OpNode>()\n  for (const op of graph.ops) opByOutId.set(op.out, op)\n\n  const outputSet = new Set(graph.outputs)\n\n  // Walk all tensors in id order. Categorize each.\n  for (const t of graph.tensors) {\n    const op = opByOutId.get(t.id)\n    let kind: BufferSpec['kind'] = 'intermediate'\n    let name: string | null = null\n    let initValue: number | undefined\n\n    if (op?.kind === 'param_input') {\n      kind = 'param'\n      name = op.name\n    } else if (op?.kind === 'tensor_input') {\n      kind = 'tensor_input'\n      name = op.name\n    } else if (op?.kind === 'state_input') {\n      kind = 'state'\n      name = op.name\n      initValue = op.initValue\n    } else if (gradTensorIdToName.has(t.id)) {\n      kind = 'param_grad'\n      name = gradTensorIdToName.get(t.id)!\n    } else if (outputSet.has(t.id)) {\n      kind = 'output'\n    }\n\n    const spec: BufferSpec = {\n      id: t.id,\n      byteSize: Math.max(4, shapeSize(t.shape) * dtypeBytes[t.dtype]),\n      dtype: t.dtype,\n      shape: t.shape,\n      kind,\n      name,\n      ...(initValue !== undefined ? { initValue } : {}),\n    }\n    buffers.push(spec)\n    tensorToBuffer.set(t.id, t.id)  // 1:1 for v1\n\n    if (kind === 'param') paramsByName.set(name!, t.id)\n    if (kind === 'tensor_input') inputsByName.set(name!, t.id)\n    if (kind === 'param_grad') paramGradsByName.set(name!, t.id)\n    if (kind === 'state') statesByName.set(name!, t.id)\n  }\n\n  const outputBufferIds = graph.outputs.map(id => tensorToBuffer.get(id)!)\n\n  // Resolve writeback declarations to (source, dest) buffer-id pairs.\n  const writebacks: Writeback[] = writebackDecls.map(decl => {\n    const sourceBufId = tensorToBuffer.get(decl.source.id)\n    if (sourceBufId === undefined) {\n      throw new Error(`planBuffers: writeback source tensor #${decl.source.id} not in graph`)\n    }\n    const destBufId = decl.destKind === 'param'\n      ? paramsByName.get(decl.destName)\n      : statesByName.get(decl.destName)\n    if (destBufId === undefined) {\n      throw new Error(`planBuffers: writeback dest ${decl.destKind}:'${decl.destName}' not found`)\n    }\n    const sourceSpec = buffers[sourceBufId]!\n    const destSpec = buffers[destBufId]!\n    if (sourceSpec.byteSize !== destSpec.byteSize) {\n      throw new Error(\n        `planBuffers: writeback size mismatch for ${decl.destKind}:'${decl.destName}' ` +\n        `(source ${sourceSpec.byteSize} bytes vs dest ${destSpec.byteSize})`,\n      )\n    }\n    return { source: sourceBufId, dest: destBufId, bytes: sourceSpec.byteSize }\n  })\n\n  // Resolve graph.captures (name -> tensor id) to (name -> buffer id).\n  // No pinning needed at the planner level: each tensor already has its own\n  // buffer (see \"v1 strategy\" comment at top \u2014 no pooling yet).\n  const capturesByName = new Map<string, number>()\n  for (const [name, tensorId] of graph.captures) {\n    const bufId = tensorToBuffer.get(tensorId)\n    if (bufId === undefined) {\n      throw new Error(`planBuffers: capture '${name}' references unknown tensor #${tensorId}`)\n    }\n    capturesByName.set(name, bufId)\n  }\n\n  return { buffers, tensorToBuffer, paramsByName, inputsByName, paramGradsByName, statesByName, capturesByName, outputBufferIds, writebacks }\n}\n", "// WGSL codegen: one kernel per IR op.\n//\n// All shapes are baked into the WGSL as compile-time constants \u2014 no shape\n// uniforms. This means each shape combination produces a distinct shader\n// (so `add([B, T, D], [D])` and `add([B, T, D], [B, T, D])` get different\n// kernels), which is fine for our static-shape model and gives the WGSL\n// compiler full freedom to specialize.\n//\n// Most kernels are direct ports of `transformer-gpu.bulb.md`'s WGSL \u2014 those\n// are already debugged and tuned. The autograd ops (broadcast_to, sum_to_shape,\n// relu_grad, etc.) are new.\n\nimport type { Graph, OpNode, Tensor, Shape } from './ir.js'\nimport type { BufferPlan } from './buffers.js'\nimport { shapeSize } from './shape.js'\n\n// Workgroup size of 256 means even our biggest kernel (~8M threads in\n// matmul_bwd_dW) needs only ~32K workgroups, well under WebGPU's 65535-per-dim\n// dispatch cap. Smaller WG_SIZE forced 2D dispatch with significant over-dispatch.\nconst WG_SIZE = 256\n\n// Global thread index, packed across the 2D dispatch grid that lets us route\n// past WebGPU's 65535-per-dim cap. Every kernel uses this exact line \u2014 keep\n// the formula consistent with the dispatch-stride math in runtime.ts (MAX_X\n// = 65535, so per-row stride = 65535 * WG_SIZE = 16776960). Inlined into\n// each WGSL string via interpolation rather than a function so the WGSL\n// compiler still sees a literal constant.\nconst GID_LINE = 'let i = gid.x + gid.y * 16776960u;'\n\nexport interface KernelSpec {\n  /** Index into graph.ops. */\n  opIndex: number\n  /** Op kind (for debugging / pipeline cache key). */\n  opKind: OpNode['kind']\n  /** Generated WGSL source. Empty string for \"logical\" ops with no kernel. */\n  wgsl: string\n  /**\n   * Buffer ids in binding-index order. The runtime creates a bind group with\n   * these in @binding(0..N) on @group(0). Inputs come first (read), output last\n   * (read_write).\n   */\n  bindings: number[]\n  /** Number of threads to dispatch (1-D). 0 means \"skip\" (e.g. reshape no-op). */\n  threads: number\n  /** Workgroup size; usually WG_SIZE. */\n  workgroupSize: number\n}\n\n// ============================================================================\n// Public entry point\n// ============================================================================\n\n/** Generate a KernelSpec per compute op in graph.ops (in dispatch order). */\nexport function emitKernels(graph: Graph, plan: BufferPlan): KernelSpec[] {\n  const out: KernelSpec[] = []\n  for (let i = 0; i < graph.ops.length; i++) {\n    const op = graph.ops[i]!\n    const spec = emitKernel(op, graph, plan, i)\n    out.push(spec)\n  }\n  return out\n}\n\nfunction emitKernel(op: OpNode, graph: Graph, plan: BufferPlan, opIndex: number): KernelSpec {\n  const tof = (id: number) => graph.tensors[id]!\n  const buf = (tensorId: number) => plan.tensorToBuffer.get(tensorId)!\n  const empty = (): KernelSpec => ({ opIndex, opKind: op.kind, wgsl: '', bindings: [], threads: 0, workgroupSize: WG_SIZE })\n\n  switch (op.kind) {\n    // ---- Leaves: data is supplied externally; no kernel ---------------------\n    case 'param_input':\n    case 'tensor_input':\n    case 'state_input':\n      return empty()\n\n    // ---- arange / const_scalar: kernel that fills the buffer once -----------\n    case 'arange': {\n      const out = tof(op.out)\n      const wgsl = `\n@group(0) @binding(0) var<storage, read_write> buf : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${op.n}u) { return; }\n  buf[i] = ${castFromI32('i32(i)', out.dtype)};\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.out)], threads: op.n, workgroupSize: WG_SIZE }\n    }\n    case 'const_scalar': {\n      const wgsl = `\n@group(0) @binding(0) var<storage, read_write> buf : array<${wgslDtype(op.dtype)}>;\n@compute @workgroup_size(1)\nfn main() {\n  buf[0] = ${wgslLiteral(op.value, op.dtype)};\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.out)], threads: 1, workgroupSize: 1 }\n    }\n\n    // ---- Element-wise binops with broadcast --------------------------------\n    case 'add':\n    case 'sub':\n    case 'mul':\n    case 'div': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const b = tof(op.b)\n      const opStr = { add: '+', sub: '-', mul: '*', div: '/' }[op.kind]\n      const total = shapeSize(out.shape)\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<${wgslDtype(a.dtype)}>;\n@group(0) @binding(1) var<storage, read> b : array<${wgslDtype(b.dtype)}>;\n@group(0) @binding(2) var<storage, read_write> out : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n${broadcastIndexBlock('i', out.shape, a.shape, 'aIdx')}\n${broadcastIndexBlock('i', out.shape, b.shape, 'bIdx')}\n  out[i] = a[aIdx] ${opStr} b[bIdx];\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.b), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- Element-wise scalar binops (scalar baked into WGSL) ---------------\n    case 'mul_scalar':\n    case 'add_scalar': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const opStr = op.kind === 'mul_scalar' ? '*' : '+'\n      const total = shapeSize(out.shape)\n      const lit = wgslLiteral(op.scalar, out.dtype)\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<${wgslDtype(a.dtype)}>;\n@group(0) @binding(1) var<storage, read_write> out : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  out[i] = a[i] ${opStr} ${lit};\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- Unary -------------------------------------------------------------\n    case 'sqrt':\n    case 'rsqrt':\n    case 'log':\n    case 'exp':\n    case 'relu': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const total = shapeSize(out.shape)\n      const expr =\n        op.kind === 'sqrt'  ? 'sqrt(x)' :\n        op.kind === 'rsqrt' ? '1.0 / sqrt(x)' :\n        op.kind === 'log'   ? 'log(x)' :\n        op.kind === 'exp'   ? 'exp(x)' :\n        /* relu */            'max(x, 0.0)'\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<${wgslDtype(a.dtype)}>;\n@group(0) @binding(1) var<storage, read_write> out : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  let x = a[i];\n  out[i] = ${expr};\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- Comparisons + select --------------------------------------------\n    case 'less':\n    case 'greater': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const b = tof(op.b)\n      const opStr = op.kind === 'less' ? '<' : '>'\n      const total = shapeSize(out.shape)\n      // bool tensors lower to u32 in storage (1 if true, 0 if false).\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<${wgslDtype(a.dtype)}>;\n@group(0) @binding(1) var<storage, read> b : array<${wgslDtype(b.dtype)}>;\n@group(0) @binding(2) var<storage, read_write> out : array<u32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n${broadcastIndexBlock('i', out.shape, a.shape, 'aIdx')}\n${broadcastIndexBlock('i', out.shape, b.shape, 'bIdx')}\n  out[i] = select(0u, 1u, a[aIdx] ${opStr} b[bIdx]);\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.b), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n    case 'where': {\n      const out = tof(op.out)\n      const cond = tof(op.cond)\n      const a = tof(op.a)\n      const b = tof(op.b)\n      const total = shapeSize(out.shape)\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> cond : array<u32>;\n@group(0) @binding(1) var<storage, read> a : array<${wgslDtype(a.dtype)}>;\n@group(0) @binding(2) var<storage, read> b : array<${wgslDtype(b.dtype)}>;\n@group(0) @binding(3) var<storage, read_write> out : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n${broadcastIndexBlock('i', out.shape, cond.shape, 'cIdx')}\n${broadcastIndexBlock('i', out.shape, a.shape, 'aIdx')}\n${broadcastIndexBlock('i', out.shape, b.shape, 'bIdx')}\n  out[i] = select(b[bIdx], a[aIdx], cond[cIdx] != 0u);\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.cond), buf(op.a), buf(op.b), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    case 'relu_grad': {\n      const out = tof(op.out)\n      const total = shapeSize(out.shape)\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> x : array<f32>;\n@group(0) @binding(1) var<storage, read> dy : array<f32>;\n@group(0) @binding(2) var<storage, read_write> out : array<f32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  out[i] = select(0.0, dy[i], x[i] > 0.0);\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.x), buf(op.dy), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- Reductions over last axis -----------------------------------------\n    case 'mean_last':\n    case 'sum_last': {\n      const a = tof(op.a)\n      const D = a.shape[a.shape.length - 1]!\n      const outerSize = shapeSize(a.shape) / D\n      const divisor = op.kind === 'mean_last' ? `f32(${D}u)` : '1.0'\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<f32>;\n@group(0) @binding(1) var<storage, read_write> out : array<f32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${outerSize}u) { return; }\n  let base = i * ${D}u;\n  var s : f32 = 0.0;\n  for (var j : u32 = 0u; j < ${D}u; j = j + 1u) {\n    s = s + a[base + j];\n  }\n  out[i] = s / ${divisor};\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: outerSize, workgroupSize: WG_SIZE }\n    }\n\n    // ---- Shape ---------------------------------------------------------------\n    // reshape: no kernel needed if buffers can alias (shape change only). For\n    // v1 simplicity we emit a memcpy-style kernel rather than aliasing buffers,\n    // because aliasing complicates the buffer plan and we have memory headroom.\n    case 'reshape': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const total = shapeSize(out.shape)\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<${wgslDtype(a.dtype)}>;\n@group(0) @binding(1) var<storage, read_write> out : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  out[i] = a[i];\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    case 'transpose': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const total = shapeSize(out.shape)\n      // Emit per-axis index computation. For each output flat index i, decompose\n      // into per-axis output indices, then use op.perm to find the source axis order.\n      // Source flat index = sum(outIdx[perm.invert()[k]] * a_stride[k] for k).\n      const aStrides = computeStrides(a.shape)\n      const outDimDecls = decomposeFlatIndexBlock('i', out.shape, 'oIdx')\n      const srcExpr: string[] = []\n      for (let k = 0; k < a.shape.length; k++) {\n        const srcAxis = op.perm.indexOf(k)  // which output axis came from input axis k\n        srcExpr.push(`oIdx_${srcAxis} * ${aStrides[k]}u`)\n      }\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<${wgslDtype(a.dtype)}>;\n@group(0) @binding(1) var<storage, read_write> out : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n${outDimDecls}\n  let srcIdx = ${srcExpr.join(' + ')};\n  out[i] = a[srcIdx];\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- Linear algebra ----------------------------------------------------\n    // matmul: a [..., M, K] \u00B7 b [K, N] -> [..., M, N]. b is unbatched.\n    case 'matmul': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const b = tof(op.b)\n      const M = a.shape[a.shape.length - 2]!\n      const K = a.shape[a.shape.length - 1]!\n      const N = b.shape[1]!\n      const batch = shapeSize(a.shape) / (M * K)\n      const total = batch * M * N\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<f32>;\n@group(0) @binding(1) var<storage, read> b : array<f32>;\n@group(0) @binding(2) var<storage, read_write> c : array<f32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  let bi = i / ${M * N}u;          // batch index\n  let mn = i % ${M * N}u;\n  let m = mn / ${N}u;\n  let n = mn % ${N}u;\n  let aBase = bi * ${M * K}u + m * ${K}u;\n  var s : f32 = 0.0;\n  for (var k : u32 = 0u; k < ${K}u; k = k + 1u) {\n    s = s + a[aBase + k] * b[k * ${N}u + n];\n  }\n  c[i] = s;\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.b), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    case 'matmul_batched': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const b = tof(op.b)\n      const M = a.shape[a.shape.length - 2]!\n      const K = a.shape[a.shape.length - 1]!\n      const N = b.shape[b.shape.length - 1]!\n      const batch = shapeSize(a.shape) / (M * K)\n      const total = batch * M * N\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<f32>;\n@group(0) @binding(1) var<storage, read> b : array<f32>;\n@group(0) @binding(2) var<storage, read_write> c : array<f32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  let bi = i / ${M * N}u;\n  let mn = i % ${M * N}u;\n  let m = mn / ${N}u;\n  let n = mn % ${N}u;\n  let aBase = bi * ${M * K}u + m * ${K}u;\n  let bBase = bi * ${K * N}u;\n  var s : f32 = 0.0;\n  for (var k : u32 = 0u; k < ${K}u; k = k + 1u) {\n    s = s + a[aBase + k] * b[bBase + k * ${N}u + n];\n  }\n  c[i] = s;\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.b), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- One-hot ------------------------------------------------------------\n    case 'one_hot': {\n      const out = tof(op.out)\n      const indices = tof(op.indices)\n      const total = shapeSize(out.shape)\n      const depth = op.depth\n      const zeroLit = wgslLiteral(0, out.dtype)\n      const oneLit = wgslLiteral(1, out.dtype)\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> indices : array<i32>;\n@group(0) @binding(1) var<storage, read_write> out : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  let outerIdx = i / ${depth}u;\n  let depthIdx = i % ${depth}u;\n  let tgt = u32(indices[outerIdx]);\n  out[i] = select(${zeroLit}, ${oneLit}, tgt == depthIdx);\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.indices), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- ML primitives -----------------------------------------------------\n    case 'log_softmax_last': {\n      const a = tof(op.a)\n      const D = a.shape[a.shape.length - 1]!\n      const outerSize = shapeSize(a.shape) / D\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<f32>;\n@group(0) @binding(1) var<storage, read_write> out : array<f32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${outerSize}u) { return; }\n  let base = i * ${D}u;\n  var m : f32 = -1.0e30;\n  for (var j : u32 = 0u; j < ${D}u; j = j + 1u) {\n    let v = a[base + j];\n    if (v > m) { m = v; }\n  }\n  var s : f32 = 0.0;\n  for (var j : u32 = 0u; j < ${D}u; j = j + 1u) {\n    s = s + exp(a[base + j] - m);\n  }\n  let logZ = m + log(s);\n  for (var j : u32 = 0u; j < ${D}u; j = j + 1u) {\n    out[base + j] = a[base + j] - logZ;\n  }\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: outerSize, workgroupSize: WG_SIZE }\n    }\n\n    case 'softmax_causal_last': {\n      const a = tof(op.a)\n      const T = a.shape[a.shape.length - 1]!  // == second-to-last (square)\n      // Outer size = (everything except last 2 axes) * (second-to-last axis)\n      const outerSize = shapeSize(a.shape) / T\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<f32>;\n@group(0) @binding(1) var<storage, read_write> out : array<f32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  // Each thread handles one (..., qpos)-row, softmaxing over kpos\u2208[0..qpos].\n  ${GID_LINE}\n  if (i >= ${outerSize}u) { return; }\n  let qpos = i % ${T}u;\n  let base = i * ${T}u;\n  var m : f32 = -1.0e30;\n  for (var k : u32 = 0u; k <= qpos; k = k + 1u) {\n    let v = a[base + k];\n    if (v > m) { m = v; }\n  }\n  var s : f32 = 0.0;\n  for (var k : u32 = 0u; k <= qpos; k = k + 1u) {\n    let e = exp(a[base + k] - m);\n    out[base + k] = e;\n    s = s + e;\n  }\n  for (var k : u32 = 0u; k <= qpos; k = k + 1u) {\n    out[base + k] = out[base + k] / s;\n  }\n  for (var k : u32 = qpos + 1u; k < ${T}u; k = k + 1u) {\n    out[base + k] = 0.0;\n  }\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: outerSize, workgroupSize: WG_SIZE }\n    }\n\n    case 'where_causal': {\n      const a = tof(op.a)\n      const T = a.shape[a.shape.length - 1]!\n      const total = shapeSize(a.shape)\n      const fillLit = wgslLiteral(op.fillValue, 'f32')\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<f32>;\n@group(0) @binding(1) var<storage, read_write> out : array<f32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  let kpos = i % ${T}u;\n  let qpos = (i / ${T}u) % ${T}u;\n  if (kpos > qpos) {\n    out[i] = ${fillLit};\n  } else {\n    out[i] = a[i];\n  }\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- Slicing -----------------------------------------------------------\n    case 'slice_last_range': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const D_in = a.shape[a.shape.length - 1]!\n      const D_out = op.end - op.start\n      const total = shapeSize(out.shape)\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<${wgslDtype(a.dtype)}>;\n@group(0) @binding(1) var<storage, read_write> out : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  let outer = i / ${D_out}u;\n  let inner = i % ${D_out}u;\n  out[i] = a[outer * ${D_in}u + ${op.start}u + inner];\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- Broadcast / un-broadcast (autograd infrastructure) ----------------\n    case 'broadcast_to': {\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const total = shapeSize(out.shape)\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> a : array<${wgslDtype(a.dtype)}>;\n@group(0) @binding(1) var<storage, read_write> out : array<${wgslDtype(out.dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n${broadcastIndexBlock('i', out.shape, a.shape, 'srcIdx')}\n  out[i] = a[srcIdx];\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n\n    // ---- Adam (fused per-element) -----------------------------------------\n    case 'adam_update_m': {\n      // m_new = b1 * m + (1 - b1) * g\n      const out = tof(op.out)\n      const total = shapeSize(out.shape)\n      const b1 = op.b1\n      const oneMinusB1 = 1 - b1\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> m : array<f32>;\n@group(0) @binding(1) var<storage, read> g : array<f32>;\n@group(0) @binding(2) var<storage, read_write> out : array<f32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  out[i] = ${wgslLiteral(b1, 'f32')} * m[i] + ${wgslLiteral(oneMinusB1, 'f32')} * g[i];\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.m), buf(op.g), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n    case 'adam_update_v': {\n      // v_new = b2 * v + (1 - b2) * g\u00B2\n      const out = tof(op.out)\n      const total = shapeSize(out.shape)\n      const b2 = op.b2\n      const oneMinusB2 = 1 - b2\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> v : array<f32>;\n@group(0) @binding(1) var<storage, read> g : array<f32>;\n@group(0) @binding(2) var<storage, read_write> out : array<f32>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  let gv = g[i];\n  out[i] = ${wgslLiteral(b2, 'f32')} * v[i] + ${wgslLiteral(oneMinusB2, 'f32')} * gv * gv;\n}`.trim()\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.v), buf(op.g), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n    case 'adam_update_p': {\n      // p_new = decayShrink * p - lrt[0] * m_new / (sqrt(v_new) + eps).\n      // lrt is supplied per-step from CPU (already includes bias correction).\n      // decayShrink is either baked as a literal (no schedule, fixed lr) or\n      // bound as a per-step scalar input (when the user supplies an lr\n      // schedule via `adam: { lr: (step) => ... }`). When literal=1 the WGSL\n      // compiler folds the multiply away.\n      const out = tof(op.out)\n      const total = shapeSize(out.shape)\n      const dynamicShrink = op.decayShrinkTensor !== null\n      const shrinkExpr = dynamicShrink ? 'decayShrink[0]' : wgslLiteral(op.decayShrink, 'f32')\n      const shrinkBinding = dynamicShrink\n        ? `@group(0) @binding(4) var<storage, read> decayShrink : array<f32>;\\n` +\n          `@group(0) @binding(5) var<storage, read_write> out : array<f32>;`\n        : `@group(0) @binding(4) var<storage, read_write> out : array<f32>;`\n      const wgsl = `\n@group(0) @binding(0) var<storage, read> p : array<f32>;\n@group(0) @binding(1) var<storage, read> mNew : array<f32>;\n@group(0) @binding(2) var<storage, read> vNew : array<f32>;\n@group(0) @binding(3) var<storage, read> lrt : array<f32>;\n${shrinkBinding}\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n  out[i] = ${shrinkExpr} * p[i] - lrt[0] * mNew[i] / (sqrt(vNew[i]) + ${wgslLiteral(op.eps, 'f32')});\n}`.trim()\n      const bindings = dynamicShrink\n        ? [buf(op.p), buf(op.mNew), buf(op.vNew), buf(op.lrt), buf(op.decayShrinkTensor!), buf(op.out)]\n        : [buf(op.p), buf(op.mNew), buf(op.vNew), buf(op.lrt), buf(op.out)]\n      return { opIndex, opKind: op.kind, wgsl, bindings, threads: total, workgroupSize: WG_SIZE }\n    }\n\n    case 'sum_to_shape': {\n      // Sum-reduce src down to target by summing over each axis where target=1\n      // or where target is missing (offset-prefix axes that get fully summed).\n      const out = tof(op.out)\n      const a = tof(op.a)\n      const wgsl = emitSumToShape(a.shape, out.shape, a.dtype)\n      const total = shapeSize(out.shape)\n      return { opIndex, opKind: op.kind, wgsl, bindings: [buf(op.a), buf(op.out)], threads: total, workgroupSize: WG_SIZE }\n    }\n  }\n}\n\n// ============================================================================\n// WGSL helpers\n// ============================================================================\n\nfunction wgslDtype(d: 'f32' | 'i32' | 'bool'): string {\n  // bool can't be in storage buffers in WGSL; we lower bool-typed tensors to\n  // u32 (0/1). For Phase 3a there are no bool-typed storage buffers in the\n  // forward+backward graph (causal mask is built inline in softmax kernels),\n  // so this only matters if the user explicitly creates a bool tensor.\n  if (d === 'bool') return 'u32'\n  return d\n}\n\nfunction wgslLiteral(value: number, dtype: 'f32' | 'i32' | 'bool'): string {\n  if (dtype === 'f32') {\n    if (Number.isFinite(value)) {\n      // WGSL requires `.` in float literals; force decimal form.\n      return value.toString().includes('.') || value.toString().includes('e')\n        ? `${value}f`\n        : `${value}.0f`\n    }\n    return value > 0 ? '1.0e30f' : '-1.0e30f'\n  }\n  if (dtype === 'i32') return `${Math.trunc(value)}i`\n  return value ? '1u' : '0u'\n}\n\nfunction castFromI32(expr: string, dtype: 'f32' | 'i32' | 'bool'): string {\n  if (dtype === 'f32') return `f32(${expr})`\n  if (dtype === 'i32') return `i32(${expr})`\n  return `u32(${expr})`\n}\n\nfunction computeStrides(shape: Shape): number[] {\n  const strides: number[] = new Array(shape.length).fill(1)\n  for (let i = shape.length - 2; i >= 0; i--) {\n    strides[i] = strides[i + 1]! * shape[i + 1]!\n  }\n  return strides\n}\n\n/**\n * Generate WGSL that decomposes a flat index `flatVar` into per-axis indices\n * `outVar_0, outVar_1, ...` according to `shape`.\n */\nfunction decomposeFlatIndexBlock(flatVar: string, shape: Shape, outVar: string): string {\n  if (shape.length === 0) return `  let ${outVar}_0 : u32 = 0u;`  // not used but parser-safe\n  const strides = computeStrides(shape)\n  const lines: string[] = []\n  let remaining = flatVar\n  for (let i = 0; i < shape.length; i++) {\n    if (i === shape.length - 1) {\n      lines.push(`  let ${outVar}_${i} = ${remaining};`)\n    } else {\n      lines.push(`  let ${outVar}_${i} = ${remaining} / ${strides[i]}u;`)\n      const newRem = `${outVar}_rem${i}`\n      lines.push(`  let ${newRem} = ${remaining} % ${strides[i]}u;`)\n      remaining = newRem\n    }\n  }\n  return lines.join('\\n')\n}\n\n/**\n * Generate WGSL that computes the source flat index in `srcVar` for an output\n * flat index `flatVar`, given output shape `outShape` and source shape `srcShape`\n * under right-aligned NumPy-style broadcasting (size-1 axes broadcast).\n *\n * Strategy:\n *   1. Decompose flat output index into per-axis output indices.\n *   2. For each output axis that maps onto a source axis (right-aligned), use\n *      the output index there if src.dim != 1, else 0 (broadcast).\n *   3. Drop output-only axes (those with no corresponding source axis).\n *   4. Combine source indices with source strides.\n */\nfunction broadcastIndexBlock(flatVar: string, outShape: Shape, srcShape: Shape, srcVar: string): string {\n  // Name the per-axis decomposition vars after `srcVar` so multiple\n  // broadcastIndexBlock calls in the same WGSL function don't collide.\n  const prefix = `${srcVar}_ax`\n  const decompose = decomposeFlatIndexBlock(flatVar, outShape, prefix)\n  const offset = outShape.length - srcShape.length\n  if (srcShape.length === 0) {\n    return `${decompose}\\n  let ${srcVar} : u32 = 0u;`\n  }\n  const srcStrides = computeStrides(srcShape)\n  const terms: string[] = []\n  for (let i = 0; i < srcShape.length; i++) {\n    const outAxis = i + offset\n    const srcDim = srcShape[i]!\n    const term = srcDim === 1 ? '0u' : `${prefix}_${outAxis} * ${srcStrides[i]}u`\n    terms.push(term)\n  }\n  return `${decompose}\\n  let ${srcVar} = ${terms.join(' + ')};`\n}\n\n/**\n * sum_to_shape: each output cell sums over the source axes that are reduced.\n * For source shape S and target shape T (right-aligned):\n *   - Axes in S not in T (leading prefix): fully reduced (sum over whole axis).\n *   - Axes where T=1 but S>1: reduced (sum over that axis).\n *   - Axes where T=S: passed through.\n *\n * Implementation: each thread = one output cell. It iterates over the reduced\n * axes via nested-loop unrolling (we generate explicit nested for-loops).\n */\nfunction emitSumToShape(srcShape: Shape, tgtShape: Shape, dtype: 'f32' | 'i32' | 'bool'): string {\n  const srcStrides = computeStrides(srcShape)\n  const tgtStrides = computeStrides(tgtShape)\n  const offset = srcShape.length - tgtShape.length\n\n  // Decompose flat output index into per-axis target indices.\n  const decompose = decomposeFlatIndexBlock('i', tgtShape, 'tgt')\n\n  // Identify reduced axes of the SOURCE: axis k in src is reduced if either\n  // it's in the leading prefix (k < offset) or its corresponding target axis\n  // has size 1. For non-reduced axes (k >= offset and tgt=src), the source\n  // index is the target index along that axis.\n  const reducedAxes: number[] = []\n  for (let k = 0; k < srcShape.length; k++) {\n    if (k < offset) { reducedAxes.push(k); continue }\n    const tDim = tgtShape[k - offset]!\n    const sDim = srcShape[k]!\n    if (tDim === 1 && sDim > 1) reducedAxes.push(k)\n  }\n\n  // Build the source flat index expression. Initialize from the non-reduced axes.\n  const baseTerms: string[] = []\n  for (let k = 0; k < srcShape.length; k++) {\n    if (reducedAxes.includes(k)) continue  // contributed by loop var instead\n    const tAxis = k - offset\n    baseTerms.push(`tgt_${tAxis} * ${srcStrides[k]}u`)\n  }\n  const baseExpr = baseTerms.length > 0 ? baseTerms.join(' + ') : '0u'\n\n  // Emit nested for loops over the reduced axes.\n  const indent = (depth: number) => '  '.repeat(depth + 1)\n  const loops: string[] = []\n  for (let depth = 0; depth < reducedAxes.length; depth++) {\n    const k = reducedAxes[depth]!\n    const dim = srcShape[k]!\n    loops.push(`${indent(depth)}for (var r${k} : u32 = 0u; r${k} < ${dim}u; r${k} = r${k} + 1u) {`)\n  }\n  // Inside innermost loop, compute source index.\n  const reducedTerms = reducedAxes.map(k => `r${k} * ${srcStrides[k]}u`)\n  const fullExpr = reducedTerms.length > 0\n    ? `${baseExpr} + ${reducedTerms.join(' + ')}`\n    : baseExpr\n  loops.push(`${indent(reducedAxes.length)}s = s + a[${fullExpr}];`)\n  for (let depth = reducedAxes.length - 1; depth >= 0; depth--) {\n    loops.push(`${indent(depth)}}`)\n  }\n\n  const total = tgtShape.length === 0 ? 1 : (tgtStrides[0]! * tgtShape[0]!)\n  const loopBody = reducedAxes.length === 0\n    ? `  s = s + a[${baseExpr}];`\n    : loops.join('\\n')\n\n  return `\n@group(0) @binding(0) var<storage, read> a : array<${wgslDtype(dtype)}>;\n@group(0) @binding(1) var<storage, read_write> out : array<${wgslDtype(dtype)}>;\n@compute @workgroup_size(${WG_SIZE})\nfn main(@builtin(global_invocation_id) gid : vec3<u32>) {\n  ${GID_LINE}\n  if (i >= ${total}u) { return; }\n${decompose}\n  var s : ${wgslDtype(dtype)} = ${dtype === 'f32' ? '0.0f' : (dtype === 'i32' ? '0i' : '0u')};\n${loopBody}\n  out[i] = s;\n}`.trim()\n}\n", "// WebGPU runtime. Reads a BufferPlan + KernelSpec[] (produced by codegen),\r\n// allocates real GPU buffers and pipelines, and provides a `step()` method\r\n// that uploads inputs, dispatches all kernels, and reads back outputs.\r\n//\r\n// Browser-only: this module needs `navigator.gpu` at runtime.\r\n\r\nimport type { BufferPlan } from './buffers.js'\r\nimport type { KernelSpec } from './codegen.js'\r\n\r\n// TS lib.dom defines WebGPU types but not the GPUMapMode runtime constant.\r\n// Provided by the browser per WebGPU spec; declare just what we use.\r\ndeclare const GPUMapMode: { readonly READ: number; readonly WRITE: number }\r\n\r\nexport interface UploadParamsOptions {\r\n  /** Skip the \"missing param\" check, allowing the caller to update only some\r\n   *  params and leave the rest at their current GPU values. Extra (unknown)\r\n   *  keys are still rejected \u2014 that's always a typo. Default: false. */\r\n  partial?: boolean\r\n}\r\n\r\n/**\r\n * Activation readbacks for one `step()`/`run()` call. Keyed by the names\r\n * passed to `capture(name, t)` during the trace. `get(name)` throws if the\r\n * name isn't registered or wasn't read back this call (i.e., the call was\r\n * made without `{ withCaptures: true }`); use `has(name)` if you need to\r\n * branch. `shapeOf(name)` returns the static-after-compile shape and works\r\n * regardless of whether captures were read back.\r\n */\r\nexport class Captures {\r\n  constructor(\r\n    private readonly shapes: Record<string, readonly number[]>,\r\n    private readonly data: Map<string, Float32Array>,\r\n  ) {}\r\n  get(name: string): Float32Array {\r\n    const d = this.data.get(name)\r\n    if (!d) {\r\n      const known = [...this.data.keys()].sort().join(', ')\r\n      const detail = known ? `Known this call: ${known}` : `(call run/step with { withCaptures: true } to populate)`\r\n      throw new Error(`Captures.get: '${name}' not present. ${detail}`)\r\n    }\r\n    return d\r\n  }\r\n  shapeOf(name: string): readonly number[] {\r\n    const s = this.shapes[name]\r\n    if (!s) {\r\n      const known = Object.keys(this.shapes).sort().join(', ') || '(none registered)'\r\n      throw new Error(`Captures.shapeOf: '${name}' not registered. Known: ${known}`)\r\n    }\r\n    return s\r\n  }\r\n  has(name: string): boolean { return this.data.has(name) }\r\n  names(): string[] { return [...this.data.keys()].sort() }\r\n}\r\n\r\nexport interface RunResult {\r\n  output: Float32Array\r\n  captures: Captures\r\n}\r\n\r\nexport interface StepResult {\r\n  loss: number\r\n  captures: Captures\r\n}\r\n\r\nexport interface RunOptions {\r\n  /** Read back tensors registered via `capture(name, t)` during the trace.\r\n   *  Default false. When false, the returned `captures` is empty (calling\r\n   *  `.get` throws); when true, captures are read back and accessible. */\r\n  withCaptures?: boolean\r\n}\r\n\r\nexport interface StepOptions extends RunOptions {\r\n  /** If false, the training submit is queued but the JS thread does not\r\n   *  await `mapAsync` of the loss buffer. Returns `void` immediately.\r\n   *  Use `runtime.readLoss()` to read the latest loss explicitly when\r\n   *  you want it (e.g., every Nth step for UI display).\r\n   *\r\n   *  Why: each `mapAsync` round-trip is ~1 ms on desktop but 10\u201330 ms on\r\n   *  Android Chrome. A training loop that awaits per step pays N \u00D7 that\r\n   *  on the main thread, which on mobile starves the OS compositor and\r\n   *  causes visible UI sluggishness. With `readLoss: false` plus a\r\n   *  `requestAnimationFrame` yield between steps, the main thread stays\r\n   *  responsive while training runs at GPU speed.\r\n   *\r\n   *  Implies `withCaptures: false`. Default: true. */\r\n  readLoss?: boolean\r\n}\r\n\r\n/** Common surface for both training and forward-only compiled runtimes. */\r\nexport interface CompiledBase {\r\n  /** The GPUDevice this runtime is bound to. Pass to sibling compiles to\r\n   *  share the device, or use directly for other GPU work. */\r\n  device: GPUDevice\r\n  /** Param name -> the underlying GPUBuffer. Pass to a sibling compile via\r\n   *  `sharedParams` to share without copies. */\r\n  params: Map<string, GPUBuffer>\r\n  /** Shape of the graph's output (loss scalar `[]` for training; the user's\r\n   *  returned tensor for forward-only compiles). */\r\n  outputShape: number[]\r\n  /** Upload parameter Float32Arrays to their GPU buffers. By default, requires\r\n   *  *all* params to be present; throws on any unknown or missing key. Pass\r\n   *  `{ partial: true }` to skip the missing-key check. */\r\n  uploadParams(params: Record<string, Float32Array>, opts?: UploadParamsOptions): void\r\n  /** Read all parameters back as Float32Arrays \u2014 used for UI panels. */\r\n  downloadParams(): Promise<Record<string, Float32Array>>\r\n  /** Free GPU resources. */\r\n  destroy(): void\r\n}\r\n\r\n/** Run a dispatch and read back the full output tensor. Default returns the\r\n *  output as a `Float32Array`; with `{ withCaptures: true }` returns\r\n *  `{ output, captures }`. Same shape as `step()`'s overloads. */\r\nexport interface RunFn {\r\n  (inputs: Record<string, Int32Array | Float32Array>): Promise<Float32Array>\r\n  (inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<RunResult>\r\n  (inputs: Record<string, Int32Array | Float32Array>, opts: RunOptions): Promise<Float32Array | RunResult>\r\n}\r\n\r\nexport interface CompiledRuntime extends CompiledBase {\r\n  /** Read all parameter gradients back. Mostly for verification / debugging. */\r\n  downloadParamGrads(): Promise<Record<string, Float32Array>>\r\n  /**\r\n   * One full forward+backward step.\r\n   *   1. Uploads `inputs` (tokens, targets, masks) to input buffers.\r\n   *   2. Dispatches every kernel in order.\r\n   *   3. Reads back the loss scalar (and any registered captures, if requested).\r\n   * Default returns the loss as a JS number; with `{ withCaptures: true }`\r\n   * returns `{ loss, captures }`.\r\n   */\r\n  step(inputs: Record<string, Int32Array | Float32Array>): Promise<number>\r\n  step(inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<StepResult>\r\n  step(inputs: Record<string, Int32Array | Float32Array>, opts: { readLoss: false }): Promise<void>\r\n  step(inputs: Record<string, Int32Array | Float32Array>, opts: StepOptions): Promise<number | StepResult | void>\r\n  /** Same dispatch as step() but returns the full output Float32Array \u2014 for\r\n   *  training graphs the output is a scalar loss, so step() is usually more\r\n   *  convenient. Provided for parity with `compileForward`. */\r\n  run: RunFn\r\n  /** Read the latest loss value from the GPU. Pair with `step({ readLoss: false })`\r\n   *  fire-and-forget training: every Nth iteration, call `readLoss()` for the\r\n   *  UI, but most iterations don't pay the `mapAsync` cost. */\r\n  readLoss(): Promise<number>\r\n  /** Re-zero all optimizer state buffers (Adam's m/v) in place. Pair with\r\n   *  `uploadInitialParams()` for a full training reset without recompile. */\r\n  resetOptimizerState(): void\r\n}\r\n\r\n/** Forward-only compiled runtime \u2014 produced by `compileForward`. No optimizer,\r\n *  no backward. Returns the output tensor (not just a scalar) per `run()` call. */\r\nexport interface CompiledForward extends CompiledBase {\r\n  run: RunFn\r\n}\r\n\r\nexport interface RuntimeOpts {\r\n  /** Pre-acquired GPUDevice. If omitted, runtime requests its own. */\r\n  device?: GPUDevice\r\n  /** External param buffers to bind in place of allocating fresh ones, keyed\r\n   *  by param name. Used to share params between a training compile and a\r\n   *  sibling forward-only compile (e.g., a B=1 inference graph). When a name\r\n   *  is in this map, the runtime reuses the provided GPUBuffer; otherwise it\r\n   *  allocates as usual. */\r\n  sharedParams?: Map<string, GPUBuffer>\r\n}\r\n\r\n// Inlined numeric values (per WebGPU spec) so this module is importable in Node\r\n// for codegen-only usage. The browser provides GPUBufferUsage as a global, but\r\n// referencing it at module scope would crash before any browser code runs.\r\nconst STORAGE_RW = 0x80 /*STORAGE*/ | 0x8 /*COPY_DST*/ | 0x4 /*COPY_SRC*/\r\nconst READBACK = 0x1 /*MAP_READ*/ | 0x8 /*COPY_DST*/\r\n\r\nexport async function createRuntime(\r\n  plan: BufferPlan,\r\n  kernels: KernelSpec[],\r\n  lossBufferId: number,\r\n  opts: RuntimeOpts = {},\r\n): Promise<CompiledRuntime> {\r\n  const device = opts.device ?? await acquireDevice()\r\n  const queue = device.queue\r\n\r\n  // ---- Allocate one GPUBuffer per BufferSpec --------------------------------\r\n  // State buffers also get filled with their initValue at allocation time.\r\n  // Param buffers may be supplied externally via opts.sharedParams; in that\r\n  // case we reuse the provided GPUBuffer instead of allocating, and the\r\n  // sibling compile that owns it is responsible for upload + lifetime.\r\n  // ownedBufferIds tracks which buffers we allocated ourselves (and so must\r\n  // destroy on .destroy()) vs which were handed in by a sibling compile.\r\n  const buffers = new Map<number, GPUBuffer>()\r\n  const ownedBufferIds = new Set<number>()\r\n  const sharedParams = opts.sharedParams\r\n  for (const spec of plan.buffers) {\r\n    const shared = spec.kind === 'param' ? sharedParams?.get(spec.name!) : undefined\r\n    if (shared) {\r\n      if (shared.size !== spec.byteSize) {\r\n        throw new Error(\r\n          `sharedParams: size mismatch for '${spec.name}' \u2014 supplied ${shared.size} bytes, ` +\r\n          `compiled graph expects ${spec.byteSize}.`,\r\n        )\r\n      }\r\n      buffers.set(spec.id, shared)\r\n      continue\r\n    }\r\n    const buf = device.createBuffer({\r\n      size: spec.byteSize,\r\n      usage: STORAGE_RW,\r\n      label: spec.name ?? `t${spec.id}-${spec.kind}`,\r\n    })\r\n    buffers.set(spec.id, buf)\r\n    ownedBufferIds.add(spec.id)\r\n    if (spec.kind === 'state') fillStateBuffer(spec, buf)\r\n  }\r\n\r\n  // ---- Compile pipelines per kernel; cache by WGSL source -------------------\r\n  // Push an error scope around each shader+pipeline creation so we can surface\r\n  // the actual compile error rather than the cryptic \"previous error\" that\r\n  // comes from using an invalid pipeline at dispatch time.\r\n  const moduleCache = new Map<string, GPUShaderModule>()\r\n  const pipelines: (GPUComputePipeline | null)[] = []\r\n  type ErrorProbe = Promise<{ k: KernelSpec; module: GPUShaderModule; err: GPUError } | null>\r\n  const probes: ErrorProbe[] = []\r\n  for (const k of kernels) {\r\n    if (!k.wgsl) { pipelines.push(null); continue }\r\n    let module = moduleCache.get(k.wgsl)\r\n    if (!module) {\r\n      module = device.createShaderModule({ code: k.wgsl, label: k.opKind })\r\n      moduleCache.set(k.wgsl, module)\r\n    }\r\n    device.pushErrorScope('validation')\r\n    const pipeline = device.createComputePipeline({\r\n      layout: 'auto',\r\n      compute: { module, entryPoint: 'main' },\r\n      label: k.opKind,\r\n    })\r\n    pipelines.push(pipeline)\r\n    probes.push(device.popErrorScope().then(err => err ? { k, module: module!, err } : null))\r\n  }\r\n  const probeResults = await Promise.all(probes)\r\n  const failures = probeResults.filter((p): p is { k: KernelSpec; module: GPUShaderModule; err: GPUError } => p != null)\r\n  if (failures.length > 0) {\r\n    const reports: string[] = []\r\n    for (const { k, module, err } of failures) {\r\n      const info = await module.getCompilationInfo()\r\n      const messages = info.messages\r\n        .map(m => `  L${m.lineNum}:${m.linePos} [${m.type}] ${m.message}`)\r\n        .join('\\n')\r\n      reports.push(\r\n        `[shader compile error] ${k.opKind} (op #${k.opIndex}): ${err.message}\\n` +\r\n        (messages || '  (no compilation messages)') +\r\n        `\\n--- WGSL ---\\n${k.wgsl}\\n-----------`,\r\n      )\r\n    }\r\n    // eslint-disable-next-line no-console\r\n    console.error(reports.join('\\n\\n'))\r\n    throw new Error(`tensorgrad: ${failures.length} shader(s) failed to compile (see console).`)\r\n  }\r\n\r\n  // ---- Pre-build bind groups (static \u2014 buffer ids don't change per step) ---\r\n  const bindGroups: (GPUBindGroup | null)[] = kernels.map((k, i) => {\r\n    const pipeline = pipelines[i]\r\n    if (!pipeline) return null\r\n    return device.createBindGroup({\r\n      layout: pipeline.getBindGroupLayout(0),\r\n      entries: k.bindings.map((bufId, idx) => ({\r\n        binding: idx,\r\n        resource: { buffer: buffers.get(bufId)! },\r\n      })),\r\n    })\r\n  })\r\n\r\n  // ---- Output readback staging buffer ---------------------------------------\r\n  // `outputBufferId` is the graph's main output (loss for training, the user's\r\n  // returned tensor for forward-only). step() reads back its first element;\r\n  // run() reads back the full Float32Array.\r\n  const outputSpec = plan.buffers[lossBufferId]!\r\n  const outputReadback = device.createBuffer({ size: outputSpec.byteSize, usage: READBACK })\r\n\r\n  // ---- Capture readback staging buffer (lazy, single concatenated) ---------\r\n  // One buffer for ALL captures, with each capture occupying a slice. Matters\r\n  // on mobile: each `mapAsync` round-trip on Android Chrome adds significant\r\n  // GPU-fence latency (~10\u201330 ms vs ~1 ms on desktop). With N captures, the\r\n  // per-call mobile cost is N \u00D7 that latency on the main thread. Concatenating\r\n  // and reading back via one `mapAsync` collapses N stalls into one. Allocated\r\n  // on first `step({ withCaptures: true })` call.\r\n  type CaptureLayout = {\r\n    buffer: GPUBuffer\r\n    slices: { name: string; bufId: number; offset: number; byteSize: number }[]\r\n  }\r\n  let captureStaging: CaptureLayout | null = null\r\n  function ensureCaptureStaging(): CaptureLayout {\r\n    if (captureStaging) return captureStaging\r\n    let totalBytes = 0\r\n    const slices: CaptureLayout['slices'] = []\r\n    for (const [name, bufId] of plan.capturesByName) {\r\n      const spec = plan.buffers[bufId]!\r\n      // copyBufferToBuffer offsets must be 4-aligned. Capture byteSizes are\r\n      // always shape-product \u00D7 4 (f32/i32/bool all 4 bytes), so cumulative\r\n      // offsets stay aligned.\r\n      slices.push({ name, bufId, offset: totalBytes, byteSize: spec.byteSize })\r\n      totalBytes += spec.byteSize\r\n    }\r\n    const buffer = device.createBuffer({ size: totalBytes, usage: READBACK, label: 'captures-staging' })\r\n    captureStaging = { buffer, slices }\r\n    return captureStaging\r\n  }\r\n\r\n  // ---- dispatch() \u2014 shared core for step() and run() -----------------------\r\n  // Uploads inputs, dispatches all kernels (in order), queues writebacks, copies\r\n  // the output buffer into its staging, optionally copies captures into theirs,\r\n  // submits, and reads back. Returns the full output Float32Array; step() takes\r\n  // [0] for scalar loss, run() returns it whole.\r\n  //\r\n  // **Concurrent calls auto-serialize.** Two `step()`/`run()` calls on the same\r\n  // runtime would otherwise both try to `mapAsync` the shared output staging\r\n  // buffer at the same time and trip \"Buffer already has an outstanding map\r\n  // pending.\" We chain each new dispatch onto the prior one's promise so they\r\n  // run sequentially even when fired from independent async paths (e.g., a\r\n  // training loop's auxiliary `refreshPrediction()` + `writeDiagnostic()`).\r\n  let pending: Promise<unknown> = Promise.resolve()\r\n  type DispatchOpts = { wantCaptures: boolean; readback: boolean }\r\n  type DispatchResult = { output: Float32Array; captures: Map<string, Float32Array> } | null\r\n  async function dispatch(\r\n    inputs: Record<string, Int32Array | Float32Array>,\r\n    opts: DispatchOpts,\r\n  ): Promise<DispatchResult> {\r\n    const turn = pending.catch(() => {}).then(() => dispatchUnsynchronized(inputs, opts))\r\n    pending = turn\r\n    return turn\r\n  }\r\n  async function dispatchUnsynchronized(\r\n    inputs: Record<string, Int32Array | Float32Array>,\r\n    opts: DispatchOpts,\r\n  ): Promise<DispatchResult> {\r\n    const wantCaptures = opts.wantCaptures\r\n    if (wantCaptures && plan.capturesByName.size === 0) {\r\n      throw new Error(\r\n        `withCaptures=true but no capture(...) calls were registered during ` +\r\n        `the trace. Add capture('name', tensor) inside your forward pass for ` +\r\n        `the intermediates you want read back.`,\r\n      )\r\n    }\r\n    for (const [name, bufId] of plan.inputsByName) {\r\n      const data = inputs[name]\r\n      if (!data) throw new Error(`tensorgrad: missing input '${name}'`)\r\n      const expectedBytes = plan.buffers[bufId]!.byteSize\r\n      if (data.byteLength !== expectedBytes) {\r\n        throw new Error(`tensorgrad: input '${name}' has ${data.byteLength} bytes, expected ${expectedBytes}`)\r\n      }\r\n      // Cast to BufferSource: typed arrays are accepted by writeBuffer at runtime\r\n      // but TS may infer ArrayBufferLike (vs ArrayBuffer) under strict configs.\r\n      queue.writeBuffer(buffers.get(bufId)!, 0, data as unknown as BufferSource)\r\n    }\r\n\r\n    const encoder = device.createCommandEncoder({ label: 'tensorgrad-step' })\r\n    for (let i = 0; i < kernels.length; i++) {\r\n      const k = kernels[i]!\r\n      if (!k.wgsl || k.threads === 0) continue\r\n      const pipeline = pipelines[i]!\r\n      const bindGroup = bindGroups[i]!\r\n      const pass = encoder.beginComputePass({ label: k.opKind })\r\n      pass.setPipeline(pipeline)\r\n      pass.setBindGroup(0, bindGroup)\r\n      // WebGPU caps each dispatch dimension at 65535 workgroups. Split into 2D\r\n      // when a kernel needs more than that on the X axis. Kernels compute their\r\n      // global index as `gid.x + gid.y * (65535 * workgroup_size)`, matching the\r\n      // stride we set here. For dispatches that fit in one row, gid.y is 0.\r\n      const wgCount = Math.max(1, Math.ceil(k.threads / k.workgroupSize))\r\n      const MAX_X = 65535\r\n      const wgX = Math.min(wgCount, MAX_X)\r\n      const wgY = Math.ceil(wgCount / MAX_X)\r\n      pass.dispatchWorkgroups(wgX, wgY, 1)\r\n      pass.end()\r\n    }\r\n    // After all dispatches: writebacks (Adam state, updated params). Empty for\r\n    // forward-only compiles.\r\n    for (const wb of plan.writebacks) {\r\n      encoder.copyBufferToBuffer(buffers.get(wb.source)!, 0, buffers.get(wb.dest)!, 0, wb.bytes)\r\n    }\r\n    encoder.copyBufferToBuffer(buffers.get(lossBufferId)!, 0, outputReadback, 0, outputSpec.byteSize)\r\n    // Capture readbacks (only when opted in). All captures concatenate into\r\n    // a single staging buffer so we mapAsync once instead of N times.\r\n    let layout: CaptureLayout | null = null\r\n    if (wantCaptures) {\r\n      layout = ensureCaptureStaging()\r\n      for (const s of layout.slices) {\r\n        encoder.copyBufferToBuffer(buffers.get(s.bufId)!, 0, layout.buffer, s.offset, s.byteSize)\r\n      }\r\n    }\r\n    queue.submit([encoder.finish()])\r\n\r\n    // readback=false: training fire-and-forget. The encoder still copied\r\n    // loss \u2192 outputReadback (and captures \u2192 staging), but we don't await\r\n    // mapAsync. The caller can read the latest loss later via readLoss()\r\n    // when it actually wants to display it.\r\n    if (!opts.readback) return null\r\n\r\n    await outputReadback.mapAsync(GPUMapMode.READ)\r\n    const output = new Float32Array(outputReadback.getMappedRange().slice(0))\r\n    outputReadback.unmap()\r\n\r\n    const captures = new Map<string, Float32Array>()\r\n    if (layout) {\r\n      await layout.buffer.mapAsync(GPUMapMode.READ)\r\n      const range = layout.buffer.getMappedRange()\r\n      for (const s of layout.slices) {\r\n        // Copy out (slice) before unmap \u2014 the underlying ArrayBuffer is\r\n        // detached when the buffer unmaps.\r\n        captures.set(s.name, new Float32Array(range, s.offset, s.byteSize / 4).slice())\r\n      }\r\n      layout.buffer.unmap()\r\n    }\r\n    return { output, captures }\r\n  }\r\n\r\n  // ---- step() \u2014 training-mode wrapper, returns scalar [0] of output ---------\r\n  function step(inputs: Record<string, Int32Array | Float32Array>): Promise<number>\r\n  function step(inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<StepResult>\r\n  function step(inputs: Record<string, Int32Array | Float32Array>, opts: { readLoss: false }): Promise<void>\r\n  function step(inputs: Record<string, Int32Array | Float32Array>, opts: StepOptions): Promise<number | StepResult | void>\r\n  async function step(\r\n    inputs: Record<string, Int32Array | Float32Array>,\r\n    opts?: StepOptions,\r\n  ): Promise<number | StepResult | void> {\r\n    if (opts?.readLoss === false) {\r\n      await dispatch(inputs, { wantCaptures: false, readback: false })\r\n      return\r\n    }\r\n    const r = (await dispatch(inputs, { wantCaptures: opts?.withCaptures === true, readback: true }))!\r\n    if (opts?.withCaptures) return { loss: r.output[0]!, captures: new Captures(captureShapes, r.captures) }\r\n    return r.output[0]!\r\n  }\r\n\r\n  // ---- readLoss() \u2014 explicit late readback for fire-and-forget training -----\r\n  // Maps the output buffer (which step() always copies the latest loss into,\r\n  // even when readLoss:false) and returns the value. Goes through the same\r\n  // serialization chain as step()/run() so two readLoss() calls don't both\r\n  // try to mapAsync the same buffer.\r\n  async function readLoss(): Promise<number> {\r\n    const turn = pending.catch(() => {}).then(async () => {\r\n      await outputReadback.mapAsync(GPUMapMode.READ)\r\n      const v = new Float32Array(outputReadback.getMappedRange())[0]!\r\n      outputReadback.unmap()\r\n      return v\r\n    })\r\n    pending = turn\r\n    return turn\r\n  }\r\n\r\n  // ---- run() \u2014 forward-mode wrapper, returns Float32Array by default -------\r\n  // Same overloaded shape as step(): scalar-shaped result (here Float32Array,\r\n  // there a JS number) is the default; { ..., captures } is the opt-in form.\r\n  function run(inputs: Record<string, Int32Array | Float32Array>): Promise<Float32Array>\r\n  function run(inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<RunResult>\r\n  function run(inputs: Record<string, Int32Array | Float32Array>, opts: RunOptions): Promise<Float32Array | RunResult>\r\n  async function run(\r\n    inputs: Record<string, Int32Array | Float32Array>,\r\n    opts?: RunOptions,\r\n  ): Promise<Float32Array | RunResult> {\r\n    const r = (await dispatch(inputs, { wantCaptures: opts?.withCaptures === true, readback: true }))!\r\n    if (opts?.withCaptures) return { output: r.output, captures: new Captures(captureShapes, r.captures) }\r\n    return r.output\r\n  }\r\n\r\n  // ---- uploadParams ---------------------------------------------------------\r\n  function uploadParams(params: Record<string, Float32Array>, opts?: UploadParamsOptions) {\r\n    const partial = opts?.partial ?? false\r\n    for (const name of Object.keys(params)) {\r\n      if (!plan.paramsByName.has(name)) {\r\n        throw new Error(\r\n          `uploadParams: unknown param '${name}'. ` +\r\n          `Known: ${[...plan.paramsByName.keys()].sort().join(', ')}`,\r\n        )\r\n      }\r\n    }\r\n    if (!partial) {\r\n      for (const name of plan.paramsByName.keys()) {\r\n        if (!(name in params)) {\r\n          throw new Error(\r\n            `uploadParams: missing param '${name}'. ` +\r\n            `Pass { partial: true } if you mean to update only some params.`,\r\n          )\r\n        }\r\n      }\r\n    }\r\n    for (const [name, bufId] of plan.paramsByName) {\r\n      const data = params[name]\r\n      if (!data) continue\r\n      const expected = plan.buffers[bufId]!.byteSize / 4\r\n      if (data.length !== expected) {\r\n        throw new Error(`uploadParams: '${name}' has ${data.length} elements, expected ${expected}`)\r\n      }\r\n      queue.writeBuffer(buffers.get(bufId)!, 0, data as unknown as BufferSource)\r\n    }\r\n  }\r\n\r\n  // ---- download helpers -----------------------------------------------------\r\n  async function downloadFromMap(map: Map<string, number>): Promise<Record<string, Float32Array>> {\r\n    const stagings: { name: string; buf: GPUBuffer; bytes: number }[] = []\r\n    const encoder = device.createCommandEncoder({ label: 'tensorgrad-download' })\r\n    for (const [name, bufId] of map) {\r\n      const spec = plan.buffers[bufId]!\r\n      const staging = device.createBuffer({ size: spec.byteSize, usage: READBACK })\r\n      encoder.copyBufferToBuffer(buffers.get(bufId)!, 0, staging, 0, spec.byteSize)\r\n      stagings.push({ name, buf: staging, bytes: spec.byteSize })\r\n    }\r\n    queue.submit([encoder.finish()])\r\n    const out: Record<string, Float32Array> = {}\r\n    for (const s of stagings) {\r\n      await s.buf.mapAsync(GPUMapMode.READ)\r\n      out[s.name] = new Float32Array(s.buf.getMappedRange().slice(0))\r\n      s.buf.unmap()\r\n      s.buf.destroy()\r\n    }\r\n    return out\r\n  }\r\n\r\n  // Fill a state buffer with its declared initValue (typically 0). Float and\r\n  // int both serialize to 4 bytes per element. Used at allocation time and on\r\n  // resetOptimizerState() \u2014 same logic, two callers.\r\n  function fillStateBuffer(spec: { byteSize: number; dtype: 'f32' | 'i32' | 'bool'; initValue?: number }, target: GPUBuffer): void {\r\n    const elements = spec.byteSize / 4\r\n    const init = spec.dtype === 'f32'\r\n      ? new Float32Array(elements).fill(spec.initValue ?? 0)\r\n      : new Int32Array(elements).fill(Math.trunc(spec.initValue ?? 0))\r\n    queue.writeBuffer(target, 0, init as unknown as BufferSource)\r\n  }\r\n\r\n  function resetOptimizerState() {\r\n    for (const spec of plan.buffers) {\r\n      if (spec.kind === 'state') fillStateBuffer(spec, buffers.get(spec.id)!)\r\n    }\r\n  }\r\n\r\n  // Build the params map AFTER buffer allocation so it points at the actual\r\n  // GPUBuffers (shared or freshly allocated).\r\n  const params = new Map<string, GPUBuffer>()\r\n  for (const [name, bufId] of plan.paramsByName) {\r\n    params.set(name, buffers.get(bufId)!)\r\n  }\r\n  // Static-after-compile shape metadata so users don't have to recompute\r\n  // strides to interpret a flat capture readback.\r\n  const captureShapes: Record<string, number[]> = {}\r\n  for (const [name, bufId] of plan.capturesByName) {\r\n    captureShapes[name] = [...plan.buffers[bufId]!.shape]\r\n  }\r\n  const outputShape = [...plan.buffers[lossBufferId]!.shape]\r\n\r\n  const destroy = () => {\r\n    for (const [id, b] of buffers) {\r\n      if (ownedBufferIds.has(id)) b.destroy()\r\n    }\r\n    outputReadback.destroy()\r\n    if (captureStaging) captureStaging.buffer.destroy()\r\n  }\r\n\r\n  return {\r\n    device,\r\n    params,\r\n    outputShape,\r\n    uploadParams,\r\n    downloadParams: () => downloadFromMap(plan.paramsByName),\r\n    downloadParamGrads: () => downloadFromMap(plan.paramGradsByName),\r\n    step,\r\n    run,\r\n    readLoss,\r\n    resetOptimizerState,\r\n    destroy,\r\n  }\r\n}\r\n\r\n/** Same machinery as `createRuntime`, narrower public type: a forward-only\r\n *  graph exposes `run()` instead of `step()` (no optimizer state, no scalar-\r\n *  loss readback). The full runtime object is built once and projected by\r\n *  `compileForward` to the public shape. */\r\nexport async function createForwardRuntime(\r\n  plan: BufferPlan,\r\n  kernels: KernelSpec[],\r\n  outputBufferId: number,\r\n  opts: RuntimeOpts = {},\r\n): Promise<CompiledForward> {\r\n  return await createRuntime(plan, kernels, outputBufferId, opts)\r\n}\r\n\r\nasync function acquireDevice(): Promise<GPUDevice> {\r\n  if (typeof navigator === 'undefined' || !navigator.gpu) {\r\n    throw new Error('tensorgrad: WebGPU not available in this environment')\r\n  }\r\n  const adapter = await navigator.gpu.requestAdapter()\r\n  if (!adapter) throw new Error('tensorgrad: no WebGPU adapter')\r\n  return await adapter.requestDevice()\r\n}\r\n", "// Module abstraction \u2014 a Domeleon-style component layer for parameter trees.\n//\n// User code defines a model as nested classes:\n//\n//   class Linear extends Module {\n//     W: Tensor; b: Tensor\n//     constructor(inDim: number, outDim: number) {\n//       super()\n//       this.W = this.param([inDim, outDim])               // randn, scale 0.02\n//       this.b = this.param([outDim], { init: 'zeros' })\n//     }\n//   }\n//   class Block extends Module {\n//     attn = new Attention(D)\n//     mlp  = new MLP(D, 4 * D)\n//   }\n//   class Model extends Module {\n//     embed = new Linear(VOCAB, D)\n//     layers = range(N).map(() => new Block())\n//   }\n//\n// The param tree is discovered automatically at compile time by walking\n// enumerable instance properties. Each parameter gets a name auto-derived\n// from its path (`layers.0.attn.W_q`); names are used for upload/download\n// and writeback wiring. Forward functions are pure and stateless \u2014 they\n// take the materialized model and inputs, return a Tensor.\n\nimport type { Tensor, Shape, Dtype } from './ir.js'\nimport { paramInput } from './trace.js'\n\n// ============================================================================\n// Init metadata\n// ============================================================================\n\n/** How a parameter's initial values are produced. Serializable shape \u2014 no\n *  closures, since the initial values cross the worker boundary at compile\n *  time. Use the `init` helpers for ergonomic construction.\n *\n *  String shorthands:\n *  - `'randn'` \u2014 Gaussian with std 0.02 (the common weight-matrix init).\n *  - `'zeros'` \u2014 fill with 0 (biases, LayerNorm beta).\n *  - `'ones'`  \u2014 fill with 1 (LayerNorm gain).\n *\n *  Object shapes:\n *  - `{ kind: 'randn', scale }` \u2014 randn with explicit std.\n *  - `{ kind: 'kaiming', gain? }` \u2014 `std = gain / sqrt(fan_in)`. Default\n *    gain `sqrt(2)` (good for ReLU). `fan_in = shape[0]`.\n *  - `{ kind: 'literal', data }` \u2014 explicit Float32Array; length must\n *    match the parameter's element count.\n */\nexport type InitSpec =\n  | 'randn'\n  | 'zeros'\n  | 'ones'\n  | { readonly kind: 'randn'; readonly scale: number }\n  | { readonly kind: 'kaiming'; readonly gain?: number }\n  | { readonly kind: 'literal'; readonly data: Float32Array }\n\n/** Ergonomic constructors for InitSpec object shapes. */\nexport const init = {\n  randn: (opts: { scale?: number } = {}): InitSpec => ({ kind: 'randn', scale: opts.scale ?? 0.02 }),\n  kaiming: (opts: { gain?: number } = {}): InitSpec =>\n    opts.gain !== undefined ? { kind: 'kaiming', gain: opts.gain } : { kind: 'kaiming' },\n  literal: (data: Float32Array): InitSpec => ({ kind: 'literal', data }),\n}\n\nexport interface ParamOptions {\n  dtype?: Dtype\n  /** Init shape. Default: `'randn'` (std 0.02). */\n  init?: InitSpec\n  /** Whether AdamW (when `weightDecay > 0`) should apply decoupled weight\n   *  decay to this param. Default: `true` for randn/kaiming/literal init\n   *  (weight matrices, embeddings); `false` for zeros/ones (biases, LN\n   *  gains). Override to force or skip. Replaces `adam.decayFilter` for\n   *  the common case. */\n  decay?: boolean\n}\n\ntype InitFn = (size: number, shape: readonly number[]) => Float32Array\n\nfunction boxMuller(): number {\n  return Math.sqrt(-2 * Math.log(Math.max(1e-10, Math.random()))) * Math.cos(2 * Math.PI * Math.random())\n}\n\nfunction randnFn(scale: number): InitFn {\n  return (size) => {\n    const arr = new Float32Array(size)\n    for (let i = 0; i < size; i++) arr[i] = boxMuller() * scale\n    return arr\n  }\n}\n\n/** Compile-time-only: resolve an InitSpec shape into the closure that\n *  generates the initial Float32Array for a given parameter shape. Runs\n *  on the main thread before initial values are transferred to the worker. */\nfunction resolveInit(spec: InitSpec | undefined): InitFn {\n  if (!spec || spec === 'randn') return randnFn(0.02)\n  if (spec === 'zeros') return (size) => new Float32Array(size)\n  if (spec === 'ones') return (size) => { const a = new Float32Array(size); a.fill(1); return a }\n  switch (spec.kind) {\n    case 'randn': return randnFn(spec.scale)\n    case 'kaiming': {\n      const gain = spec.gain ?? Math.sqrt(2)\n      return (size, shape) => {\n        const fanIn = shape[0] ?? size\n        const std = gain / Math.sqrt(fanIn)\n        const arr = new Float32Array(size)\n        for (let i = 0; i < size; i++) arr[i] = boxMuller() * std\n        return arr\n      }\n    }\n    case 'literal': {\n      const data = spec.data\n      return (size) => {\n        if (data.length !== size) {\n          throw new Error(`init.literal: data length ${data.length} doesn't match param size ${size}`)\n        }\n        return new Float32Array(data)\n      }\n    }\n  }\n}\n\n/** Resolve the decay default for a param. Weight-shaped inits (randn,\n *  kaiming, literal) default to decay=true; ones/zeros default to false\n *  (biases, LN gains). Explicit `decay` opt overrides. */\nfunction resolveDecay(opts: ParamOptions | undefined): boolean {\n  if (opts?.decay !== undefined) return opts.decay\n  const spec = opts?.init ?? 'randn'\n  return spec !== 'zeros' && spec !== 'ones'\n}\n\n// ============================================================================\n// Internals: param sentinel\n// ============================================================================\n//\n// `this.param(shape)` returns a placeholder that's replaced by a real Tensor\n// during `materializeParams`. We type-cheat by declaring the return type as\n// `Tensor` so user code can write `this.W` and have TS happy; the cheat is\n// only valid post-materialization (which is always before forward runs).\n\nclass ParamSentinel {\n  constructor(\n    public readonly shape: Shape,\n    public readonly dtype: Dtype,\n    public readonly initFn: InitFn,\n    public readonly decay: boolean,\n  ) {}\n}\n\n// ============================================================================\n// Module base class\n// ============================================================================\n\nexport abstract class Module {\n  /**\n   * Declare a learnable parameter at this module. Must be called from inside\n   * the constructor (typically as a field assignment). Returns a placeholder\n   * that gets replaced with a real Tensor at compile time.\n   *\n   * The parameter's name is auto-derived from its property path in the model\n   * tree (e.g. `layers.0.attn.W_q`). Init metadata travels with the param;\n   * call `compiled.uploadInitialParams()` to apply it after compile.\n   */\n  protected param(shape: Shape, opts?: ParamOptions): Tensor {\n    const dtype = opts?.dtype ?? 'f32'\n    // Lie to TypeScript: the sentinel becomes a Tensor at materialize time.\n    return new ParamSentinel(shape, dtype, resolveInit(opts?.init), resolveDecay(opts)) as unknown as Tensor\n  }\n}\n\n// ============================================================================\n// Tree walking\n// ============================================================================\n\nexport interface MaterializedParams {\n  /** Map from auto-derived path (e.g. `layers.0.attn.W_q`) to its Tensor. */\n  tensors: Record<string, Tensor>\n  /** Init function per param path. Used by `uploadInitialParams`. */\n  initFns: Record<string, InitFn>\n  /** Whether this param should receive AdamW weight decay. Resolved at\n   *  `param()` time from `ParamOptions.decay` (with init-based default). */\n  decayFlags: Record<string, boolean>\n}\n\n/**\n * Walk the module tree and replace every ParamSentinel with a real Tensor\n * created via `paramInput(autoName, ...)`. Must be called inside an active\n * trace context (paramInput appends to the current graph).\n *\n * Returns the param tensors keyed by path, plus init functions for use by\n * `uploadInitialParams`.\n */\nexport function materializeParams(root: Module): MaterializedParams {\n  const tensors: Record<string, Tensor> = {}\n  const initFns: Record<string, InitFn> = {}\n  const decayFlags: Record<string, boolean> = {}\n  visit(root, '', (path, val, owner, key) => {\n    if (val instanceof ParamSentinel) {\n      const t = paramInput(path, val.shape, val.dtype)\n      ;(owner as any)[key] = t\n      tensors[path] = t\n      initFns[path] = val.initFn\n      decayFlags[path] = val.decay\n    }\n  })\n  return { tensors, initFns, decayFlags }\n}\n\n// ----------------------------------------------------------------------------\n// Visitor\n// ----------------------------------------------------------------------------\n//\n// Walks enumerable own properties recursively, building a path string. Recurses\n// into nested Modules and arrays of Modules (or arrays of arrays, etc.).\n// Calls `visitor` on every leaf \u2014 including ParamSentinels (pre-materialize)\n// and real Tensor leaves (post-materialize).\n\ntype Visitor = (path: string, val: unknown, owner: object, key: string | number) => void\n\nfunction visit(node: unknown, path: string, visitor: Visitor): void {\n  if (node === null || node === undefined) return\n  if (typeof node !== 'object') return\n\n  if (node instanceof Module) {\n    for (const key of Object.keys(node as object)) {\n      const child = (node as any)[key]\n      const childPath = path ? `${path}.${key}` : key\n      visitChild(child, childPath, node, key, visitor)\n    }\n    return\n  }\n  if (Array.isArray(node)) {\n    node.forEach((item, i) => {\n      const childPath = path ? `${path}.${i}` : String(i)\n      visitChild(item, childPath, node as unknown as object, i, visitor)\n    })\n    return\n  }\n  // Plain leaf object (sentinel / tensor / something else): visitor decides.\n  // No deeper recursion.\n}\n\nfunction visitChild(child: unknown, path: string, owner: object, key: string | number, visitor: Visitor): void {\n  if (child instanceof Module || Array.isArray(child)) {\n    visit(child, path, visitor)\n  } else {\n    visitor(path, child, owner, key)\n  }\n}\n", "// Wire format for the main-thread \u2194 worker postMessage channel.\n//\n// All requests carry a numeric `id` assigned by the main thread; responses\n// echo it back so the proxy can match concurrent in-flight calls. Every\n// response is either `{ ok: true, result }` or `{ ok: false, error }`.\n// Errors carry serialized name/message/stack so the proxy can reconstitute\n// an Error with a working `instanceof` check on the receiving side.\n//\n// Inputs (typed arrays) and outputs (typed arrays, captures) are transferred\n// rather than copied \u2014 see the per-request notes for which fields go on the\n// transfer list. A single worker may host multiple compiled graphs (a train\n// graph plus sibling forward graphs); each has a `graphId` issued by the\n// main thread at compile time.\n\nimport type { Graph } from './ir.js'\nimport type { BufferPlan } from './buffers.js'\nimport type { KernelSpec } from './codegen.js'\nimport type { LRSchedule } from './adam.js'\n\n// ============================================================================\n// Serializable config (subset of AdamResolvedConfig that crosses the wire).\n// `decayFilter` (a function, used only at compile time) is NOT part of this \u2014\n// the per-param decay decision is already baked into the IR by appendAdam\n// before the IR ships to the worker.\n// ============================================================================\n\nexport interface WireAdamConfig {\n  lr: LRSchedule\n  b1: number\n  b2: number\n  eps: number\n  weightDecay: number\n  lrIsScheduled: boolean\n  /** Names of the per-step scalar inputs the worker must populate before\n   *  every step (`_adam_lrt`, optionally `_adam_decay_shrink`). Mirrors\n   *  AdamResult so the worker can update them without re-deriving. */\n  lrtInputName: string\n  decayShrinkInputName: string | null\n}\n\n/** Compile output that crosses to the worker. Same fields as CompiledIR\n *  minus the `loss` tensor (carried by graph.outputs[0]). */\nexport interface WireIR {\n  graph: Graph\n  plan: BufferPlan\n  kernels: KernelSpec[]\n}\n\n// ============================================================================\n// Requests (main \u2192 worker)\n// ============================================================================\n\nexport type Req =\n  | { id: number; kind: 'createRuntime'; payload: CreateRuntimePayload }\n  | { id: number; kind: 'compileForward'; payload: CompileForwardPayload }\n  | { id: number; kind: 'step'; payload: StepPayload }\n  | { id: number; kind: 'run'; payload: RunPayload }\n  | { id: number; kind: 'uploadParams'; payload: UploadParamsPayload }\n  | { id: number; kind: 'downloadParams'; payload: { graphId: number } }\n  | { id: number; kind: 'downloadParamGrads'; payload: { graphId: number } }\n  | { id: number; kind: 'resetOptimizer'; payload: { graphId: number } }\n  | { id: number; kind: 'destroy'; payload: { graphId: number } }\n\n/** Build the training runtime. Always graphId=0 for a fresh worker. */\nexport interface CreateRuntimePayload {\n  graphId: number\n  ir: WireIR\n  /** Initial param values per name. Transferred (zero-copy) \u2014 the main\n   *  thread loses access after postMessage. */\n  initialParams: Record<string, Float32Array>\n  /** Adam config when training; absent for forward-only compiles. */\n  adam: WireAdamConfig | null\n}\n\n/** Build a sibling forward-only graph that shares param buffers with an\n *  existing graph (typically the training graph at graphId=0). */\nexport interface CompileForwardPayload {\n  graphId: number\n  parentGraphId: number\n  ir: WireIR\n}\n\n/** One training step. Inputs are transferred; the caller's typed arrays\n *  become detached after postMessage. */\nexport interface StepPayload {\n  graphId: number\n  inputs: Record<string, Int32Array | Float32Array>\n  withCaptures: boolean\n}\n\n/** Forward-only run. Same transfer semantics as `step`. */\nexport interface RunPayload {\n  graphId: number\n  inputs: Record<string, Int32Array | Float32Array>\n  withCaptures: boolean\n}\n\nexport interface UploadParamsPayload {\n  graphId: number\n  params: Record<string, Float32Array>  // transferred\n  partial: boolean\n}\n\n// ============================================================================\n// Responses (worker \u2192 main)\n// ============================================================================\n\nexport type Res<R = unknown> =\n  | { id: number; ok: true; result: R }\n  | { id: number; ok: false; error: WireError }\n\nexport interface WireError {\n  name: string\n  message: string\n  stack: string\n}\n\n// Per-request result shapes:\n\nexport interface CreateRuntimeResult {\n  paramNames: string[]\n  outputShape: number[]\n  kernelCount: number\n  captureShapes: Record<string, number[]>\n}\n\nexport interface CompileForwardResult {\n  paramNames: string[]\n  outputShape: number[]\n  kernelCount: number\n  captureShapes: Record<string, number[]>\n}\n\n/** Step without `withCaptures` returns just `loss`. With captures, also\n *  populates `captures` (per-name Float32Array, all transferred back). */\nexport interface StepResultWire {\n  loss: number\n  captures: Record<string, Float32Array> | null\n}\n\n/** Run without `withCaptures` returns `{ output, captures: null }`.\n *  With captures, also populates `captures`. */\nexport interface RunResultWire {\n  output: Float32Array\n  captures: Record<string, Float32Array> | null\n}\n\nexport interface DownloadParamsResult {\n  params: Record<string, Float32Array>  // transferred\n}\n\n// ============================================================================\n// Transfer-list helpers\n// ============================================================================\n\n/** Collect the underlying ArrayBuffers from a Record of typed arrays so we\n *  can pass them on `postMessage`'s transfer list. The values themselves\n *  stay in the Record; only their backing buffers move. */\nexport function transferablesOfRecord(\n  rec: Record<string, Int32Array | Float32Array>,\n): ArrayBuffer[] {\n  const out: ArrayBuffer[] = []\n  for (const v of Object.values(rec)) out.push(v.buffer as ArrayBuffer)\n  return out\n}\n\n/** Serialize an Error to a wire-friendly shape, preserving stack + name so\n *  the receiving side can reconstitute an Error that an `instanceof`-aware\n *  caller (e.g., for `ShapeError`) can still pattern-match by name. */\nexport function wireError(e: unknown): WireError {\n  if (e instanceof Error) {\n    return { name: e.name, message: e.message, stack: e.stack ?? '' }\n  }\n  return { name: 'Error', message: String(e), stack: '' }\n}\n\n/** Reconstitute an Error from the wire shape on the receiving (main) side. */\nexport function reconstituteError(w: WireError): Error {\n  const err = new Error(w.message)\n  err.name = w.name\n  err.stack = w.stack\n  return err\n}\n", "// Main-thread half of the worker channel: request/response correlation,\n// promise wiring, error reconstitution. Knows nothing about Adam, captures,\n// IR, etc. \u2014 just shuttles typed messages.\n\nimport type { Req, Res, WireError } from './worker-protocol.js'\nimport { reconstituteError } from './worker-protocol.js'\n\ninterface PendingHandlers {\n  resolve: (v: unknown) => void\n  reject: (e: Error) => void\n}\n\n/** Spawn a worker from an inlined source string and provide a typed\n *  request/response channel. One WorkerProxy = one Worker = one GPUDevice\n *  on the worker side. Sibling graphs share the same WorkerProxy. */\nexport class WorkerProxy {\n  private worker: Worker\n  private nextId = 1\n  private pending = new Map<number, PendingHandlers>()\n  private terminated = false\n\n  constructor(workerSource: string) {\n    const blob = new Blob([workerSource], { type: 'application/javascript' })\n    const url = URL.createObjectURL(blob)\n    this.worker = new Worker(url, { type: 'module' })\n    // The Blob URL keeps memory alive as long as it's referenced; revoke\n    // once the worker has loaded its source. Browsers tolerate revoke\n    // immediately after construction in practice.\n    URL.revokeObjectURL(url)\n\n    this.worker.onmessage = (ev: MessageEvent<Res>) => {\n      const reply = ev.data\n      const handlers = this.pending.get(reply.id)\n      if (!handlers) return  // stale reply; ignore\n      this.pending.delete(reply.id)\n      if (reply.ok) handlers.resolve(reply.result)\n      else handlers.reject(reconstituteError(reply.error))\n    }\n\n    this.worker.onerror = (ev: ErrorEvent) => {\n      const err = new Error(`tensorgrad worker error: ${ev.message || 'unknown'}`)\n      const wire: WireError = { name: 'WorkerError', message: err.message, stack: err.stack ?? '' }\n      // Reject everything in flight; subsequent calls will fail too.\n      for (const handlers of this.pending.values()) handlers.reject(reconstituteError(wire))\n      this.pending.clear()\n    }\n  }\n\n  /** Send a request and await its matching response. `transfer` lists the\n   *  ArrayBuffers to move (zero-copy) into the worker. */\n  request<R>(req: Omit<Req, 'id'>, transfer: ArrayBuffer[] = []): Promise<R> {\n    if (this.terminated) return Promise.reject(new Error('tensorgrad: worker has been terminated'))\n    const id = this.nextId++\n    return new Promise<R>((resolve, reject) => {\n      this.pending.set(id, { resolve: resolve as (v: unknown) => void, reject })\n      this.worker.postMessage({ ...req, id } as Req, transfer)\n    })\n  }\n\n  /** Fire-and-forget variant for cases where the caller doesn't need a reply\n   *  (currently unused; keep for symmetry / future use). */\n  send(req: Omit<Req, 'id'>, transfer: ArrayBuffer[] = []): void {\n    if (this.terminated) return\n    const id = this.nextId++\n    this.worker.postMessage({ ...req, id } as Req, transfer)\n  }\n\n  terminate(): void {\n    if (this.terminated) return\n    this.terminated = true\n    this.worker.terminate()\n    const err = new Error('tensorgrad: worker terminated')\n    for (const handlers of this.pending.values()) handlers.reject(err)\n    this.pending.clear()\n  }\n}\n", "// Top-level compile(): trace \u2192 autograd \u2192 buffer plan \u2192 codegen \u2192 runtime.\n//\n// Two entry points:\n//   * `compile(traceFn)`        \u2014 low-level. User declares params via\n//                                 paramInput() inside the trace.\n//   * `compileModule(model, \u2026)` \u2014 high-level. User defines the model as a\n//                                 Module tree; the library auto-discovers\n//                                 params, traces the forward, appends grad\n//                                 and Adam, and returns a runtime.\n//\n// As of the worker-architecture refactor: compile-time work (trace, autograd,\n// buffer planning, codegen) runs on the main thread. createRuntime and all\n// dispatch/mapAsync work runs in a Web Worker spawned per top-level compile;\n// the returned `CompiledModule` is a thin proxy over the worker channel.\n// See specs/WorkerArchitecture.md.\n\nimport type { Tensor, Shape, Dtype } from './ir.js'\nimport { trace, tensorInput } from './trace.js'\nimport { appendGrad, type GradResult } from './grad.js'\nimport {\n  appendAdam, resolveLR,\n  type AdamConfig, type AdamResult, type AdamResolvedConfig,\n} from './adam.js'\nimport { planBuffers, type BufferPlan } from './buffers.js'\nimport { emitKernels, type KernelSpec } from './codegen.js'\nimport {\n  Captures, type RunResult, type StepResult, type RunOptions, type UploadParamsOptions,\n} from './runtime.js'\nimport { Module, materializeParams, type MaterializedParams } from './module.js'\nimport { WorkerProxy } from './worker-proxy.js'\nimport {\n  transferablesOfRecord,\n  type Req, type WireIR, type WireAdamConfig,\n  type CreateRuntimeResult, type CompileForwardResult,\n  type StepResultWire, type RunResultWire, type DownloadParamsResult,\n} from './worker-protocol.js'\n\n// `__WORKER_SOURCE__` is replaced at build time by scripts/build.mjs with the\n// stringified contents of the bundled src/worker.ts. Declared here so TS is\n// happy; substituted as a string literal by esbuild's `define` during\n// `npm run build:js`. See scripts/build.mjs.\ndeclare const __WORKER_SOURCE__: string\n\n// ============================================================================\n// Public types\n// ============================================================================\n\n/** Declares one input tensor of the model's forward function. The name is the\n *  key in the `inputs:` Record at compile time and the key on the `step()`/\n *  `run()` data object at runtime. */\nexport interface InputDecl {\n  shape: Shape\n  dtype?: Dtype\n}\n\n/** Inputs declaration: a Record from input name to its shape/dtype. */\nexport type InputDecls = Record<string, InputDecl>\n\n/** Maps an `InputDecls` Record to its forward-time tensor counterpart \u2014\n *  same keys, each value is a Tensor. */\nexport type InputsTensors<I extends InputDecls> = { [K in keyof I]: Tensor }\n\n/** Forward function shape. */\nexport type ForwardFn<M extends Module, I extends InputDecls = InputDecls> =\n  (m: M, inputs: InputsTensors<I>) => Tensor\n\nexport interface CompiledIR {\n  graph: GradResult['graph']\n  paramGrads: GradResult['paramGrads']\n  loss: Tensor\n  plan: BufferPlan\n  kernels: KernelSpec[]\n}\n\n/** Trace + autograd + buffer-plan + codegen, without touching WebGPU. */\nexport function compileToIR(traceFn: () => Tensor): CompiledIR {\n  const graph = trace(traceFn)\n  const { paramGrads, loss } = appendGrad(graph)\n  const plan = planBuffers(graph, paramGrads)\n  const kernels = emitKernels(graph, plan)\n  return { graph, paramGrads, loss, plan, kernels }\n}\n\n// ============================================================================\n// CompiledModule / CompiledForwardModule \u2014 main-thread proxy surface\n// ============================================================================\n\nexport interface CompileModuleOptions<I extends InputDecls = InputDecls> {\n  inputs?: I\n  adam?: AdamConfig\n}\n\nexport interface CompileForwardOptions<I extends InputDecls = InputDecls> {\n  inputs?: I\n}\n\nexport interface CompileForwardMethodOptions<I extends InputDecls = InputDecls> {\n  inputs?: I\n}\n\n/** Returned by `compileModule`. Proxies all GPU work to a worker held\n *  internally; user code awaits Promises and never sees the worker. */\nexport interface CompiledModule<M extends Module> {\n  readonly ir: CompiledIR\n  readonly kernelCount: number\n  readonly outputShape: readonly number[]\n  /** Names of the model's parameters, in materialization order. The actual\n   *  GPUBuffers live in the worker; use `downloadParams()` for values. */\n  readonly paramNames: readonly string[]\n\n  step(inputs: Record<string, Int32Array | Float32Array>): Promise<number>\n  step(inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<StepResult>\n\n  run(inputs: Record<string, Int32Array | Float32Array>): Promise<Float32Array>\n  run(inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<RunResult>\n\n  uploadParams(params: Record<string, Float32Array>, opts?: UploadParamsOptions): Promise<void>\n  downloadParams(): Promise<Record<string, Float32Array>>\n  downloadParamGrads(): Promise<Record<string, Float32Array>>\n\n  /** Re-initialize all params + zero optimizer state. */\n  reset(): Promise<void>\n  resetOptimizerState(): Promise<void>\n\n  /** Compile a sibling forward-only graph that shares this runtime's worker\n   *  (and therefore its param GPUBuffers). */\n  compileForward<I extends InputDecls>(\n    forward: ForwardFn<M, I>,\n    opts?: CompileForwardMethodOptions<I>,\n  ): Promise<CompiledForwardModule>\n\n  /** Free the runtime's GPU resources and terminate the worker. */\n  destroy(): void\n}\n\n/** Returned by `compileForward` (and by the `compileForward` method). */\nexport interface CompiledForwardModule {\n  readonly ir: CompiledIR\n  readonly kernelCount: number\n  readonly outputShape: readonly number[]\n  readonly paramNames: readonly string[]\n\n  run(inputs: Record<string, Int32Array | Float32Array>): Promise<Float32Array>\n  run(inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<RunResult>\n\n  uploadParams(params: Record<string, Float32Array>, opts?: UploadParamsOptions): Promise<void>\n  downloadParams(): Promise<Record<string, Float32Array>>\n\n  destroy(): void\n}\n\n// ============================================================================\n// compileModule / compileForward\n// ============================================================================\n\n/**\n * Compile a Module-based model. Pass a *factory* `() => new Model()`, not the\n * model instance itself: compilation mutates the tree (every `ParamSentinel`\n * field becomes a real `Tensor`), so the instance is consumed and shouldn't be\n * referenced afterwards.\n *\n * The forward function takes the materialized model and a Record of named\n * input tensors, returns the loss tensor:\n *\n *   inputs: {\n *     tokens:  { shape: [B, T], dtype: 'i32' },\n *     targets: { shape: [B, T], dtype: 'i32' },\n *   }\n *   forward: (m, { tokens, targets }) => \u2026\n *\n * Returns a `CompiledModule` proxy. All GPU work (createRuntime, step, run,\n * mapAsync) happens in an internal worker; calls return Promises that resolve\n * when the worker replies.\n */\nexport async function compileModule<M extends Module, I extends InputDecls = InputDecls>(\n  modelFactory: () => M,\n  forward: ForwardFn<M, I>,\n  opts: CompileModuleOptions<I> = {},\n): Promise<CompiledModule<M>> {\n  // ---- Compile-time work (main thread) ------------------------------------\n  const { graph, materialized } = traceModule(modelFactory, forward, opts.inputs ?? {})\n  const { paramGrads, loss } = appendGrad(graph)\n  const adamResult = opts.adam\n    ? appendAdam(graph, paramGrads, materialized.tensors, opts.adam, materialized.decayFlags)\n    : undefined\n\n  const plan = planBuffers(graph, paramGrads, adamResult?.writebacks ?? [])\n  const kernels = emitKernels(graph, plan)\n  const ir: CompiledIR = { graph, paramGrads, loss, plan, kernels }\n\n  // Initial params: resolve init shapes to Float32Arrays now (main thread).\n  // These transfer (zero-copy) to the worker as part of createRuntime.\n  const initialParams = buildInitialParams(plan, materialized.initFns)\n\n  // ---- Spawn worker, send IR + initial params -----------------------------\n  const proxy = new WorkerProxy(__WORKER_SOURCE__)\n  const wireIR: WireIR = { graph, plan, kernels }\n  const wireAdam = adamResult ? wireAdamConfig(adamResult) : null\n  const transfers = transferablesOfRecord(initialParams)\n\n  let meta: CreateRuntimeResult\n  try {\n    meta = await proxy.request<CreateRuntimeResult>(\n      { kind: 'createRuntime', payload: { graphId: 0, ir: wireIR, initialParams, adam: wireAdam } },\n      transfers,\n    )\n  } catch (e) {\n    proxy.terminate()\n    throw e\n  }\n\n  return new CompiledModuleProxy<M>(\n    proxy, /* graphId */ 0, ir, meta, modelFactory,\n    /* initFns */ materialized.initFns,\n    /* nextGraphId */ { v: 1 },\n  )\n}\n\n/**\n * Forward-only compile. Spawns its own worker. For sibling graphs that share\n * params with a training graph, prefer the `compileForward` method on the\n * CompiledModule returned by `compileModule()`.\n */\nexport async function compileForward<M extends Module, I extends InputDecls = InputDecls>(\n  modelFactory: () => M,\n  forward: ForwardFn<M, I>,\n  opts: CompileForwardOptions<I> = {},\n): Promise<CompiledForwardModule> {\n  const { graph, materialized } = traceModule(modelFactory, forward, opts.inputs ?? {})\n  const outputTensor = graph.tensors[graph.outputs[0]!]!\n  const plan = planBuffers(graph, /* paramGrads */ {})\n  const kernels = emitKernels(graph, plan)\n  const ir: CompiledIR = { graph, paramGrads: {}, loss: outputTensor, plan, kernels }\n\n  const initialParams = buildInitialParams(plan, materialized.initFns)\n  const proxy = new WorkerProxy(__WORKER_SOURCE__)\n  const wireIR: WireIR = { graph, plan, kernels }\n  const transfers = transferablesOfRecord(initialParams)\n\n  let meta: CreateRuntimeResult\n  try {\n    meta = await proxy.request<CreateRuntimeResult>(\n      { kind: 'createRuntime', payload: { graphId: 0, ir: wireIR, initialParams, adam: null } },\n      transfers,\n    )\n  } catch (e) {\n    proxy.terminate()\n    throw e\n  }\n\n  return new CompiledForwardModuleProxy(proxy, /* graphId */ 0, ir, meta, /* ownsWorker */ true)\n}\n\n// ============================================================================\n// Proxy implementations\n// ============================================================================\n\nclass CompiledModuleProxy<M extends Module> implements CompiledModule<M> {\n  constructor(\n    private readonly proxy: WorkerProxy,\n    private readonly graphId: number,\n    public readonly ir: CompiledIR,\n    private readonly meta: CreateRuntimeResult,\n    private readonly modelFactory: () => M,\n    /** Init closures captured from materializeParams at compile time. Used\n     *  by reset() to regenerate initial param values. */\n    private readonly initFns: Record<string, InitFn>,\n    private readonly nextGraphId: { v: number },\n  ) {}\n\n  get kernelCount(): number { return this.meta.kernelCount }\n  get outputShape(): readonly number[] { return this.meta.outputShape }\n  get paramNames(): readonly string[] { return this.meta.paramNames }\n\n  step(inputs: Record<string, Int32Array | Float32Array>): Promise<number>\n  step(inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<StepResult>\n  async step(\n    inputs: Record<string, Int32Array | Float32Array>,\n    opts?: { withCaptures?: boolean },\n  ): Promise<number | StepResult> {\n    // Note: inputs are copied (not transferred) into the worker. Callers\n    // commonly reuse the same TypedArray as a scratch buffer across step()\n    // calls; transferring would detach it. The copy cost is small relative\n    // to a training step's GPU work.\n    const r = await this.proxy.request<StepResultWire>(\n      { kind: 'step', payload: { graphId: this.graphId, inputs, withCaptures: opts?.withCaptures === true } },\n    )\n    if (opts?.withCaptures) {\n      return { loss: r.loss, captures: makeCaptures(r.captures, this.meta.captureShapes) }\n    }\n    return r.loss\n  }\n\n  run(inputs: Record<string, Int32Array | Float32Array>): Promise<Float32Array>\n  run(inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<RunResult>\n  async run(\n    inputs: Record<string, Int32Array | Float32Array>,\n    opts?: { withCaptures?: boolean },\n  ): Promise<Float32Array | RunResult> {\n    // Inputs copied (see note in step()).\n    const r = await this.proxy.request<RunResultWire>(\n      { kind: 'run', payload: { graphId: this.graphId, inputs, withCaptures: opts?.withCaptures === true } },\n    )\n    if (opts?.withCaptures) {\n      return { output: r.output, captures: makeCaptures(r.captures, this.meta.captureShapes) }\n    }\n    return r.output\n  }\n\n  uploadParams(params: Record<string, Float32Array>, opts?: UploadParamsOptions): Promise<void> {\n    // Params copied (see note in step()) \u2014 caller's Float32Arrays stay valid.\n    return this.proxy.request<null>(\n      { kind: 'uploadParams', payload: { graphId: this.graphId, params, partial: !!opts?.partial } },\n    ).then(() => undefined)\n  }\n\n  async downloadParams(): Promise<Record<string, Float32Array>> {\n    const r = await this.proxy.request<DownloadParamsResult>(\n      { kind: 'downloadParams', payload: { graphId: this.graphId } },\n    )\n    return r.params\n  }\n\n  async downloadParamGrads(): Promise<Record<string, Float32Array>> {\n    const r = await this.proxy.request<DownloadParamsResult>(\n      { kind: 'downloadParamGrads', payload: { graphId: this.graphId } },\n    )\n    return r.params\n  }\n\n  async reset(): Promise<void> {\n    // Re-init main-thread, upload, then reset Adam state on worker. Two\n    // round-trips but reset() is rare. The init closures were captured at\n    // compile time and stashed on the proxy.\n    const initialParams = buildInitialParams(this.ir.plan, this.initFns)\n    await this.uploadParams(initialParams)\n    await this.resetOptimizerState()\n  }\n\n  resetOptimizerState(): Promise<void> {\n    return this.proxy.request<null>(\n      { kind: 'resetOptimizer', payload: { graphId: this.graphId } },\n    ).then(() => undefined)\n  }\n\n  async compileForward<I extends InputDecls>(\n    forward: ForwardFn<M, I>,\n    opts: CompileForwardMethodOptions<I> = {},\n  ): Promise<CompiledForwardModule> {\n    const { graph, materialized: _materialized } = traceModule(this.modelFactory, forward, opts.inputs ?? {})\n    const outputTensor = graph.tensors[graph.outputs[0]!]!\n    const plan = planBuffers(graph, /* paramGrads */ {})\n    const kernels = emitKernels(graph, plan)\n    const ir: CompiledIR = { graph, paramGrads: {}, loss: outputTensor, plan, kernels }\n\n    const childGraphId = this.nextGraphId.v++\n    const wireIR: WireIR = { graph, plan, kernels }\n\n    const meta = await this.proxy.request<CompileForwardResult>(\n      { kind: 'compileForward', payload: { graphId: childGraphId, parentGraphId: this.graphId, ir: wireIR } },\n    )\n\n    return new CompiledForwardModuleProxy(this.proxy, childGraphId, ir, meta, /* ownsWorker */ false)\n  }\n\n  destroy(): void {\n    // Fire-and-forget destroy; postMessage ordering ensures the worker\n    // processes any in-flight requests before we terminate it.\n    this.proxy.send({ kind: 'destroy', payload: { graphId: this.graphId } })\n    this.proxy.terminate()\n  }\n}\n\nclass CompiledForwardModuleProxy implements CompiledForwardModule {\n  constructor(\n    private readonly proxy: WorkerProxy,\n    private readonly graphId: number,\n    public readonly ir: CompiledIR,\n    private readonly meta: CompileForwardResult | CreateRuntimeResult,\n    private readonly ownsWorker: boolean,\n  ) {}\n\n  get kernelCount(): number { return this.meta.kernelCount }\n  get outputShape(): readonly number[] { return this.meta.outputShape }\n  get paramNames(): readonly string[] { return this.meta.paramNames }\n\n  run(inputs: Record<string, Int32Array | Float32Array>): Promise<Float32Array>\n  run(inputs: Record<string, Int32Array | Float32Array>, opts: { withCaptures: true }): Promise<RunResult>\n  async run(\n    inputs: Record<string, Int32Array | Float32Array>,\n    opts?: { withCaptures?: boolean },\n  ): Promise<Float32Array | RunResult> {\n    // Inputs copied; caller's TypedArrays stay valid.\n    const r = await this.proxy.request<RunResultWire>(\n      { kind: 'run', payload: { graphId: this.graphId, inputs, withCaptures: opts?.withCaptures === true } },\n    )\n    if (opts?.withCaptures) {\n      return { output: r.output, captures: makeCaptures(r.captures, this.meta.captureShapes) }\n    }\n    return r.output\n  }\n\n  uploadParams(params: Record<string, Float32Array>, opts?: UploadParamsOptions): Promise<void> {\n    return this.proxy.request<null>(\n      { kind: 'uploadParams', payload: { graphId: this.graphId, params, partial: !!opts?.partial } },\n    ).then(() => undefined)\n  }\n\n  async downloadParams(): Promise<Record<string, Float32Array>> {\n    const r = await this.proxy.request<DownloadParamsResult>(\n      { kind: 'downloadParams', payload: { graphId: this.graphId } },\n    )\n    return r.params\n  }\n\n  destroy(): void {\n    this.proxy.send({ kind: 'destroy', payload: { graphId: this.graphId } })\n    if (this.ownsWorker) this.proxy.terminate()\n  }\n}\n\n// ============================================================================\n// Internals\n// ============================================================================\n\ntype Graph = ReturnType<typeof trace>\ntype InitFn = (size: number, shape: readonly number[]) => Float32Array\n\n/** Trace the forward function with a fresh model + tensor inputs and capture\n *  the materialized params. Shared by both compile entry points; everything\n *  past this point (grad/adam/buffer plan/runtime) diverges. */\nfunction traceModule<M extends Module, I extends InputDecls>(\n  modelFactory: () => M,\n  forward: ForwardFn<M, I>,\n  inputDecls: InputDecls,\n): { graph: Graph; materialized: MaterializedParams } {\n  const model = modelFactory()\n  let materialized: MaterializedParams = { tensors: {}, initFns: {}, decayFlags: {} }\n  const graph = trace(() => {\n    materialized = materializeParams(model)\n    const inputTensors: Record<string, Tensor> = {}\n    for (const [name, decl] of Object.entries(inputDecls)) {\n      inputTensors[name] = tensorInput(name, decl.shape, decl.dtype ?? 'f32')\n    }\n    return forward(model, inputTensors as InputsTensors<I>)\n  })\n  return { graph, materialized }\n}\n\n/** Run each param's init function against its declared shape to produce the\n *  initial Float32Arrays. Runs main-thread before transfer to the worker. */\nfunction buildInitialParams(plan: BufferPlan, initFns: Record<string, InitFn>): Record<string, Float32Array> {\n  const out: Record<string, Float32Array> = {}\n  for (const [name, bufId] of plan.paramsByName) {\n    const shape = plan.buffers[bufId]!.shape\n    const size = shape.reduce((a, b) => a * b, 1)\n    const initFn = initFns[name]\n    if (!initFn) throw new Error(`compile: no init for param '${name}'`)\n    out[name] = initFn(size, shape)\n  }\n  return out\n}\n\n/** Subset of AdamResolvedConfig that crosses the wire (drops decayFilter,\n *  which is only used at compile time). */\nfunction wireAdamConfig(r: AdamResult): WireAdamConfig {\n  const c: AdamResolvedConfig = r.config\n  return {\n    lr: c.lr,\n    b1: c.b1,\n    b2: c.b2,\n    eps: c.eps,\n    weightDecay: c.weightDecay,\n    lrIsScheduled: c.lrIsScheduled,\n    lrtInputName: r.lrtInputName,\n    decayShrinkInputName: r.decayShrinkInputName,\n  }\n}\n\n/** Wrap a worker-returned `Record<name, Float32Array>` in a Captures instance\n *  using the static capture shapes captured at compile time. */\nfunction makeCaptures(\n  captures: Record<string, Float32Array> | null,\n  captureShapes: Record<string, number[]>,\n): Captures {\n  const data = new Map<string, Float32Array>()\n  if (captures) {\n    for (const [name, arr] of Object.entries(captures)) data.set(name, arr)\n  }\n  return new Captures(captureShapes, data)\n}\n\n", "// Standard \"batteries-included\" Module subclasses for the most common layers.\n//\n// Each class declares its params and a `.fwd(x)` method that runs the forward\n// computation. Forward methods are pure tensorgrad ops \u2014 autograd traces\n// through them just like any other call.\n//\n//   import { nn } from 'tensorgrad'\n//   class Block extends Module {\n//     ln  = new nn.LayerNorm(D)\n//     ffn = new nn.Linear(D, 4 * D)\n//   }\n//   const y = p.ffn.fwd(p.ln.fwd(x))\n\nimport { Module } from './module.js'\nimport type { Tensor } from './ir.js'\nimport { add, matmul, sub, mul, div, sqrt, meanLast, sumLast, reshape, swapAxes, oneHot, logSoftmaxLast } from './ops.js'\nimport { ShapeError } from './shape.js'\nimport { captureSite } from './ir.js'\nimport type { Captures } from './runtime.js'\n\n// ----------------------------------------------------------------------------\n// Linear: y = x @ W (+ b)\n// ----------------------------------------------------------------------------\n\nexport interface LinearOptions {\n  /** Include a bias term (default true). */\n  bias?: boolean\n}\n\nexport class Linear extends Module {\n  W: Tensor\n  b: Tensor | null\n  constructor(public readonly inDim: number, public readonly outDim: number, opts: LinearOptions = {}) {\n    super()\n    this.W = this.param([inDim, outDim])                      // randn, scale 0.02\n    this.b = opts.bias === false ? null : this.param([outDim], { init: 'zeros' })\n  }\n  fwd(x: Tensor): Tensor {\n    const out = matmul(x, this.W)\n    return this.b ? add(out, this.b) : out\n  }\n}\n\n// ----------------------------------------------------------------------------\n// LayerNorm \u2014 normalizes over the last axis. eps defaults to 1e-5.\n// ----------------------------------------------------------------------------\n\nexport class LayerNorm extends Module {\n  g: Tensor\n  b: Tensor\n  constructor(public readonly d: number, public readonly eps: number = 1e-5) {\n    super()\n    this.g = this.param([d], { init: 'ones' })\n    this.b = this.param([d], { init: 'zeros' })\n  }\n  fwd(x: Tensor): Tensor {\n    const m = meanLast(x)\n    const c = sub(x, m)\n    const v = meanLast(mul(c, c))\n    const stdev = sqrt(add(v, this.eps))\n    return add(mul(div(c, stdev), this.g), this.b)\n  }\n}\n\n// ----------------------------------------------------------------------------\n// Multi-head attention shape helpers \u2014 split the last (model) axis into\n// [nHeads, headDim] and bring heads ahead of the sequence axis.\n// ----------------------------------------------------------------------------\n\n/** [..., T, D] \u2192 [..., H, T, D/H]. Folds the standard\n *  `transpose(reshape(x, [..., T, H, d]), [..., H, T, d])` pattern into one\n *  call. Last dim of `x` must divide evenly by `nHeads`. */\nexport function splitHeads(x: Tensor, nHeads: number): Tensor {\n  const site = captureSite('splitHeads')\n  const r = x.shape.length\n  if (r < 2) throw new ShapeError(`splitHeads: requires rank >= 2, got ${r}`, site)\n  const T = x.shape[r - 2]!\n  const D = x.shape[r - 1]!\n  if (D % nHeads !== 0) {\n    throw new ShapeError(`splitHeads: last dim ${D} not divisible by nHeads ${nHeads}`, site)\n  }\n  const lead = x.shape.slice(0, r - 2)\n  const reshaped = reshape(x, [...lead, T, nHeads, D / nHeads])\n  // Swap T (axis lead.length) with H (axis lead.length + 1).\n  return swapAxes(reshaped, lead.length, lead.length + 1)\n}\n\n/** Inverse of `splitHeads`: [..., H, T, d] \u2192 [..., T, H*d]. */\nexport function mergeHeads(x: Tensor): Tensor {\n  const site = captureSite('mergeHeads')\n  const r = x.shape.length\n  if (r < 3) throw new ShapeError(`mergeHeads: requires rank >= 3, got ${r}`, site)\n  const H = x.shape[r - 3]!\n  const T = x.shape[r - 2]!\n  const d = x.shape[r - 1]!\n  const lead = x.shape.slice(0, r - 3)\n  // Swap H (axis r-3) and T (axis r-2): [..., H, T, d] \u2192 [..., T, H, d]\n  const swapped = swapAxes(x, r - 3, r - 2)\n  return reshape(swapped, [...lead, T, H * d])\n}\n\n/** Slice a captured tensor named `name` into one Float32Array per head, using\n *  the static shape registered at compile time. The leading axis is treated as\n *  heads (matching `splitHeads` layout at B=1); a leading singleton batch is\n *  stripped if present so callers can pass capture names directly. Throws if\n *  the capture isn't registered or wasn't read back this call. */\nexport function unsplitHeads(captures: Captures, name: string): Float32Array[] {\n  const flat = captures.get(name)\n  const shape = captures.shapeOf(name)\n  if (shape.length < 2) {\n    throw new Error(`unsplitHeads: '${name}' shape needs >= 2 dims, got [${shape.join(', ')}]`)\n  }\n  // For inference graphs at B=1, captures have shape [1, H, ..., ...]. Strip\n  // the leading 1 if present so the next axis is heads.\n  const s = shape[0] === 1 ? shape.slice(1) : shape\n  const H = s[0]!\n  let stride = 1\n  for (let i = 1; i < s.length; i++) stride *= s[i]!\n  const expected = H * stride\n  if (flat.length !== expected) {\n    throw new Error(`unsplitHeads: '${name}' length ${flat.length} doesn't match shape product ${expected}`)\n  }\n  return Array.from({ length: H }, (_, h) => flat.slice(h * stride, (h + 1) * stride))\n}\n\n// ----------------------------------------------------------------------------\n// Loss helpers\n// ----------------------------------------------------------------------------\n\n/** Per-position cross-entropy along the last (vocab) axis: returns\n *  `-log p(target)` at each position. `logits` is `[..., V]`; `targets` is\n *  `[...]` of i32; result is `[...]` (one rank less than logits). The user\n *  applies their own masking + reduction downstream \u2014 useful when only some\n *  positions contribute (e.g. result-digit masking) or for label smoothing. */\nexport function crossEntropyLast(logits: Tensor, targets: Tensor): Tensor {\n  const site = captureSite('crossEntropyLast')\n  if (targets.dtype !== 'i32') {\n    throw new ShapeError(`crossEntropyLast: targets must be i32, got ${targets.dtype}`, site)\n  }\n  const vocab = logits.shape[logits.shape.length - 1]!\n  const lp = logSoftmaxLast(logits)                                   // [..., V]\n  const targetLp = sumLast(mul(lp, oneHot(targets, vocab, 'f32')))    // [...]\n  return mul(targetLp, -1)\n}\n"],
+  "mappings": ";;;;;;;AAiKO,SAAS,YAAmB;AACjC,SAAO,EAAE,KAAK,CAAC,GAAG,SAAS,CAAC,GAAG,SAAS,CAAC,GAAG,UAAU,oBAAI,IAAI,EAAE;AAClE;AAGO,SAAS,UAAU,GAAU,OAAc,OAAc,QAAuB,MAA+B;AACpH,QAAM,KAAK,EAAE,QAAQ;AACrB,QAAM,IAAY,EAAE,IAAI,OAAO,OAAO,QAAQ,KAAK;AACnD,IAAE,QAAQ,KAAK,CAAC;AAChB,SAAO;AACT;AAMO,SAAS,MACd,GACA,MACA,OACA,OACA,MACA,QACQ;AACR,QAAM,UAAU,EAAE,IAAI;AACtB,QAAM,MAAM,UAAU,GAAG,OAAO,OAAO,SAAS,IAAI;AACpD,QAAM,OAAO,EAAE,MAAM,KAAK,IAAI,IAAI,GAAG,OAAO;AAC5C,IAAE,IAAI,KAAK,IAAI;AACf,SAAO;AACT;AAIO,SAAS,YAAY,QAA0B;AAEpD,QAAM,QAAS,IAAI,MAAM,EAAG,SAAS;AACrC,SAAO,EAAE,QAAQ,MAAM;AACzB;AAIO,SAAS,WAAW,MAAwB;AACjD,QAAM,QAAQ,KAAK,MAAM,MAAM,IAAI;AAGnC,QAAM,aAAuB,CAAC;AAC9B,aAAW,QAAQ,MAAM,MAAM,CAAC,GAAG;AACjC,QAAI,KAAK,SAAS,kBAAkB,KAAK,KAAK,SAAS,qBAAqB,EAAG;AAC/E,eAAW,KAAK,KAAK,KAAK,CAAC;AAC3B,QAAI,WAAW,UAAU,EAAG;AAAA,EAC9B;AACA,MAAI,WAAW,WAAW,EAAG,QAAO,IAAI,KAAK,MAAM;AACnD,SAAO,IAAI,KAAK,MAAM;AAAA,IAAQ,WAAW,KAAK,MAAM,CAAC;AACvD;;;AC7LO,IAAM,aAAN,cAAyB,MAAM;AAAA,EACpC,YAAY,SAAiB,MAAuB;AAClD,UAAM,YAAY,OAAO,GAAG,OAAO;AAAA,OAAU,WAAW,IAAI,CAAC,KAAK;AAClE,UAAM,SAAS;AACf,SAAK,OAAO;AAAA,EACd;AACF;AAEA,SAAS,KAAK,SAAiB,MAA8B;AAC3D,QAAM,IAAI,WAAW,SAAS,IAAI;AACpC;AAMO,SAAS,YAAY,GAAU,GAAmB;AACvD,MAAI,EAAE,WAAW,EAAE,OAAQ,QAAO;AAClC,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,IAAK,KAAI,EAAE,CAAC,MAAM,EAAE,CAAC,EAAG,QAAO;AAC7D,SAAO;AACT;AAEO,SAAS,UAAU,OAAsB;AAC9C,MAAI,IAAI;AACR,aAAW,KAAK,MAAO,MAAK;AAC5B,SAAO;AACT;AAEO,SAAS,UAAU,OAAsB;AAC9C,SAAO,IAAI,MAAM,KAAK,IAAI,CAAC;AAC7B;AAKO,SAAS,kBAAkB,GAAU,GAAwB;AAClE,QAAM,OAAO,KAAK,IAAI,EAAE,QAAQ,EAAE,MAAM;AACxC,QAAM,MAAgB,IAAI,MAAM,IAAI;AACpC,WAAS,IAAI,GAAG,IAAI,MAAM,KAAK;AAC7B,UAAM,KAAK,KAAK,OAAO,EAAE;AACzB,UAAM,KAAK,KAAK,OAAO,EAAE;AACzB,UAAM,KAAK,KAAK,IAAI,IAAI,EAAE,EAAE;AAC5B,UAAM,KAAK,KAAK,IAAI,IAAI,EAAE,EAAE;AAC5B,QAAI,OAAO,GAAI,KAAI,CAAC,IAAI;AAAA,aACf,OAAO,EAAG,KAAI,CAAC,IAAI;AAAA,aACnB,OAAO,EAAG,KAAI,CAAC,IAAI;AAAA,QACvB,QAAO;AAAA,EACd;AACA,SAAO;AACT;AASO,SAAS,sBACd,QAAgB,QAAe,QAAe,MACvC;AACP,QAAM,SAAS,kBAAkB,QAAQ,MAAM;AAC/C,MAAI,CAAC,QAAQ;AACX;AAAA,MACE,GAAG,MAAM,yBAAyB,UAAU,MAAM,CAAC,QAAQ,UAAU,MAAM,CAAC;AAAA,MAG5E;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,WAAW,SAAiB,QAAe,OAA+B;AACxF,SAAO;AACT;AAEO,SAAS,cAAc,QAAgB,QAAe,MAA8B;AACzF,MAAI,OAAO,WAAW,EAAG,MAAK,GAAG,MAAM,gCAAgC,IAAI;AAE3E,SAAO,CAAC,GAAG,OAAO,MAAM,GAAG,EAAE,GAAG,CAAC;AACnC;AAEO,SAAS,aAAa,QAAgB,QAAe,MAA8B;AACxF,MAAI,OAAO,WAAW,EAAG,MAAK,GAAG,MAAM,gCAAgC,IAAI;AAE3E,SAAO,OAAO,MAAM,GAAG,EAAE;AAC3B;AAEO,SAAS,aAAa,QAAgB,QAAe,UAAiB,MAA8B;AAEzG,MAAI,WAAW;AACf,MAAI,YAAY;AAChB,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,UAAM,IAAI,SAAS,CAAC;AACpB,QAAI,MAAM,IAAI;AACZ,UAAI,aAAa,GAAI,MAAK,GAAG,MAAM,4CAA4C,UAAU,QAAQ,CAAC,IAAI,IAAI;AAC1G,iBAAW;AAAA,IACb,WAAW,KAAK,GAAG;AACjB,WAAK,GAAG,MAAM,iBAAiB,CAAC,gBAAgB,UAAU,QAAQ,CAAC,IAAI,IAAI;AAAA,IAC7E,OAAO;AACL,mBAAa;AAAA,IACf;AAAA,EACF;AACA,QAAM,UAAU,UAAU,MAAM;AAChC,QAAM,MAAM,CAAC,GAAG,QAAQ;AACxB,MAAI,aAAa,IAAI;AACnB,QAAI,UAAU,cAAc,GAAG;AAC7B,WAAK,GAAG,MAAM,oBAAoB,UAAU,MAAM,CAAC,UAAU,OAAO,QAAQ,UAAU,QAAQ,CAAC,kCAA6B,SAAS,IAAI,IAAI;AAAA,IAC/I;AACA,QAAI,QAAQ,IAAI,UAAU;AAAA,EAC5B,WAAW,cAAc,SAAS;AAChC,SAAK,GAAG,MAAM,gCAA2B,UAAU,MAAM,CAAC,QAAQ,OAAO,0BAA0B,UAAU,QAAQ,CAAC,QAAQ,SAAS,IAAI,IAAI;AAAA,EACjJ;AACA,SAAO;AACT;AAEO,SAAS,eAAe,QAAgB,QAAe,MAAyB,MAA8B;AACnH,MAAI,KAAK,WAAW,OAAO,QAAQ;AACjC,SAAK,GAAG,MAAM,iBAAiB,KAAK,MAAM,0BAA0B,OAAO,MAAM,IAAI,IAAI;AAAA,EAC3F;AACA,QAAM,OAAO,oBAAI,IAAY;AAC7B,aAAW,KAAK,MAAM;AACpB,QAAI,IAAI,KAAK,KAAK,OAAO,OAAQ,MAAK,GAAG,MAAM,gBAAgB,CAAC,0BAA0B,OAAO,MAAM,IAAI,IAAI;AAC/G,QAAI,KAAK,IAAI,CAAC,EAAG,MAAK,GAAG,MAAM,8BAA8B,CAAC,IAAI,IAAI;AACtE,SAAK,IAAI,CAAC;AAAA,EACZ;AACA,SAAO,KAAK,IAAI,OAAK,OAAO,CAAC,CAAE;AACjC;AAGO,SAAS,YAAY,QAAgB,QAAe,QAAe,MAA8B;AACtG,MAAI,OAAO,SAAS,EAAG,MAAK,GAAG,MAAM,kCAAkC,UAAU,MAAM,CAAC,IAAI,IAAI;AAChG,MAAI,OAAO,WAAW,EAAG,MAAK,GAAG,MAAM,+BAA+B,UAAU,MAAM,CAAC,6CAAwC,IAAI;AACnI,QAAM,IAAI,OAAO,OAAO,SAAS,CAAC;AAClC,QAAM,KAAK,OAAO,OAAO,SAAS,CAAC;AACnC,QAAM,KAAK,OAAO,CAAC;AACnB,QAAM,IAAI,OAAO,CAAC;AAClB,MAAI,OAAO,GAAI,MAAK,GAAG,MAAM,mCAA8B,UAAU,MAAM,CAAC,SAAM,UAAU,MAAM,CAAC,wBAAwB,EAAE,yBAAyB,EAAE,KAAK,IAAI;AACjK,SAAO,CAAC,GAAG,OAAO,MAAM,GAAG,EAAE,GAAG,GAAG,CAAC;AACtC;AAGO,SAAS,mBAAmB,QAAgB,QAAe,QAAe,MAA8B;AAC7G,MAAI,OAAO,SAAS,KAAK,OAAO,SAAS,GAAG;AAC1C,SAAK,GAAG,MAAM,0CAA0C,UAAU,MAAM,CAAC,QAAQ,UAAU,MAAM,CAAC,IAAI,IAAI;AAAA,EAC5G;AACA,MAAI,OAAO,WAAW,OAAO,QAAQ;AACnC,SAAK,GAAG,MAAM,2BAA2B,OAAO,MAAM,OAAO,OAAO,MAAM,gDAAgD,IAAI;AAAA,EAChI;AACA,QAAM,SAAS,OAAO,MAAM,GAAG,EAAE;AACjC,QAAM,SAAS,OAAO,MAAM,GAAG,EAAE;AACjC,WAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,QAAI,OAAO,CAAC,MAAM,OAAO,CAAC,GAAG;AAC3B,WAAK,GAAG,MAAM,kCAA6B,UAAU,MAAM,CAAC,OAAO,UAAU,MAAM,CAAC,IAAI,IAAI;AAAA,IAC9F;AAAA,EACF;AACA,QAAM,IAAI,OAAO,OAAO,SAAS,CAAC;AAClC,QAAM,KAAK,OAAO,OAAO,SAAS,CAAC;AACnC,QAAM,KAAK,OAAO,OAAO,SAAS,CAAC;AACnC,QAAM,IAAI,OAAO,OAAO,SAAS,CAAC;AAClC,MAAI,OAAO,GAAI,MAAK,GAAG,MAAM,sDAAiD,EAAE,6BAA6B,EAAE,IAAI,IAAI;AACvH,SAAO,CAAC,GAAG,QAAQ,GAAG,CAAC;AACzB;AAEO,SAAS,YAAY,QAAgB,cAAqB,OAAe,MAA8B;AAC5G,MAAI,SAAS,EAAG,MAAK,GAAG,MAAM,iCAAiC,KAAK,IAAI,IAAI;AAC5E,SAAO,CAAC,GAAG,cAAc,KAAK;AAChC;AAGO,SAAS,iBAAiB,QAAgB,QAAe,MAA8B;AAC5F,MAAI,OAAO,SAAS,EAAG,MAAK,GAAG,MAAM,6BAA6B,UAAU,MAAM,CAAC,IAAI,IAAI;AAC3F,QAAM,IAAI,OAAO,OAAO,SAAS,CAAC;AAClC,QAAM,IAAI,OAAO,OAAO,SAAS,CAAC;AAClC,MAAI,MAAM,EAAG,MAAK,GAAG,MAAM,oDAAoD,UAAU,MAAM,CAAC,IAAI,IAAI;AACxG,SAAO;AACT;AAEO,SAAS,oBAAoB,QAAgB,QAAe,OAAe,KAAa,MAA8B;AAC3H,MAAI,OAAO,WAAW,EAAG,MAAK,GAAG,MAAM,6BAA6B,IAAI;AACxE,QAAM,OAAO,OAAO,OAAO,SAAS,CAAC;AACrC,MAAI,QAAQ,KAAK,MAAM,QAAQ,SAAS,KAAK;AAC3C,SAAK,GAAG,MAAM,oBAAoB,KAAK,KAAK,GAAG,2BAA2B,IAAI,IAAI,IAAI;AAAA,EACxF;AACA,SAAO,CAAC,GAAG,OAAO,MAAM,GAAG,EAAE,GAAG,MAAM,KAAK;AAC7C;AAIO,SAAS,iBAAiB,QAAgB,QAAe,aAAoB,MAA8B;AAChH,MAAI,OAAO,SAAS,YAAY,QAAQ;AACtC,SAAK,GAAG,MAAM,iBAAiB,OAAO,MAAM,kBAAkB,YAAY,MAAM,IAAI,IAAI;AAAA,EAC1F;AACA,QAAM,SAAS,YAAY,SAAS,OAAO;AAC3C,WAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,UAAM,KAAK,OAAO,CAAC;AACnB,UAAM,KAAK,YAAY,SAAS,CAAC;AACjC,QAAI,OAAO,MAAM,OAAO,GAAG;AACzB,WAAK,GAAG,MAAM,sBAAsB,UAAU,MAAM,CAAC,OAAO,UAAU,WAAW,CAAC,gBAAW,CAAC,UAAU,EAAE,+BAA+B,SAAS,CAAC,UAAU,EAAE,iBAAiB,IAAI;AAAA,IACtL;AAAA,EACF;AACA,SAAO;AACT;AAIO,SAAS,gBAAgB,QAAgB,QAAe,aAAoB,MAA8B;AAC/G,MAAI,YAAY,SAAS,OAAO,QAAQ;AACtC,SAAK,GAAG,MAAM,iBAAiB,YAAY,MAAM,kBAAkB,OAAO,MAAM,IAAI,IAAI;AAAA,EAC1F;AACA,QAAM,SAAS,OAAO,SAAS,YAAY;AAC3C,WAAS,IAAI,GAAG,IAAI,YAAY,QAAQ,KAAK;AAC3C,UAAM,KAAK,OAAO,SAAS,CAAC;AAC5B,UAAM,KAAK,YAAY,CAAC;AACxB,QAAI,OAAO,MAAM,OAAO,GAAG;AACzB,WAAK,GAAG,MAAM,uBAAuB,UAAU,MAAM,CAAC,OAAO,UAAU,WAAW,CAAC,uBAAkB,CAAC,UAAU,EAAE,+BAA+B,IAAI;AAAA,IACvJ;AAAA,EACF;AACA,SAAO;AACT;AAIO,SAAS,WAAW,QAAgB,WAAkB,QAAe,QAAe,MAA8B;AACvH,QAAM,KAAK,kBAAkB,QAAQ,MAAM;AAC3C,MAAI,CAAC,GAAI,MAAK,GAAG,MAAM,uBAAuB,UAAU,MAAM,CAAC,OAAO,UAAU,MAAM,CAAC,IAAI,IAAI;AAC/F,QAAM,SAAS,kBAAkB,WAAW,EAAE;AAC9C,MAAI,CAAC,OAAQ,MAAK,GAAG,MAAM,UAAU,UAAU,SAAS,CAAC,sCAAsC,UAAU,EAAE,CAAC,IAAI,IAAI;AACpH,SAAO;AACT;AAEO,SAAS,cAAc,QAAgB,QAAe,SAAgB,MAA8B;AACzG,MAAI,CAAC,YAAY,QAAQ,OAAO,GAAG;AACjC,SAAK,GAAG,MAAM,6CAA6C,UAAU,MAAM,CAAC,QAAQ,UAAU,OAAO,CAAC,IAAI,IAAI;AAAA,EAChH;AACA,SAAO;AACT;;;AClPA,IAAI,WAAyB;AAI7B,IAAI,kBAAkB;AAEf,SAAS,eAAsB;AACpC,MAAI,CAAC,UAAU;AACb,UAAM,IAAI;AAAA,MACR;AAAA,IAEF;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,mBAA4B;AAC1C,SAAO;AACT;AAIO,SAAS,MAAM,IAAoC;AACxD,MAAI,UAAU;AACZ,UAAM,IAAI,MAAM,6CAA6C;AAAA,EAC/D;AACA,QAAM,IAAI,UAAU;AACpB,aAAW;AACX,oBAAkB;AAClB,MAAI;AACF,UAAM,SAAS,GAAG;AAClB,UAAM,UAAU,MAAM,QAAQ,MAAM,IAAI,SAAS,CAAC,MAAM;AACxD,eAAW,KAAK,SAAS;AACvB;AAAC,MAAC,EAAE,QAAqB,KAAK,EAAE,EAAE;AAAA,IACpC;AAAA,EACF,UAAE;AACA,eAAW;AACX,sBAAkB;AAAA,EACpB;AACA,SAAO;AACT;AAQO,SAAS,UAAa,GAAU,IAAgB;AACrD,MAAI,UAAU;AACZ,UAAM,IAAI,MAAM,8DAA8D;AAAA,EAChF;AACA,aAAW;AAEX,MAAI;AACF,WAAO,GAAG;AAAA,EACZ,UAAE;AACA,eAAW;AAAA,EACb;AACF;AAUA,SAAS,iBAAiB,GAAU,MAAc,OAAyB,OAAqB;AAC9F,MAAI,EAAE,IAAI,KAAK,QAAM,MAAM,SAAS,GAAG,IAAsB,KAAM,GAAyB,SAAS,IAAI,GAAG;AAC1G,UAAM,IAAI,MAAM,eAAe,KAAK,UAAU,IAAI,8BAA8B;AAAA,EAClF;AACF;AAEO,SAAS,WAAW,MAAc,OAAc,QAAe,OAAe;AACnF,QAAM,IAAI,aAAa;AACvB,mBAAiB,GAAG,MAAM,CAAC,eAAe,cAAc,GAAG,OAAO;AAClE,QAAM,OAAO,YAAY,YAAY;AACrC,SAAO,MAAM,GAAG,eAAe,OAAO,OAAO,MAAM,EAAE,KAAK,CAAQ;AACpE;AAEO,SAAS,YAAY,MAAc,OAAc,QAAe,OAAe;AACpF,QAAM,IAAI,aAAa;AACvB,mBAAiB,GAAG,MAAM,CAAC,eAAe,cAAc,GAAG,OAAO;AAClE,QAAM,OAAO,YAAY,aAAa;AACtC,SAAO,MAAM,GAAG,gBAAgB,OAAO,OAAO,MAAM,EAAE,KAAK,CAAQ;AACrE;AAIO,SAAS,WAAW,MAAc,OAAc,QAAe,OAAO,YAAY,GAAW;AAClG,QAAM,IAAI,aAAa;AACvB,mBAAiB,GAAG,MAAM,CAAC,aAAa,GAAG,OAAO;AAClD,QAAM,OAAO,YAAY,YAAY;AACrC,SAAO,MAAM,GAAG,eAAe,OAAO,OAAO,MAAM,EAAE,MAAM,UAAU,CAAQ;AAC/E;;;AC5FO,SAAS,QAA0B,MAAc,GAAS;AAC/D,MAAI,CAAC,iBAAiB,EAAG,QAAO;AAChC,QAAM,IAAI,aAAa;AACvB,MAAI,EAAE,SAAS,IAAI,IAAI,GAAG;AACxB,UAAM,IAAI;AAAA,MACR,kBAAkB,IAAI;AAAA,IAExB;AAAA,EACF;AACA,IAAE,SAAS,IAAI,MAAM,EAAE,EAAE;AACzB,SAAO;AACT;;;ACJA,SAAS,QACP,MACA,MACA,GAAW,GACX,WAAkB,EAAE,OACZ;AACR,QAAM,OAAO,YAAY,IAAI;AAC7B,MAAI,EAAE,UAAU,EAAE,MAAO,OAAM,IAAI,WAAW,GAAG,IAAI,qBAAqB,EAAE,KAAK,OAAO,EAAE,KAAK,KAAK,IAAI;AACxG,QAAM,WAAW,sBAAsB,MAAM,EAAE,OAAO,EAAE,OAAO,IAAI;AACnE,SAAO,MAAM,aAAa,GAAG,MAAM,UAAU,UAAU,MAAM,EAAE,GAAG,EAAE,IAAI,GAAG,EAAE,GAAG,CAAC;AACnF;AAKO,SAAS,IAAI,GAAW,GAA4B;AACzD,SAAO,OAAO,MAAM,WAAW,UAAU,GAAG,CAAC,IAAI,QAAQ,OAAO,OAAO,GAAG,CAAC;AAC7E;AACO,SAAS,IAAI,GAAW,GAA4B;AACzD,SAAO,OAAO,MAAM,WAAW,UAAU,GAAG,CAAC,CAAC,IAAI,QAAQ,OAAO,OAAO,GAAG,CAAC;AAC9E;AACO,SAAS,IAAI,GAAW,GAA4B;AACzD,SAAO,OAAO,MAAM,WAAW,UAAU,GAAG,CAAC,IAAI,QAAQ,OAAO,OAAO,GAAG,CAAC;AAC7E;AACO,SAAS,IAAI,GAAW,GAA4B;AACzD,MAAI,OAAO,MAAM,UAAU;AACzB,QAAI,MAAM,EAAG,OAAM,IAAI,WAAW,sCAAsC,YAAY,KAAK,CAAC;AAC1F,WAAO,UAAU,GAAG,IAAI,CAAC;AAAA,EAC3B;AACA,SAAO,QAAQ,OAAO,OAAO,GAAG,CAAC;AACnC;AAQO,SAAS,UAAU,GAAW,QAAwB;AAC3D,QAAM,OAAO,YAAY,WAAW;AACpC,SAAO,MAAM,aAAa,GAAG,cAAc,EAAE,OAAO,EAAE,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,OAAO,CAAC;AACxF;AAEO,SAAS,UAAU,GAAW,QAAwB;AAC3D,QAAM,OAAO,YAAY,WAAW;AACpC,SAAO,MAAM,aAAa,GAAG,cAAc,EAAE,OAAO,EAAE,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,OAAO,CAAC;AACxF;AAMA,SAAS,MAAM,MAAiD,GAAmB;AACjF,QAAM,OAAO,YAAY,IAAI;AAC7B,MAAI,EAAE,UAAU,MAAO,OAAM,IAAI,WAAW,GAAG,IAAI,uBAAuB,EAAE,KAAK,IAAI,IAAI;AACzF,SAAO,MAAM,aAAa,GAAG,MAAM,WAAW,MAAM,EAAE,OAAO,IAAI,GAAG,OAAO,MAAM,EAAE,GAAG,EAAE,GAAG,CAAC;AAC9F;AAEO,IAAM,OAAQ,CAAC,MAAsB,MAAM,QAAS,CAAC;AACrD,IAAM,QAAQ,CAAC,MAAsB,MAAM,SAAS,CAAC;AACrD,IAAM,MAAQ,CAAC,MAAsB,MAAM,OAAS,CAAC;AACrD,IAAM,MAAQ,CAAC,MAAsB,MAAM,OAAS,CAAC;AACrD,IAAM,OAAQ,CAAC,MAAsB,MAAM,QAAS,CAAC;AAOrD,SAAS,SAAS,GAAmB;AAC1C,QAAM,OAAO,YAAY,UAAU;AACnC,MAAI,EAAE,UAAU,MAAO,OAAM,IAAI,WAAW,+BAA+B,EAAE,KAAK,IAAI,IAAI;AAC1F,QAAM,WAAW,cAAc,YAAY,EAAE,OAAO,IAAI;AACxD,SAAO,MAAM,aAAa,GAAG,aAAa,UAAU,EAAE,OAAO,MAAM,EAAE,GAAG,EAAE,GAAG,CAAC;AAChF;AAEO,SAAS,QAAQ,GAAmB;AACzC,QAAM,OAAO,YAAY,SAAS;AAClC,MAAI,EAAE,UAAU,MAAO,OAAM,IAAI,WAAW,8BAA8B,EAAE,KAAK,IAAI,IAAI;AACzF,QAAM,WAAW,aAAa,WAAW,EAAE,OAAO,IAAI;AACtD,SAAO,MAAM,aAAa,GAAG,YAAY,UAAU,EAAE,OAAO,MAAM,EAAE,GAAG,EAAE,GAAG,CAAC;AAC/E;AAGO,SAAS,OAAO,GAAmB;AACxC,SAAO,QAAQ,QAAQ,GAAG,CAAC,EAAE,CAAC,CAAC;AACjC;AAMO,SAAS,QAAQ,GAAW,UAAyB;AAC1D,QAAM,OAAO,YAAY,SAAS;AAClC,QAAM,WAAW,aAAa,WAAW,EAAE,OAAO,UAAU,IAAI;AAChE,SAAO,MAAM,aAAa,GAAG,WAAW,UAAU,EAAE,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,UAAU,SAAS,CAAC;AAClG;AAEO,SAAS,UAAU,GAAW,MAAiC;AACpE,QAAM,OAAO,YAAY,WAAW;AACpC,QAAM,WAAW,eAAe,aAAa,EAAE,OAAO,MAAM,IAAI;AAChE,SAAO,MAAM,aAAa,GAAG,aAAa,UAAU,EAAE,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,KAAK,CAAC;AACtF;AAMO,SAAS,SAAS,GAAW,OAAe,OAAuB;AACxE,QAAM,IAAI,EAAE,MAAM;AAClB,QAAM,OAAO,CAAC,SAAyB,OAAO,IAAI,IAAI,OAAO;AAC7D,QAAM,KAAK,KAAK,KAAK;AACrB,QAAM,KAAK,KAAK,KAAK;AACrB,QAAM,OAAO,YAAY,UAAU;AACnC,MAAI,KAAK,KAAK,MAAM,KAAK,KAAK,KAAK,MAAM,GAAG;AAC1C,UAAM,IAAI,WAAW,2CAAsC,KAAK,KAAK,KAAK,cAAc,CAAC,WAAW,IAAI;AAAA,EAC1G;AACA,MAAI,OAAO,GAAI,QAAO;AACtB,QAAM,OAAO,MAAM,KAAK,EAAE,QAAQ,EAAE,GAAG,CAAC,GAAG,MAAM,CAAC;AAClD,OAAK,EAAE,IAAI;AACX,OAAK,EAAE,IAAI;AACX,SAAO,UAAU,GAAG,IAAI;AAC1B;AAMO,SAAS,OAAO,GAAW,GAAmB;AACnD,QAAM,OAAO,YAAY,QAAQ;AACjC,MAAI,EAAE,UAAU,SAAS,EAAE,UAAU,OAAO;AAC1C,UAAM,IAAI,WAAW,6BAA6B,EAAE,KAAK,QAAQ,EAAE,KAAK,IAAI,IAAI;AAAA,EAClF;AACA,QAAM,WAAW,YAAY,UAAU,EAAE,OAAO,EAAE,OAAO,IAAI;AAC7D,SAAO,MAAM,aAAa,GAAG,UAAU,UAAU,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,GAAG,EAAE,GAAG,CAAC;AACpF;AAEO,SAAS,cAAc,GAAW,GAAmB;AAC1D,QAAM,OAAO,YAAY,eAAe;AACxC,MAAI,EAAE,UAAU,SAAS,EAAE,UAAU,OAAO;AAC1C,UAAM,IAAI,WAAW,oCAAoC,EAAE,KAAK,QAAQ,EAAE,KAAK,IAAI,IAAI;AAAA,EACzF;AACA,QAAM,WAAW,mBAAmB,iBAAiB,EAAE,OAAO,EAAE,OAAO,IAAI;AAC3E,SAAO,MAAM,aAAa,GAAG,kBAAkB,UAAU,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,GAAG,EAAE,GAAG,CAAC;AAC5F;AAMO,SAAS,OAAO,SAAiB,OAAe,QAAe,OAAe;AACnF,QAAM,OAAO,YAAY,QAAQ;AACjC,MAAI,QAAQ,UAAU,OAAO;AAC3B,UAAM,IAAI,WAAW,oCAAoC,QAAQ,KAAK,IAAI,IAAI;AAAA,EAChF;AACA,QAAM,WAAW,YAAY,UAAU,QAAQ,OAAO,OAAO,IAAI;AACjE,SAAO,MAAM,aAAa,GAAG,WAAW,UAAU,OAAO,MAAM,EAAE,SAAS,QAAQ,IAAI,OAAO,MAAM,CAAC;AACtG;AAOO,SAAS,UAAU,OAAe,SAAyB;AAChE,QAAM,OAAO,YAAY,WAAW;AACpC,MAAI,MAAM,MAAM,WAAW,GAAG;AAC5B,UAAM,IAAI,WAAW,kDAAkD,UAAU,MAAM,KAAK,CAAC,IAAI,IAAI;AAAA,EACvG;AACA,MAAI,QAAQ,UAAU,OAAO;AAC3B,UAAM,IAAI,WAAW,uCAAuC,QAAQ,KAAK,IAAI,IAAI;AAAA,EACnF;AACA,SAAO,OAAO,OAAO,SAAS,MAAM,MAAM,CAAC,GAAI,KAAK,GAAG,KAAK;AAC9D;AAGO,SAAS,OAAO,GAAW,QAAe,OAAe;AAC9D,QAAM,OAAO,YAAY,QAAQ;AACjC,MAAI,KAAK,KAAK,CAAC,OAAO,UAAU,CAAC,GAAG;AAClC,UAAM,IAAI,WAAW,6CAA6C,CAAC,IAAI,IAAI;AAAA,EAC7E;AACA,SAAO,MAAM,aAAa,GAAG,UAAU,CAAC,CAAC,GAAG,OAAO,MAAM,EAAE,GAAG,MAAM,CAAC;AACvE;AASO,SAAS,kBAAkB,GAAmB;AACnD,QAAM,OAAO,YAAY,mBAAmB;AAC5C,MAAI,EAAE,UAAU,MAAO,OAAM,IAAI,WAAW,wCAAwC,EAAE,KAAK,IAAI,IAAI;AACnG,mBAAiB,qBAAqB,EAAE,OAAO,IAAI;AACnD,SAAO,MAAM,aAAa,GAAG,uBAAuB,EAAE,OAAO,OAAO,MAAM,EAAE,GAAG,EAAE,GAAG,CAAC;AACvF;AAGO,SAAS,eAAe,GAAmB;AAChD,QAAM,OAAO,YAAY,gBAAgB;AACzC,MAAI,EAAE,UAAU,MAAO,OAAM,IAAI,WAAW,qCAAqC,EAAE,KAAK,IAAI,IAAI;AAChG,SAAO,MAAM,aAAa,GAAG,oBAAoB,EAAE,OAAO,OAAO,MAAM,EAAE,GAAG,EAAE,GAAG,CAAC;AACpF;AAMO,SAAS,YAAY,GAAW,WAA2B;AAChE,QAAM,OAAO,YAAY,aAAa;AACtC,MAAI,EAAE,UAAU,MAAO,OAAM,IAAI,WAAW,kCAAkC,EAAE,KAAK,IAAI,IAAI;AAC7F,mBAAiB,eAAe,EAAE,OAAO,IAAI;AAC7C,SAAO,MAAM,aAAa,GAAG,gBAAgB,EAAE,OAAO,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,UAAU,CAAC;AAC3F;AAQO,SAAS,eAAe,GAAW,OAAe,KAAqB;AAC5E,QAAM,OAAO,YAAY,gBAAgB;AACzC,QAAM,WAAW,oBAAoB,kBAAkB,EAAE,OAAO,OAAO,KAAK,IAAI;AAChF,SAAO,MAAM,aAAa,GAAG,oBAAoB,UAAU,EAAE,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,OAAO,IAAI,CAAC;AACnG;AAOO,SAAS,YAAY,GAAW,aAA4B;AACjE,QAAM,OAAO,YAAY,aAAa;AACtC,mBAAiB,eAAe,EAAE,OAAO,aAAa,IAAI;AAC1D,SAAO,MAAM,aAAa,GAAG,gBAAgB,aAAa,EAAE,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,YAAY,CAAC;AACnG;AAEO,SAAS,WAAW,GAAW,aAA4B;AAChE,QAAM,OAAO,YAAY,YAAY;AACrC,kBAAgB,cAAc,EAAE,OAAO,aAAa,IAAI;AACxD,SAAO,MAAM,aAAa,GAAG,gBAAgB,aAAa,EAAE,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,YAAY,CAAC;AACnG;AAOO,SAAS,YAAY,OAAe,QAAe,OAAe;AACvE,QAAM,OAAO,YAAY,aAAa;AACtC,SAAO,MAAM,aAAa,GAAG,gBAAgB,CAAC,GAAG,OAAO,MAAM,EAAE,OAAO,MAAM,CAAC;AAChF;AAWO,IAAM,OAAU,CAAC,GAAW,MAAsB,QAAQ,QAAW,QAAW,GAAG,GAAG,MAAM;AAC5F,IAAM,UAAU,CAAC,GAAW,MAAsB,QAAQ,WAAW,WAAW,GAAG,GAAG,MAAM;AAG5F,SAAS,MAAM,MAAc,GAAW,GAAmB;AAChE,QAAM,OAAO,YAAY,OAAO;AAChC,MAAI,KAAK,UAAU,OAAQ,OAAM,IAAI,WAAW,iCAAiC,KAAK,KAAK,IAAI,IAAI;AACnG,MAAI,EAAE,UAAU,EAAE,MAAO,OAAM,IAAI,WAAW,8BAA8B,EAAE,KAAK,OAAO,EAAE,KAAK,KAAK,IAAI;AAC1G,QAAM,WAAW,WAAW,SAAS,KAAK,OAAO,EAAE,OAAO,EAAE,OAAO,IAAI;AACvE,SAAO,MAAM,aAAa,GAAG,SAAS,UAAU,EAAE,OAAO,MAAM,EAAE,MAAM,KAAK,IAAI,GAAG,EAAE,IAAI,GAAG,EAAE,GAAG,CAAC;AACpG;AAIO,SAAS,SAAS,GAAW,IAAoB;AACtD,QAAM,OAAO,YAAY,UAAU;AACnC,MAAI,EAAE,UAAU,SAAS,GAAG,UAAU,OAAO;AAC3C,UAAM,IAAI,WAAW,+BAA+B,EAAE,KAAK,QAAQ,GAAG,KAAK,IAAI,IAAI;AAAA,EACrF;AACA,QAAM,WAAW,cAAc,YAAY,EAAE,OAAO,GAAG,OAAO,IAAI;AAClE,SAAO,MAAM,aAAa,GAAG,aAAa,UAAU,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,IAAI,GAAG,GAAG,CAAC;AACzF;AAMO,SAAS,YAAY,GAAW,GAAW,IAAoB;AACpE,QAAM,OAAO,YAAY,aAAa;AACtC,MAAI,EAAE,UAAU,SAAS,EAAE,UAAU,MAAO,OAAM,IAAI,WAAW,6BAA6B,IAAI;AAClG,MAAI,EAAE,MAAM,WAAW,EAAE,MAAM,UAAU,EAAE,MAAM,KAAK,CAAC,GAAG,MAAM,MAAM,EAAE,MAAM,CAAC,CAAC,GAAG;AACjF,UAAM,IAAI,WAAW,+BAA+B,IAAI;AAAA,EAC1D;AACA,SAAO,MAAM,aAAa,GAAG,iBAAiB,EAAE,OAAO,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,GAAG,EAAE,IAAI,GAAG,CAAC;AAC9F;AAEO,SAAS,YAAY,GAAW,GAAW,IAAoB;AACpE,QAAM,OAAO,YAAY,aAAa;AACtC,MAAI,EAAE,UAAU,SAAS,EAAE,UAAU,MAAO,OAAM,IAAI,WAAW,6BAA6B,IAAI;AAClG,MAAI,EAAE,MAAM,WAAW,EAAE,MAAM,UAAU,EAAE,MAAM,KAAK,CAAC,GAAG,MAAM,MAAM,EAAE,MAAM,CAAC,CAAC,GAAG;AACjF,UAAM,IAAI,WAAW,+BAA+B,IAAI;AAAA,EAC1D;AACA,SAAO,MAAM,aAAa,GAAG,iBAAiB,EAAE,OAAO,OAAO,MAAM,EAAE,GAAG,EAAE,IAAI,GAAG,EAAE,IAAI,GAAG,CAAC;AAC9F;AAEO,SAAS,YACd,GACA,MACA,MACA,KACA,KACA,cAA+B,GACvB;AACR,QAAM,OAAO,YAAY,aAAa;AACtC,MAAI,EAAE,UAAU,MAAO,OAAM,IAAI,WAAW,6BAA6B,IAAI;AAC7E,MAAI,IAAI,UAAU,SAAS,IAAI,MAAM,WAAW,GAAG;AACjD,UAAM,IAAI,WAAW,6CAA6C,IAAI;AAAA,EACxE;AACA,MAAI,EAAE,MAAM,WAAW,KAAK,MAAM,UAAU,EAAE,MAAM,KAAK,CAAC,GAAG,MAAM,MAAM,KAAK,MAAM,CAAC,CAAC,GAAG;AACvF,UAAM,IAAI,WAAW,sCAAsC,IAAI;AAAA,EACjE;AAIA,QAAM,WAAW,OAAO,gBAAgB;AACxC,MAAI,UAAU;AACZ,QAAI,YAAY,UAAU,SAAS,YAAY,MAAM,WAAW,GAAG;AACjE,YAAM,IAAI,WAAW,4DAA4D,IAAI;AAAA,IACvF;AAAA,EACF;AACA,SAAO,MAAM,aAAa,GAAG,iBAAiB,EAAE,OAAO,OAAO,MAAM;AAAA,IAClE,GAAG,EAAE;AAAA,IACL,MAAM,KAAK;AAAA,IACX,MAAM,KAAK;AAAA,IACX,KAAK,IAAI;AAAA,IACT;AAAA,IACA,aAAa,WAAW,IAAI;AAAA,IAC5B,mBAAmB,WAAW,YAAY,KAAK;AAAA,EACjD,CAAC;AACH;;;ACrUO,SAAS,WAAW,OAA0B;AACnD,MAAI,MAAM,QAAQ,WAAW,GAAG;AAC9B,UAAM,IAAI,MAAM,kEAAkE,MAAM,QAAQ,MAAM,EAAE;AAAA,EAC1G;AACA,QAAM,SAAS,MAAM,QAAQ,CAAC;AAC9B,QAAM,aAAa,MAAM,QAAQ,MAAM;AACvC,MAAI,WAAW,MAAM,WAAW,GAAG;AACjC,UAAM,IAAI;AAAA,MACR,sDAAsD,WAAW,MAAM,KAAK,IAAI,CAAC;AAAA,IAEnF;AAAA,EACF;AAIA,QAAM,iBAAiB,MAAM,IAAI;AACjC,QAAM,aAAa,MAAM,IAAI,MAAM,GAAG,cAAc;AAGpD,QAAM,aAAa,oBAAI,IAAoB;AAE3C,SAAO,UAAU,OAAO,MAAM;AAE5B,eAAW,IAAI,QAAQ,YAAY,GAAK,KAAK,CAAC;AAG9C,aAAS,IAAI,iBAAiB,GAAG,KAAK,GAAG,KAAK;AAC5C,YAAM,KAAK,WAAW,CAAC;AACvB,YAAM,WAAW,WAAW,IAAI,GAAG,GAAG;AACtC,UAAI,CAAC,SAAU;AACf,uBAAiB,IAAI,UAAU,OAAO,UAAU;AAAA,IAClD;AAGA,UAAM,aAAqC,CAAC;AAC5C,eAAW,MAAM,YAAY;AAC3B,UAAI,GAAG,SAAS,cAAe;AAE/B,YAAM,QAAQ,WAAW,IAAI,GAAG,GAAG;AACnC,UAAI,CAAC,OAAO;AAGV,cAAM,IAAI,MAAM,QAAQ,GAAG,GAAG;AAC9B,mBAAW,GAAG,IAAI,IAAI,YAAY,YAAY,GAAK,EAAE,KAAK,GAAG,EAAE,KAAK;AAAA,MACtE,OAAO;AACL,mBAAW,GAAG,IAAI,IAAI;AAAA,MACxB;AAAA,IACF;AAEA,WAAO,EAAE,OAAO,YAAY,MAAM,WAAW;AAAA,EAC/C,CAAC;AACH;AAQA,SAAS,WAAW,YAAiC,SAAiB,cAA4B;AAChG,QAAM,WAAW,WAAW,IAAI,OAAO;AACvC,MAAI,UAAU;AACZ,eAAW,IAAI,SAAS,IAAI,UAAU,YAAY,CAAC;AAAA,EACrD,OAAO;AACL,eAAW,IAAI,SAAS,YAAY;AAAA,EACtC;AACF;AAIA,SAAS,YAAY,OAAe,SAAwB;AAC1D,MAAI,YAAY,MAAM,OAAO,OAAO,EAAG,QAAO;AAC9C,SAAO,WAAW,OAAO,OAAO;AAClC;AAYA,SAAS,iBACP,IACA,UACA,OACA,YACM;AACN,QAAM,WAAW,CAAC,OAAe,MAAM,QAAQ,EAAE;AAEjD,UAAQ,GAAG,MAAM;AAAA;AAAA,IAEf,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AACH;AAAA;AAAA;AAAA,IAIF,KAAK,OAAO;AACV,YAAM,IAAI,SAAS,GAAG,CAAC,GAAG,IAAI,SAAS,GAAG,CAAC;AAC3C,iBAAW,YAAY,GAAG,GAAG,YAAY,UAAU,EAAE,KAAK,CAAC;AAC3D,iBAAW,YAAY,GAAG,GAAG,YAAY,UAAU,EAAE,KAAK,CAAC;AAC3D;AAAA,IACF;AAAA,IACA,KAAK,OAAO;AACV,YAAM,IAAI,SAAS,GAAG,CAAC,GAAG,IAAI,SAAS,GAAG,CAAC;AAC3C,iBAAW,YAAY,GAAG,GAAG,YAAY,UAAU,EAAE,KAAK,CAAC;AAC3D,iBAAW,YAAY,GAAG,GAAG,YAAY,UAAU,UAAU,EAAE,GAAG,EAAE,KAAK,CAAC;AAC1E;AAAA,IACF;AAAA,IACA,KAAK,OAAO;AACV,YAAM,IAAI,SAAS,GAAG,CAAC,GAAG,IAAI,SAAS,GAAG,CAAC;AAK3C,iBAAW,YAAY,GAAG,GAAG,YAAY,IAAI,UAAU,CAAC,GAAG,EAAE,KAAK,CAAC;AACnE,iBAAW,YAAY,GAAG,GAAG,YAAY,IAAI,UAAU,CAAC,GAAG,EAAE,KAAK,CAAC;AACnE;AAAA,IACF;AAAA,IACA,KAAK,OAAO;AAEV,YAAM,IAAI,SAAS,GAAG,CAAC,GAAG,IAAI,SAAS,GAAG,CAAC;AAC3C,iBAAW,YAAY,GAAG,GAAG,YAAY,IAAI,UAAU,CAAC,GAAG,EAAE,KAAK,CAAC;AAEnE,YAAM,QAAQ,IAAI,UAAU,CAAC;AAC7B,YAAM,MAAM,IAAI,GAAG,CAAC;AACpB,iBAAW,YAAY,GAAG,GAAG,YAAY,UAAU,IAAI,OAAO,GAAG,GAAG,EAAE,GAAG,EAAE,KAAK,CAAC;AACjF;AAAA,IACF;AAAA;AAAA,IAGA,KAAK,cAAc;AAEjB,iBAAW,YAAY,GAAG,GAAG,UAAU,UAAU,GAAG,MAAM,CAAC;AAC3D;AAAA,IACF;AAAA,IACA,KAAK,cAAc;AAEjB,iBAAW,YAAY,GAAG,GAAG,QAAQ;AACrC;AAAA,IACF;AAAA;AAAA,IAGA,KAAK,QAAQ;AAEX,YAAM,IAAI,SAAS,GAAG,GAAG;AACzB,iBAAW,YAAY,GAAG,GAAG,UAAU,IAAI,UAAU,CAAC,GAAG,GAAG,CAAC;AAC7D;AAAA,IACF;AAAA,IACA,KAAK,SAAS;AAEZ,YAAM,IAAI,SAAS,GAAG,GAAG;AACzB,YAAM,KAAK,IAAI,IAAI,GAAG,CAAC,GAAG,CAAC;AAC3B,iBAAW,YAAY,GAAG,GAAG,UAAU,IAAI,UAAU,EAAE,GAAG,IAAI,CAAC;AAC/D;AAAA,IACF;AAAA,IACA,KAAK,OAAO;AAEV,YAAM,IAAI,SAAS,GAAG,CAAC;AACvB,iBAAW,YAAY,GAAG,GAAG,IAAI,UAAU,CAAC,CAAC;AAC7C;AAAA,IACF;AAAA,IACA,KAAK,OAAO;AAEV,YAAM,IAAI,SAAS,GAAG,GAAG;AACzB,iBAAW,YAAY,GAAG,GAAG,IAAI,UAAU,CAAC,CAAC;AAC7C;AAAA,IACF;AAAA,IACA,KAAK,QAAQ;AAEX,YAAM,IAAI,SAAS,GAAG,CAAC;AACvB,iBAAW,YAAY,GAAG,GAAG,SAAS,GAAG,QAAQ,CAAC;AAClD;AAAA,IACF;AAAA;AAAA,IAGA,KAAK,aAAa;AAGhB,YAAM,IAAI,SAAS,GAAG,CAAC;AACvB,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACpC,YAAM,WAAW,YAAY,UAAU,EAAE,KAAK;AAC9C,iBAAW,YAAY,GAAG,GAAG,UAAU,UAAU,IAAI,CAAC,CAAC;AACvD;AAAA,IACF;AAAA,IACA,KAAK,YAAY;AAIf,YAAM,IAAI,SAAS,GAAG,CAAC;AAEvB,YAAM,WAAW,QAAQ,UAAU,CAAC,GAAG,SAAS,OAAO,CAAC,CAAC;AACzD,iBAAW,YAAY,GAAG,GAAG,YAAY,UAAU,EAAE,KAAK,CAAC;AAC3D;AAAA,IACF;AAAA;AAAA,IAGA,KAAK,WAAW;AAEd,YAAM,IAAI,SAAS,GAAG,CAAC;AACvB,iBAAW,YAAY,GAAG,GAAG,QAAQ,UAAU,EAAE,KAAK,CAAC;AACvD;AAAA,IACF;AAAA,IACA,KAAK,aAAa;AAEhB,YAAM,MAAM,WAAW,GAAG,IAAI;AAC9B,iBAAW,YAAY,GAAG,GAAG,UAAU,UAAU,GAAG,CAAC;AACrD;AAAA,IACF;AAAA;AAAA,IAGA,KAAK,UAAU;AASb,YAAM,IAAI,SAAS,GAAG,CAAC,GAAG,IAAI,SAAS,GAAG,CAAC;AAE3C,iBAAW,YAAY,GAAG,GAAG,OAAO,UAAU,SAAS,GAAG,IAAI,EAAE,CAAC,CAAC;AAGlE,YAAM,KAAK,SAAS,GAAG,IAAI,EAAE;AAG7B,UAAI;AACJ,UAAI,EAAE,MAAM,SAAS,GAAG;AACtB,qBAAa,cAAc,IAAI,QAAQ;AAAA,MACzC,OAAO;AACL,qBAAa,OAAO,IAAI,QAAQ;AAAA,MAClC;AAEA,iBAAW,YAAY,GAAG,GAAG,WAAW,YAAY,EAAE,KAAK,CAAC;AAC5D;AAAA,IACF;AAAA,IACA,KAAK,kBAAkB;AAIrB,YAAM,IAAI,SAAS,GAAG,CAAC,GAAG,IAAI,SAAS,GAAG,CAAC;AAC3C,iBAAW,YAAY,GAAG,GAAG,cAAc,UAAU,SAAS,GAAG,IAAI,EAAE,CAAC,CAAC;AACzE,iBAAW,YAAY,GAAG,GAAG,cAAc,SAAS,GAAG,IAAI,EAAE,GAAG,QAAQ,CAAC;AACzE;AAAA,IACF;AAAA;AAAA,IAGA,KAAK;AAGH;AAAA;AAAA,IAGF,KAAK,oBAAoB;AAQvB,YAAM,IAAI,SAAS,GAAG,CAAC;AAIvB,YAAM,IAAI;AAAA,QACR,2OAIW,EAAE,KAAK,OAAO,SAAS,GAAG,GAAG,EAAE,KAAK;AAAA,MACjD;AAAA,IACF;AAAA;AAAA,IAGA,KAAK,gBAAgB;AAEnB,YAAM,IAAI,SAAS,GAAG,CAAC;AACvB,iBAAW,YAAY,GAAG,GAAG,WAAW,UAAU,EAAE,KAAK,CAAC;AAC1D;AAAA,IACF;AAAA,IACA,KAAK,gBAAgB;AAEnB,YAAM,IAAI,SAAS,GAAG,CAAC;AACvB,iBAAW,YAAY,GAAG,GAAG,YAAY,UAAU,EAAE,KAAK,CAAC;AAC3D;AAAA,IACF;AAAA;AAAA,IAGA,KAAK,oBAAoB;AAGvB,YAAM,IAAI,SAAS,GAAG,GAAG;AACzB,YAAM,KAAK,IAAI,CAAC;AAGhB,YAAM,QAAQ,QAAQ,QAAQ;AAC9B,YAAM,YAAY,QAAQ,OAAO,CAAC,GAAG,MAAM,OAAO,CAAC,CAAC;AACpD,YAAM,OAAO,IAAI,IAAI,YAAY,WAAW,EAAE,KAAK,CAAC;AACpD,iBAAW,YAAY,GAAG,GAAG,IAAI,UAAU,IAAI,CAAC;AAChD;AAAA,IACF;AAAA,IACA,KAAK,uBAAuB;AAK1B,YAAM,IAAI,SAAS,GAAG,GAAG;AACzB,YAAM,OAAO,IAAI,UAAU,CAAC;AAC5B,YAAM,IAAI,QAAQ,IAAI;AACtB,YAAM,QAAQ,QAAQ,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;AACxC,YAAM,QAAQ,IAAI,UAAU,YAAY,OAAO,EAAE,KAAK,CAAC;AACvD,iBAAW,YAAY,GAAG,GAAG,IAAI,OAAO,CAAC,CAAC;AAC1C;AAAA,IACF;AAAA;AAAA,IAEA,KAAK;AAAA,IACL,KAAK;AAEH;AAAA,IAEF,KAAK,SAAS;AAIZ,YAAM,OAAO,SAAS,GAAG,IAAI;AAC7B,YAAM,IAAI,SAAS,GAAG,CAAC;AACvB,YAAM,IAAI,SAAS,GAAG,CAAC;AAEvB,YAAM,QAAQ,YAAY,YAAY,GAAG,EAAE,KAAK,GAAG,SAAS,KAAK;AACjE,YAAM,QAAQ,YAAY,YAAY,GAAG,EAAE,KAAK,GAAG,SAAS,KAAK;AACjE,iBAAW,YAAY,GAAG,GAAG,YAAY,MAAM,MAAM,UAAU,KAAK,GAAG,EAAE,KAAK,CAAC;AAC/E,iBAAW,YAAY,GAAG,GAAG,YAAY,MAAM,MAAM,OAAO,QAAQ,GAAG,EAAE,KAAK,CAAC;AAC/E;AAAA,IACF;AAAA,IAEA,KAAK,gBAAgB;AAInB,YAAM,IAAI;AAAA,QACR;AAAA,MAEF;AAAA,IACF;AAAA;AAAA,IAGA,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AACH,YAAM,IAAI,MAAM,0CAA0C,GAAG,IAAI,EAAE;AAAA;AAAA,IAGrE,KAAK,aAAa;AAGhB,YAAM,IAAI;AAAA,QACR;AAAA,MAEF;AAAA,IACF;AAAA,IAEA,SAAS;AAEP,YAAM,cAAqB;AAC3B,WAAK;AACL,YAAM,IAAI,MAAM,+BAAgC,GAAc,IAAI,EAAE;AAAA,IACtE;AAAA,EACF;AACF;AAMA,SAAS,WAAW,MAAmC;AACrD,QAAM,MAAgB,IAAI,MAAM,KAAK,MAAM;AAC3C,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,IAAK,KAAI,KAAK,CAAC,CAAE,IAAI;AACtD,SAAO;AACT;;;ACrYO,IAAM,KAAK;AAAA,EAChB,UAAU,CAAC,WAA+B,EAAE,MAAM,YAAY,MAAM;AAAA;AAAA;AAAA,EAGpE,aAAa,CAAC,UACX,EAAE,MAAM,eAAe,GAAG,KAAK;AAAA;AAAA;AAAA,EAGlC,aAAa,CAAC,UACX,EAAE,MAAM,eAAe,GAAG,KAAK;AAAA;AAAA;AAAA,EAGlC,QAAQ,CAAC,UACN,EAAE,MAAM,UAAU,GAAG,KAAK;AAC/B;AAGO,SAAS,UAAU,UAAsB,MAAsB;AACpE,MAAI,OAAO,aAAa,SAAU,QAAO;AACzC,UAAQ,SAAS,MAAM;AAAA,IACrB,KAAK;AAAY,aAAO,SAAS;AAAA,IACjC,KAAK,eAAe;AAClB,YAAM,IAAI,KAAK,IAAI,OAAO,SAAS,OAAO,CAAC;AAC3C,aAAO,SAAS,QAAQ,SAAS,QAAQ,SAAS,QAAQ;AAAA,IAC5D;AAAA,IACA,KAAK,eAAe;AAClB,YAAM,IAAI,KAAK,IAAI,OAAO,SAAS,OAAO,CAAC;AAC3C,aAAO,SAAS,QAAQ,OAAO,SAAS,OAAO,SAAS,UAAU,IAAI,KAAK,IAAI,KAAK,KAAK,CAAC;AAAA,IAC5F;AAAA,IACA,KAAK,UAAU;AACb,UAAI,QAAQ,SAAS,YAAa,QAAO,SAAS,UAAU,OAAO,SAAS;AAC5E,aAAO,UAAU,SAAS,OAAO,OAAO,SAAS,WAAW;AAAA,IAC9D;AAAA,EACF;AACF;AAKO,SAAS,YAAY,UAA+B;AACzD,MAAI,OAAO,aAAa,SAAU,QAAO;AACzC,SAAO,SAAS,SAAS;AAC3B;AA2DO,SAAS,WACd,OACA,YACA,cACA,QAKA,YACY;AACZ,QAAM,gBAAgB,YAAY,OAAO,EAAE;AAC3C,QAAM,YAAY,UAAU,OAAO,IAAI,CAAC;AACxC,QAAM,aAAiC;AAAA,IACrC,IAAI,OAAO;AAAA,IACX,IAAI,OAAO,MAAM;AAAA,IACjB,IAAI,OAAO,MAAM;AAAA,IACjB,KAAK,OAAO,OAAO;AAAA,IACnB,aAAa,OAAO,eAAe;AAAA,IACnC,aAAa,OAAO,gBAAgB,MAAM;AAAA,IAC1C;AAAA,EACF;AACA,QAAM,aAA8B,CAAC;AACrC,QAAM,eAAe;AAGrB,MAAI,uBAAsC;AAE1C,SAAO,UAAU,OAAO,MAAM;AAC5B,UAAM,MAAM,YAAY,cAAc,CAAC,GAAG,KAAK;AAM/C,UAAM,eAAe,IAAI;AAAA,MACvB,WAAW,cAAc,IACrB,OAAO,KAAK,UAAU,EAAE,OAAO,UAC5B,cAAc,QAAQ,aAAc,WAAW,IAAI,IAAK,WAAW,YAAY,IAAI,CAAC,IACvF,CAAC;AAAA,IACP;AAKA,QAAI,oBAAmC;AACvC,QAAI,iBAAiB,aAAa,OAAO,GAAG;AAC1C,6BAAuB;AACvB,0BAAoB,YAAY,sBAAsB,CAAC,GAAG,KAAK;AAAA,IACjE;AAEA,eAAW,QAAQ,OAAO,KAAK,UAAU,GAAG;AAC1C,YAAM,IAAI,aAAa,IAAI;AAC3B,YAAM,IAAI,WAAW,IAAI;AACzB,UAAI,CAAC,EAAG,OAAM,IAAI,MAAM,yCAAyC,IAAI,GAAG;AACxE,UAAI,CAAC,EAAG,OAAM,IAAI,MAAM,qCAAqC,IAAI,GAAG;AAEpE,YAAM,SAAS,WAAW,UAAU,IAAI,IAAI,EAAE,OAAO,OAAO,CAAC;AAC7D,YAAM,SAAS,WAAW,UAAU,IAAI,IAAI,EAAE,OAAO,OAAO,CAAC;AAM7D,YAAM,cACJ,CAAC,aAAa,IAAI,IAAI,IAAI,IACxB,sBAAsB,OAAO,oBAC7B,IAAI,YAAY,WAAW;AAG/B,YAAM,OAAO,YAAY,QAAQ,GAAG,WAAW,EAAE;AACjD,YAAM,OAAO,YAAY,QAAQ,GAAG,WAAW,EAAE;AACjD,YAAM,OAAO,YAAY,GAAG,MAAM,MAAM,KAAK,WAAW,KAAK,WAAW;AAExE,iBAAW,KAAK,EAAE,QAAQ,MAAM,UAAU,UAAU,IAAI,IAAI,UAAU,QAAQ,CAAC;AAC/E,iBAAW,KAAK,EAAE,QAAQ,MAAM,UAAU,UAAU,IAAI,IAAI,UAAU,QAAQ,CAAC;AAC/E,iBAAW,KAAK,EAAE,QAAQ,MAAM,UAAU,MAAkB,UAAU,QAAQ,CAAC;AAAA,IACjF;AACA,WAAO,EAAE,YAAY,cAAc,sBAAsB,QAAQ,WAAW;AAAA,EAC9E,CAAC;AACH;;;AC7KA,IAAM,aAAoC,EAAE,KAAK,GAAG,KAAK,GAAG,MAAM,EAAE;AAqB7D,SAAS,YACd,OACA,YACA,iBAAkC,CAAC,GACvB;AACZ,QAAM,UAAwB,CAAC;AAC/B,QAAM,iBAAiB,oBAAI,IAAoB;AAC/C,QAAM,eAAe,oBAAI,IAAoB;AAC7C,QAAM,eAAe,oBAAI,IAAoB;AAC7C,QAAM,mBAAmB,oBAAI,IAAoB;AACjD,QAAM,eAAe,oBAAI,IAAoB;AAG7C,QAAM,qBAAqB,oBAAI,IAAoB;AACnD,aAAW,CAAC,MAAM,MAAM,KAAK,OAAO,QAAQ,UAAU,GAAG;AACvD,uBAAmB,IAAI,OAAO,IAAI,IAAI;AAAA,EACxC;AAEA,QAAM,YAAY,oBAAI,IAAoB;AAC1C,aAAW,MAAM,MAAM,IAAK,WAAU,IAAI,GAAG,KAAK,EAAE;AAEpD,QAAM,YAAY,IAAI,IAAI,MAAM,OAAO;AAGvC,aAAW,KAAK,MAAM,SAAS;AAC7B,UAAM,KAAK,UAAU,IAAI,EAAE,EAAE;AAC7B,QAAI,OAA2B;AAC/B,QAAI,OAAsB;AAC1B,QAAI;AAEJ,QAAI,IAAI,SAAS,eAAe;AAC9B,aAAO;AACP,aAAO,GAAG;AAAA,IACZ,WAAW,IAAI,SAAS,gBAAgB;AACtC,aAAO;AACP,aAAO,GAAG;AAAA,IACZ,WAAW,IAAI,SAAS,eAAe;AACrC,aAAO;AACP,aAAO,GAAG;AACV,kBAAY,GAAG;AAAA,IACjB,WAAW,mBAAmB,IAAI,EAAE,EAAE,GAAG;AACvC,aAAO;AACP,aAAO,mBAAmB,IAAI,EAAE,EAAE;AAAA,IACpC,WAAW,UAAU,IAAI,EAAE,EAAE,GAAG;AAC9B,aAAO;AAAA,IACT;AAEA,UAAM,OAAmB;AAAA,MACvB,IAAI,EAAE;AAAA,MACN,UAAU,KAAK,IAAI,GAAG,UAAU,EAAE,KAAK,IAAI,WAAW,EAAE,KAAK,CAAC;AAAA,MAC9D,OAAO,EAAE;AAAA,MACT,OAAO,EAAE;AAAA,MACT;AAAA,MACA;AAAA,MACA,GAAI,cAAc,SAAY,EAAE,UAAU,IAAI,CAAC;AAAA,IACjD;AACA,YAAQ,KAAK,IAAI;AACjB,mBAAe,IAAI,EAAE,IAAI,EAAE,EAAE;AAE7B,QAAI,SAAS,QAAS,cAAa,IAAI,MAAO,EAAE,EAAE;AAClD,QAAI,SAAS,eAAgB,cAAa,IAAI,MAAO,EAAE,EAAE;AACzD,QAAI,SAAS,aAAc,kBAAiB,IAAI,MAAO,EAAE,EAAE;AAC3D,QAAI,SAAS,QAAS,cAAa,IAAI,MAAO,EAAE,EAAE;AAAA,EACpD;AAEA,QAAM,kBAAkB,MAAM,QAAQ,IAAI,QAAM,eAAe,IAAI,EAAE,CAAE;AAGvE,QAAM,aAA0B,eAAe,IAAI,UAAQ;AACzD,UAAM,cAAc,eAAe,IAAI,KAAK,OAAO,EAAE;AACrD,QAAI,gBAAgB,QAAW;AAC7B,YAAM,IAAI,MAAM,yCAAyC,KAAK,OAAO,EAAE,eAAe;AAAA,IACxF;AACA,UAAM,YAAY,KAAK,aAAa,UAChC,aAAa,IAAI,KAAK,QAAQ,IAC9B,aAAa,IAAI,KAAK,QAAQ;AAClC,QAAI,cAAc,QAAW;AAC3B,YAAM,IAAI,MAAM,+BAA+B,KAAK,QAAQ,KAAK,KAAK,QAAQ,aAAa;AAAA,IAC7F;AACA,UAAM,aAAa,QAAQ,WAAW;AACtC,UAAM,WAAW,QAAQ,SAAS;AAClC,QAAI,WAAW,aAAa,SAAS,UAAU;AAC7C,YAAM,IAAI;AAAA,QACR,4CAA4C,KAAK,QAAQ,KAAK,KAAK,QAAQ,aAChE,WAAW,QAAQ,kBAAkB,SAAS,QAAQ;AAAA,MACnE;AAAA,IACF;AACA,WAAO,EAAE,QAAQ,aAAa,MAAM,WAAW,OAAO,WAAW,SAAS;AAAA,EAC5E,CAAC;AAKD,QAAM,iBAAiB,oBAAI,IAAoB;AAC/C,aAAW,CAAC,MAAM,QAAQ,KAAK,MAAM,UAAU;AAC7C,UAAM,QAAQ,eAAe,IAAI,QAAQ;AACzC,QAAI,UAAU,QAAW;AACvB,YAAM,IAAI,MAAM,yBAAyB,IAAI,gCAAgC,QAAQ,EAAE;AAAA,IACzF;AACA,mBAAe,IAAI,MAAM,KAAK;AAAA,EAChC;AAEA,SAAO,EAAE,SAAS,gBAAgB,cAAc,cAAc,kBAAkB,cAAc,gBAAgB,iBAAiB,WAAW;AAC5I;;;ACjKA,IAAM,UAAU;AAQhB,IAAM,WAAW;AA0BV,SAAS,YAAY,OAAc,MAAgC;AACxE,QAAM,MAAoB,CAAC;AAC3B,WAAS,IAAI,GAAG,IAAI,MAAM,IAAI,QAAQ,KAAK;AACzC,UAAM,KAAK,MAAM,IAAI,CAAC;AACtB,UAAM,OAAO,WAAW,IAAI,OAAO,MAAM,CAAC;AAC1C,QAAI,KAAK,IAAI;AAAA,EACf;AACA,SAAO;AACT;AAEA,SAAS,WAAW,IAAY,OAAc,MAAkB,SAA6B;AAC3F,QAAM,MAAM,CAAC,OAAe,MAAM,QAAQ,EAAE;AAC5C,QAAM,MAAM,CAAC,aAAqB,KAAK,eAAe,IAAI,QAAQ;AAClE,QAAM,QAAQ,OAAmB,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,IAAI,UAAU,CAAC,GAAG,SAAS,GAAG,eAAe,QAAQ;AAExH,UAAQ,GAAG,MAAM;AAAA;AAAA,IAEf,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AACH,aAAO,MAAM;AAAA;AAAA,IAGf,KAAK,UAAU;AACb,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,OAAO;AAAA,6DAC0C,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,GAAG,CAAC;AAAA,aACJ,YAAY,UAAU,IAAI,KAAK,CAAC;AAAA,GAC1C,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,GAAG,GAAG,eAAe,QAAQ;AAAA,IAC1G;AAAA,IACA,KAAK,gBAAgB;AACnB,YAAM,OAAO;AAAA,6DAC0C,UAAU,GAAG,KAAK,CAAC;AAAA;AAAA;AAAA,aAGnE,YAAY,GAAG,OAAO,GAAG,KAAK,CAAC;AAAA,GACzC,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,GAAG,eAAe,EAAE;AAAA,IACjG;AAAA;AAAA,IAGA,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK,OAAO;AACV,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,QAAQ,EAAE,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,IAAI,EAAE,GAAG,IAAI;AAChE,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,OAAO;AAAA,qDACkC,UAAU,EAAE,KAAK,CAAC;AAAA,qDAClB,UAAU,EAAE,KAAK,CAAC;AAAA,6DACV,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,EAChB,oBAAoB,KAAK,IAAI,OAAO,EAAE,OAAO,MAAM,CAAC;AAAA,EACpD,oBAAoB,KAAK,IAAI,OAAO,EAAE,OAAO,MAAM,CAAC;AAAA,qBACjC,KAAK;AAAA,GACvB,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACjI;AAAA;AAAA,IAGA,KAAK;AAAA,IACL,KAAK,cAAc;AACjB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,QAAQ,GAAG,SAAS,eAAe,MAAM;AAC/C,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,MAAM,YAAY,GAAG,QAAQ,IAAI,KAAK;AAC5C,YAAM,OAAO;AAAA,qDACkC,UAAU,EAAE,KAAK,CAAC;AAAA,6DACV,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,kBACA,KAAK,IAAI,GAAG;AAAA,GAC3B,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACtH;AAAA;AAAA,IAGA,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK,QAAQ;AACX,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,OACJ,GAAG,SAAS,SAAU,YACtB,GAAG,SAAS,UAAU,kBACtB,GAAG,SAAS,QAAU,WACtB,GAAG,SAAS,QAAU;AAAA;AAAA,QACA;AAAA;AACxB,YAAM,OAAO;AAAA,qDACkC,UAAU,EAAE,KAAK,CAAC;AAAA,6DACV,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA;AAAA,aAEL,IAAI;AAAA,GACd,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACtH;AAAA;AAAA,IAGA,KAAK;AAAA,IACL,KAAK,WAAW;AACd,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,QAAQ,GAAG,SAAS,SAAS,MAAM;AACzC,YAAM,QAAQ,UAAU,IAAI,KAAK;AAEjC,YAAM,OAAO;AAAA,qDACkC,UAAU,EAAE,KAAK,CAAC;AAAA,qDAClB,UAAU,EAAE,KAAK,CAAC;AAAA;AAAA,2BAE5C,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,EAChB,oBAAoB,KAAK,IAAI,OAAO,EAAE,OAAO,MAAM,CAAC;AAAA,EACpD,oBAAoB,KAAK,IAAI,OAAO,EAAE,OAAO,MAAM,CAAC;AAAA,oCAClB,KAAK;AAAA,GACtC,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACjI;AAAA,IACA,KAAK,SAAS;AACZ,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,OAAO,IAAI,GAAG,IAAI;AACxB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,OAAO;AAAA;AAAA,qDAEkC,UAAU,EAAE,KAAK,CAAC;AAAA,qDAClB,UAAU,EAAE,KAAK,CAAC;AAAA,6DACV,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,EAChB,oBAAoB,KAAK,IAAI,OAAO,KAAK,OAAO,MAAM,CAAC;AAAA,EACvD,oBAAoB,KAAK,IAAI,OAAO,EAAE,OAAO,MAAM,CAAC;AAAA,EACpD,oBAAoB,KAAK,IAAI,OAAO,EAAE,OAAO,MAAM,CAAC;AAAA;AAAA,GAEnD,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IAC/I;AAAA,IAEA,KAAK,aAAa;AAChB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,OAAO;AAAA;AAAA;AAAA;AAAA,2BAIQ,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA;AAAA,GAEf,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,EAAE,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IAClI;AAAA;AAAA,IAGA,KAAK;AAAA,IACL,KAAK,YAAY;AACf,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACpC,YAAM,YAAY,UAAU,EAAE,KAAK,IAAI;AACvC,YAAM,UAAU,GAAG,SAAS,cAAc,OAAO,CAAC,OAAO;AACzD,YAAM,OAAO;AAAA;AAAA;AAAA,2BAGQ,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,SAAS;AAAA,mBACH,CAAC;AAAA;AAAA,+BAEW,CAAC;AAAA;AAAA;AAAA,iBAGf,OAAO;AAAA,GACrB,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,WAAW,eAAe,QAAQ;AAAA,IAC1H;AAAA;AAAA;AAAA;AAAA;AAAA,IAMA,KAAK,WAAW;AACd,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,OAAO;AAAA,qDACkC,UAAU,EAAE,KAAK,CAAC;AAAA,6DACV,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA;AAAA,GAEf,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACtH;AAAA,IAEA,KAAK,aAAa;AAChB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,QAAQ,UAAU,IAAI,KAAK;AAIjC,YAAM,WAAW,eAAe,EAAE,KAAK;AACvC,YAAM,cAAc,wBAAwB,KAAK,IAAI,OAAO,MAAM;AAClE,YAAM,UAAoB,CAAC;AAC3B,eAAS,IAAI,GAAG,IAAI,EAAE,MAAM,QAAQ,KAAK;AACvC,cAAM,UAAU,GAAG,KAAK,QAAQ,CAAC;AACjC,gBAAQ,KAAK,QAAQ,OAAO,MAAM,SAAS,CAAC,CAAC,GAAG;AAAA,MAClD;AACA,YAAM,OAAO;AAAA,qDACkC,UAAU,EAAE,KAAK,CAAC;AAAA,6DACV,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,EAChB,WAAW;AAAA,iBACI,QAAQ,KAAK,KAAK,CAAC;AAAA;AAAA,GAEjC,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACtH;AAAA;AAAA;AAAA,IAIA,KAAK,UAAU;AACb,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACpC,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACpC,YAAM,IAAI,EAAE,MAAM,CAAC;AACnB,YAAM,QAAQ,UAAU,EAAE,KAAK,KAAK,IAAI;AACxC,YAAM,QAAQ,QAAQ,IAAI;AAC1B,YAAM,OAAO;AAAA;AAAA;AAAA;AAAA,2BAIQ,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,iBACD,IAAI,CAAC;AAAA,iBACL,IAAI,CAAC;AAAA,iBACL,CAAC;AAAA,iBACD,CAAC;AAAA,qBACG,IAAI,CAAC,WAAW,CAAC;AAAA;AAAA,+BAEP,CAAC;AAAA,mCACG,CAAC;AAAA;AAAA;AAAA,GAGjC,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACjI;AAAA,IAEA,KAAK,kBAAkB;AACrB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACpC,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACpC,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACpC,YAAM,QAAQ,UAAU,EAAE,KAAK,KAAK,IAAI;AACxC,YAAM,QAAQ,QAAQ,IAAI;AAC1B,YAAM,OAAO;AAAA;AAAA;AAAA;AAAA,2BAIQ,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,iBACD,IAAI,CAAC;AAAA,iBACL,IAAI,CAAC;AAAA,iBACL,CAAC;AAAA,iBACD,CAAC;AAAA,qBACG,IAAI,CAAC,WAAW,CAAC;AAAA,qBACjB,IAAI,CAAC;AAAA;AAAA,+BAEK,CAAC;AAAA,2CACW,CAAC;AAAA;AAAA;AAAA,GAGzC,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACjI;AAAA;AAAA,IAGA,KAAK,WAAW;AACd,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,UAAU,IAAI,GAAG,OAAO;AAC9B,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,QAAQ,GAAG;AACjB,YAAM,UAAU,YAAY,GAAG,IAAI,KAAK;AACxC,YAAM,SAAS,YAAY,GAAG,IAAI,KAAK;AACvC,YAAM,OAAO;AAAA;AAAA,6DAE0C,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,uBACK,KAAK;AAAA,uBACL,KAAK;AAAA;AAAA,oBAER,OAAO,KAAK,MAAM;AAAA,GACnC,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,OAAO,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IAC5H;AAAA;AAAA,IAGA,KAAK,oBAAoB;AACvB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACpC,YAAM,YAAY,UAAU,EAAE,KAAK,IAAI;AACvC,YAAM,OAAO;AAAA;AAAA;AAAA,2BAGQ,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,SAAS;AAAA,mBACH,CAAC;AAAA;AAAA,+BAEW,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA,+BAKD,CAAC;AAAA;AAAA;AAAA;AAAA,+BAID,CAAC;AAAA;AAAA;AAAA,GAG7B,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,WAAW,eAAe,QAAQ;AAAA,IAC1H;AAAA,IAEA,KAAK,uBAAuB;AAC1B,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAEpC,YAAM,YAAY,UAAU,EAAE,KAAK,IAAI;AACvC,YAAM,OAAO;AAAA;AAAA;AAAA,2BAGQ,OAAO;AAAA;AAAA;AAAA,IAG9B,QAAQ;AAAA,aACC,SAAS;AAAA,mBACH,CAAC;AAAA,mBACD,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,sCAekB,CAAC;AAAA;AAAA;AAAA,GAGpC,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,WAAW,eAAe,QAAQ;AAAA,IAC1H;AAAA,IAEA,KAAK,gBAAgB;AACnB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,IAAI,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACpC,YAAM,QAAQ,UAAU,EAAE,KAAK;AAC/B,YAAM,UAAU,YAAY,GAAG,WAAW,KAAK;AAC/C,YAAM,OAAO;AAAA;AAAA;AAAA,2BAGQ,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,mBACC,CAAC;AAAA,oBACA,CAAC,QAAQ,CAAC;AAAA;AAAA,eAEf,OAAO;AAAA;AAAA;AAAA;AAAA,GAInB,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACtH;AAAA;AAAA,IAGA,KAAK,oBAAoB;AACvB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACvC,YAAM,QAAQ,GAAG,MAAM,GAAG;AAC1B,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,OAAO;AAAA,qDACkC,UAAU,EAAE,KAAK,CAAC;AAAA,6DACV,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,oBACE,KAAK;AAAA,oBACL,KAAK;AAAA,uBACF,IAAI,OAAO,GAAG,KAAK;AAAA,GACvC,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACtH;AAAA;AAAA,IAGA,KAAK,gBAAgB;AACnB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,OAAO;AAAA,qDACkC,UAAU,EAAE,KAAK,CAAC;AAAA,6DACV,UAAU,IAAI,KAAK,CAAC;AAAA,2BACtD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,EAChB,oBAAoB,KAAK,IAAI,OAAO,EAAE,OAAO,QAAQ,CAAC;AAAA;AAAA,GAErD,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACtH;AAAA;AAAA,IAGA,KAAK,iBAAiB;AAEpB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,KAAK,GAAG;AACd,YAAM,aAAa,IAAI;AACvB,YAAM,OAAO;AAAA;AAAA;AAAA;AAAA,2BAIQ,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,aACL,YAAY,IAAI,KAAK,CAAC,aAAa,YAAY,YAAY,KAAK,CAAC;AAAA,GAC3E,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACjI;AAAA,IACA,KAAK,iBAAiB;AAEpB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,KAAK,GAAG;AACd,YAAM,aAAa,IAAI;AACvB,YAAM,OAAO;AAAA;AAAA;AAAA;AAAA,2BAIQ,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA;AAAA,aAEL,YAAY,IAAI,KAAK,CAAC,aAAa,YAAY,YAAY,KAAK,CAAC;AAAA,GAC3E,KAAK;AACF,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACjI;AAAA,IACA,KAAK,iBAAiB;AAOpB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,YAAM,gBAAgB,GAAG,sBAAsB;AAC/C,YAAM,aAAa,gBAAgB,mBAAmB,YAAY,GAAG,aAAa,KAAK;AACvF,YAAM,gBAAgB,gBAClB;AAAA,oEAEA;AACJ,YAAM,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA,EAKjB,aAAa;AAAA,2BACY,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,aACL,UAAU,iDAAiD,YAAY,GAAG,KAAK,KAAK,CAAC;AAAA,GAC/F,KAAK;AACF,YAAM,WAAW,gBACb,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,GAAG,GAAG,IAAI,GAAG,iBAAkB,GAAG,IAAI,GAAG,GAAG,CAAC,IAC5F,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,GAAG,GAAG,IAAI,GAAG,GAAG,CAAC;AACpE,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,SAAS,OAAO,eAAe,QAAQ;AAAA,IAC5F;AAAA,IAEA,KAAK,gBAAgB;AAGnB,YAAM,MAAM,IAAI,GAAG,GAAG;AACtB,YAAM,IAAI,IAAI,GAAG,CAAC;AAClB,YAAM,OAAO,eAAe,EAAE,OAAO,IAAI,OAAO,EAAE,KAAK;AACvD,YAAM,QAAQ,UAAU,IAAI,KAAK;AACjC,aAAO,EAAE,SAAS,QAAQ,GAAG,MAAM,MAAM,UAAU,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,GAAG,SAAS,OAAO,eAAe,QAAQ;AAAA,IACtH;AAAA,EACF;AACF;AAMA,SAAS,UAAU,GAAmC;AAKpD,MAAI,MAAM,OAAQ,QAAO;AACzB,SAAO;AACT;AAEA,SAAS,YAAY,OAAe,OAAuC;AACzE,MAAI,UAAU,OAAO;AACnB,QAAI,OAAO,SAAS,KAAK,GAAG;AAE1B,aAAO,MAAM,SAAS,EAAE,SAAS,GAAG,KAAK,MAAM,SAAS,EAAE,SAAS,GAAG,IAClE,GAAG,KAAK,MACR,GAAG,KAAK;AAAA,IACd;AACA,WAAO,QAAQ,IAAI,YAAY;AAAA,EACjC;AACA,MAAI,UAAU,MAAO,QAAO,GAAG,KAAK,MAAM,KAAK,CAAC;AAChD,SAAO,QAAQ,OAAO;AACxB;AAEA,SAAS,YAAY,MAAc,OAAuC;AACxE,MAAI,UAAU,MAAO,QAAO,OAAO,IAAI;AACvC,MAAI,UAAU,MAAO,QAAO,OAAO,IAAI;AACvC,SAAO,OAAO,IAAI;AACpB;AAEA,SAAS,eAAe,OAAwB;AAC9C,QAAM,UAAoB,IAAI,MAAM,MAAM,MAAM,EAAE,KAAK,CAAC;AACxD,WAAS,IAAI,MAAM,SAAS,GAAG,KAAK,GAAG,KAAK;AAC1C,YAAQ,CAAC,IAAI,QAAQ,IAAI,CAAC,IAAK,MAAM,IAAI,CAAC;AAAA,EAC5C;AACA,SAAO;AACT;AAMA,SAAS,wBAAwB,SAAiB,OAAc,QAAwB;AACtF,MAAI,MAAM,WAAW,EAAG,QAAO,SAAS,MAAM;AAC9C,QAAM,UAAU,eAAe,KAAK;AACpC,QAAM,QAAkB,CAAC;AACzB,MAAI,YAAY;AAChB,WAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,QAAI,MAAM,MAAM,SAAS,GAAG;AAC1B,YAAM,KAAK,SAAS,MAAM,IAAI,CAAC,MAAM,SAAS,GAAG;AAAA,IACnD,OAAO;AACL,YAAM,KAAK,SAAS,MAAM,IAAI,CAAC,MAAM,SAAS,MAAM,QAAQ,CAAC,CAAC,IAAI;AAClE,YAAM,SAAS,GAAG,MAAM,OAAO,CAAC;AAChC,YAAM,KAAK,SAAS,MAAM,MAAM,SAAS,MAAM,QAAQ,CAAC,CAAC,IAAI;AAC7D,kBAAY;AAAA,IACd;AAAA,EACF;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAcA,SAAS,oBAAoB,SAAiB,UAAiB,UAAiB,QAAwB;AAGtG,QAAM,SAAS,GAAG,MAAM;AACxB,QAAM,YAAY,wBAAwB,SAAS,UAAU,MAAM;AACnE,QAAM,SAAS,SAAS,SAAS,SAAS;AAC1C,MAAI,SAAS,WAAW,GAAG;AACzB,WAAO,GAAG,SAAS;AAAA,QAAW,MAAM;AAAA,EACtC;AACA,QAAM,aAAa,eAAe,QAAQ;AAC1C,QAAM,QAAkB,CAAC;AACzB,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,UAAM,UAAU,IAAI;AACpB,UAAM,SAAS,SAAS,CAAC;AACzB,UAAM,OAAO,WAAW,IAAI,OAAO,GAAG,MAAM,IAAI,OAAO,MAAM,WAAW,CAAC,CAAC;AAC1E,UAAM,KAAK,IAAI;AAAA,EACjB;AACA,SAAO,GAAG,SAAS;AAAA,QAAW,MAAM,MAAM,MAAM,KAAK,KAAK,CAAC;AAC7D;AAYA,SAAS,eAAe,UAAiB,UAAiB,OAAuC;AAC/F,QAAM,aAAa,eAAe,QAAQ;AAC1C,QAAM,aAAa,eAAe,QAAQ;AAC1C,QAAM,SAAS,SAAS,SAAS,SAAS;AAG1C,QAAM,YAAY,wBAAwB,KAAK,UAAU,KAAK;AAM9D,QAAM,cAAwB,CAAC;AAC/B,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,QAAI,IAAI,QAAQ;AAAE,kBAAY,KAAK,CAAC;AAAG;AAAA,IAAS;AAChD,UAAM,OAAO,SAAS,IAAI,MAAM;AAChC,UAAM,OAAO,SAAS,CAAC;AACvB,QAAI,SAAS,KAAK,OAAO,EAAG,aAAY,KAAK,CAAC;AAAA,EAChD;AAGA,QAAM,YAAsB,CAAC;AAC7B,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,QAAI,YAAY,SAAS,CAAC,EAAG;AAC7B,UAAM,QAAQ,IAAI;AAClB,cAAU,KAAK,OAAO,KAAK,MAAM,WAAW,CAAC,CAAC,GAAG;AAAA,EACnD;AACA,QAAM,WAAW,UAAU,SAAS,IAAI,UAAU,KAAK,KAAK,IAAI;AAGhE,QAAM,SAAS,CAAC,UAAkB,KAAK,OAAO,QAAQ,CAAC;AACvD,QAAM,QAAkB,CAAC;AACzB,WAAS,QAAQ,GAAG,QAAQ,YAAY,QAAQ,SAAS;AACvD,UAAM,IAAI,YAAY,KAAK;AAC3B,UAAM,MAAM,SAAS,CAAC;AACtB,UAAM,KAAK,GAAG,OAAO,KAAK,CAAC,aAAa,CAAC,iBAAiB,CAAC,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,UAAU;AAAA,EAChG;AAEA,QAAM,eAAe,YAAY,IAAI,OAAK,IAAI,CAAC,MAAM,WAAW,CAAC,CAAC,GAAG;AACrE,QAAM,WAAW,aAAa,SAAS,IACnC,GAAG,QAAQ,MAAM,aAAa,KAAK,KAAK,CAAC,KACzC;AACJ,QAAM,KAAK,GAAG,OAAO,YAAY,MAAM,CAAC,aAAa,QAAQ,IAAI;AACjE,WAAS,QAAQ,YAAY,SAAS,GAAG,SAAS,GAAG,SAAS;AAC5D,UAAM,KAAK,GAAG,OAAO,KAAK,CAAC,GAAG;AAAA,EAChC;AAEA,QAAM,QAAQ,SAAS,WAAW,IAAI,IAAK,WAAW,CAAC,IAAK,SAAS,CAAC;AACtE,QAAM,WAAW,YAAY,WAAW,IACpC,eAAe,QAAQ,OACvB,MAAM,KAAK,IAAI;AAEnB,SAAO;AAAA,qDAC4C,UAAU,KAAK,CAAC;AAAA,6DACR,UAAU,KAAK,CAAC;AAAA,2BAClD,OAAO;AAAA;AAAA,IAE9B,QAAQ;AAAA,aACC,KAAK;AAAA,EAChB,SAAS;AAAA,YACC,UAAU,KAAK,CAAC,MAAM,UAAU,QAAQ,SAAU,UAAU,QAAQ,OAAO,IAAK;AAAA,EAC1F,QAAQ;AAAA;AAAA,GAEP,KAAK;AACR;;;ACzuBO,IAAM,WAAN,MAAe;AAAA,EACpB,YACmB,QACA,MACjB;AAFiB;AACA;AAAA,EAChB;AAAA,EAFgB;AAAA,EACA;AAAA,EAEnB,IAAI,MAA4B;AAC9B,UAAM,IAAI,KAAK,KAAK,IAAI,IAAI;AAC5B,QAAI,CAAC,GAAG;AACN,YAAM,QAAQ,CAAC,GAAG,KAAK,KAAK,KAAK,CAAC,EAAE,KAAK,EAAE,KAAK,IAAI;AACpD,YAAM,SAAS,QAAQ,oBAAoB,KAAK,KAAK;AACrD,YAAM,IAAI,MAAM,kBAAkB,IAAI,kBAAkB,MAAM,EAAE;AAAA,IAClE;AACA,WAAO;AAAA,EACT;AAAA,EACA,QAAQ,MAAiC;AACvC,UAAM,IAAI,KAAK,OAAO,IAAI;AAC1B,QAAI,CAAC,GAAG;AACN,YAAM,QAAQ,OAAO,KAAK,KAAK,MAAM,EAAE,KAAK,EAAE,KAAK,IAAI,KAAK;AAC5D,YAAM,IAAI,MAAM,sBAAsB,IAAI,4BAA4B,KAAK,EAAE;AAAA,IAC/E;AACA,WAAO;AAAA,EACT;AAAA,EACA,IAAI,MAAuB;AAAE,WAAO,KAAK,KAAK,IAAI,IAAI;AAAA,EAAE;AAAA,EACxD,QAAkB;AAAE,WAAO,CAAC,GAAG,KAAK,KAAK,KAAK,CAAC,EAAE,KAAK;AAAA,EAAE;AAC1D;AAkHA,IAAM,aAAa,MAAmB,IAAmB;AACzD,IAAM,WAAW,IAAmB;;;AC5G7B,IAAM,OAAO;AAAA,EAClB,OAAO,CAAC,OAA2B,CAAC,OAAiB,EAAE,MAAM,SAAS,OAAO,KAAK,SAAS,KAAK;AAAA,EAChG,SAAS,CAAC,OAA0B,CAAC,MACnC,KAAK,SAAS,SAAY,EAAE,MAAM,WAAW,MAAM,KAAK,KAAK,IAAI,EAAE,MAAM,UAAU;AAAA,EACrF,SAAS,CAAC,UAAkC,EAAE,MAAM,WAAW,KAAK;AACtE;AAgBA,SAAS,YAAoB;AAC3B,SAAO,KAAK,KAAK,KAAK,KAAK,IAAI,KAAK,IAAI,OAAO,KAAK,OAAO,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,IAAI,KAAK,KAAK,KAAK,OAAO,CAAC;AACxG;AAEA,SAAS,QAAQ,OAAuB;AACtC,SAAO,CAAC,SAAS;AACf,UAAM,MAAM,IAAI,aAAa,IAAI;AACjC,aAAS,IAAI,GAAG,IAAI,MAAM,IAAK,KAAI,CAAC,IAAI,UAAU,IAAI;AACtD,WAAO;AAAA,EACT;AACF;AAKA,SAAS,YAAY,MAAoC;AACvD,MAAI,CAAC,QAAQ,SAAS,QAAS,QAAO,QAAQ,IAAI;AAClD,MAAI,SAAS,QAAS,QAAO,CAAC,SAAS,IAAI,aAAa,IAAI;AAC5D,MAAI,SAAS,OAAQ,QAAO,CAAC,SAAS;AAAE,UAAM,IAAI,IAAI,aAAa,IAAI;AAAG,MAAE,KAAK,CAAC;AAAG,WAAO;AAAA,EAAE;AAC9F,UAAQ,KAAK,MAAM;AAAA,IACjB,KAAK;AAAS,aAAO,QAAQ,KAAK,KAAK;AAAA,IACvC,KAAK,WAAW;AACd,YAAM,OAAO,KAAK,QAAQ,KAAK,KAAK,CAAC;AACrC,aAAO,CAAC,MAAM,UAAU;AACtB,cAAM,QAAQ,MAAM,CAAC,KAAK;AAC1B,cAAM,MAAM,OAAO,KAAK,KAAK,KAAK;AAClC,cAAM,MAAM,IAAI,aAAa,IAAI;AACjC,iBAAS,IAAI,GAAG,IAAI,MAAM,IAAK,KAAI,CAAC,IAAI,UAAU,IAAI;AACtD,eAAO;AAAA,MACT;AAAA,IACF;AAAA,IACA,KAAK,WAAW;AACd,YAAM,OAAO,KAAK;AAClB,aAAO,CAAC,SAAS;AACf,YAAI,KAAK,WAAW,MAAM;AACxB,gBAAM,IAAI,MAAM,6BAA6B,KAAK,MAAM,6BAA6B,IAAI,EAAE;AAAA,QAC7F;AACA,eAAO,IAAI,aAAa,IAAI;AAAA,MAC9B;AAAA,IACF;AAAA,EACF;AACF;AAKA,SAAS,aAAa,MAAyC;AAC7D,MAAI,MAAM,UAAU,OAAW,QAAO,KAAK;AAC3C,QAAM,OAAO,MAAM,QAAQ;AAC3B,SAAO,SAAS,WAAW,SAAS;AACtC;AAWA,IAAM,gBAAN,MAAoB;AAAA,EAClB,YACkB,OACA,OACA,QACA,OAChB;AAJgB;AACA;AACA;AACA;AAAA,EACf;AAAA,EAJe;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAEpB;AAMO,IAAe,SAAf,MAAsB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUjB,MAAM,OAAc,MAA6B;AACzD,UAAM,QAAQ,MAAM,SAAS;AAE7B,WAAO,IAAI,cAAc,OAAO,OAAO,YAAY,MAAM,IAAI,GAAG,aAAa,IAAI,CAAC;AAAA,EACpF;AACF;AAwBO,SAAS,kBAAkB,MAAkC;AAClE,QAAM,UAAkC,CAAC;AACzC,QAAM,UAAkC,CAAC;AACzC,QAAM,aAAsC,CAAC;AAC7C,QAAM,MAAM,IAAI,CAAC,MAAM,KAAK,OAAO,QAAQ;AACzC,QAAI,eAAe,eAAe;AAChC,YAAM,IAAI,WAAW,MAAM,IAAI,OAAO,IAAI,KAAK;AAC9C,MAAC,MAAc,GAAG,IAAI;AACvB,cAAQ,IAAI,IAAI;AAChB,cAAQ,IAAI,IAAI,IAAI;AACpB,iBAAW,IAAI,IAAI,IAAI;AAAA,IACzB;AAAA,EACF,CAAC;AACD,SAAO,EAAE,SAAS,SAAS,WAAW;AACxC;AAaA,SAAS,MAAM,MAAe,MAAc,SAAwB;AAClE,MAAI,SAAS,QAAQ,SAAS,OAAW;AACzC,MAAI,OAAO,SAAS,SAAU;AAE9B,MAAI,gBAAgB,QAAQ;AAC1B,eAAW,OAAO,OAAO,KAAK,IAAc,GAAG;AAC7C,YAAM,QAAS,KAAa,GAAG;AAC/B,YAAM,YAAY,OAAO,GAAG,IAAI,IAAI,GAAG,KAAK;AAC5C,iBAAW,OAAO,WAAW,MAAM,KAAK,OAAO;AAAA,IACjD;AACA;AAAA,EACF;AACA,MAAI,MAAM,QAAQ,IAAI,GAAG;AACvB,SAAK,QAAQ,CAAC,MAAM,MAAM;AACxB,YAAM,YAAY,OAAO,GAAG,IAAI,IAAI,CAAC,KAAK,OAAO,CAAC;AAClD,iBAAW,MAAM,WAAW,MAA2B,GAAG,OAAO;AAAA,IACnE,CAAC;AACD;AAAA,EACF;AAGF;AAEA,SAAS,WAAW,OAAgB,MAAc,OAAe,KAAsB,SAAwB;AAC7G,MAAI,iBAAiB,UAAU,MAAM,QAAQ,KAAK,GAAG;AACnD,UAAM,OAAO,MAAM,OAAO;AAAA,EAC5B,OAAO;AACL,YAAQ,MAAM,OAAO,OAAO,GAAG;AAAA,EACjC;AACF;;;AC3FO,SAAS,sBACd,KACe;AACf,QAAM,MAAqB,CAAC;AAC5B,aAAW,KAAK,OAAO,OAAO,GAAG,EAAG,KAAI,KAAK,EAAE,MAAqB;AACpE,SAAO;AACT;AAaO,SAAS,kBAAkB,GAAqB;AACrD,QAAM,MAAM,IAAI,MAAM,EAAE,OAAO;AAC/B,MAAI,OAAO,EAAE;AACb,MAAI,QAAQ,EAAE;AACd,SAAO;AACT;;;ACvKO,IAAM,cAAN,MAAkB;AAAA,EACf;AAAA,EACA,SAAS;AAAA,EACT,UAAU,oBAAI,IAA6B;AAAA,EAC3C,aAAa;AAAA,EAErB,YAAY,cAAsB;AAChC,UAAM,OAAO,IAAI,KAAK,CAAC,YAAY,GAAG,EAAE,MAAM,yBAAyB,CAAC;AACxE,UAAM,MAAM,IAAI,gBAAgB,IAAI;AACpC,SAAK,SAAS,IAAI,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAIhD,QAAI,gBAAgB,GAAG;AAEvB,SAAK,OAAO,YAAY,CAAC,OAA0B;AACjD,YAAM,QAAQ,GAAG;AACjB,YAAM,WAAW,KAAK,QAAQ,IAAI,MAAM,EAAE;AAC1C,UAAI,CAAC,SAAU;AACf,WAAK,QAAQ,OAAO,MAAM,EAAE;AAC5B,UAAI,MAAM,GAAI,UAAS,QAAQ,MAAM,MAAM;AAAA,UACtC,UAAS,OAAO,kBAAkB,MAAM,KAAK,CAAC;AAAA,IACrD;AAEA,SAAK,OAAO,UAAU,CAAC,OAAmB;AACxC,YAAM,MAAM,IAAI,MAAM,4BAA4B,GAAG,WAAW,SAAS,EAAE;AAC3E,YAAM,OAAkB,EAAE,MAAM,eAAe,SAAS,IAAI,SAAS,OAAO,IAAI,SAAS,GAAG;AAE5F,iBAAW,YAAY,KAAK,QAAQ,OAAO,EAAG,UAAS,OAAO,kBAAkB,IAAI,CAAC;AACrF,WAAK,QAAQ,MAAM;AAAA,IACrB;AAAA,EACF;AAAA;AAAA;AAAA,EAIA,QAAW,KAAsB,WAA0B,CAAC,GAAe;AACzE,QAAI,KAAK,WAAY,QAAO,QAAQ,OAAO,IAAI,MAAM,wCAAwC,CAAC;AAC9F,UAAM,KAAK,KAAK;AAChB,WAAO,IAAI,QAAW,CAAC,SAAS,WAAW;AACzC,WAAK,QAAQ,IAAI,IAAI,EAAE,SAA0C,OAAO,CAAC;AACzE,WAAK,OAAO,YAAY,EAAE,GAAG,KAAK,GAAG,GAAU,QAAQ;AAAA,IACzD,CAAC;AAAA,EACH;AAAA;AAAA;AAAA,EAIA,KAAK,KAAsB,WAA0B,CAAC,GAAS;AAC7D,QAAI,KAAK,WAAY;AACrB,UAAM,KAAK,KAAK;AAChB,SAAK,OAAO,YAAY,EAAE,GAAG,KAAK,GAAG,GAAU,QAAQ;AAAA,EACzD;AAAA,EAEA,YAAkB;AAChB,QAAI,KAAK,WAAY;AACrB,SAAK,aAAa;AAClB,SAAK,OAAO,UAAU;AACtB,UAAM,MAAM,IAAI,MAAM,+BAA+B;AACrD,eAAW,YAAY,KAAK,QAAQ,OAAO,EAAG,UAAS,OAAO,GAAG;AACjE,SAAK,QAAQ,MAAM;AAAA,EACrB;AACF;;;ACAO,SAAS,YAAY,SAAmC;AAC7D,QAAM,QAAQ,MAAM,OAAO;AAC3B,QAAM,EAAE,YAAY,KAAK,IAAI,WAAW,KAAK;AAC7C,QAAM,OAAO,YAAY,OAAO,UAAU;AAC1C,QAAM,UAAU,YAAY,OAAO,IAAI;AACvC,SAAO,EAAE,OAAO,YAAY,MAAM,MAAM,QAAQ;AAClD;AA6FA,eAAsB,cACpB,cACA,SACA,OAAgC,CAAC,GACL;AAE5B,QAAM,EAAE,OAAO,aAAa,IAAI,YAAY,cAAc,SAAS,KAAK,UAAU,CAAC,CAAC;AACpF,QAAM,EAAE,YAAY,KAAK,IAAI,WAAW,KAAK;AAC7C,QAAM,aAAa,KAAK,OACpB,WAAW,OAAO,YAAY,aAAa,SAAS,KAAK,MAAM,aAAa,UAAU,IACtF;AAEJ,QAAM,OAAO,YAAY,OAAO,YAAY,YAAY,cAAc,CAAC,CAAC;AACxE,QAAM,UAAU,YAAY,OAAO,IAAI;AACvC,QAAM,KAAiB,EAAE,OAAO,YAAY,MAAM,MAAM,QAAQ;AAIhE,QAAM,gBAAgB,mBAAmB,MAAM,aAAa,OAAO;AAGnE,QAAM,QAAQ,IAAI,YAAY,8pnBAAiB;AAC/C,QAAM,SAAiB,EAAE,OAAO,MAAM,QAAQ;AAC9C,QAAM,WAAW,aAAa,eAAe,UAAU,IAAI;AAC3D,QAAM,YAAY,sBAAsB,aAAa;AAErD,MAAI;AACJ,MAAI;AACF,WAAO,MAAM,MAAM;AAAA,MACjB,EAAE,MAAM,iBAAiB,SAAS,EAAE,SAAS,GAAG,IAAI,QAAQ,eAAe,MAAM,SAAS,EAAE;AAAA,MAC5F;AAAA,IACF;AAAA,EACF,SAAS,GAAG;AACV,UAAM,UAAU;AAChB,UAAM;AAAA,EACR;AAEA,SAAO,IAAI;AAAA,IACT;AAAA;AAAA,IAAqB;AAAA,IAAG;AAAA,IAAI;AAAA,IAAM;AAAA;AAAA,IACpB,aAAa;AAAA;AAAA,IACT,EAAE,GAAG,EAAE;AAAA,EAC3B;AACF;AAOA,eAAsB,eACpB,cACA,SACA,OAAiC,CAAC,GACF;AAChC,QAAM,EAAE,OAAO,aAAa,IAAI,YAAY,cAAc,SAAS,KAAK,UAAU,CAAC,CAAC;AACpF,QAAM,eAAe,MAAM,QAAQ,MAAM,QAAQ,CAAC,CAAE;AACpD,QAAM,OAAO;AAAA,IAAY;AAAA;AAAA,IAAwB,CAAC;AAAA,EAAC;AACnD,QAAM,UAAU,YAAY,OAAO,IAAI;AACvC,QAAM,KAAiB,EAAE,OAAO,YAAY,CAAC,GAAG,MAAM,cAAc,MAAM,QAAQ;AAElF,QAAM,gBAAgB,mBAAmB,MAAM,aAAa,OAAO;AACnE,QAAM,QAAQ,IAAI,YAAY,8pnBAAiB;AAC/C,QAAM,SAAiB,EAAE,OAAO,MAAM,QAAQ;AAC9C,QAAM,YAAY,sBAAsB,aAAa;AAErD,MAAI;AACJ,MAAI;AACF,WAAO,MAAM,MAAM;AAAA,MACjB,EAAE,MAAM,iBAAiB,SAAS,EAAE,SAAS,GAAG,IAAI,QAAQ,eAAe,MAAM,KAAK,EAAE;AAAA,MACxF;AAAA,IACF;AAAA,EACF,SAAS,GAAG;AACV,UAAM,UAAU;AAChB,UAAM;AAAA,EACR;AAEA,SAAO,IAAI;AAAA,IAA2B;AAAA;AAAA,IAAqB;AAAA,IAAG;AAAA,IAAI;AAAA;AAAA,IAAuB;AAAA,EAAI;AAC/F;AAMA,IAAM,sBAAN,MAAyE;AAAA,EACvE,YACmB,OACA,SACD,IACC,MACA,cAGA,SACA,aACjB;AATiB;AACA;AACD;AACC;AACA;AAGA;AACA;AAAA,EAChB;AAAA,EATgB;AAAA,EACA;AAAA,EACD;AAAA,EACC;AAAA,EACA;AAAA,EAGA;AAAA,EACA;AAAA,EAGnB,IAAI,cAAsB;AAAE,WAAO,KAAK,KAAK;AAAA,EAAY;AAAA,EACzD,IAAI,cAAiC;AAAE,WAAO,KAAK,KAAK;AAAA,EAAY;AAAA,EACpE,IAAI,aAAgC;AAAE,WAAO,KAAK,KAAK;AAAA,EAAW;AAAA,EAIlE,MAAM,KACJ,QACA,MAC8B;AAK9B,UAAM,IAAI,MAAM,KAAK,MAAM;AAAA,MACzB,EAAE,MAAM,QAAQ,SAAS,EAAE,SAAS,KAAK,SAAS,QAAQ,cAAc,MAAM,iBAAiB,KAAK,EAAE;AAAA,IACxG;AACA,QAAI,MAAM,cAAc;AACtB,aAAO,EAAE,MAAM,EAAE,MAAM,UAAU,aAAa,EAAE,UAAU,KAAK,KAAK,aAAa,EAAE;AAAA,IACrF;AACA,WAAO,EAAE;AAAA,EACX;AAAA,EAIA,MAAM,IACJ,QACA,MACmC;AAEnC,UAAM,IAAI,MAAM,KAAK,MAAM;AAAA,MACzB,EAAE,MAAM,OAAO,SAAS,EAAE,SAAS,KAAK,SAAS,QAAQ,cAAc,MAAM,iBAAiB,KAAK,EAAE;AAAA,IACvG;AACA,QAAI,MAAM,cAAc;AACtB,aAAO,EAAE,QAAQ,EAAE,QAAQ,UAAU,aAAa,EAAE,UAAU,KAAK,KAAK,aAAa,EAAE;AAAA,IACzF;AACA,WAAO,EAAE;AAAA,EACX;AAAA,EAEA,aAAa,QAAsC,MAA2C;AAE5F,WAAO,KAAK,MAAM;AAAA,MAChB,EAAE,MAAM,gBAAgB,SAAS,EAAE,SAAS,KAAK,SAAS,QAAQ,SAAS,CAAC,CAAC,MAAM,QAAQ,EAAE;AAAA,IAC/F,EAAE,KAAK,MAAM,MAAS;AAAA,EACxB;AAAA,EAEA,MAAM,iBAAwD;AAC5D,UAAM,IAAI,MAAM,KAAK,MAAM;AAAA,MACzB,EAAE,MAAM,kBAAkB,SAAS,EAAE,SAAS,KAAK,QAAQ,EAAE;AAAA,IAC/D;AACA,WAAO,EAAE;AAAA,EACX;AAAA,EAEA,MAAM,qBAA4D;AAChE,UAAM,IAAI,MAAM,KAAK,MAAM;AAAA,MACzB,EAAE,MAAM,sBAAsB,SAAS,EAAE,SAAS,KAAK,QAAQ,EAAE;AAAA,IACnE;AACA,WAAO,EAAE;AAAA,EACX;AAAA,EAEA,MAAM,QAAuB;AAI3B,UAAM,gBAAgB,mBAAmB,KAAK,GAAG,MAAM,KAAK,OAAO;AACnE,UAAM,KAAK,aAAa,aAAa;AACrC,UAAM,KAAK,oBAAoB;AAAA,EACjC;AAAA,EAEA,sBAAqC;AACnC,WAAO,KAAK,MAAM;AAAA,MAChB,EAAE,MAAM,kBAAkB,SAAS,EAAE,SAAS,KAAK,QAAQ,EAAE;AAAA,IAC/D,EAAE,KAAK,MAAM,MAAS;AAAA,EACxB;AAAA,EAEA,MAAM,eACJ,SACA,OAAuC,CAAC,GACR;AAChC,UAAM,EAAE,OAAO,cAAc,cAAc,IAAI,YAAY,KAAK,cAAc,SAAS,KAAK,UAAU,CAAC,CAAC;AACxG,UAAM,eAAe,MAAM,QAAQ,MAAM,QAAQ,CAAC,CAAE;AACpD,UAAM,OAAO;AAAA,MAAY;AAAA;AAAA,MAAwB,CAAC;AAAA,IAAC;AACnD,UAAM,UAAU,YAAY,OAAO,IAAI;AACvC,UAAM,KAAiB,EAAE,OAAO,YAAY,CAAC,GAAG,MAAM,cAAc,MAAM,QAAQ;AAElF,UAAM,eAAe,KAAK,YAAY;AACtC,UAAM,SAAiB,EAAE,OAAO,MAAM,QAAQ;AAE9C,UAAM,OAAO,MAAM,KAAK,MAAM;AAAA,MAC5B,EAAE,MAAM,kBAAkB,SAAS,EAAE,SAAS,cAAc,eAAe,KAAK,SAAS,IAAI,OAAO,EAAE;AAAA,IACxG;AAEA,WAAO,IAAI;AAAA,MAA2B,KAAK;AAAA,MAAO;AAAA,MAAc;AAAA,MAAI;AAAA;AAAA,MAAuB;AAAA,IAAK;AAAA,EAClG;AAAA,EAEA,UAAgB;AAGd,SAAK,MAAM,KAAK,EAAE,MAAM,WAAW,SAAS,EAAE,SAAS,KAAK,QAAQ,EAAE,CAAC;AACvE,SAAK,MAAM,UAAU;AAAA,EACvB;AACF;AAEA,IAAM,6BAAN,MAAkE;AAAA,EAChE,YACmB,OACA,SACD,IACC,MACA,YACjB;AALiB;AACA;AACD;AACC;AACA;AAAA,EAChB;AAAA,EALgB;AAAA,EACA;AAAA,EACD;AAAA,EACC;AAAA,EACA;AAAA,EAGnB,IAAI,cAAsB;AAAE,WAAO,KAAK,KAAK;AAAA,EAAY;AAAA,EACzD,IAAI,cAAiC;AAAE,WAAO,KAAK,KAAK;AAAA,EAAY;AAAA,EACpE,IAAI,aAAgC;AAAE,WAAO,KAAK,KAAK;AAAA,EAAW;AAAA,EAIlE,MAAM,IACJ,QACA,MACmC;AAEnC,UAAM,IAAI,MAAM,KAAK,MAAM;AAAA,MACzB,EAAE,MAAM,OAAO,SAAS,EAAE,SAAS,KAAK,SAAS,QAAQ,cAAc,MAAM,iBAAiB,KAAK,EAAE;AAAA,IACvG;AACA,QAAI,MAAM,cAAc;AACtB,aAAO,EAAE,QAAQ,EAAE,QAAQ,UAAU,aAAa,EAAE,UAAU,KAAK,KAAK,aAAa,EAAE;AAAA,IACzF;AACA,WAAO,EAAE;AAAA,EACX;AAAA,EAEA,aAAa,QAAsC,MAA2C;AAC5F,WAAO,KAAK,MAAM;AAAA,MAChB,EAAE,MAAM,gBAAgB,SAAS,EAAE,SAAS,KAAK,SAAS,QAAQ,SAAS,CAAC,CAAC,MAAM,QAAQ,EAAE;AAAA,IAC/F,EAAE,KAAK,MAAM,MAAS;AAAA,EACxB;AAAA,EAEA,MAAM,iBAAwD;AAC5D,UAAM,IAAI,MAAM,KAAK,MAAM;AAAA,MACzB,EAAE,MAAM,kBAAkB,SAAS,EAAE,SAAS,KAAK,QAAQ,EAAE;AAAA,IAC/D;AACA,WAAO,EAAE;AAAA,EACX;AAAA,EAEA,UAAgB;AACd,SAAK,MAAM,KAAK,EAAE,MAAM,WAAW,SAAS,EAAE,SAAS,KAAK,QAAQ,EAAE,CAAC;AACvE,QAAI,KAAK,WAAY,MAAK,MAAM,UAAU;AAAA,EAC5C;AACF;AAYA,SAAS,YACP,cACA,SACA,YACoD;AACpD,QAAM,QAAQ,aAAa;AAC3B,MAAI,eAAmC,EAAE,SAAS,CAAC,GAAG,SAAS,CAAC,GAAG,YAAY,CAAC,EAAE;AAClF,QAAM,QAAQ,MAAM,MAAM;AACxB,mBAAe,kBAAkB,KAAK;AACtC,UAAM,eAAuC,CAAC;AAC9C,eAAW,CAAC,MAAM,IAAI,KAAK,OAAO,QAAQ,UAAU,GAAG;AACrD,mBAAa,IAAI,IAAI,YAAY,MAAM,KAAK,OAAO,KAAK,SAAS,KAAK;AAAA,IACxE;AACA,WAAO,QAAQ,OAAO,YAAgC;AAAA,EACxD,CAAC;AACD,SAAO,EAAE,OAAO,aAAa;AAC/B;AAIA,SAAS,mBAAmB,MAAkB,SAA+D;AAC3G,QAAM,MAAoC,CAAC;AAC3C,aAAW,CAAC,MAAM,KAAK,KAAK,KAAK,cAAc;AAC7C,UAAM,QAAQ,KAAK,QAAQ,KAAK,EAAG;AACnC,UAAM,OAAO,MAAM,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC;AAC5C,UAAM,SAAS,QAAQ,IAAI;AAC3B,QAAI,CAAC,OAAQ,OAAM,IAAI,MAAM,+BAA+B,IAAI,GAAG;AACnE,QAAI,IAAI,IAAI,OAAO,MAAM,KAAK;AAAA,EAChC;AACA,SAAO;AACT;AAIA,SAAS,eAAe,GAA+B;AACrD,QAAM,IAAwB,EAAE;AAChC,SAAO;AAAA,IACL,IAAI,EAAE;AAAA,IACN,IAAI,EAAE;AAAA,IACN,IAAI,EAAE;AAAA,IACN,KAAK,EAAE;AAAA,IACP,aAAa,EAAE;AAAA,IACf,eAAe,EAAE;AAAA,IACjB,cAAc,EAAE;AAAA,IAChB,sBAAsB,EAAE;AAAA,EAC1B;AACF;AAIA,SAAS,aACP,UACA,eACU;AACV,QAAM,OAAO,oBAAI,IAA0B;AAC3C,MAAI,UAAU;AACZ,eAAW,CAAC,MAAM,GAAG,KAAK,OAAO,QAAQ,QAAQ,EAAG,MAAK,IAAI,MAAM,GAAG;AAAA,EACxE;AACA,SAAO,IAAI,SAAS,eAAe,IAAI;AACzC;;;AC1eA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AA6BO,IAAM,SAAN,cAAqB,OAAO;AAAA,EAGjC,YAA4B,OAA+B,QAAgB,OAAsB,CAAC,GAAG;AACnG,UAAM;AADoB;AAA+B;AAEzD,SAAK,IAAI,KAAK,MAAM,CAAC,OAAO,MAAM,CAAC;AACnC,SAAK,IAAI,KAAK,SAAS,QAAQ,OAAO,KAAK,MAAM,CAAC,MAAM,GAAG,EAAE,MAAM,QAAQ,CAAC;AAAA,EAC9E;AAAA,EAJ4B;AAAA,EAA+B;AAAA,EAF3D;AAAA,EACA;AAAA,EAMA,IAAI,GAAmB;AACrB,UAAM,MAAM,OAAO,GAAG,KAAK,CAAC;AAC5B,WAAO,KAAK,IAAI,IAAI,KAAK,KAAK,CAAC,IAAI;AAAA,EACrC;AACF;AAMO,IAAM,YAAN,cAAwB,OAAO;AAAA,EAGpC,YAA4B,GAA2B,MAAc,MAAM;AACzE,UAAM;AADoB;AAA2B;AAErD,SAAK,IAAI,KAAK,MAAM,CAAC,CAAC,GAAG,EAAE,MAAM,OAAO,CAAC;AACzC,SAAK,IAAI,KAAK,MAAM,CAAC,CAAC,GAAG,EAAE,MAAM,QAAQ,CAAC;AAAA,EAC5C;AAAA,EAJ4B;AAAA,EAA2B;AAAA,EAFvD;AAAA,EACA;AAAA,EAMA,IAAI,GAAmB;AACrB,UAAM,IAAI,SAAS,CAAC;AACpB,UAAM,IAAI,IAAI,GAAG,CAAC;AAClB,UAAM,IAAI,SAAS,IAAI,GAAG,CAAC,CAAC;AAC5B,UAAM,QAAQ,KAAK,IAAI,GAAG,KAAK,GAAG,CAAC;AACnC,WAAO,IAAI,IAAI,IAAI,GAAG,KAAK,GAAG,KAAK,CAAC,GAAG,KAAK,CAAC;AAAA,EAC/C;AACF;AAUO,SAAS,WAAW,GAAW,QAAwB;AAC5D,QAAM,OAAO,YAAY,YAAY;AACrC,QAAM,IAAI,EAAE,MAAM;AAClB,MAAI,IAAI,EAAG,OAAM,IAAI,WAAW,uCAAuC,CAAC,IAAI,IAAI;AAChF,QAAM,IAAI,EAAE,MAAM,IAAI,CAAC;AACvB,QAAM,IAAI,EAAE,MAAM,IAAI,CAAC;AACvB,MAAI,IAAI,WAAW,GAAG;AACpB,UAAM,IAAI,WAAW,wBAAwB,CAAC,4BAA4B,MAAM,IAAI,IAAI;AAAA,EAC1F;AACA,QAAM,OAAO,EAAE,MAAM,MAAM,GAAG,IAAI,CAAC;AACnC,QAAM,WAAW,QAAQ,GAAG,CAAC,GAAG,MAAM,GAAG,QAAQ,IAAI,MAAM,CAAC;AAE5D,SAAO,SAAS,UAAU,KAAK,QAAQ,KAAK,SAAS,CAAC;AACxD;AAGO,SAAS,WAAW,GAAmB;AAC5C,QAAM,OAAO,YAAY,YAAY;AACrC,QAAM,IAAI,EAAE,MAAM;AAClB,MAAI,IAAI,EAAG,OAAM,IAAI,WAAW,uCAAuC,CAAC,IAAI,IAAI;AAChF,QAAM,IAAI,EAAE,MAAM,IAAI,CAAC;AACvB,QAAM,IAAI,EAAE,MAAM,IAAI,CAAC;AACvB,QAAM,IAAI,EAAE,MAAM,IAAI,CAAC;AACvB,QAAM,OAAO,EAAE,MAAM,MAAM,GAAG,IAAI,CAAC;AAEnC,QAAM,UAAU,SAAS,GAAG,IAAI,GAAG,IAAI,CAAC;AACxC,SAAO,QAAQ,SAAS,CAAC,GAAG,MAAM,GAAG,IAAI,CAAC,CAAC;AAC7C;AAOO,SAAS,aAAa,UAAoB,MAA8B;AAC7E,QAAM,OAAO,SAAS,IAAI,IAAI;AAC9B,QAAM,QAAQ,SAAS,QAAQ,IAAI;AACnC,MAAI,MAAM,SAAS,GAAG;AACpB,UAAM,IAAI,MAAM,kBAAkB,IAAI,iCAAiC,MAAM,KAAK,IAAI,CAAC,GAAG;AAAA,EAC5F;AAGA,QAAM,IAAI,MAAM,CAAC,MAAM,IAAI,MAAM,MAAM,CAAC,IAAI;AAC5C,QAAM,IAAI,EAAE,CAAC;AACb,MAAI,SAAS;AACb,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,IAAK,WAAU,EAAE,CAAC;AAChD,QAAM,WAAW,IAAI;AACrB,MAAI,KAAK,WAAW,UAAU;AAC5B,UAAM,IAAI,MAAM,kBAAkB,IAAI,YAAY,KAAK,MAAM,gCAAgC,QAAQ,EAAE;AAAA,EACzG;AACA,SAAO,MAAM,KAAK,EAAE,QAAQ,EAAE,GAAG,CAAC,GAAG,MAAM,KAAK,MAAM,IAAI,SAAS,IAAI,KAAK,MAAM,CAAC;AACrF;AAWO,SAAS,iBAAiB,QAAgB,SAAyB;AACxE,QAAM,OAAO,YAAY,kBAAkB;AAC3C,MAAI,QAAQ,UAAU,OAAO;AAC3B,UAAM,IAAI,WAAW,8CAA8C,QAAQ,KAAK,IAAI,IAAI;AAAA,EAC1F;AACA,QAAM,QAAQ,OAAO,MAAM,OAAO,MAAM,SAAS,CAAC;AAClD,QAAM,KAAK,eAAe,MAAM;AAChC,QAAM,WAAW,QAAQ,IAAI,IAAI,OAAO,SAAS,OAAO,KAAK,CAAC,CAAC;AAC/D,SAAO,IAAI,UAAU,EAAE;AACzB;",
   "names": []
 }