npm - numbl - Versions diffs - 0.1.7 → 0.3.0 - Mend

numbl 0.1.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

package/binding.gyp +59 -3
package/dist-cli/cli.js +22538 -7936
package/dist-lib/lib.js +34682 -20852
package/dist-lib/numbl-core/executeCode.d.ts +13 -0
package/dist-lib/numbl-core/fileIOAdapter.d.ts +2 -0
package/dist-lib/numbl-core/helpers/reduction-helpers.d.ts +7 -2
package/dist-lib/numbl-core/interpreter/builtins/datetime.d.ts +39 -0
package/dist-lib/numbl-core/interpreter/builtins/index.d.ts +1 -0
package/dist-lib/numbl-core/interpreter/builtins/time-system.d.ts +1 -0
package/dist-lib/numbl-core/interpreter/builtins/types.d.ts +96 -5
package/dist-lib/numbl-core/interpreter/interpreter.d.ts +41 -3
package/dist-lib/numbl-core/interpreter/types.d.ts +1 -1
package/dist-lib/numbl-core/jit/c/abi.d.ts +90 -0
package/dist-lib/numbl-core/jit/c/assemble.d.ts +56 -0
package/dist-lib/numbl-core/jit/c/classify.d.ts +70 -0
package/dist-lib/numbl-core/jit/c/compile.d.ts +37 -0
package/dist-lib/numbl-core/jit/c/context.d.ts +152 -0
package/dist-lib/numbl-core/jit/c/emit/assign.d.ts +20 -0
package/dist-lib/numbl-core/jit/c/emit/complexScalar.d.ts +18 -0
package/dist-lib/numbl-core/jit/c/emit/fused.d.ts +42 -0
package/dist-lib/numbl-core/jit/c/emit/helpers.d.ts +40 -0
package/dist-lib/numbl-core/jit/c/emit/index.d.ts +14 -0
package/dist-lib/numbl-core/jit/c/emit/scalar.d.ts +23 -0
package/dist-lib/numbl-core/jit/c/emit/stmt.d.ts +25 -0
package/dist-lib/numbl-core/jit/c/emit/tensor.d.ts +127 -0
package/dist-lib/numbl-core/jit/c/emit/userCall.d.ts +58 -0
package/dist-lib/numbl-core/jit/c/epilogue.d.ts +26 -0
package/dist-lib/numbl-core/jit/c/feasibility.d.ts +44 -0
package/dist-lib/numbl-core/jit/c/prelude.d.ts +37 -0
package/dist-lib/numbl-core/jit/c/visit.d.ts +63 -0
package/dist-lib/numbl-core/jit/e1/complexKernelEmit.d.ts +46 -0
package/dist-lib/numbl-core/jit/e1/hash.d.ts +10 -0
package/dist-lib/numbl-core/jit/e1/install.d.ts +13 -0
package/dist-lib/numbl-core/jit/e1/kernelEmit.d.ts +54 -0
package/dist-lib/numbl-core/jit/e1/multiReductionKernel.d.ts +66 -0
package/dist-lib/numbl-core/jit/e1/openmpFlag.d.ts +13 -0
package/dist-lib/numbl-core/jit/e1/scalarFnKernel.d.ts +44 -0
package/dist-lib/numbl-core/jit/e2/assignKernel.d.ts +34 -0
package/dist-lib/numbl-core/jit/e2/astToJitExpr.d.ts +25 -0
package/dist-lib/numbl-core/jit/e2/cache.d.ts +80 -0
package/dist-lib/numbl-core/jit/e2/chainKernelEmit.d.ts +55 -0
package/dist-lib/numbl-core/jit/e2/classify.d.ts +119 -0
package/dist-lib/numbl-core/jit/e2/compileFn.d.ts +16 -0
package/dist-lib/numbl-core/jit/e2/complexChainKernelEmit.d.ts +79 -0
package/dist-lib/numbl-core/jit/e2/emitShared.d.ts +71 -0
package/dist-lib/numbl-core/jit/e2/install.d.ts +11 -0
package/dist-lib/numbl-core/jit/e2/liveness.d.ts +29 -0
package/dist-lib/numbl-core/jit/e2/loopKernel.d.ts +49 -0
package/dist-lib/numbl-core/jit/e2/loopKernelEmit.d.ts +75 -0
package/dist-lib/numbl-core/jit/e2/multiReductionDriver.d.ts +24 -0
package/dist-lib/numbl-core/jit/e2/reductionKernelEmit.d.ts +72 -0
package/dist-lib/numbl-core/jit/e2/scalarFnDriver.d.ts +29 -0
package/dist-lib/numbl-core/jit/fusedChainHelpers.d.ts +65 -0
package/dist-lib/numbl-core/jit/fusedScalarEmit.d.ts +69 -0
package/dist-lib/numbl-core/jit/fusion.d.ts +71 -0
package/dist-lib/numbl-core/jit/fusionOps.d.ts +25 -0
package/dist-lib/numbl-core/jit/heavyOps.d.ts +15 -0
package/dist-lib/numbl-core/{interpreter/jit → jit}/index.d.ts +2 -2
package/dist-lib/numbl-core/jit/jitBailSafety.d.ts +41 -0
package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLoop.d.ts +2 -2
package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLoopAnalysis.d.ts +6 -1
package/dist-lib/numbl-core/jit/jitLower.d.ts +122 -0
package/dist-lib/numbl-core/jit/jitLowerExpr.d.ts +27 -0
package/dist-lib/numbl-core/jit/jitLowerStmt.d.ts +9 -0
package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLowerTypes.d.ts +7 -3
package/dist-lib/numbl-core/jit/jitTopLevel.d.ts +22 -0
package/dist-lib/numbl-core/{interpreter/jit → jit}/jitTypes.d.ts +133 -1
package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitCodegen.d.ts +2 -2
package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitCodegenHoist.d.ts +19 -1
package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitHelpers.d.ts +15 -3
package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitHelpersIndex.d.ts +7 -0
package/dist-lib/numbl-core/jit/js/jitHelpersTensor.d.ts +34 -0
package/dist-lib/numbl-core/jit/js/jsFusedCodegen.d.ts +17 -0
package/dist-lib/numbl-core/jit/js/jsMultiReduction.d.ts +70 -0
package/dist-lib/numbl-core/jit/scalarEmit.d.ts +58 -0
package/dist-lib/numbl-core/lexer/types.d.ts +2 -1
package/dist-lib/numbl-core/native/lapack-bridge.d.ts +39 -1
package/dist-lib/numbl-core/ops/bessel.d.ts +18 -0
package/dist-lib/numbl-core/ops/comparison.d.ts +11 -0
package/dist-lib/numbl-core/ops/complexBinaryElemwise.d.ts +10 -0
package/dist-lib/numbl-core/ops/complexUnaryElemwise.d.ts +8 -0
package/dist-lib/numbl-core/ops/dispatch.d.ts +26 -0
package/dist-lib/numbl-core/ops/index.d.ts +8 -0
package/dist-lib/numbl-core/ops/opCodes.d.ts +70 -0
package/dist-lib/numbl-core/ops/realBinaryElemwise.d.ts +8 -0
package/dist-lib/numbl-core/ops/realUnaryElemwise.d.ts +5 -0
package/dist-lib/numbl-core/ops/reduce.d.ts +6 -0
package/dist-lib/numbl-core/parser/types.d.ts +6 -0
package/dist-lib/numbl-core/runtime/alloc.d.ts +23 -0
package/dist-lib/numbl-core/runtime/runtime.d.ts +1 -0
package/dist-lib/numbl-core/version.d.ts +1 -1
package/native/jit_runtime/jit_runtime.c +261 -0
package/native/jit_runtime/jit_runtime.h +204 -0
package/native/numbl_addon.cpp +62 -1
package/native/ops/bessel.c +572 -0
package/native/ops/comparison.c +150 -0
package/native/ops/complex_binary_elemwise.c +192 -0
package/native/ops/complex_unary_elemwise.c +152 -0
package/native/ops/numbl_ops.c +66 -0
package/native/ops/numbl_ops.h +262 -0
package/native/ops/real_binary_elemwise.c +85 -0
package/native/ops/real_unary_elemwise.c +104 -0
package/native/ops/reduce.c +162 -0
package/native/ops_napi.cpp +320 -0
package/package.json +8 -9
package/dist-lib/numbl-core/interpreter/jit/jitHelpersTensor.d.ts +0 -28
package/dist-lib/numbl-core/interpreter/jit/jitLower.d.ts +0 -23
/package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitHelpersComplex.d.ts +0 -0

package/dist-lib/numbl-core/jit/c/context.d.ts ADDED Viewed

@@ -0,0 +1,152 @@
+/**
+ * Shared types, constants, and helpers used by both emit.ts (the per-
+ * function emitter) and assemble.ts (the orchestration that builds
+ * the final signature / prelude / epilogue).
+ *
+ * Contains no emit logic itself — pulling this out of the main file
+ * breaks the otherwise-circular dependency between assemble.ts and
+ * the emit helpers.
+ */
+import { BinaryOperation } from "../../parser/types.js";
+import type { ScalarOpTarget } from "../scalarEmit.js";
+import type { ClassificationResult } from "./classify.js";
+import type { CParamDesc, COutputDesc } from "./abi.js";
+/** Per-callee ABI info the outer emitter uses to marshal its call sites.
+ *  `emitUserCall` reads `paramDescs` to know what slots each param
+ *  contributes (data + len + optional d0/d1 for tensors) and
+ *  `outputDescs` to know whether the return uses the dynamic-output ABI. */
+export interface CalleeAbi {
+    paramDescs: CParamDesc[];
+    outputDescs: COutputDesc[];
+}
+/**
+ * Minimum NUMBL_JIT_RT_VERSION the emitter needs at link time.
+ *
+ * Bump this in lockstep with NUMBL_JIT_RT_VERSION in
+ * native/jit_runtime/jit_runtime.h whenever we add a helper the emitter
+ * calls. The emitted C asserts `NUMBL_JIT_RT_VERSION >= N` so a stale
+ * archive fails the per-JIT compile step with a clear message instead
+ * of a cryptic linker "undefined reference" error.
+ *
+ * Version log:
+ *   1 — initial: idx1r, mod, sign, reduce_flat, tic/toc/monotonic_time.
+ *   2 — set1r_h (scalar linear Index write with soft-bail on OOB).
+ *   3 — idx2r / idx3r / set2r_h / set3r_h (multi-index Index read/write).
+ *   4 — setRange1r_h / setCol2r_h / copyRange1r (range/col slice r/w).
+ *   5 — is_nan / is_inf / is_finite (predicates that survive -ffast-math
+ *       by inspecting IEEE-754 bit patterns).
+ */
+export declare const NUMBL_JIT_RT_REQUIRED_VERSION = 5;
+export declare function mangle(name: string): string;
+/** Imaginary part of a complex scalar local / param. Paired with
+ *  `mangle(name)` (the real part). */
+export declare function mangleIm(name: string): string;
+/** Join a C type and an identifier with a space unless the type already
+ *  ends in `*` (pointer types get no space between `*` and the name). */
+export declare function spaceBeforeName(cType: string): string;
+export declare const C_SCALAR_TARGET: ScalarOpTarget;
+export declare const TENSOR_BIN_OP: Partial<Record<BinaryOperation, string>>;
+export declare const TENSOR_COMPLEX_BIN_OP: Partial<Record<BinaryOperation, string>>;
+export declare const TENSOR_CMP_OP: Partial<Record<BinaryOperation, string>>;
+/**
+ * Tensor-op dispatch helpers. Each reads `IBuiltin.jitCapabilities` for
+ * the named builtin and returns the corresponding libnumbl_ops opcode
+ * enum name / C function name — or `undefined` when the builtin has no
+ * C-JIT tensor-op routing. These helpers are the single source of truth
+ * for both the C feasibility check (feasibility.ts) and the C emitter
+ * (emit.ts); registering a new tensor op is a single edit on the
+ * IBuiltin, not three parallel table updates.
+ */
+/** libnumbl_ops unary opcode enum name for a tensor-unary builtin. */
+export declare function getTensorUnaryOp(name: string): string | undefined;
+/** C function name for a 2-arg element-wise tensor binary builtin. */
+export declare function getTensorBinaryFn(name: string): string | undefined;
+/** libnumbl_ops reduce opcode enum name for a tensor-reduction builtin. */
+export declare function getTensorReductionOp(name: string): string | undefined;
+export declare function formatNumberLiteral(v: number): string;
+export declare function tensorData(name: string): string;
+/** Imaginary data pointer for a complex tensor. Pairs with
+ *  `tensorData(name)` — both locals carry the same length and shape
+ *  info. The kernels in numbl_ops accept NULL for the imag pointer as
+ *  "all zero", so a nominally-complex tensor whose `.imag` is undefined
+ *  can still be passed efficiently without allocating a zero buffer.
+ *
+ *  Uses the `__im_` prefix so a user variable `x_data_im` can't collide
+ *  with the imag companion of `x`'s tensor data. */
+export declare function tensorDataIm(name: string): string;
+export declare function tensorLen(name: string): string;
+/** Row count (shape[0]) for a 2D/3D-indexed tensor param. Also reused
+ *  as the mutable row-count local for fresh-alloc tensors (TensorLiteral/
+ *  zeros/ones/VConcatGrow targets). */
+export declare function tensorD0(name: string): string;
+/** Column count. For 3D tensor params this is shape[1]; for fresh-alloc
+ *  tensors it's shape[1] (i.e., the column count of a matrix or 1 for
+ *  a column vector). */
+export declare function tensorD1(name: string): string;
+export interface EmitCtx {
+    /** Shared classification table: every tensor-name decision reads from
+     *  here (kind, maxIndexDim, hasFreshAlloc, needsUnshare,
+     *  isDynamicOutput, ...). See `classify.ts`. */
+    cls: ClassificationResult;
+    /** Counter for scratch buffer slots. Each tensor sub-expression that
+     *  doesn't have a top-level dest gets a scratch double* + int64_t pair. */
+    scratchCount: number;
+    /** Scratch indices whose tensor expression was complex — the prelude
+     *  declares a paired `__sN_data_im` for these, and epilogue frees both
+     *  buffers. Real scratches stay single-buffer. */
+    complexScratch: Set<number>;
+    /** Counter for for-loop step temps. */
+    tmp: {
+        n: number;
+    };
+    /** Set of scratch indices that were actually used. */
+    usedScratch: Set<number>;
+    /** When set, expression emission can prepend statements (e.g. for
+     *  reductions of complex tensor expressions that need scratch buffers). */
+    pendingStmts?: {
+        lines: string[];
+        indent: string;
+    };
+    /** Emit fused per-element loops for tensor chains (--fuse). */
+    fuse: boolean;
+    /** Set when tic or toc is used — triggers __tic_state parameter. */
+    needsTicState: boolean;
+    /** Set when any Index read is emitted — triggers __err_flag parameter
+     *  and the __numbl_idx1r helper. JS wrapper checks the flag after the
+     *  call and throws "Index exceeds array bounds" if set. */
+    needsErrorFlag: boolean;
+    /** Set when a `disp(...)` call is emitted — triggers the __disp_cb
+     *  trailer parameter. The JS wrapper registers a koffi callback that
+     *  routes back into `rt.output` with MATLAB-style formatting. */
+    needsDispCb: boolean;
+    /** Emit `#pragma omp parallel for` on fused non-reduction loops. */
+    openmp: boolean;
+    /** ABI of every reachable UserCall callee (keyed by `jitName`). Populated
+     *  by `generateC` as each callee is emitted, then handed to the outer
+     *  emitter so `emitUserCall` can walk the callee's paramDescs to marshal
+     *  tensor args (and decide scalar-vs-tensor return shape) at the call
+     *  site. Absent during tests that only emit a single function. */
+    calleeAbi?: Map<string, CalleeAbi>;
+    /** Names (params + locals + outputs) that hold `complex_or_number`
+     *  scalar values and thus have paired `v_name` / `v_name_im` locals. */
+    complexScalarVars: Set<string>;
+}
+export declare function isTensorVar(ctx: EmitCtx, name: string): boolean;
+export declare function isComplexTensorVar(ctx: EmitCtx, name: string): boolean;
+export declare function isComplexScalarVar(ctx: EmitCtx, name: string): boolean;
+export declare function hasFreshAlloc(ctx: EmitCtx, name: string): boolean;
+export declare function isDynamicOutput(ctx: EmitCtx, name: string): boolean;
+export declare function isLocalTensor(ctx: EmitCtx, name: string): boolean;
+export declare function isOutputTensor(ctx: EmitCtx, name: string): boolean;
+export declare function tensorMaxDim(ctx: EmitCtx, name: string): number;
+/** Allocate a scratch buffer pair (__s{n}_data, __s{n}_len). */
+export declare function allocScratch(ctx: EmitCtx): number;
+/** Allocate a complex scratch triple (__s{n}_data, __s{n}_data_im,
+ *  __s{n}_len) — the emitter tags the index so the prelude declares
+ *  and the epilogue frees the imag companion. */
+export declare function allocComplexScratch(ctx: EmitCtx): number;
+export declare function scratchData(n: number): string;
+/** Imaginary companion for a complex scratch buffer. Only declared in
+ *  the prelude when the scratch index appears in ctx.complexScratch. */
+export declare function scratchDataIm(n: number): string;
+export declare function scratchLen(n: number): string;

package/dist-lib/numbl-core/jit/c/emit/assign.d.ts ADDED Viewed

@@ -0,0 +1,20 @@
+import { type JitExpr } from "../../jitTypes.js";
+import { type EmitCtx } from "../context.js";
+/** Complex-tensor Assign: parallels emitTensorAssign but writes paired
+ *  re+im buffers. RHS sub-exprs route through emitComplexTensorExprToStmts
+ *  / emitComplexTensorBinaryStmts. For a real RHS (e.g. a Var pointing
+ *  to a real tensor), imag is widened via NULL pointer or a zero buffer.
+ *
+ *  Runs inside a pendingStmts frame so nested complex scalar sub-expressions
+ *  (`1i`, `3+4i`, `re(z) + 1i*im(z)`, ...) can materialize their pair
+ *  locals into the same `lines` stream ahead of the kernel call. */
+export declare function emitComplexTensorAssign(lines: string[], indent: string, destName: string, expr: JitExpr, ctx: EmitCtx): void;
+/** Emit a tensor-result Assign: handles Binary, Unary, Call on tensors. */
+export declare function emitTensorAssign(lines: string[], indent: string, destName: string, expr: JitExpr, ctx: EmitCtx): void;
+/** Emit an Assign where the RHS is a reduction on a tensor sub-
+ *  expression (not just a Var) — e.g. `y = sum(x .* z)`. The tensor
+ *  expression is materialised into a scratch buffer first, then the
+ *  scalar reduction reads that buffer. */
+export declare function emitReductionOfTensorExpr(lines: string[], indent: string, destName: string, callExpr: JitExpr & {
+    tag: "Call";
+}, ctx: EmitCtx): void;

package/dist-lib/numbl-core/jit/c/emit/complexScalar.d.ts ADDED Viewed

@@ -0,0 +1,18 @@
+import type { JitExpr } from "../../jitTypes.js";
+import { type EmitCtx } from "../context.js";
+import { type ComplexPair } from "./helpers.js";
+/** Materialize a ComplexPair into two fresh `__cN_re` / `__cN_im` locals
+ *  prepended to `ctx.pendingStmts.lines`. Used when the emitted formula
+ *  would otherwise evaluate either sub-expression more than once (e.g.
+ *  both Mul and Div reference each operand twice in the per-component
+ *  formula). Returns references to the new locals. */
+export declare function materializeComplexPair(pair: ComplexPair, ctx: EmitCtx): ComplexPair;
+/** Emit a complex-valued scalar expression, returning (re, im) C
+ *  expression strings. Call sites that need a pair (complex RHS, complex
+ *  operand of a complex op, arg of real/imag/conj on complex) route
+ *  through here. Real sub-expressions widen implicitly (im = 0). */
+export declare function emitComplex(expr: JitExpr, ctx: EmitCtx): ComplexPair;
+/** Emit a scalar sub-expression at a complex tensor op boundary. Returns
+ *  a (re, im) pair of C expressions. Real scalars become (expr, "0.0");
+ *  complex scalars go through emitComplex for their pair form. */
+export declare function emitComplexScalarPair(expr: JitExpr, ctx: EmitCtx): ComplexPair;

package/dist-lib/numbl-core/jit/c/emit/fused.d.ts ADDED Viewed

@@ -0,0 +1,42 @@
+/**
+ * Fused per-element loop emission for the C-JIT.
+ *
+ * Given a FusibleChain (from fusion.ts), emits a single
+ *   `for (int64_t __i = 0; __i < N; __i++) { ... }`
+ * loop that evaluates all the chain's tensor assigns as inline scalar
+ * expressions per element — no libnumbl_ops calls, no intermediate
+ * buffers.
+ *
+ * Scalar expressions (number literals, scalar vars, scalar math calls)
+ * pass through unchanged. Tensor var references become either:
+ *   - `v_name_data[__i]`  for input params / pre-existing tensors
+ *   - `__f_name`           for chain-produced intermediates (scalar local)
+ *
+ * The optional trailing reduction is absorbed as an inline accumulator
+ * (`__f_acc += expr`) inside the same loop, eliminating the need to
+ * materialise the tensor result at all when it is only consumed by the
+ * reduction.
+ */
+import type { FusibleChain } from "../../fusion.js";
+/**
+ * Emit a fused per-element loop for the given chain.
+ *
+ * Appends C source lines to `lines`. All scalar math helpers the inner
+ * body may reference (mod, sign, ...) live in jit_runtime.a, so this
+ * function no longer reports back "helpers needed" — the emitter simply
+ * calls them as library symbols.
+ *
+ * Dispatches to the real or complex per-element emitter based on whether
+ * any assign in the chain produces a complex tensor. Both paths share
+ * the outer shell (lenVar, writeBack, parallel-for decision, loop open);
+ * only the buffer sizing, per-element emission, and write-back differ.
+ *
+ * `allTensorVars` is the full set of tensor-typed variable names.
+ * `paramTensors` is the subset that are input parameters.
+ * `outputTensorNames` is the subset that are function outputs.
+ * `localTensorNames` is the subset that are non-param, non-output locals.
+ * `complexTensorNames` is the subset whose tensor has a paired imag buffer.
+ * `complexScalarVars` is the set of scalar vars that hold complex values
+ * (pair-of-doubles `v_name` / `__im_v_name`).
+ */
+export declare function emitFusedChain(lines: string[], indent: string, chain: FusibleChain, allTensorVars: ReadonlySet<string>, paramTensors: ReadonlySet<string>, outputTensorNames: ReadonlySet<string>, localTensorNames: ReadonlySet<string>, dynamicOutputNames: ReadonlySet<string>, complexTensorNames: ReadonlySet<string>, complexScalarVars: ReadonlySet<string>, openmp?: boolean): void;

package/dist-lib/numbl-core/jit/c/emit/helpers.d.ts ADDED Viewed

@@ -0,0 +1,40 @@
+/**
+ * Small shared helpers used across the emit/ modules.
+ *
+ * These are one-liners and trivial utilities that don't belong in any
+ * one topic file — keeping them here avoids either duplicating them
+ * across topic files or inflating a topic file with unrelated detail.
+ */
+import type { JitExpr } from "../../jitTypes.js";
+import type { TensorMeta } from "../classify.js";
+import type { EmitCtx } from "../context.js";
+export declare function isTensorExpr(expr: JitExpr): boolean;
+export declare function isComplexExpr(expr: JitExpr): boolean;
+/** Pair of C expressions holding the real and imaginary parts of a
+ *  complex scalar value. Produced by `emitComplex`. */
+export interface ComplexPair {
+    re: string;
+    im: string;
+}
+/** Complex tensor expression result: data + dataIm + len in C. For a
+ *  Var whose JitType is a real tensor, `dataIm` is the literal string
+ *  `"NULL"` — the numbl_ops complex kernels treat that as "all zero",
+ *  so a real tensor flowing into a complex op doesn't need a zero
+ *  buffer. */
+export interface ComplexTensorResult {
+    data: string;
+    dataIm: string;
+    len: string;
+}
+/** Widen a real scalar C expression to a complex pair (im = 0). */
+export declare function widenRealToComplex(realCode: string): ComplexPair;
+/** Escape a JS string into a C string literal (double-quoted, with C
+ *  escapes for backslash, double-quote, and common control chars;
+ *  non-ASCII bytes encoded as `\xNN` octets of their UTF-8 encoding). */
+export declare function cStringLiteral(s: string): string;
+/** Resolve a tensor name's meta or throw — the tensor-creation emit
+ *  helpers depend on the name being classified (with `hasFreshAlloc`)
+ *  for the d0/d1 locals they write to actually exist at runtime.
+ *  Failing loudly here beats emitting C that references undeclared
+ *  identifiers. */
+export declare function requireFreshAllocMeta(ctx: EmitCtx, destName: string, site: string): TensorMeta;

package/dist-lib/numbl-core/jit/c/emit/index.d.ts ADDED Viewed

@@ -0,0 +1,14 @@
+/**
+ * Public entry point to the emit/ subpackage.
+ *
+ * Outside callers (the outer-function orchestrator in
+ * [../assemble.ts](../assemble.ts)) see only:
+ *   - `emitStmts` — walk a statement list and push C source lines.
+ *   - `shapeExprsFor` — derive (d0, d1) C expressions for a dynamic
+ *     tensor output (used inside this package too, but also by the
+ *     outer orchestrator for param-output shape plumbing).
+ *
+ * Everything else is private to the emit/ subpackage.
+ */
+export { emitStmts } from "./stmt.js";
+export { shapeExprsFor } from "./tensor.js";

package/dist-lib/numbl-core/jit/c/emit/scalar.d.ts ADDED Viewed

@@ -0,0 +1,23 @@
+import { type JitExpr } from "../../jitTypes.js";
+import { type EmitCtx } from "../context.js";
+/** Emit a value-expression. For scalars, returns a C `double` expression.
+ *  For tensors, returns the data-variable name (the caller knows to also
+ *  access the corresponding _len variable).
+ *
+ *  Complex-typed expressions are *not* valid here — they produce a pair
+ *  of doubles and must go through `emitComplex`. Reaching this function
+ *  with a complex expression indicates a missed routing at the caller. */
+export declare function emitExpr(expr: JitExpr, ctx: EmitCtx): string;
+export declare function emitBinary(expr: JitExpr & {
+    tag: "Binary";
+}, ctx: EmitCtx): string;
+export declare function emitUnary(expr: JitExpr & {
+    tag: "Unary";
+}, ctx: EmitCtx): string;
+export declare function emitIndex(expr: JitExpr & {
+    tag: "Index";
+}, ctx: EmitCtx): string;
+export declare function emitCall(expr: JitExpr & {
+    tag: "Call";
+}, ctx: EmitCtx): string;
+export declare function emitTruthiness(expr: JitExpr, ctx: EmitCtx): string;

package/dist-lib/numbl-core/jit/c/emit/stmt.d.ts ADDED Viewed

@@ -0,0 +1,25 @@
+/**
+ * Statement-level emission — the top-level dispatch for every JitStmt.
+ *
+ * Exports:
+ *   - `emitStmts(lines, stmts, indent, ctx)` — public entry point; the
+ *     outer function body is fed through this. Runs the fusion pass
+ *     when `ctx.fuse` is set, otherwise emits one statement at a time.
+ *   - `emitStmt` — per-statement dispatch used internally (also used
+ *     inside If / For / While bodies via `emitStmts`).
+ *   - `withPendingStmts` — scoped helper for nested expressions that
+ *     need to hoist declarations above the calling statement.
+ *
+ * This module is thin on purpose: it routes to the specialized emitters
+ * in [./scalar.ts](./scalar.ts), [./complexScalar.ts](./complexScalar.ts),
+ * [./tensor.ts](./tensor.ts), [./assign.ts](./assign.ts),
+ * [./userCall.ts](./userCall.ts), and [./fused.ts](./fused.ts).
+ */
+import { type JitStmt } from "../../jitTypes.js";
+import { type EmitCtx } from "../context.js";
+/** Evaluate `fn` with `ctx.pendingStmts` set to `{lines, indent}` so any
+ *  UserCall / RangeSliceRead nested inside can hoist its decl+call into
+ *  `lines` ahead of the calling statement. Restores the prior value on
+ *  exit (safe even if nested save/restore frames stack). */
+export declare function withPendingStmts<T>(ctx: EmitCtx, lines: string[], indent: string, fn: () => T): T;
+export declare function emitStmts(lines: string[], stmts: JitStmt[], indent: string, ctx: EmitCtx): void;

package/dist-lib/numbl-core/jit/c/emit/tensor.d.ts ADDED Viewed

@@ -0,0 +1,127 @@
+import { type JitExpr } from "../../jitTypes.js";
+import { type EmitCtx } from "../context.js";
+import { type ComplexTensorResult } from "./helpers.js";
+/** Size a scratch tensor buffer to `lenExpr` doubles, freeing any stale
+ *  buffer from a prior loop iteration first. A plain `if (!sData) malloc`
+ *  would overflow the buffer if a later iteration demanded more bytes. */
+export declare function emitScratchBufAlloc(lines: string[], indent: string, sData: string, sLen: string, lenExpr: string): void;
+/** Complex scratch alloc: re + im buffers, both sized to `lenExpr`. The
+ *  complex kernels in numbl_ops write both buffers unconditionally, so
+ *  both must be valid pointers (len > 0).
+ *
+ *  Same length-match fast-path as emitScratchBufAlloc: both buffers are
+ *  kept in lockstep, so a single size check guards both. */
+export declare function emitScratchBufAllocComplex(lines: string[], indent: string, sData: string, sDataIm: string, sLen: string, lenExpr: string): void;
+/** Derive (d0, d1) C expressions for a dynamic tensor output whose
+ *  shape is inherited from an elemwise operand. Uses the operand's
+ *  static shape when known; else — when `ctx` is provided — falls
+ *  back to the nearest tensor Var's runtime `_d0`/`_d1` locals
+ *  (accurate even for partial-shape specializations). Final fallback
+ *  recovers the missing dim from lenExpr and the other dim (e.g. row
+ *  `[1, -1]` → `[1, len]`), or `[len, 1]` (column convention) when
+ *  nothing is known. */
+export declare function shapeExprsFor(shapeSrc: JitExpr | undefined, lenExpr: string, ctx?: EmitCtx): [string, string];
+/** Find a tensor-length expression from a tensor expr tree (for pre-
+ *  allocating scratch buffers). Returns a C expression for the length. */
+export declare function findTensorLenExpr(expr: JitExpr, ctx: EmitCtx): string;
+/** Ensure `destName`'s buffer is sized to exactly `lenExpr` doubles
+ *  before an elemwise op writes into it. For locals and dynamic
+ *  outputs, always free+malloc — loop iterations can produce different
+ *  sizes, so a first-time-only malloc would overflow the buffer in
+ *  later iterations. For fixed-size outputs (caller-aliased buffer),
+ *  just records the length; freeing would corrupt the caller.
+ *
+ *  When the destination is a dynamic output, also writes its _d0/_d1
+ *  shape locals. Pass the tensor operand whose shape the result
+ *  inherits (elemwise preserves operand shape); we use its static
+ *  jitType.shape when available, else fall back to `[lenExpr, 1]`. */
+export declare function emitEnsureTensorBuf(lines: string[], indent: string, destName: string, lenExpr: string, ctx: EmitCtx, shapeSrc?: JitExpr): void;
+/** Emit an elemwise tensor-assign with aliasing safety and a
+ *  steady-state fast path.
+ *
+ *  Problem: the RHS of `dst = <elemwise-expr>` may read from `dst` itself
+ *  (e.g. `r = r .* y + 3.0`). If we resized the dst buffer first and
+ *  then the kernel read the old dst data, it would dangle. Pre-c54add0
+ *  the kernel wrote inline into dst, which was only safe when no resize
+ *  was needed — resizing freed the operand buffer before the kernel
+ *  read it. c54add0 defused this via an unconditional scratch-transfer
+ *  (eval into scratch, then free+malloc dst, then memcpy) but that
+ *  added a full-tensor memcpy to every elemwise assign in hot loops.
+ *
+ *  This helper splits at runtime: if dst is already the right size,
+ *  emit the kernel directly into dst (no memcpy, no scratch). Else
+ *  evaluate into a scratch, then realloc dst and memcpy. Both paths
+ *  are aliasing-safe: the fast path doesn't free dst at all; the slow
+ *  path reads operands before dst is freed.
+ *
+ *  Fixed-size outputs (caller-owned buffer) skip the guard — their
+ *  buffer is never freed, so the inline path is always safe.
+ *
+ *  `emitKernel` emits the tensor kernel writing `targetLen` doubles
+ *  into `targetData`. It is invoked exactly once; both paths share
+ *  a single emission. Sub-expressions may use scratch buffers freely. */
+export declare function emitElemwiseTensorAssign(lines: string[], indent: string, destName: string, lenExpr: string, shapeSrc: JitExpr | undefined, ctx: EmitCtx, emitKernel: (lines: string[], indent: string, targetData: string, targetLen: string) => void): void;
+/** Complex-tensor sibling of `emitElemwiseTensorAssign`. Same runtime
+ *  size-match scheme, but manages paired (re, im) buffers. The kernel
+ *  callback receives both `targetData` and `targetDataIm` so it can
+ *  write the paired output in one shot.
+ *
+ *  When the kernel's operand aliases the target (e.g. `z = -z`, `z =
+ *  conj(z)`), the callback MUST be element-wise safe — kernels like
+ *  `numbl_complex_scalar_binary_elemwise` already are, but a memcpy
+ *  from operand→target against a same-buffer alias is UB. Use an
+ *  element-wise loop there. */
+export declare function emitComplexElemwiseTensorAssign(lines: string[], indent: string, destName: string, lenExpr: string, shapeSrc: JitExpr | undefined, ctx: EmitCtx, emitKernel: (lines: string[], indent: string, targetData: string, targetDataIm: string, targetLen: string) => void): void;
+/** Complex-tensor version of emitEnsureTensorBuf. Reallocates both re
+ *  and im buffers in lockstep, so kernel writes into the paired
+ *  destination don't mismatch in size. Fixed outputs keep their
+ *  caller-aliased re buffer and (for complex) the caller-aliased im
+ *  buffer — we only record the length. */
+export declare function emitEnsureComplexTensorBuf(lines: string[], indent: string, destName: string, lenExpr: string, ctx: EmitCtx, shapeSrc?: JitExpr): void;
+/** Emit a tensor expression as statements, returning the data/len vars. */
+export declare function emitTensorExprToStmts(lines: string[], indent: string, expr: JitExpr, ctx: EmitCtx): {
+    data: string;
+    len: string;
+};
+/** For tensor binary/unary, we need multi-statement emission. This helper
+ *  emits the operation as statements into `lines` and returns the result
+ *  data pointer variable name. Callers that route the result into a
+ *  named dst (vs. a scratch) must use the scratch-transfer pattern —
+ *  let this helper write the full expression into a fresh scratch via
+ *  `emitTensorExprToStmts`, then `emitEnsureTensorBuf` + memcpy into
+ *  the dst. That ordering avoids clobbering an operand that aliases
+ *  the dst (e.g. `r = r .* y + 3.0`). */
+export declare function emitTensorBinaryStmts(lines: string[], indent: string, expr: JitExpr & {
+    tag: "Binary";
+}, ctx: EmitCtx, destDataVar: string, destLenVar: string): void;
+/** Emit a tensor expression in complex (paired-buffer) form, producing
+ *  scratch locals for sub-expressions. Handles:
+ *   - Var on a complex tensor: returns the name's `_data`/`_data_im`/`_len`.
+ *   - Var on a real tensor: widens via `dataIm = NULL`.
+ *   - Binary on a complex-typed tensor expr (the result is complex).
+ *   - Unary Plus/Minus on a complex-typed tensor expr.
+ *   - conj / real / imag on a complex tensor — real/imag are actually
+ *     handled via the real tensor path (result is real), but conj stays
+ *     complex and is lowered here.
+ */
+export declare function emitComplexTensorExprToStmts(lines: string[], indent: string, expr: JitExpr, ctx: EmitCtx): ComplexTensorResult;
+/** Emit a complex tensor binary op into caller-provided dest buffers
+ *  (both re and im). Handles all combos of real-tensor/complex-tensor/
+ *  real-scalar/complex-scalar operands. Operand widening is kernel-side
+ *  (NULL imag pointer or imag=0.0). */
+export declare function emitComplexTensorBinaryStmts(lines: string[], indent: string, expr: JitExpr & {
+    tag: "Binary";
+}, ctx: EmitCtx, destDataVar: string, destDataImVar: string, destLenVar: string): void;
+/** Emit `dstData = src(a:b) copy` pattern into caller-provided dst data /
+ *  len vars. */
+export declare function emitRangeSliceReadToBuf(lines: string[], indent: string, expr: JitExpr & {
+    tag: "RangeSliceRead";
+}, ctx: EmitCtx, destData: string, destLen: string): void;
+/** Emit a `src(a:b)` RangeSliceRead into a fresh scratch buffer,
+ *  returning the {data, len} pair for the result. */
+export declare function emitRangeSliceReadToStmts(lines: string[], indent: string, expr: JitExpr & {
+    tag: "RangeSliceRead";
+}, ctx: EmitCtx): {
+    data: string;
+    len: string;
+};

package/dist-lib/numbl-core/jit/c/emit/userCall.d.ts ADDED Viewed

@@ -0,0 +1,58 @@
+/**
+ * UserCall emission — scalar returns in value position, and tensor
+ * returns from an Assign RHS.
+ *
+ * A UserCall lowers to a static C function (`jit_<jitName>`) generated
+ * alongside the outer function by [../assemble.ts](../assemble.ts).
+ * This module marshals arg slots per the callee's ABI and emits the
+ * call itself.
+ *
+ * Exports:
+ *   - `emitUserCall` — scalar-return UserCall in value position (Assign
+ *     RHS / nested expr). Stashes the result into a fresh `__ucN_out`
+ *     local and returns the name as the expression text.
+ *   - `emitUserCallArgSlots` — helper used by `emitUserCall` *and* the
+ *     tensor-return variant in [./assign.ts](./assign.ts) to convert
+ *     one arg into its ABI-slot expression list.
+ *   - `emitUserCallTensorAssign` — tensor-return UserCall, only allowed
+ *     as the top RHS of an Assign. Uses the dynamic-output ABI: the
+ *     callee mallocs + transfers ownership via `double **` out-params.
+ */
+import type { JitExpr } from "../../jitTypes.js";
+import type { TensorMeta } from "../classify.js";
+import { type EmitCtx } from "../context.js";
+/** Emit the C expressions for one arg's ABI slots, consulting the
+ *  callee's paramDesc so the slot order matches the callee's signature.
+ *  Scalars contribute one slot; tensors contribute data + len + optional
+ *  d0 / d1. For the shape slots the caller falls back to
+ *  `(int64_t)tensorLen(arg)` / `1` when its own arg-var wasn't classified
+ *  with matching shape plumbing. */
+export declare function emitUserCallArgSlots(a: JitExpr, paramDesc: {
+    kind: "scalar" | "complexScalar" | "tensor";
+    slots: {
+        kind: string;
+    }[];
+}, ctx: EmitCtx): string[];
+/** Scalar-return UserCall. Tensor args are marshaled via the callee's
+ *  paramDescs (data + len + optional d0/d1 slots). The callee is emitted
+ *  as `static void jit_<jitName>(...)` in the same .c file by
+ *  `generateC`, with a trailing `__err_flag` pointer. We stash the
+ *  return value in a fresh local and return its name as the expression
+ *  text. Must be invoked from statement context so the decl + call can
+ *  be inserted before the surrounding expression.
+ *
+ *  Tensor-return UserCall is handled upstream by emitTensorAssign (only
+ *  allowed as an Assign RHS), not here. */
+export declare function emitUserCall(expr: JitExpr & {
+    tag: "UserCall";
+}, ctx: EmitCtx): string;
+/** Emit `dest = foo(...)` where foo returns a tensor via the dynamic-
+ *  output ABI. Feasibility has already verified the callee's output[0]
+ *  is a fresh-alloc dynamic output, so the callee fills
+ *  `buf_out / out_len / d0_out / d1_out` and transfers ownership. The
+ *  caller frees the old dest buffer (if any), takes the new buffer, and
+ *  lets the epilogue free() it at end-of-scope alongside the other
+ *  local tensors. */
+export declare function emitUserCallTensorAssign(lines: string[], indent: string, destName: string, destMeta: TensorMeta, expr: JitExpr & {
+    tag: "UserCall";
+}, ctx: EmitCtx): void;

package/dist-lib/numbl-core/jit/c/epilogue.d.ts ADDED Viewed

@@ -0,0 +1,26 @@
+/**
+ * Function epilogue emission.
+ *
+ * The epilogue is the set of statements appended after the body of a
+ * generated function: output-slot writes (scalar out-pointers, tensor
+ * dynamic-output transfers), scratch buffer frees, and local / unshared
+ * tensor frees. Called once per function by `generateC` in
+ * [assemble.ts](./assemble.ts).
+ *
+ * The prelude is in [prelude.ts](./prelude.ts); both read from the same
+ * shared state (`ClassificationResult` + `EmitCtx`).
+ */
+import type { ClassificationResult } from "./classify.js";
+import type { COutputDesc } from "./abi.js";
+import { type EmitCtx } from "./context.js";
+export interface EpilogueInput {
+    cls: ClassificationResult;
+    ctx: EmitCtx;
+    outputDescs: COutputDesc[];
+    /** Pure-input tensor params that were malloc'd in the prelude's
+     *  unshare path — must be freed here. */
+    unshareTensorParams: Set<string>;
+    /** Indent string to prepend to each emitted line. */
+    indent: string;
+}
+export declare function buildEpilogue(input: EpilogueInput): string[];

package/dist-lib/numbl-core/jit/c/feasibility.d.ts ADDED Viewed

@@ -0,0 +1,44 @@
+/**
+ * Feasibility prepass for the C-JIT path.
+ *
+ * Given the lowered JIT IR for a function and the argument types, decide
+ * whether the C codegen can handle it. On any construct that isn't in
+ * the whitelist, return `{ok: false, reason}` so the caller falls
+ * through to the JS-JIT path.
+ *
+ * The whitelist intentionally mirrors what [assemble.ts](./assemble.ts)
+ * can emit, which in turn mirrors what JS-JIT does. Widen all three
+ * together.
+ *
+ * Covered today: scalar arithmetic and control flow (numbers, booleans,
+ * complex-scalar pairs); real tensor params / locals / outputs with
+ * Index / AssignIndex (1-3D), RangeSliceRead, AssignIndexRange,
+ * AssignIndexCol, TensorLiteral, VConcatGrow, zeros/ones (1-2D); real
+ * tensor binary + unary + reductions via the jitCapabilities opcodes;
+ * complex tensor binary + unary (minus, conj, real, imag, abs) and
+ * flat reductions (sum, prod, any, all); UserCall (scalar args/return
+ * and real / complex tensor args/return when the callee passes its own
+ * feasibility check).
+ */
+import type { JitStmt, JitType } from "../jitTypes.js";
+import type { GeneratedFn } from "../jitLower.js";
+export type FeasibilityResult = {
+    ok: true;
+} | {
+    ok: false;
+    reason: string;
+    line?: number;
+};
+/**
+ * Check if the lowered function can be handled by the C-JIT.
+ *
+ * `outputTypes` holds the types of every output variable in order;
+ * `outputType` is kept for backwards-compatibility and equals
+ * `outputTypes[0]` when present.
+ *
+ * Multi-output mirrors the JS-JIT's `return [out0, out1, ...]` shape:
+ * the generated C builds a `napi_value` array of length `nargout`,
+ * each entry boxed according to its type (scalar doubles, booleans, or
+ * tensors).
+ */
+export declare function checkCFeasibility(body: JitStmt[], paramNames: string[], argTypes: JitType[], outputType: JitType | null, outputTypes: JitType[], nargout: number, generatedIRBodies?: Map<string, GeneratedFn>): FeasibilityResult;

package/dist-lib/numbl-core/jit/c/prelude.d.ts ADDED Viewed

@@ -0,0 +1,37 @@
+/**
+ * Function prelude emission.
+ *
+ * The prelude is the set of C declarations at the top of a generated
+ * function, written before any statement from the body: shadowed param
+ * locals (for param-output seeding + unshare-at-entry), local tensor
+ * declarations, complex-scalar imag companions, and scratch buffer
+ * slots.
+ *
+ * Exported as `buildPrelude` — called once per function by `generateC`
+ * in [assemble.ts](./assemble.ts).
+ *
+ * The epilogue (tensor frees, out-pointer writes) is in
+ * [epilogue.ts](./epilogue.ts); both read from the same shared state
+ * (`ClassificationResult` + `EmitCtx`) populated upstream.
+ */
+import type { JitType } from "../jitTypes.js";
+import type { ClassificationResult } from "./classify.js";
+import { type EmitCtx } from "./context.js";
+export interface PreludeInput {
+    cls: ClassificationResult;
+    ctx: EmitCtx;
+    params: string[];
+    argTypes: JitType[];
+    /** Names with `kind === "paramOutput"` (output name reuses a param name). */
+    paramOutputTensors: Set<string>;
+    /** Pure-input tensor params that need an unshare-at-entry malloc+memcpy
+     *  (the body writes to them, and we must not mutate the caller's buffer). */
+    unshareTensorParams: Set<string>;
+    /** Locals to declare — outer-scope `localVars` minus params, sorted. */
+    allLocals: string[];
+    /** Names carrying `complex_or_number` scalar values (paired re+im locals). */
+    complexScalarVars: Set<string>;
+    /** Indent string to prepend to each emitted line. */
+    indent: string;
+}
+export declare function buildPrelude(input: PreludeInput): string[];