numbl 0.1.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/binding.gyp +59 -3
  2. package/dist-cli/cli.js +22538 -7936
  3. package/dist-lib/lib.js +34682 -20852
  4. package/dist-lib/numbl-core/executeCode.d.ts +13 -0
  5. package/dist-lib/numbl-core/fileIOAdapter.d.ts +2 -0
  6. package/dist-lib/numbl-core/helpers/reduction-helpers.d.ts +7 -2
  7. package/dist-lib/numbl-core/interpreter/builtins/datetime.d.ts +39 -0
  8. package/dist-lib/numbl-core/interpreter/builtins/index.d.ts +1 -0
  9. package/dist-lib/numbl-core/interpreter/builtins/time-system.d.ts +1 -0
  10. package/dist-lib/numbl-core/interpreter/builtins/types.d.ts +96 -5
  11. package/dist-lib/numbl-core/interpreter/interpreter.d.ts +41 -3
  12. package/dist-lib/numbl-core/interpreter/types.d.ts +1 -1
  13. package/dist-lib/numbl-core/jit/c/abi.d.ts +90 -0
  14. package/dist-lib/numbl-core/jit/c/assemble.d.ts +56 -0
  15. package/dist-lib/numbl-core/jit/c/classify.d.ts +70 -0
  16. package/dist-lib/numbl-core/jit/c/compile.d.ts +37 -0
  17. package/dist-lib/numbl-core/jit/c/context.d.ts +152 -0
  18. package/dist-lib/numbl-core/jit/c/emit/assign.d.ts +20 -0
  19. package/dist-lib/numbl-core/jit/c/emit/complexScalar.d.ts +18 -0
  20. package/dist-lib/numbl-core/jit/c/emit/fused.d.ts +42 -0
  21. package/dist-lib/numbl-core/jit/c/emit/helpers.d.ts +40 -0
  22. package/dist-lib/numbl-core/jit/c/emit/index.d.ts +14 -0
  23. package/dist-lib/numbl-core/jit/c/emit/scalar.d.ts +23 -0
  24. package/dist-lib/numbl-core/jit/c/emit/stmt.d.ts +25 -0
  25. package/dist-lib/numbl-core/jit/c/emit/tensor.d.ts +127 -0
  26. package/dist-lib/numbl-core/jit/c/emit/userCall.d.ts +58 -0
  27. package/dist-lib/numbl-core/jit/c/epilogue.d.ts +26 -0
  28. package/dist-lib/numbl-core/jit/c/feasibility.d.ts +44 -0
  29. package/dist-lib/numbl-core/jit/c/prelude.d.ts +37 -0
  30. package/dist-lib/numbl-core/jit/c/visit.d.ts +63 -0
  31. package/dist-lib/numbl-core/jit/e1/complexKernelEmit.d.ts +46 -0
  32. package/dist-lib/numbl-core/jit/e1/hash.d.ts +10 -0
  33. package/dist-lib/numbl-core/jit/e1/install.d.ts +13 -0
  34. package/dist-lib/numbl-core/jit/e1/kernelEmit.d.ts +54 -0
  35. package/dist-lib/numbl-core/jit/e1/multiReductionKernel.d.ts +66 -0
  36. package/dist-lib/numbl-core/jit/e1/openmpFlag.d.ts +13 -0
  37. package/dist-lib/numbl-core/jit/e1/scalarFnKernel.d.ts +44 -0
  38. package/dist-lib/numbl-core/jit/e2/assignKernel.d.ts +34 -0
  39. package/dist-lib/numbl-core/jit/e2/astToJitExpr.d.ts +25 -0
  40. package/dist-lib/numbl-core/jit/e2/cache.d.ts +80 -0
  41. package/dist-lib/numbl-core/jit/e2/chainKernelEmit.d.ts +55 -0
  42. package/dist-lib/numbl-core/jit/e2/classify.d.ts +119 -0
  43. package/dist-lib/numbl-core/jit/e2/compileFn.d.ts +16 -0
  44. package/dist-lib/numbl-core/jit/e2/complexChainKernelEmit.d.ts +79 -0
  45. package/dist-lib/numbl-core/jit/e2/emitShared.d.ts +71 -0
  46. package/dist-lib/numbl-core/jit/e2/install.d.ts +11 -0
  47. package/dist-lib/numbl-core/jit/e2/liveness.d.ts +29 -0
  48. package/dist-lib/numbl-core/jit/e2/loopKernel.d.ts +49 -0
  49. package/dist-lib/numbl-core/jit/e2/loopKernelEmit.d.ts +75 -0
  50. package/dist-lib/numbl-core/jit/e2/multiReductionDriver.d.ts +24 -0
  51. package/dist-lib/numbl-core/jit/e2/reductionKernelEmit.d.ts +72 -0
  52. package/dist-lib/numbl-core/jit/e2/scalarFnDriver.d.ts +29 -0
  53. package/dist-lib/numbl-core/jit/fusedChainHelpers.d.ts +65 -0
  54. package/dist-lib/numbl-core/jit/fusedScalarEmit.d.ts +69 -0
  55. package/dist-lib/numbl-core/jit/fusion.d.ts +71 -0
  56. package/dist-lib/numbl-core/jit/fusionOps.d.ts +25 -0
  57. package/dist-lib/numbl-core/jit/heavyOps.d.ts +15 -0
  58. package/dist-lib/numbl-core/{interpreter/jit → jit}/index.d.ts +2 -2
  59. package/dist-lib/numbl-core/jit/jitBailSafety.d.ts +41 -0
  60. package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLoop.d.ts +2 -2
  61. package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLoopAnalysis.d.ts +6 -1
  62. package/dist-lib/numbl-core/jit/jitLower.d.ts +122 -0
  63. package/dist-lib/numbl-core/jit/jitLowerExpr.d.ts +27 -0
  64. package/dist-lib/numbl-core/jit/jitLowerStmt.d.ts +9 -0
  65. package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLowerTypes.d.ts +7 -3
  66. package/dist-lib/numbl-core/jit/jitTopLevel.d.ts +22 -0
  67. package/dist-lib/numbl-core/{interpreter/jit → jit}/jitTypes.d.ts +133 -1
  68. package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitCodegen.d.ts +2 -2
  69. package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitCodegenHoist.d.ts +19 -1
  70. package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitHelpers.d.ts +15 -3
  71. package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitHelpersIndex.d.ts +7 -0
  72. package/dist-lib/numbl-core/jit/js/jitHelpersTensor.d.ts +34 -0
  73. package/dist-lib/numbl-core/jit/js/jsFusedCodegen.d.ts +17 -0
  74. package/dist-lib/numbl-core/jit/js/jsMultiReduction.d.ts +70 -0
  75. package/dist-lib/numbl-core/jit/scalarEmit.d.ts +58 -0
  76. package/dist-lib/numbl-core/lexer/types.d.ts +2 -1
  77. package/dist-lib/numbl-core/native/lapack-bridge.d.ts +39 -1
  78. package/dist-lib/numbl-core/ops/bessel.d.ts +18 -0
  79. package/dist-lib/numbl-core/ops/comparison.d.ts +11 -0
  80. package/dist-lib/numbl-core/ops/complexBinaryElemwise.d.ts +10 -0
  81. package/dist-lib/numbl-core/ops/complexUnaryElemwise.d.ts +8 -0
  82. package/dist-lib/numbl-core/ops/dispatch.d.ts +26 -0
  83. package/dist-lib/numbl-core/ops/index.d.ts +8 -0
  84. package/dist-lib/numbl-core/ops/opCodes.d.ts +70 -0
  85. package/dist-lib/numbl-core/ops/realBinaryElemwise.d.ts +8 -0
  86. package/dist-lib/numbl-core/ops/realUnaryElemwise.d.ts +5 -0
  87. package/dist-lib/numbl-core/ops/reduce.d.ts +6 -0
  88. package/dist-lib/numbl-core/parser/types.d.ts +6 -0
  89. package/dist-lib/numbl-core/runtime/alloc.d.ts +23 -0
  90. package/dist-lib/numbl-core/runtime/runtime.d.ts +1 -0
  91. package/dist-lib/numbl-core/version.d.ts +1 -1
  92. package/native/jit_runtime/jit_runtime.c +261 -0
  93. package/native/jit_runtime/jit_runtime.h +204 -0
  94. package/native/numbl_addon.cpp +62 -1
  95. package/native/ops/bessel.c +572 -0
  96. package/native/ops/comparison.c +150 -0
  97. package/native/ops/complex_binary_elemwise.c +192 -0
  98. package/native/ops/complex_unary_elemwise.c +152 -0
  99. package/native/ops/numbl_ops.c +66 -0
  100. package/native/ops/numbl_ops.h +262 -0
  101. package/native/ops/real_binary_elemwise.c +85 -0
  102. package/native/ops/real_unary_elemwise.c +104 -0
  103. package/native/ops/reduce.c +162 -0
  104. package/native/ops_napi.cpp +320 -0
  105. package/package.json +8 -9
  106. package/dist-lib/numbl-core/interpreter/jit/jitHelpersTensor.d.ts +0 -28
  107. package/dist-lib/numbl-core/interpreter/jit/jitLower.d.ts +0 -23
  108. /package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitHelpersComplex.d.ts +0 -0
@@ -0,0 +1,152 @@
1
+ /**
2
+ * Shared types, constants, and helpers used by both emit.ts (the per-
3
+ * function emitter) and assemble.ts (the orchestration that builds
4
+ * the final signature / prelude / epilogue).
5
+ *
6
+ * Contains no emit logic itself — pulling this out of the main file
7
+ * breaks the otherwise-circular dependency between assemble.ts and
8
+ * the emit helpers.
9
+ */
10
+ import { BinaryOperation } from "../../parser/types.js";
11
+ import type { ScalarOpTarget } from "../scalarEmit.js";
12
+ import type { ClassificationResult } from "./classify.js";
13
+ import type { CParamDesc, COutputDesc } from "./abi.js";
14
+ /** Per-callee ABI info the outer emitter uses to marshal its call sites.
15
+ * `emitUserCall` reads `paramDescs` to know what slots each param
16
+ * contributes (data + len + optional d0/d1 for tensors) and
17
+ * `outputDescs` to know whether the return uses the dynamic-output ABI. */
18
+ export interface CalleeAbi {
19
+ paramDescs: CParamDesc[];
20
+ outputDescs: COutputDesc[];
21
+ }
22
+ /**
23
+ * Minimum NUMBL_JIT_RT_VERSION the emitter needs at link time.
24
+ *
25
+ * Bump this in lockstep with NUMBL_JIT_RT_VERSION in
26
+ * native/jit_runtime/jit_runtime.h whenever we add a helper the emitter
27
+ * calls. The emitted C asserts `NUMBL_JIT_RT_VERSION >= N` so a stale
28
+ * archive fails the per-JIT compile step with a clear message instead
29
+ * of a cryptic linker "undefined reference" error.
30
+ *
31
+ * Version log:
32
+ * 1 — initial: idx1r, mod, sign, reduce_flat, tic/toc/monotonic_time.
33
+ * 2 — set1r_h (scalar linear Index write with soft-bail on OOB).
34
+ * 3 — idx2r / idx3r / set2r_h / set3r_h (multi-index Index read/write).
35
+ * 4 — setRange1r_h / setCol2r_h / copyRange1r (range/col slice r/w).
36
+ * 5 — is_nan / is_inf / is_finite (predicates that survive -ffast-math
37
+ * by inspecting IEEE-754 bit patterns).
38
+ */
39
+ export declare const NUMBL_JIT_RT_REQUIRED_VERSION = 5;
40
+ export declare function mangle(name: string): string;
41
+ /** Imaginary part of a complex scalar local / param. Paired with
42
+ * `mangle(name)` (the real part). */
43
+ export declare function mangleIm(name: string): string;
44
+ /** Join a C type and an identifier with a space unless the type already
45
+ * ends in `*` (pointer types get no space between `*` and the name). */
46
+ export declare function spaceBeforeName(cType: string): string;
47
+ export declare const C_SCALAR_TARGET: ScalarOpTarget;
48
+ export declare const TENSOR_BIN_OP: Partial<Record<BinaryOperation, string>>;
49
+ export declare const TENSOR_COMPLEX_BIN_OP: Partial<Record<BinaryOperation, string>>;
50
+ export declare const TENSOR_CMP_OP: Partial<Record<BinaryOperation, string>>;
51
+ /**
52
+ * Tensor-op dispatch helpers. Each reads `IBuiltin.jitCapabilities` for
53
+ * the named builtin and returns the corresponding libnumbl_ops opcode
54
+ * enum name / C function name — or `undefined` when the builtin has no
55
+ * C-JIT tensor-op routing. These helpers are the single source of truth
56
+ * for both the C feasibility check (feasibility.ts) and the C emitter
57
+ * (emit.ts); registering a new tensor op is a single edit on the
58
+ * IBuiltin, not three parallel table updates.
59
+ */
60
+ /** libnumbl_ops unary opcode enum name for a tensor-unary builtin. */
61
+ export declare function getTensorUnaryOp(name: string): string | undefined;
62
+ /** C function name for a 2-arg element-wise tensor binary builtin. */
63
+ export declare function getTensorBinaryFn(name: string): string | undefined;
64
+ /** libnumbl_ops reduce opcode enum name for a tensor-reduction builtin. */
65
+ export declare function getTensorReductionOp(name: string): string | undefined;
66
+ export declare function formatNumberLiteral(v: number): string;
67
+ export declare function tensorData(name: string): string;
68
+ /** Imaginary data pointer for a complex tensor. Pairs with
69
+ * `tensorData(name)` — both locals carry the same length and shape
70
+ * info. The kernels in numbl_ops accept NULL for the imag pointer as
71
+ * "all zero", so a nominally-complex tensor whose `.imag` is undefined
72
+ * can still be passed efficiently without allocating a zero buffer.
73
+ *
74
+ * Uses the `__im_` prefix so a user variable `x_data_im` can't collide
75
+ * with the imag companion of `x`'s tensor data. */
76
+ export declare function tensorDataIm(name: string): string;
77
+ export declare function tensorLen(name: string): string;
78
+ /** Row count (shape[0]) for a 2D/3D-indexed tensor param. Also reused
79
+ * as the mutable row-count local for fresh-alloc tensors (TensorLiteral/
80
+ * zeros/ones/VConcatGrow targets). */
81
+ export declare function tensorD0(name: string): string;
82
+ /** Column count. For 3D tensor params this is shape[1]; for fresh-alloc
83
+ * tensors it's shape[1] (i.e., the column count of a matrix or 1 for
84
+ * a column vector). */
85
+ export declare function tensorD1(name: string): string;
86
+ export interface EmitCtx {
87
+ /** Shared classification table: every tensor-name decision reads from
88
+ * here (kind, maxIndexDim, hasFreshAlloc, needsUnshare,
89
+ * isDynamicOutput, ...). See `classify.ts`. */
90
+ cls: ClassificationResult;
91
+ /** Counter for scratch buffer slots. Each tensor sub-expression that
92
+ * doesn't have a top-level dest gets a scratch double* + int64_t pair. */
93
+ scratchCount: number;
94
+ /** Scratch indices whose tensor expression was complex — the prelude
95
+ * declares a paired `__sN_data_im` for these, and epilogue frees both
96
+ * buffers. Real scratches stay single-buffer. */
97
+ complexScratch: Set<number>;
98
+ /** Counter for for-loop step temps. */
99
+ tmp: {
100
+ n: number;
101
+ };
102
+ /** Set of scratch indices that were actually used. */
103
+ usedScratch: Set<number>;
104
+ /** When set, expression emission can prepend statements (e.g. for
105
+ * reductions of complex tensor expressions that need scratch buffers). */
106
+ pendingStmts?: {
107
+ lines: string[];
108
+ indent: string;
109
+ };
110
+ /** Emit fused per-element loops for tensor chains (--fuse). */
111
+ fuse: boolean;
112
+ /** Set when tic or toc is used — triggers __tic_state parameter. */
113
+ needsTicState: boolean;
114
+ /** Set when any Index read is emitted — triggers __err_flag parameter
115
+ * and the __numbl_idx1r helper. JS wrapper checks the flag after the
116
+ * call and throws "Index exceeds array bounds" if set. */
117
+ needsErrorFlag: boolean;
118
+ /** Set when a `disp(...)` call is emitted — triggers the __disp_cb
119
+ * trailer parameter. The JS wrapper registers a koffi callback that
120
+ * routes back into `rt.output` with MATLAB-style formatting. */
121
+ needsDispCb: boolean;
122
+ /** Emit `#pragma omp parallel for` on fused non-reduction loops. */
123
+ openmp: boolean;
124
+ /** ABI of every reachable UserCall callee (keyed by `jitName`). Populated
125
+ * by `generateC` as each callee is emitted, then handed to the outer
126
+ * emitter so `emitUserCall` can walk the callee's paramDescs to marshal
127
+ * tensor args (and decide scalar-vs-tensor return shape) at the call
128
+ * site. Absent during tests that only emit a single function. */
129
+ calleeAbi?: Map<string, CalleeAbi>;
130
+ /** Names (params + locals + outputs) that hold `complex_or_number`
131
+ * scalar values and thus have paired `v_name` / `v_name_im` locals. */
132
+ complexScalarVars: Set<string>;
133
+ }
134
+ export declare function isTensorVar(ctx: EmitCtx, name: string): boolean;
135
+ export declare function isComplexTensorVar(ctx: EmitCtx, name: string): boolean;
136
+ export declare function isComplexScalarVar(ctx: EmitCtx, name: string): boolean;
137
+ export declare function hasFreshAlloc(ctx: EmitCtx, name: string): boolean;
138
+ export declare function isDynamicOutput(ctx: EmitCtx, name: string): boolean;
139
+ export declare function isLocalTensor(ctx: EmitCtx, name: string): boolean;
140
+ export declare function isOutputTensor(ctx: EmitCtx, name: string): boolean;
141
+ export declare function tensorMaxDim(ctx: EmitCtx, name: string): number;
142
+ /** Allocate a scratch buffer pair (__s{n}_data, __s{n}_len). */
143
+ export declare function allocScratch(ctx: EmitCtx): number;
144
+ /** Allocate a complex scratch triple (__s{n}_data, __s{n}_data_im,
145
+ * __s{n}_len) — the emitter tags the index so the prelude declares
146
+ * and the epilogue frees the imag companion. */
147
+ export declare function allocComplexScratch(ctx: EmitCtx): number;
148
+ export declare function scratchData(n: number): string;
149
+ /** Imaginary companion for a complex scratch buffer. Only declared in
150
+ * the prelude when the scratch index appears in ctx.complexScratch. */
151
+ export declare function scratchDataIm(n: number): string;
152
+ export declare function scratchLen(n: number): string;
@@ -0,0 +1,20 @@
1
+ import { type JitExpr } from "../../jitTypes.js";
2
+ import { type EmitCtx } from "../context.js";
3
+ /** Complex-tensor Assign: parallels emitTensorAssign but writes paired
4
+ * re+im buffers. RHS sub-exprs route through emitComplexTensorExprToStmts
5
+ * / emitComplexTensorBinaryStmts. For a real RHS (e.g. a Var pointing
6
+ * to a real tensor), imag is widened via NULL pointer or a zero buffer.
7
+ *
8
+ * Runs inside a pendingStmts frame so nested complex scalar sub-expressions
9
+ * (`1i`, `3+4i`, `re(z) + 1i*im(z)`, ...) can materialize their pair
10
+ * locals into the same `lines` stream ahead of the kernel call. */
11
+ export declare function emitComplexTensorAssign(lines: string[], indent: string, destName: string, expr: JitExpr, ctx: EmitCtx): void;
12
+ /** Emit a tensor-result Assign: handles Binary, Unary, Call on tensors. */
13
+ export declare function emitTensorAssign(lines: string[], indent: string, destName: string, expr: JitExpr, ctx: EmitCtx): void;
14
+ /** Emit an Assign where the RHS is a reduction on a tensor sub-
15
+ * expression (not just a Var) — e.g. `y = sum(x .* z)`. The tensor
16
+ * expression is materialised into a scratch buffer first, then the
17
+ * scalar reduction reads that buffer. */
18
+ export declare function emitReductionOfTensorExpr(lines: string[], indent: string, destName: string, callExpr: JitExpr & {
19
+ tag: "Call";
20
+ }, ctx: EmitCtx): void;
@@ -0,0 +1,18 @@
1
+ import type { JitExpr } from "../../jitTypes.js";
2
+ import { type EmitCtx } from "../context.js";
3
+ import { type ComplexPair } from "./helpers.js";
4
+ /** Materialize a ComplexPair into two fresh `__cN_re` / `__cN_im` locals
5
+ * prepended to `ctx.pendingStmts.lines`. Used when the emitted formula
6
+ * would otherwise evaluate either sub-expression more than once (e.g.
7
+ * both Mul and Div reference each operand twice in the per-component
8
+ * formula). Returns references to the new locals. */
9
+ export declare function materializeComplexPair(pair: ComplexPair, ctx: EmitCtx): ComplexPair;
10
+ /** Emit a complex-valued scalar expression, returning (re, im) C
11
+ * expression strings. Call sites that need a pair (complex RHS, complex
12
+ * operand of a complex op, arg of real/imag/conj on complex) route
13
+ * through here. Real sub-expressions widen implicitly (im = 0). */
14
+ export declare function emitComplex(expr: JitExpr, ctx: EmitCtx): ComplexPair;
15
+ /** Emit a scalar sub-expression at a complex tensor op boundary. Returns
16
+ * a (re, im) pair of C expressions. Real scalars become (expr, "0.0");
17
+ * complex scalars go through emitComplex for their pair form. */
18
+ export declare function emitComplexScalarPair(expr: JitExpr, ctx: EmitCtx): ComplexPair;
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Fused per-element loop emission for the C-JIT.
3
+ *
4
+ * Given a FusibleChain (from fusion.ts), emits a single
5
+ * `for (int64_t __i = 0; __i < N; __i++) { ... }`
6
+ * loop that evaluates all the chain's tensor assigns as inline scalar
7
+ * expressions per element — no libnumbl_ops calls, no intermediate
8
+ * buffers.
9
+ *
10
+ * Scalar expressions (number literals, scalar vars, scalar math calls)
11
+ * pass through unchanged. Tensor var references become either:
12
+ * - `v_name_data[__i]` for input params / pre-existing tensors
13
+ * - `__f_name` for chain-produced intermediates (scalar local)
14
+ *
15
+ * The optional trailing reduction is absorbed as an inline accumulator
16
+ * (`__f_acc += expr`) inside the same loop, eliminating the need to
17
+ * materialise the tensor result at all when it is only consumed by the
18
+ * reduction.
19
+ */
20
+ import type { FusibleChain } from "../../fusion.js";
21
+ /**
22
+ * Emit a fused per-element loop for the given chain.
23
+ *
24
+ * Appends C source lines to `lines`. All scalar math helpers the inner
25
+ * body may reference (mod, sign, ...) live in jit_runtime.a, so this
26
+ * function no longer reports back "helpers needed" — the emitter simply
27
+ * calls them as library symbols.
28
+ *
29
+ * Dispatches to the real or complex per-element emitter based on whether
30
+ * any assign in the chain produces a complex tensor. Both paths share
31
+ * the outer shell (lenVar, writeBack, parallel-for decision, loop open);
32
+ * only the buffer sizing, per-element emission, and write-back differ.
33
+ *
34
+ * `allTensorVars` is the full set of tensor-typed variable names.
35
+ * `paramTensors` is the subset that are input parameters.
36
+ * `outputTensorNames` is the subset that are function outputs.
37
+ * `localTensorNames` is the subset that are non-param, non-output locals.
38
+ * `complexTensorNames` is the subset whose tensor has a paired imag buffer.
39
+ * `complexScalarVars` is the set of scalar vars that hold complex values
40
+ * (pair-of-doubles `v_name` / `__im_v_name`).
41
+ */
42
+ export declare function emitFusedChain(lines: string[], indent: string, chain: FusibleChain, allTensorVars: ReadonlySet<string>, paramTensors: ReadonlySet<string>, outputTensorNames: ReadonlySet<string>, localTensorNames: ReadonlySet<string>, dynamicOutputNames: ReadonlySet<string>, complexTensorNames: ReadonlySet<string>, complexScalarVars: ReadonlySet<string>, openmp?: boolean): void;
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Small shared helpers used across the emit/ modules.
3
+ *
4
+ * These are one-liners and trivial utilities that don't belong in any
5
+ * one topic file — keeping them here avoids either duplicating them
6
+ * across topic files or inflating a topic file with unrelated detail.
7
+ */
8
+ import type { JitExpr } from "../../jitTypes.js";
9
+ import type { TensorMeta } from "../classify.js";
10
+ import type { EmitCtx } from "../context.js";
11
+ export declare function isTensorExpr(expr: JitExpr): boolean;
12
+ export declare function isComplexExpr(expr: JitExpr): boolean;
13
+ /** Pair of C expressions holding the real and imaginary parts of a
14
+ * complex scalar value. Produced by `emitComplex`. */
15
+ export interface ComplexPair {
16
+ re: string;
17
+ im: string;
18
+ }
19
+ /** Complex tensor expression result: data + dataIm + len in C. For a
20
+ * Var whose JitType is a real tensor, `dataIm` is the literal string
21
+ * `"NULL"` — the numbl_ops complex kernels treat that as "all zero",
22
+ * so a real tensor flowing into a complex op doesn't need a zero
23
+ * buffer. */
24
+ export interface ComplexTensorResult {
25
+ data: string;
26
+ dataIm: string;
27
+ len: string;
28
+ }
29
+ /** Widen a real scalar C expression to a complex pair (im = 0). */
30
+ export declare function widenRealToComplex(realCode: string): ComplexPair;
31
+ /** Escape a JS string into a C string literal (double-quoted, with C
32
+ * escapes for backslash, double-quote, and common control chars;
33
+ * non-ASCII bytes encoded as `\xNN` octets of their UTF-8 encoding). */
34
+ export declare function cStringLiteral(s: string): string;
35
+ /** Resolve a tensor name's meta or throw — the tensor-creation emit
36
+ * helpers depend on the name being classified (with `hasFreshAlloc`)
37
+ * for the d0/d1 locals they write to actually exist at runtime.
38
+ * Failing loudly here beats emitting C that references undeclared
39
+ * identifiers. */
40
+ export declare function requireFreshAllocMeta(ctx: EmitCtx, destName: string, site: string): TensorMeta;
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Public entry point to the emit/ subpackage.
3
+ *
4
+ * Outside callers (the outer-function orchestrator in
5
+ * [../assemble.ts](../assemble.ts)) see only:
6
+ * - `emitStmts` — walk a statement list and push C source lines.
7
+ * - `shapeExprsFor` — derive (d0, d1) C expressions for a dynamic
8
+ * tensor output (used inside this package too, but also by the
9
+ * outer orchestrator for param-output shape plumbing).
10
+ *
11
+ * Everything else is private to the emit/ subpackage.
12
+ */
13
+ export { emitStmts } from "./stmt.js";
14
+ export { shapeExprsFor } from "./tensor.js";
@@ -0,0 +1,23 @@
1
+ import { type JitExpr } from "../../jitTypes.js";
2
+ import { type EmitCtx } from "../context.js";
3
+ /** Emit a value-expression. For scalars, returns a C `double` expression.
4
+ * For tensors, returns the data-variable name (the caller knows to also
5
+ * access the corresponding _len variable).
6
+ *
7
+ * Complex-typed expressions are *not* valid here — they produce a pair
8
+ * of doubles and must go through `emitComplex`. Reaching this function
9
+ * with a complex expression indicates a missed routing at the caller. */
10
+ export declare function emitExpr(expr: JitExpr, ctx: EmitCtx): string;
11
+ export declare function emitBinary(expr: JitExpr & {
12
+ tag: "Binary";
13
+ }, ctx: EmitCtx): string;
14
+ export declare function emitUnary(expr: JitExpr & {
15
+ tag: "Unary";
16
+ }, ctx: EmitCtx): string;
17
+ export declare function emitIndex(expr: JitExpr & {
18
+ tag: "Index";
19
+ }, ctx: EmitCtx): string;
20
+ export declare function emitCall(expr: JitExpr & {
21
+ tag: "Call";
22
+ }, ctx: EmitCtx): string;
23
+ export declare function emitTruthiness(expr: JitExpr, ctx: EmitCtx): string;
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Statement-level emission — the top-level dispatch for every JitStmt.
3
+ *
4
+ * Exports:
5
+ * - `emitStmts(lines, stmts, indent, ctx)` — public entry point; the
6
+ * outer function body is fed through this. Runs the fusion pass
7
+ * when `ctx.fuse` is set, otherwise emits one statement at a time.
8
+ * - `emitStmt` — per-statement dispatch used internally (also used
9
+ * inside If / For / While bodies via `emitStmts`).
10
+ * - `withPendingStmts` — scoped helper for nested expressions that
11
+ * need to hoist declarations above the calling statement.
12
+ *
13
+ * This module is thin on purpose: it routes to the specialized emitters
14
+ * in [./scalar.ts](./scalar.ts), [./complexScalar.ts](./complexScalar.ts),
15
+ * [./tensor.ts](./tensor.ts), [./assign.ts](./assign.ts),
16
+ * [./userCall.ts](./userCall.ts), and [./fused.ts](./fused.ts).
17
+ */
18
+ import { type JitStmt } from "../../jitTypes.js";
19
+ import { type EmitCtx } from "../context.js";
20
+ /** Evaluate `fn` with `ctx.pendingStmts` set to `{lines, indent}` so any
21
+ * UserCall / RangeSliceRead nested inside can hoist its decl+call into
22
+ * `lines` ahead of the calling statement. Restores the prior value on
23
+ * exit (safe even if nested save/restore frames stack). */
24
+ export declare function withPendingStmts<T>(ctx: EmitCtx, lines: string[], indent: string, fn: () => T): T;
25
+ export declare function emitStmts(lines: string[], stmts: JitStmt[], indent: string, ctx: EmitCtx): void;
@@ -0,0 +1,127 @@
1
+ import { type JitExpr } from "../../jitTypes.js";
2
+ import { type EmitCtx } from "../context.js";
3
+ import { type ComplexTensorResult } from "./helpers.js";
4
+ /** Size a scratch tensor buffer to `lenExpr` doubles, freeing any stale
5
+ * buffer from a prior loop iteration first. A plain `if (!sData) malloc`
6
+ * would overflow the buffer if a later iteration demanded more bytes. */
7
+ export declare function emitScratchBufAlloc(lines: string[], indent: string, sData: string, sLen: string, lenExpr: string): void;
8
+ /** Complex scratch alloc: re + im buffers, both sized to `lenExpr`. The
9
+ * complex kernels in numbl_ops write both buffers unconditionally, so
10
+ * both must be valid pointers (len > 0).
11
+ *
12
+ * Same length-match fast-path as emitScratchBufAlloc: both buffers are
13
+ * kept in lockstep, so a single size check guards both. */
14
+ export declare function emitScratchBufAllocComplex(lines: string[], indent: string, sData: string, sDataIm: string, sLen: string, lenExpr: string): void;
15
+ /** Derive (d0, d1) C expressions for a dynamic tensor output whose
16
+ * shape is inherited from an elemwise operand. Uses the operand's
17
+ * static shape when known; else — when `ctx` is provided — falls
18
+ * back to the nearest tensor Var's runtime `_d0`/`_d1` locals
19
+ * (accurate even for partial-shape specializations). Final fallback
20
+ * recovers the missing dim from lenExpr and the other dim (e.g. row
21
+ * `[1, -1]` → `[1, len]`), or `[len, 1]` (column convention) when
22
+ * nothing is known. */
23
+ export declare function shapeExprsFor(shapeSrc: JitExpr | undefined, lenExpr: string, ctx?: EmitCtx): [string, string];
24
+ /** Find a tensor-length expression from a tensor expr tree (for pre-
25
+ * allocating scratch buffers). Returns a C expression for the length. */
26
+ export declare function findTensorLenExpr(expr: JitExpr, ctx: EmitCtx): string;
27
+ /** Ensure `destName`'s buffer is sized to exactly `lenExpr` doubles
28
+ * before an elemwise op writes into it. For locals and dynamic
29
+ * outputs, always free+malloc — loop iterations can produce different
30
+ * sizes, so a first-time-only malloc would overflow the buffer in
31
+ * later iterations. For fixed-size outputs (caller-aliased buffer),
32
+ * just records the length; freeing would corrupt the caller.
33
+ *
34
+ * When the destination is a dynamic output, also writes its _d0/_d1
35
+ * shape locals. Pass the tensor operand whose shape the result
36
+ * inherits (elemwise preserves operand shape); we use its static
37
+ * jitType.shape when available, else fall back to `[lenExpr, 1]`. */
38
+ export declare function emitEnsureTensorBuf(lines: string[], indent: string, destName: string, lenExpr: string, ctx: EmitCtx, shapeSrc?: JitExpr): void;
39
+ /** Emit an elemwise tensor-assign with aliasing safety and a
40
+ * steady-state fast path.
41
+ *
42
+ * Problem: the RHS of `dst = <elemwise-expr>` may read from `dst` itself
43
+ * (e.g. `r = r .* y + 3.0`). If we resized the dst buffer first and
44
+ * then the kernel read the old dst data, it would dangle. Pre-c54add0
45
+ * the kernel wrote inline into dst, which was only safe when no resize
46
+ * was needed — resizing freed the operand buffer before the kernel
47
+ * read it. c54add0 defused this via an unconditional scratch-transfer
48
+ * (eval into scratch, then free+malloc dst, then memcpy) but that
49
+ * added a full-tensor memcpy to every elemwise assign in hot loops.
50
+ *
51
+ * This helper splits at runtime: if dst is already the right size,
52
+ * emit the kernel directly into dst (no memcpy, no scratch). Else
53
+ * evaluate into a scratch, then realloc dst and memcpy. Both paths
54
+ * are aliasing-safe: the fast path doesn't free dst at all; the slow
55
+ * path reads operands before dst is freed.
56
+ *
57
+ * Fixed-size outputs (caller-owned buffer) skip the guard — their
58
+ * buffer is never freed, so the inline path is always safe.
59
+ *
60
+ * `emitKernel` emits the tensor kernel writing `targetLen` doubles
61
+ * into `targetData`. It is invoked exactly once; both paths share
62
+ * a single emission. Sub-expressions may use scratch buffers freely. */
63
+ export declare function emitElemwiseTensorAssign(lines: string[], indent: string, destName: string, lenExpr: string, shapeSrc: JitExpr | undefined, ctx: EmitCtx, emitKernel: (lines: string[], indent: string, targetData: string, targetLen: string) => void): void;
64
+ /** Complex-tensor sibling of `emitElemwiseTensorAssign`. Same runtime
65
+ * size-match scheme, but manages paired (re, im) buffers. The kernel
66
+ * callback receives both `targetData` and `targetDataIm` so it can
67
+ * write the paired output in one shot.
68
+ *
69
+ * When the kernel's operand aliases the target (e.g. `z = -z`, `z =
70
+ * conj(z)`), the callback MUST be element-wise safe — kernels like
71
+ * `numbl_complex_scalar_binary_elemwise` already are, but a memcpy
72
+ * from operand→target against a same-buffer alias is UB. Use an
73
+ * element-wise loop there. */
74
+ export declare function emitComplexElemwiseTensorAssign(lines: string[], indent: string, destName: string, lenExpr: string, shapeSrc: JitExpr | undefined, ctx: EmitCtx, emitKernel: (lines: string[], indent: string, targetData: string, targetDataIm: string, targetLen: string) => void): void;
75
+ /** Complex-tensor version of emitEnsureTensorBuf. Reallocates both re
76
+ * and im buffers in lockstep, so kernel writes into the paired
77
+ * destination don't mismatch in size. Fixed outputs keep their
78
+ * caller-aliased re buffer and (for complex) the caller-aliased im
79
+ * buffer — we only record the length. */
80
+ export declare function emitEnsureComplexTensorBuf(lines: string[], indent: string, destName: string, lenExpr: string, ctx: EmitCtx, shapeSrc?: JitExpr): void;
81
+ /** Emit a tensor expression as statements, returning the data/len vars. */
82
+ export declare function emitTensorExprToStmts(lines: string[], indent: string, expr: JitExpr, ctx: EmitCtx): {
83
+ data: string;
84
+ len: string;
85
+ };
86
+ /** For tensor binary/unary, we need multi-statement emission. This helper
87
+ * emits the operation as statements into `lines` and returns the result
88
+ * data pointer variable name. Callers that route the result into a
89
+ * named dst (vs. a scratch) must use the scratch-transfer pattern —
90
+ * let this helper write the full expression into a fresh scratch via
91
+ * `emitTensorExprToStmts`, then `emitEnsureTensorBuf` + memcpy into
92
+ * the dst. That ordering avoids clobbering an operand that aliases
93
+ * the dst (e.g. `r = r .* y + 3.0`). */
94
+ export declare function emitTensorBinaryStmts(lines: string[], indent: string, expr: JitExpr & {
95
+ tag: "Binary";
96
+ }, ctx: EmitCtx, destDataVar: string, destLenVar: string): void;
97
+ /** Emit a tensor expression in complex (paired-buffer) form, producing
98
+ * scratch locals for sub-expressions. Handles:
99
+ * - Var on a complex tensor: returns the name's `_data`/`_data_im`/`_len`.
100
+ * - Var on a real tensor: widens via `dataIm = NULL`.
101
+ * - Binary on a complex-typed tensor expr (the result is complex).
102
+ * - Unary Plus/Minus on a complex-typed tensor expr.
103
+ * - conj / real / imag on a complex tensor — real/imag are actually
104
+ * handled via the real tensor path (result is real), but conj stays
105
+ * complex and is lowered here.
106
+ */
107
+ export declare function emitComplexTensorExprToStmts(lines: string[], indent: string, expr: JitExpr, ctx: EmitCtx): ComplexTensorResult;
108
+ /** Emit a complex tensor binary op into caller-provided dest buffers
109
+ * (both re and im). Handles all combos of real-tensor/complex-tensor/
110
+ * real-scalar/complex-scalar operands. Operand widening is kernel-side
111
+ * (NULL imag pointer or imag=0.0). */
112
+ export declare function emitComplexTensorBinaryStmts(lines: string[], indent: string, expr: JitExpr & {
113
+ tag: "Binary";
114
+ }, ctx: EmitCtx, destDataVar: string, destDataImVar: string, destLenVar: string): void;
115
+ /** Emit `dstData = src(a:b) copy` pattern into caller-provided dst data /
116
+ * len vars. */
117
+ export declare function emitRangeSliceReadToBuf(lines: string[], indent: string, expr: JitExpr & {
118
+ tag: "RangeSliceRead";
119
+ }, ctx: EmitCtx, destData: string, destLen: string): void;
120
+ /** Emit a `src(a:b)` RangeSliceRead into a fresh scratch buffer,
121
+ * returning the {data, len} pair for the result. */
122
+ export declare function emitRangeSliceReadToStmts(lines: string[], indent: string, expr: JitExpr & {
123
+ tag: "RangeSliceRead";
124
+ }, ctx: EmitCtx): {
125
+ data: string;
126
+ len: string;
127
+ };
@@ -0,0 +1,58 @@
1
+ /**
2
+ * UserCall emission — scalar returns in value position, and tensor
3
+ * returns from an Assign RHS.
4
+ *
5
+ * A UserCall lowers to a static C function (`jit_<jitName>`) generated
6
+ * alongside the outer function by [../assemble.ts](../assemble.ts).
7
+ * This module marshals arg slots per the callee's ABI and emits the
8
+ * call itself.
9
+ *
10
+ * Exports:
11
+ * - `emitUserCall` — scalar-return UserCall in value position (Assign
12
+ * RHS / nested expr). Stashes the result into a fresh `__ucN_out`
13
+ * local and returns the name as the expression text.
14
+ * - `emitUserCallArgSlots` — helper used by `emitUserCall` *and* the
15
+ * tensor-return variant in [./assign.ts](./assign.ts) to convert
16
+ * one arg into its ABI-slot expression list.
17
+ * - `emitUserCallTensorAssign` — tensor-return UserCall, only allowed
18
+ * as the top RHS of an Assign. Uses the dynamic-output ABI: the
19
+ * callee mallocs + transfers ownership via `double **` out-params.
20
+ */
21
+ import type { JitExpr } from "../../jitTypes.js";
22
+ import type { TensorMeta } from "../classify.js";
23
+ import { type EmitCtx } from "../context.js";
24
+ /** Emit the C expressions for one arg's ABI slots, consulting the
25
+ * callee's paramDesc so the slot order matches the callee's signature.
26
+ * Scalars contribute one slot; tensors contribute data + len + optional
27
+ * d0 / d1. For the shape slots the caller falls back to
28
+ * `(int64_t)tensorLen(arg)` / `1` when its own arg-var wasn't classified
29
+ * with matching shape plumbing. */
30
+ export declare function emitUserCallArgSlots(a: JitExpr, paramDesc: {
31
+ kind: "scalar" | "complexScalar" | "tensor";
32
+ slots: {
33
+ kind: string;
34
+ }[];
35
+ }, ctx: EmitCtx): string[];
36
+ /** Scalar-return UserCall. Tensor args are marshaled via the callee's
37
+ * paramDescs (data + len + optional d0/d1 slots). The callee is emitted
38
+ * as `static void jit_<jitName>(...)` in the same .c file by
39
+ * `generateC`, with a trailing `__err_flag` pointer. We stash the
40
+ * return value in a fresh local and return its name as the expression
41
+ * text. Must be invoked from statement context so the decl + call can
42
+ * be inserted before the surrounding expression.
43
+ *
44
+ * Tensor-return UserCall is handled upstream by emitTensorAssign (only
45
+ * allowed as an Assign RHS), not here. */
46
+ export declare function emitUserCall(expr: JitExpr & {
47
+ tag: "UserCall";
48
+ }, ctx: EmitCtx): string;
49
+ /** Emit `dest = foo(...)` where foo returns a tensor via the dynamic-
50
+ * output ABI. Feasibility has already verified the callee's output[0]
51
+ * is a fresh-alloc dynamic output, so the callee fills
52
+ * `buf_out / out_len / d0_out / d1_out` and transfers ownership. The
53
+ * caller frees the old dest buffer (if any), takes the new buffer, and
54
+ * lets the epilogue free() it at end-of-scope alongside the other
55
+ * local tensors. */
56
+ export declare function emitUserCallTensorAssign(lines: string[], indent: string, destName: string, destMeta: TensorMeta, expr: JitExpr & {
57
+ tag: "UserCall";
58
+ }, ctx: EmitCtx): void;
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Function epilogue emission.
3
+ *
4
+ * The epilogue is the set of statements appended after the body of a
5
+ * generated function: output-slot writes (scalar out-pointers, tensor
6
+ * dynamic-output transfers), scratch buffer frees, and local / unshared
7
+ * tensor frees. Called once per function by `generateC` in
8
+ * [assemble.ts](./assemble.ts).
9
+ *
10
+ * The prelude is in [prelude.ts](./prelude.ts); both read from the same
11
+ * shared state (`ClassificationResult` + `EmitCtx`).
12
+ */
13
+ import type { ClassificationResult } from "./classify.js";
14
+ import type { COutputDesc } from "./abi.js";
15
+ import { type EmitCtx } from "./context.js";
16
+ export interface EpilogueInput {
17
+ cls: ClassificationResult;
18
+ ctx: EmitCtx;
19
+ outputDescs: COutputDesc[];
20
+ /** Pure-input tensor params that were malloc'd in the prelude's
21
+ * unshare path — must be freed here. */
22
+ unshareTensorParams: Set<string>;
23
+ /** Indent string to prepend to each emitted line. */
24
+ indent: string;
25
+ }
26
+ export declare function buildEpilogue(input: EpilogueInput): string[];
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Feasibility prepass for the C-JIT path.
3
+ *
4
+ * Given the lowered JIT IR for a function and the argument types, decide
5
+ * whether the C codegen can handle it. On any construct that isn't in
6
+ * the whitelist, return `{ok: false, reason}` so the caller falls
7
+ * through to the JS-JIT path.
8
+ *
9
+ * The whitelist intentionally mirrors what [assemble.ts](./assemble.ts)
10
+ * can emit, which in turn mirrors what JS-JIT does. Widen all three
11
+ * together.
12
+ *
13
+ * Covered today: scalar arithmetic and control flow (numbers, booleans,
14
+ * complex-scalar pairs); real tensor params / locals / outputs with
15
+ * Index / AssignIndex (1-3D), RangeSliceRead, AssignIndexRange,
16
+ * AssignIndexCol, TensorLiteral, VConcatGrow, zeros/ones (1-2D); real
17
+ * tensor binary + unary + reductions via the jitCapabilities opcodes;
18
+ * complex tensor binary + unary (minus, conj, real, imag, abs) and
19
+ * flat reductions (sum, prod, any, all); UserCall (scalar args/return
20
+ * and real / complex tensor args/return when the callee passes its own
21
+ * feasibility check).
22
+ */
23
+ import type { JitStmt, JitType } from "../jitTypes.js";
24
+ import type { GeneratedFn } from "../jitLower.js";
25
+ export type FeasibilityResult = {
26
+ ok: true;
27
+ } | {
28
+ ok: false;
29
+ reason: string;
30
+ line?: number;
31
+ };
32
+ /**
33
+ * Check if the lowered function can be handled by the C-JIT.
34
+ *
35
+ * `outputTypes` holds the types of every output variable in order;
36
+ * `outputType` is kept for backwards-compatibility and equals
37
+ * `outputTypes[0]` when present.
38
+ *
39
+ * Multi-output mirrors the JS-JIT's `return [out0, out1, ...]` shape:
40
+ * the generated C builds a `napi_value` array of length `nargout`,
41
+ * each entry boxed according to its type (scalar doubles, booleans, or
42
+ * tensors).
43
+ */
44
+ export declare function checkCFeasibility(body: JitStmt[], paramNames: string[], argTypes: JitType[], outputType: JitType | null, outputTypes: JitType[], nargout: number, generatedIRBodies?: Map<string, GeneratedFn>): FeasibilityResult;
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Function prelude emission.
3
+ *
4
+ * The prelude is the set of C declarations at the top of a generated
5
+ * function, written before any statement from the body: shadowed param
6
+ * locals (for param-output seeding + unshare-at-entry), local tensor
7
+ * declarations, complex-scalar imag companions, and scratch buffer
8
+ * slots.
9
+ *
10
+ * Exported as `buildPrelude` — called once per function by `generateC`
11
+ * in [assemble.ts](./assemble.ts).
12
+ *
13
+ * The epilogue (tensor frees, out-pointer writes) is in
14
+ * [epilogue.ts](./epilogue.ts); both read from the same shared state
15
+ * (`ClassificationResult` + `EmitCtx`) populated upstream.
16
+ */
17
+ import type { JitType } from "../jitTypes.js";
18
+ import type { ClassificationResult } from "./classify.js";
19
+ import { type EmitCtx } from "./context.js";
20
+ export interface PreludeInput {
21
+ cls: ClassificationResult;
22
+ ctx: EmitCtx;
23
+ params: string[];
24
+ argTypes: JitType[];
25
+ /** Names with `kind === "paramOutput"` (output name reuses a param name). */
26
+ paramOutputTensors: Set<string>;
27
+ /** Pure-input tensor params that need an unshare-at-entry malloc+memcpy
28
+ * (the body writes to them, and we must not mutate the caller's buffer). */
29
+ unshareTensorParams: Set<string>;
30
+ /** Locals to declare — outer-scope `localVars` minus params, sorted. */
31
+ allLocals: string[];
32
+ /** Names carrying `complex_or_number` scalar values (paired re+im locals). */
33
+ complexScalarVars: Set<string>;
34
+ /** Indent string to prepend to each emitted line. */
35
+ indent: string;
36
+ }
37
+ export declare function buildPrelude(input: PreludeInput): string[];