numbl 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/binding.gyp +53 -2
  2. package/dist-cli/cli.js +38743 -24679
  3. package/dist-lib/lib.js +43424 -30466
  4. package/dist-lib/numbl-core/executeCode.d.ts +22 -0
  5. package/dist-lib/numbl-core/helpers/bessel.d.ts +9 -0
  6. package/dist-lib/numbl-core/helpers/reduction-helpers.d.ts +7 -2
  7. package/dist-lib/numbl-core/interpreter/builtins/datetime.d.ts +39 -0
  8. package/dist-lib/numbl-core/interpreter/builtins/index.d.ts +1 -0
  9. package/dist-lib/numbl-core/interpreter/builtins/time-system.d.ts +1 -0
  10. package/dist-lib/numbl-core/interpreter/builtins/types.d.ts +100 -5
  11. package/dist-lib/numbl-core/interpreter/interpreter.d.ts +41 -3
  12. package/dist-lib/numbl-core/interpreter/interpreterSpecialBuiltins.d.ts +2 -0
  13. package/dist-lib/numbl-core/interpreter/types.d.ts +16 -7
  14. package/dist-lib/numbl-core/jit/c/abi.d.ts +90 -0
  15. package/dist-lib/numbl-core/jit/c/assemble.d.ts +56 -0
  16. package/dist-lib/numbl-core/jit/c/classify.d.ts +70 -0
  17. package/dist-lib/numbl-core/jit/c/compile.d.ts +37 -0
  18. package/dist-lib/numbl-core/jit/c/context.d.ts +152 -0
  19. package/dist-lib/numbl-core/jit/c/emit/assign.d.ts +20 -0
  20. package/dist-lib/numbl-core/jit/c/emit/complexScalar.d.ts +18 -0
  21. package/dist-lib/numbl-core/jit/c/emit/fused.d.ts +42 -0
  22. package/dist-lib/numbl-core/jit/c/emit/helpers.d.ts +40 -0
  23. package/dist-lib/numbl-core/jit/c/emit/index.d.ts +14 -0
  24. package/dist-lib/numbl-core/jit/c/emit/scalar.d.ts +23 -0
  25. package/dist-lib/numbl-core/jit/c/emit/stmt.d.ts +25 -0
  26. package/dist-lib/numbl-core/jit/c/emit/tensor.d.ts +127 -0
  27. package/dist-lib/numbl-core/jit/c/emit/userCall.d.ts +58 -0
  28. package/dist-lib/numbl-core/jit/c/epilogue.d.ts +26 -0
  29. package/dist-lib/numbl-core/jit/c/feasibility.d.ts +44 -0
  30. package/dist-lib/numbl-core/jit/c/hybrid.d.ts +42 -0
  31. package/dist-lib/numbl-core/jit/c/install.d.ts +15 -0
  32. package/dist-lib/numbl-core/jit/c/parityError.d.ts +26 -0
  33. package/dist-lib/numbl-core/jit/c/prelude.d.ts +37 -0
  34. package/dist-lib/numbl-core/jit/c/registry.d.ts +51 -0
  35. package/dist-lib/numbl-core/jit/c/visit.d.ts +63 -0
  36. package/dist-lib/numbl-core/jit/e1/install.d.ts +13 -0
  37. package/dist-lib/numbl-core/jit/e1/kernelEmit.d.ts +54 -0
  38. package/dist-lib/numbl-core/jit/e1/openmpFlag.d.ts +13 -0
  39. package/dist-lib/numbl-core/jit/e1/scalarFnKernel.d.ts +44 -0
  40. package/dist-lib/numbl-core/jit/fusedChainHelpers.d.ts +65 -0
  41. package/dist-lib/numbl-core/jit/fusedScalarEmit.d.ts +61 -0
  42. package/dist-lib/numbl-core/jit/fusion.d.ts +71 -0
  43. package/dist-lib/numbl-core/jit/fusionOps.d.ts +25 -0
  44. package/dist-lib/numbl-core/{interpreter/jit → jit}/index.d.ts +2 -2
  45. package/dist-lib/numbl-core/jit/jitBailSafety.d.ts +41 -0
  46. package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLoop.d.ts +2 -2
  47. package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLoopAnalysis.d.ts +13 -1
  48. package/dist-lib/numbl-core/jit/jitLower.d.ts +122 -0
  49. package/dist-lib/numbl-core/jit/jitLowerExpr.d.ts +27 -0
  50. package/dist-lib/numbl-core/jit/jitLowerStmt.d.ts +9 -0
  51. package/dist-lib/numbl-core/jit/jitLowerTypes.d.ts +29 -0
  52. package/dist-lib/numbl-core/jit/jitTopLevel.d.ts +22 -0
  53. package/dist-lib/numbl-core/jit/jitTypes.d.ts +394 -0
  54. package/dist-lib/numbl-core/jit/js/jitCodegen.d.ts +7 -0
  55. package/dist-lib/numbl-core/jit/js/jitCodegenHoist.d.ts +70 -0
  56. package/dist-lib/numbl-core/jit/js/jitHelpers.d.ts +34 -0
  57. package/dist-lib/numbl-core/jit/js/jitHelpersComplex.d.ts +21 -0
  58. package/dist-lib/numbl-core/jit/js/jitHelpersIndex.d.ts +33 -0
  59. package/dist-lib/numbl-core/jit/js/jitHelpersTensor.d.ts +34 -0
  60. package/dist-lib/numbl-core/jit/js/jsFusedCodegen.d.ts +17 -0
  61. package/dist-lib/numbl-core/jit/scalarEmit.d.ts +58 -0
  62. package/dist-lib/numbl-core/lexer/types.d.ts +2 -1
  63. package/dist-lib/numbl-core/native/lapack-bridge.d.ts +46 -1
  64. package/dist-lib/numbl-core/ops/bessel.d.ts +18 -0
  65. package/dist-lib/numbl-core/ops/comparison.d.ts +11 -0
  66. package/dist-lib/numbl-core/ops/complexBinaryElemwise.d.ts +10 -0
  67. package/dist-lib/numbl-core/ops/complexUnaryElemwise.d.ts +8 -0
  68. package/dist-lib/numbl-core/ops/dispatch.d.ts +26 -0
  69. package/dist-lib/numbl-core/ops/index.d.ts +8 -0
  70. package/dist-lib/numbl-core/ops/opCodes.d.ts +70 -0
  71. package/dist-lib/numbl-core/ops/realBinaryElemwise.d.ts +8 -0
  72. package/dist-lib/numbl-core/ops/realUnaryElemwise.d.ts +5 -0
  73. package/dist-lib/numbl-core/ops/reduce.d.ts +6 -0
  74. package/dist-lib/numbl-core/parser/types.d.ts +6 -0
  75. package/dist-lib/numbl-core/runtime/alloc.d.ts +23 -0
  76. package/dist-lib/numbl-core/runtime/constructors.d.ts +2 -1
  77. package/dist-lib/numbl-core/runtime/error.d.ts +3 -0
  78. package/dist-lib/numbl-core/runtime/index.d.ts +1 -1
  79. package/dist-lib/numbl-core/runtime/runtime.d.ts +15 -2
  80. package/dist-lib/numbl-core/runtime/runtimePlot.d.ts +11 -0
  81. package/dist-lib/numbl-core/runtime/types.d.ts +16 -1
  82. package/dist-lib/numbl-core/runtime/utils.d.ts +3 -1
  83. package/dist-lib/numbl-core/version.d.ts +1 -1
  84. package/dist-plot-viewer/assets/{index-vtrJ8bml.js → index-GiUNnMQg.js} +1 -1
  85. package/dist-plot-viewer/index.html +1 -1
  86. package/native/elemwise.cpp +134 -0
  87. package/native/jit_runtime/jit_runtime.c +261 -0
  88. package/native/jit_runtime/jit_runtime.h +204 -0
  89. package/native/numbl_addon.cpp +55 -1
  90. package/native/numbl_addon_common.h +1 -0
  91. package/native/ops/bessel.c +572 -0
  92. package/native/ops/comparison.c +150 -0
  93. package/native/ops/complex_binary_elemwise.c +192 -0
  94. package/native/ops/complex_unary_elemwise.c +152 -0
  95. package/native/ops/numbl_ops.c +66 -0
  96. package/native/ops/numbl_ops.h +262 -0
  97. package/native/ops/real_binary_elemwise.c +85 -0
  98. package/native/ops/real_unary_elemwise.c +104 -0
  99. package/native/ops/reduce.c +162 -0
  100. package/native/ops_napi.cpp +320 -0
  101. package/package.json +11 -10
  102. package/dist-lib/numbl-core/interpreter/jit/jitCodegen.d.ts +0 -5
  103. package/dist-lib/numbl-core/interpreter/jit/jitHelpers.d.ts +0 -14
  104. package/dist-lib/numbl-core/interpreter/jit/jitLower.d.ts +0 -20
  105. package/dist-lib/numbl-core/interpreter/jit/jitTypes.d.ts +0 -168
@@ -22,6 +22,8 @@ export interface ExecOptions {
22
22
  profile?: boolean;
23
23
  /** Called each time a JIT function is compiled, with a description and the generated JS. */
24
24
  onJitCompile?: (description: string, jsCode: string) => void;
25
+ /** Called each time the C-JIT compiles a function, with a description and the generated C. */
26
+ onCJitCompile?: (description: string, cSource: string) => void;
25
27
  /** Initial hold state for plotting (persisted across REPL executions). */
26
28
  initialHoldState?: boolean;
27
29
  /** Override or add builtins for this execution only. */
@@ -34,6 +36,23 @@ export interface ExecOptions {
34
36
  onInput?: (prompt: string) => string;
35
37
  /** Optimization level for interpreter (0 = none, >=1 = JIT scalar functions). */
36
38
  optimization?: number;
39
+ /**
40
+ * Experimental opt variant selector — e.g. `"e1"` for the prototype
41
+ * that keeps JS-JIT as the outer and emits on-demand C kernels for
42
+ * fusible tensor chains. Orthogonal to `optimization`; when set,
43
+ * `optimization` is still the base level (typically 1).
44
+ */
45
+ experimental?: string;
46
+ /** Emit fused per-element loops in C-JIT (requires --opt 2). */
47
+ fuse?: boolean;
48
+ /** Parallelize fused loops with OpenMP threads (--par flag). */
49
+ par?: boolean;
50
+ /**
51
+ * Diagnostic mode (`--check-c-jit-parity`, only meaningful with `--opt 2`):
52
+ * throw on any C-JIT miss where JS-JIT would have compiled. Lets us
53
+ * enumerate parity gaps as hard errors rather than silent fallbacks.
54
+ */
55
+ checkCJitParity?: boolean;
37
56
  /**
38
57
  * Initial implicit cwd path for the MATLAB-style "cwd is the first search path" feature.
39
58
  * - undefined → auto-detect from `system.cwd()` and scan its files.
@@ -41,6 +60,8 @@ export interface ExecOptions {
41
60
  * - null → opt out of the implicit-cwd behavior entirely.
42
61
  */
43
62
  implicitCwdPath?: string | null;
63
+ /** SharedArrayBuffer for cooperative cancellation. Int32[0] != 0 means cancelled. */
64
+ cancelSAB?: SharedArrayBuffer;
44
65
  }
45
66
  export interface BuiltinProfileEntry {
46
67
  totalTimeMs: number;
@@ -72,6 +93,7 @@ export interface ProfileData {
72
93
  export interface ExecResult {
73
94
  output: string[];
74
95
  generatedJS: string;
96
+ generatedC: string;
75
97
  plotInstructions: PlotInstruction[];
76
98
  returnValue: RuntimeValue;
77
99
  variableValues: Record<string, RuntimeValue>;
@@ -2,6 +2,15 @@
2
2
  * Bessel function implementations (pure numeric, no runtime dependencies).
3
3
  *
4
4
  * Provides besselj, bessely, besseli, besselk for real-valued arguments.
5
+ *
6
+ * The rational polynomial approximations for J0, J1, Y0, Y1 are derived from
7
+ * the Cephes Math Library Release 2.8 (June 2000) by Stephen L. Moshier.
8
+ * Original Cephes code: Copyright 1984-2000 by Stephen L. Moshier.
9
+ * Used under the BSD license with permission of the author.
10
+ *
11
+ * The underlying minimax polynomial coefficients are based on the work of
12
+ * W.J. Cody, "Algorithm 715: SPECFUN", ACM Trans. Math. Software 19(1),
13
+ * pp. 22-32, 1993.
5
14
  */
6
15
  export declare function lanczosGamma(x: number): number;
7
16
  export declare function besselj(nu: number, x: number): number;
@@ -46,8 +46,13 @@ export type ReductionKernel = {
46
46
  reduceAll: (v: RuntimeTensor) => RuntimeValue;
47
47
  reduceDim: (v: RuntimeTensor, dim: number) => RuntimeValue;
48
48
  };
49
- /** Create an accumulator-based reduction kernel (sum, mean, etc.) */
50
- export declare function accumKernel(reduceFn: (acc: number, val: number) => number, initial: number, finalizeFn?: (acc: number, count: number) => number): ReductionKernel;
49
+ /** Create an accumulator-based reduction kernel (sum, mean, etc.).
50
+ *
51
+ * When `opCode` is provided (an OpReduce.* value whose semantics match
52
+ * `reduceFn`/`initial`), the reduceAll fast path routes Float64 real and
53
+ * complex tensors through tensorOps.realFlatReduce. The closure path
54
+ * is still used as a fallback for non-Float64 data. */
55
+ export declare function accumKernel(reduceFn: (acc: number, val: number) => number, initial: number, finalizeFn?: (acc: number, count: number) => number, opCode?: number): ReductionKernel;
51
56
  /** Create a slice-based reduction kernel (median, mode, etc.) */
52
57
  export declare function sliceKernel(sliceFn: (slice: ArrayLike<number>) => number): ReductionKernel;
53
58
  /** Create an accumulator kernel that skips NaN values. */
@@ -0,0 +1,39 @@
1
+ /**
2
+ * datetime / duration builtins — scalar-only initial implementation.
3
+ *
4
+ * datetime:
5
+ * datetime() current time
6
+ * datetime('now' | 'today' | 'yesterday' | 'tomorrow')
7
+ * datetime(Y, M, D)
8
+ * datetime(Y, M, D, H, MI, S[, MS])
9
+ * datetime(X, 'ConvertFrom', 'datenum' | 'posixtime' | 'excel' | 'excel1904')
10
+ *
11
+ * datetime values are class_instance with className="datetime" and fields
12
+ * Year/Month/Day/Hour/Minute/Second. They are display-formatted by
13
+ * display.ts as "dd-MMM-yyyy [HH:mm:ss]".
14
+ *
15
+ * duration:
16
+ * Produced by datetime - datetime, or by seconds(N) / minutes(N) / ...
17
+ * Represented as class_instance with className="duration" and a single
18
+ * Seconds field. Display format "hh:mm:ss".
19
+ *
20
+ * Arithmetic:
21
+ * datetime - datetime -> duration
22
+ * datetime + duration -> datetime
23
+ * datetime - duration -> datetime
24
+ * duration + duration -> duration
25
+ * duration - duration -> duration
26
+ *
27
+ * `seconds(d)` returns the numeric seconds of a duration, or wraps a
28
+ * number as a duration.
29
+ */
30
+ import type { RuntimeValue, RuntimeClassInstance } from "../../runtime/types.js";
31
+ export declare function makeDatetime(year: number, month: number, day: number, hour: number, minute: number, second: number): RuntimeClassInstance;
32
+ export declare function makeDuration(totalSeconds: number): RuntimeClassInstance;
33
+ /**
34
+ * Attempt to handle a named binary operator ("plus", "minus", "lt", ...)
35
+ * when at least one operand is a datetime or duration class_instance.
36
+ * Returns the result value on success, or undefined to let the generic
37
+ * numeric path run.
38
+ */
39
+ export declare function tryDatetimeDurationBinop(opName: string, a: RuntimeValue, b: RuntimeValue): RuntimeValue | undefined;
@@ -25,6 +25,7 @@ import "./string-extras.js";
25
25
  import "./prng.js";
26
26
  import "./cell-struct.js";
27
27
  import "./time-system.js";
28
+ import "./datetime.js";
28
29
  import "./sparse.js";
29
30
  import "./special-math.js";
30
31
  import "./misc.js";
@@ -3,3 +3,4 @@
3
3
  * ismac, ispc, isunix.
4
4
  */
5
5
  export declare function getTicTime(): number;
6
+ export declare function setTicTime(ms: number): void;
@@ -3,7 +3,7 @@
3
3
  */
4
4
  import type { RuntimeValue, RuntimeTensor, RuntimeComplexNumber } from "../../runtime/types.js";
5
5
  import { FloatXArray } from "../../runtime/types.js";
6
- import { type JitType } from "../jit/jitTypes.js";
6
+ import { type JitType } from "../../jit/jitTypes.js";
7
7
  export interface IBuiltinResolution {
8
8
  outputTypes: JitType[];
9
9
  apply: (args: RuntimeValue[], nargout: number) => RuntimeValue | RuntimeValue[];
@@ -18,7 +18,50 @@ export interface IBuiltin {
18
18
  /** Given input JIT types + nargout, return output types and a specialized apply, or null. */
19
19
  resolve: (argTypes: JitType[], nargout: number) => IBuiltinResolution | null;
20
20
  /** Optional fast-path JS code emission for JIT. Return null to fall back to $h.ib_<name>. */
21
- jitEmit?: (argCode: string[], argTypes: JitType[]) => string | null;
21
+ jitEmit?: (argCode: string[], argTypes: JitType[], getDest?: () => string) => string | null;
22
+ /**
23
+ * Optional fast-path C code emission for the C-JIT. Return null if the
24
+ * builtin can't be emitted as a C expression for the given arg types
25
+ * — the C-JIT will bail to JS-JIT for this call site. Covers the
26
+ * scalar-argument case only (tensor-argument emission is handled by
27
+ * separate tensor-op dispatch in assemble.ts / emit/fused.ts).
28
+ */
29
+ jitEmitC?: (argCode: string[], argTypes: JitType[]) => string | null;
30
+ /**
31
+ * Optional C-JIT tensor-op dispatch metadata. When a field is present,
32
+ * both the C feasibility check and C codegen read it directly —
33
+ * adding a new tensor-unary / tensor-binary / tensor-reduction builtin
34
+ * is one edit here (plus the matching native-side enum or C function).
35
+ * Previously these were three parallel hardcoded tables (one in
36
+ * feasibility.ts, two in context.ts) that could silently drift
37
+ * from the native-side opcode enums. Centralizing on the IBuiltin
38
+ * registration removes that drift risk.
39
+ */
40
+ jitCapabilities?: JitCapabilities;
41
+ }
42
+ /** Per-builtin C-JIT tensor-op dispatch metadata. See IBuiltin.jitCapabilities. */
43
+ export interface JitCapabilities {
44
+ /**
45
+ * libnumbl_ops opcode enum name (e.g. "NUMBL_UNARY_EXP") for
46
+ * element-wise unary tensor builtins routed through
47
+ * `numbl_realUnaryElemwise`. Set this on element-wise unary functions
48
+ * that have a libnumbl_ops opcode and are safe to invoke on any real
49
+ * input (domain-restricted ones like log/sqrt stay excluded).
50
+ */
51
+ tensorUnaryOp?: string;
52
+ /**
53
+ * C function name (e.g. "fmax", "atan2", "numbl_mod") for 2-arg
54
+ * element-wise tensor builtins. The C-JIT emits an inline per-element
55
+ * loop calling this function; it must match the interpreter's
56
+ * scalar-apply semantics exactly.
57
+ */
58
+ tensorBinaryFn?: string;
59
+ /**
60
+ * libnumbl_ops opcode enum name (e.g. "NUMBL_REDUCE_SUM") for
61
+ * tensor→scalar reductions routed through `numbl_tensor_reduce_op`.
62
+ * Set on reduction builtins (sum / prod / max / min / any / all / mean).
63
+ */
64
+ tensorReductionOp?: string;
22
65
  }
23
66
  export declare function getIBuiltin(name: string): IBuiltin | undefined;
24
67
  export declare function registerIBuiltin(b: IBuiltin): void;
@@ -66,7 +109,9 @@ export declare function defineBuiltin(opts: {
66
109
  name: string;
67
110
  help?: BuiltinHelp;
68
111
  cases: BuiltinCase[];
69
- jitEmit?: (argCode: string[], argTypes: JitType[]) => string | null;
112
+ jitEmit?: (argCode: string[], argTypes: JitType[], getDest?: () => string) => string | null;
113
+ jitEmitC?: (argCode: string[], argTypes: JitType[]) => string | null;
114
+ jitCapabilities?: JitCapabilities;
70
115
  }): void;
71
116
  type NumberJitType = Extract<JitType, {
72
117
  kind: "number";
@@ -92,6 +137,10 @@ interface UnaryElemwiseSpec {
92
137
  };
93
138
  /** Use applyUnaryElemwiseMaybeComplex instead of applyUnaryElemwise */
94
139
  maybeComplex?: boolean;
140
+ /** Opcode for the native unaryElemwise dispatch (see unary_elemwise.cpp).
141
+ * Used by the maybeComplex path so that e.g. sqrt/log can run in native C
142
+ * on the common all-in-domain case, then fall back for NaN entries only. */
143
+ nativeOpCode?: number;
95
144
  }
96
145
  /** Generate cases for a unary element-wise builtin. Each accepted kind gets
97
146
  * its own case that bundles the type rule with the corresponding apply. */
@@ -101,8 +150,54 @@ export declare function unaryRealResultCases(realFn: (x: number) => number, comp
101
150
  /** Build cases for numeric predicates (isnan, isinf, isfinite) that return logical. */
102
151
  export declare function predicateCases(scalarTest: (x: number) => boolean, complexTest: (re: number, im: number) => boolean, tensorTest: (x: number) => boolean, tensorComplexTest: (re: number, im: number) => boolean, name: string): BuiltinCase[];
103
152
  /** Fast-path emitter for unary Math.* functions.
104
- * Emits Math.fn(x) for scalar numbers, $h.tHelper(x) for real tensors. */
105
- export declare function unaryMathJitEmit(mathFn: string, tensorHelper: string, requireNonneg?: boolean): (argCode: string[], argTypes: JitType[]) => string | null;
153
+ * Emits Math.fn(x) for scalar numbers, $h.tHelper(dest, x) for real
154
+ * tensors. `getDest` is a lazy callback returning the dest local: either
155
+ * a mangled LHS (top-level Assign) or a fresh scratch (inner tensor
156
+ * sub-expression). It's only invoked when the tensor fast path is
157
+ * actually taken, so scalar / rejected paths don't burn a scratch. */
158
+ export declare function unaryMathJitEmit(mathFn: string, tensorHelper: string, requireNonneg?: boolean): (argCode: string[], argTypes: JitType[], getDest?: () => string) => string | null;
106
159
  /** Fast-path emitter for binary Math.* functions on two scalar numbers. */
107
160
  export declare function binaryMathJitEmit(mathFn: string): (argCode: string[], argTypes: JitType[]) => string | null;
161
+ /** Fast-path C emitter for unary math functions on a scalar.
162
+ * Emits `cFn(x)` for scalar number/boolean; returns null otherwise
163
+ * (tensor emission is handled separately by emit/tensor.ts).
164
+ * If `requireNonneg` is set, rejects values whose sign isn't known
165
+ * to be nonneg — matches the JS guard for domain-restricted functions. */
166
+ export declare function unaryMathJitEmitC(cFn: string, requireNonneg?: boolean): (argCode: string[], argTypes: JitType[]) => string | null;
167
+ /** Fast-path C emitter for binary math functions on two scalar numbers. */
168
+ export declare function binaryMathJitEmitC(cFn: string): (argCode: string[], argTypes: JitType[]) => string | null;
169
+ /**
170
+ * Fast-path C emitter for 1-arg scalar builtins that collapse to a
171
+ * compile-time constant given the arg's kind. Common for shape/type
172
+ * predicates where the answer is fully determined by the type
173
+ * (e.g. `isnumeric(number) -> 1.0`, `isscalar(number) -> 1.0`,
174
+ * `ndims(number) -> 2.0`, `numel(number) -> 1.0`).
175
+ *
176
+ * `valueByKind` maps each supported JitType kind to its C constant.
177
+ * Arg kinds not in the map return null, which bails the C-JIT to
178
+ * JS-JIT for that call site. All values must be valid C double
179
+ * literals (`"1.0"`, `"0.0"`, `"2.0"`, ...).
180
+ */
181
+ export declare function scalarConstantJitEmitC(valueByKind: Partial<Record<JitType["kind"], string>>): (argCode: string[], argTypes: JitType[]) => string | null;
182
+ /**
183
+ * Fast-path C emitter for 1-arg scalar predicates backed by a runtime
184
+ * helper whose return value is int (e.g. `numbl_is_nan`,
185
+ * `numbl_is_inf`, `numbl_is_finite`). The int is cast to double for
186
+ * the C-JIT's uniform boolean-as-double representation. Returns null
187
+ * for non-scalar args.
188
+ *
189
+ * Note: `isnan` / `isinf` / `isfinite` from `<math.h>` can't be used
190
+ * directly because the JIT compiles with `-ffast-math`, which implies
191
+ * `-ffinite-math-only` and constant-folds those macros to false/true.
192
+ * The `numbl_is_nan` / `_is_inf` / `_is_finite` helpers in
193
+ * `jit_runtime` use bit-pattern inspection and live in a separately
194
+ * compiled archive, so the caller's `-ffast-math` can't defeat them.
195
+ */
196
+ export declare function unaryPredicateJitEmitC(cFn: string): (argCode: string[], argTypes: JitType[]) => string | null;
197
+ /**
198
+ * Fast-path C emitter for 1-arg scalar builtins that are the identity
199
+ * on real scalars (e.g. `double(x)`, `real(x)`, `conj(x)`). Returns
200
+ * `(x)` for `number`/`boolean`, null otherwise.
201
+ */
202
+ export declare function scalarIdentityJitEmitC(): (argCode: string[], argTypes: JitType[]) => string | null;
108
203
  export {};
@@ -53,12 +53,50 @@ export declare class Interpreter {
53
53
  fn: (...args: unknown[]) => unknown;
54
54
  source: string;
55
55
  } | null>;
56
+ /** @internal Per-instance cache for C-JIT-compiled loops (parallel to loopJitCache). */
57
+ loopCJitCache: Map<string, {
58
+ fn: (...args: unknown[]) => unknown;
59
+ } | null>;
56
60
  /** @internal Progressive type widening for loop JIT: location -> last unified input types. */
57
- loopLastInputTypes: Map<string, import("./jit/jitTypes.js").JitType[]>;
58
- /** Optimization level (0 = pure interpreter, >=1 = JIT scalar functions). */
61
+ loopLastInputTypes: Map<string, import("../jit/jitTypes.js").JitType[]>;
62
+ /** @internal Sibling stmts of the currently-executing stmt (set by execStmts). */
63
+ _postSiblings: import("../parser/types.js").Stmt[] | null;
64
+ /** @internal Index in _postSiblings of the next stmt after the current one. */
65
+ _postSiblingsIdx: number;
66
+ /**
67
+ * Optimization level:
68
+ * 0 — pure AST interpreter, no JIT.
69
+ * 1 — JS-JIT (default): type-specialize hot functions/loops to JS via `new Function()`.
70
+ * 2 — C-JIT: additionally emit C for feasible scalar specializations,
71
+ * compile to a native `.node` module, and invoke via N-API.
72
+ * Infeasible IR transparently falls back to the JS-JIT path.
73
+ */
59
74
  optimization: number;
60
- /** Callback for JIT compilation logging. */
75
+ /**
76
+ * Experimental opt variant selector — e.g. `"e1"` for the prototype
77
+ * that keeps JS-JIT as the outer and emits on-demand C kernels for
78
+ * fusible tensor chains. Undefined for the standard `--opt <n>` path.
79
+ */
80
+ experimental?: string;
81
+ /** Emit fused per-element loops in C-JIT (--fuse flag). */
82
+ fuse: boolean;
83
+ /** Parallelize fused loops with OpenMP threads (--par flag). */
84
+ par: boolean;
85
+ /**
86
+ * Diagnostic mode (`--check-c-jit-parity`, only meaningful with `--opt 2`).
87
+ * When set, any C-JIT miss where JS-JIT would have compiled throws a
88
+ * `CJitParityError` instead of silently falling back — surfacing parity
89
+ * gaps as a punch list of features to implement in the C-JIT. Env
90
+ * failures (missing `cc`, compile failure) also throw, since the user
91
+ * explicitly asked to audit C-JIT coverage.
92
+ */
93
+ checkCJitParity: boolean;
94
+ /** Callback for JIT compilation logging (JS codegen). */
61
95
  onJitCompile?: (description: string, jsCode: string) => void;
96
+ /** Callback for C-JIT compilation logging (--dump-c). */
97
+ onCJitCompile?: (description: string, cSource: string) => void;
98
+ /** Verbose log sink (plumbed from ExecOptions.log; used by C-JIT for diagnostics). */
99
+ log?: (message: string) => void;
62
100
  constructor(rt: Runtime, ctx: LoweringContext, functionIndex: FunctionIndex, mainFileName: string, initialVariableValues?: Record<string, RuntimeValue>);
63
101
  /** Clear all JIT and function resolution caches. Called after addpath/rmpath. */
64
102
  clearAllCaches(): void;
@@ -12,6 +12,8 @@ export interface InterpreterContext {
12
12
  evalInLocalScope: (codeArg: unknown, fileName?: string) => unknown;
13
13
  callFunction: (name: string, args: unknown[], nargout: number) => unknown;
14
14
  rt: Runtime;
15
+ /** Optimization level (0 = JIT disabled, 1 = JIT scalar functions). */
16
+ optimization: number;
15
17
  /** Resolve a workspace function or class name to its source file,
16
18
  * or undefined if no workspace file provides that name.
17
19
  * `kind` distinguishes a regular .m function from a .numbl.js
@@ -4,7 +4,7 @@
4
4
  import type { Stmt, ArgumentsBlock } from "../parser/types.js";
5
5
  import type { Runtime } from "../runtime/runtime.js";
6
6
  import type { RuntimeValue } from "../runtime/types.js";
7
- import type { JitType } from "./jit/jitTypes.js";
7
+ import type { JitType } from "../jit/jitTypes.js";
8
8
  export declare class BreakSignal {
9
9
  readonly _tag = "break";
10
10
  }
@@ -22,15 +22,24 @@ export declare class Environment {
22
22
  private vars;
23
23
  /** When true, writes to variables found in parent go to the parent (nested function semantics). */
24
24
  isNested: boolean;
25
- /** Nested function definitions registered during execution. */
26
- nestedFunctions: Map<string, {
25
+ /** Nested function definitions registered during execution. Lazy-initialized. */
26
+ private _nestedFunctions;
27
+ get nestedFunctions(): Map<string, {
27
28
  fn: FunctionDef;
28
29
  env: Environment;
29
30
  }>;
30
- /** Names declared as `global` in this scope — reads/writes go through rt.$g */
31
- globalNames: Set<string>;
32
- /** Names declared as `persistent` in this scope */
33
- persistentNames: Set<string>;
31
+ set nestedFunctions(v: Map<string, {
32
+ fn: FunctionDef;
33
+ env: Environment;
34
+ }>);
35
+ /** Names declared as `global` in this scope — reads/writes go through rt.$g. Lazy-initialized. */
36
+ private _globalNames;
37
+ get globalNames(): Set<string>;
38
+ set globalNames(v: Set<string>);
39
+ /** Names declared as `persistent` in this scope. Lazy-initialized. */
40
+ private _persistentNames;
41
+ get persistentNames(): Set<string>;
42
+ set persistentNames(v: Set<string>);
34
43
  /** Function ID for persistent variable storage */
35
44
  persistentFuncId: string | undefined;
36
45
  /** Back-reference to the runtime (needed for global/persistent access) */
@@ -0,0 +1,90 @@
1
+ /**
2
+ * The native ABI as an explicit slot schema — one source of truth shared
3
+ * between the C-signature builder (assemble.ts) and the JS wrapper
4
+ * marshaller (install.ts).
5
+ *
6
+ * Each `AbiSlot` carries everything either side needs: the C type for
7
+ * the signature, the koffi type string (with `_Out_` prefix for
8
+ * out-pointers), the identifier name in the emitted C, and a backref
9
+ * (paramIdx / outputIdx) so the JS wrapper can locate the source value
10
+ * or output buffer without reconstructing the param-to-slot mapping
11
+ * itself.
12
+ *
13
+ * Adding a new ABI shape = add a slot kind here, emit it in
14
+ * `buildAbiSlots`, and handle it in the JS marshaller.
15
+ */
16
+ import type { ClassificationResult } from "./classify.js";
17
+ export type AbiSlotKind = "scalar" | "complexScalarRe" | "complexScalarIm" | "tensorData"
18
+ /** Imaginary data pointer for a complex tensor param. Paired with
19
+ * `tensorData`. Marshaller passes the RuntimeTensor's `.imag` (a
20
+ * Float64Array) or NULL when `.imag === undefined`; the numbl_ops
21
+ * complex kernels treat NULL imag as all-zero. */
22
+ | "tensorDataIm" | "tensorLen" | "tensorD0" | "tensorD1" | "scalarOut" | "complexScalarReOut" | "complexScalarImOut" | "fixedOutBuf"
23
+ /** Imaginary fixed-output buffer (complex tensor output, non-dynamic). */
24
+ | "fixedOutBufIm" | "fixedOutLen" | "dynOutBuf"
25
+ /** Imaginary buffer pointer for a dynamic complex tensor output. C
26
+ * mallocs, transfers ownership via `double **`; wrapper decodes+copies
27
+ * into a fresh Float64Array then frees. */
28
+ | "dynOutBufIm" | "dynOutLen" | "dynOutD0" | "dynOutD1" | "ticState" | "errFlag"
29
+ /** Callback for `disp(...)` — JS-registered function pointer. The C
30
+ * body invokes it directly; the JS wrapper supplies a koffi-registered
31
+ * pointer that routes into `rt.output`. Signature:
32
+ * void __disp_cb(const char *s, double num, int kind)
33
+ * kind=0 => use `s`, kind=1 => use `num`. */
34
+ | "dispCb";
35
+ export interface AbiSlot {
36
+ kind: AbiSlotKind;
37
+ /** C type string for the signature, e.g. "double", "const double *",
38
+ * "double **". */
39
+ cType: string;
40
+ /** Identifier as it appears in the C signature. */
41
+ cName: string;
42
+ /** koffi type string, with `_Out_` prefix where koffi must treat the
43
+ * pointer as an out-param. */
44
+ koffiType: string;
45
+ /** Index into paramDescs, for "scalar" / "tensor*" kinds. */
46
+ paramIdx?: number;
47
+ /** Index into outputDescs, for output-allocated kinds. */
48
+ outputIdx?: number;
49
+ }
50
+ export interface CParamDesc {
51
+ name: string;
52
+ kind: "scalar" | "complexScalar" | "tensor";
53
+ /** For tensor params: max indexing arity the body uses (1, 2, or 3).
54
+ * Drives the extra `_d0` / `_d1` shape args the JS wrapper must
55
+ * marshal. `undefined` means the tensor is only used in whole-tensor
56
+ * ops (legacy data/len ABI). */
57
+ ndim?: number;
58
+ /** True for complex tensor params. Adds an imag-data slot right after
59
+ * the real-data slot; the marshaller supplies the tensor's `.imag`
60
+ * Float64Array or NULL. Ignored for scalar kinds. */
61
+ isComplex?: boolean;
62
+ /** Ordered slots this param contributes to the ABI. One slot for a
63
+ * scalar; two for a complex scalar (re + im); two or more
64
+ * (data + [imag for complex] + len + optional d0/d1) for a tensor. */
65
+ slots: AbiSlot[];
66
+ }
67
+ /** Per-output descriptor. Tells the JS wrapper how to marshal outputs. */
68
+ export interface COutputDesc {
69
+ name: string;
70
+ kind: "scalar" | "boolean" | "complexScalar" | "tensor";
71
+ /** True for tensor outputs using the dynamic-output ABI: the C code
72
+ * malloc's the buffer and transfers ownership via `double **` and
73
+ * extra d0/d1 out-slots. The JS wrapper decodes the pointer, copies
74
+ * into a fresh Float64Array, and frees the C allocation. */
75
+ dynamic?: boolean;
76
+ /** True for complex tensor outputs. Fixed outputs add a paired imag
77
+ * Float64Array buffer; dynamic outputs add a paired imag `double **`
78
+ * out-pointer the caller decodes + frees after the call. */
79
+ isComplex?: boolean;
80
+ /** Ordered slots this output contributes to the ABI. One for scalars,
81
+ * two for complex scalars (reOut + imOut), two for fixed real tensor
82
+ * outputs (buf + lenOut), three for fixed complex (buf + bufIm +
83
+ * lenOut), four for dynamic real tensor outputs, five for dynamic
84
+ * complex (dynBuf + dynBufIm + dynLen + dynD0 + dynD1). */
85
+ slots: AbiSlot[];
86
+ }
87
+ /** Build the ABI schema for one generated function. Mutates paramDescs /
88
+ * outputDescs in place by filling in `slots`; returns the complete
89
+ * `abiSlots` array in calling order (params, then outputs, then trailers). */
90
+ export declare function buildAbiSlots(paramDescs: CParamDesc[], outputDescs: COutputDesc[], cls: ClassificationResult, paramOutputTensors: Set<string>, unshareTensorParams: Set<string>, needsTicState: boolean, needsErrorFlag: boolean, needsDispCb: boolean): AbiSlot[];
@@ -0,0 +1,56 @@
1
+ /**
2
+ * JIT IR → pure C code generation (koffi path).
3
+ *
4
+ * Orchestration only: this file wires the classify / ABI / emit pieces
5
+ * together and assembles the final C source (headers + per-callee
6
+ * static functions + outer function).
7
+ *
8
+ * classify.ts — TensorMeta / analyzeTensorUsage, the single pass
9
+ * feeding every downstream decision.
10
+ * abi.ts — AbiSlot / CParamDesc / COutputDesc, buildAbiSlots.
11
+ * The one schema walked by both signature and JS.
12
+ * emit/ — per-statement / per-expression C emission, split
13
+ * by concern (scalar, complexScalar, tensor, assign,
14
+ * userCall, stmt, fused). Reads ctx.cls for every
15
+ * classification decision.
16
+ * context.ts — EmitCtx + shared name/opcode helpers.
17
+ *
18
+ * UserCall support: when a feasible user-defined function is called
19
+ * from the outer body, its lowered IR is already in `generatedIRBodies`
20
+ * (populated by `lowerUserFuncCall` in jitLower.ts). We emit each
21
+ * reachable callee as a `static void jit_<jitName>(...)` in the same
22
+ * .c file, in post-order so callees are defined before callers. The
23
+ * shared `__err_flag` pointer flows from outer to every callee.
24
+ */
25
+ import { type JitStmt, type JitType } from "../jitTypes.js";
26
+ import type { GeneratedFn } from "../jitLower.js";
27
+ import { type AbiSlot, type CParamDesc, type COutputDesc } from "./abi.js";
28
+ export type { AbiSlot, AbiSlotKind } from "./abi.js";
29
+ export type { CParamDesc, COutputDesc } from "./abi.js";
30
+ export { mangle, mangleIm, tensorData, tensorDataIm, tensorLen, tensorD0, tensorD1, formatNumberLiteral, C_SCALAR_TARGET, } from "./context.js";
31
+ export interface GenerateCResult {
32
+ cSource: string;
33
+ cFnName: string;
34
+ paramDescs: CParamDesc[];
35
+ outputDescs: COutputDesc[];
36
+ /** The full ABI slot list in calling order:
37
+ * paramDescs[0].slots ++ paramDescs[1].slots ++ ...
38
+ * ++ outputDescs[0].slots ++ ... ++ trailer slots (ticState/errFlag).
39
+ * The JS wrapper walks this list to marshal values. */
40
+ abiSlots: AbiSlot[];
41
+ /** True when any tensor is involved (params, locals, or outputs). */
42
+ usesTensors: boolean;
43
+ /** koffi function signature string for declaring the C function. */
44
+ koffiSignature: string;
45
+ /** True when tic/toc are used — the function has an extra `double*` param. */
46
+ needsTicState: boolean;
47
+ /** True when any Index read was emitted — the function has an extra
48
+ * `double *__err_flag` trailing param. */
49
+ needsErrorFlag: boolean;
50
+ /** True when a `disp(...)` call was emitted — the function has an
51
+ * extra `void (*__disp_cb)(const char *, double, int)` trailing
52
+ * param. The JS wrapper registers a callback that routes back to
53
+ * `rt.output`. */
54
+ needsDispCb: boolean;
55
+ }
56
+ export declare function generateC(body: JitStmt[], params: string[], outputs: string[], nargout: number, localVars: Set<string>, argTypes: JitType[], _outputType: JitType | null, outputTypes: JitType[], fnName: string, fuse?: boolean, openmp?: boolean, generatedIRBodies?: Map<string, GeneratedFn>): GenerateCResult;
@@ -0,0 +1,70 @@
1
+ /**
2
+ * C-JIT: unified classification pass for tensor names.
3
+ *
4
+ * Replaces the ten ad-hoc sets/maps (`tensorVars`, `paramTensorNames`,
5
+ * `outputTensorNames`, `localTensorNames`, `assignIndexTargets`,
6
+ * `unshareTensorParams`, `tensorMaxDim`, `freshAllocTensors`,
7
+ * `dynamicOutputs`, `paramOutputTensors`) that the codegen used to
8
+ * build from 7+ body walks. Everything downstream — signature builder,
9
+ * prelude, epilogue, emit helpers, fusion call — now reads from the
10
+ * `TensorMeta` table this pass produces.
11
+ */
12
+ import type { JitExpr, JitStmt, JitType } from "../jitTypes.js";
13
+ export type TensorKind =
14
+ /** Tensor param, never appears in the output list. */
15
+ "param"
16
+ /** Tensor param whose name is also in the output list. */
17
+ | "paramOutput"
18
+ /** Pure tensor output (not a param). */
19
+ | "output"
20
+ /** Tensor local (neither param nor output). */
21
+ | "local";
22
+ export interface TensorMeta {
23
+ kind: TensorKind;
24
+ /** Max index arity on this name (1/2/3). 0 means the name is never
25
+ * used as an Index or AssignIndex base. */
26
+ maxIndexDim: number;
27
+ /** True when any `Assign(name, RHS)` has RHS ∈ {TensorLiteral,
28
+ * VConcatGrow, Call(zeros|ones), Var(src) where src.hasFreshAlloc,
29
+ * RangeSliceRead}. Propagated to fixed point. */
30
+ hasFreshAlloc: boolean;
31
+ /** True when any AssignIndex / AssignIndexRange / AssignIndexCol
32
+ * uses this name as the base. */
33
+ isAssignIndexTarget: boolean;
34
+ /** Derived: `kind === "param"` and (`isAssignIndexTarget` or
35
+ * `hasFreshAlloc`). Triggers the unshare-at-entry malloc+memcpy
36
+ * prelude so writes don't leak to the caller's buffer. */
37
+ needsUnshare: boolean;
38
+ /** Derived: `hasFreshAlloc` and (`kind === "output"` or
39
+ * `kind === "paramOutput"`). Triggers the `double **` dynamic-output
40
+ * ABI. */
41
+ isDynamicOutput: boolean;
42
+ /** True when this tensor's `JitType.isComplex === true` — either at
43
+ * the boundary (param / output type) or propagated from a complex
44
+ * RHS for locals. Drives paired imag-buffer plumbing: every complex
45
+ * tensor gets a `v_name_data_im` companion pointer, an extra ABI
46
+ * slot for boundaries, and imag malloc / free / copy parallel to
47
+ * the existing real path. */
48
+ isComplex: boolean;
49
+ }
50
+ export interface ClassificationResult {
51
+ /** All names with a tensor role, in insertion order (params first,
52
+ * then outputs, then locals in body order). */
53
+ tensorNames: string[];
54
+ /** Per-name metadata. */
55
+ meta: Map<string, TensorMeta>;
56
+ /** True iff any name has `hasFreshAlloc`. Lets callers skip the
57
+ * dynamic-output marshalling when no name uses it. */
58
+ hasAnyDynamic: boolean;
59
+ /** Name → is any tensor role. `meta.has(name)`. */
60
+ tensorVars: Set<string>;
61
+ /** Names with `kind === "param"` or `"paramOutput"`. */
62
+ paramTensorNames: Set<string>;
63
+ /** Names with `kind === "output"` or `"paramOutput"`. */
64
+ outputTensorNames: Set<string>;
65
+ /** Names with `kind === "local"`. */
66
+ localTensorNames: Set<string>;
67
+ }
68
+ /** Does this Assign RHS allocate a fresh C-owned buffer? */
69
+ export declare function isFreshTensorRhs(expr: JitExpr): boolean;
70
+ export declare function analyzeTensorUsage(body: JitStmt[], params: string[], argTypes: JitType[], outputNames: string[], outputTypes: JitType[]): ClassificationResult;
@@ -0,0 +1,37 @@
1
+ /**
2
+ * C-JIT compilation driver (koffi path).
3
+ *
4
+ * Takes generated C source, compiles it into a `.so` shared library,
5
+ * loads it via koffi, and returns the declared function.
6
+ *
7
+ * Strategy:
8
+ * 1. Content-addressed cache under ~/.cache/numbl/c-jit/ — the hash
9
+ * includes the source bytes plus compiler/platform/numbl versions,
10
+ * so any input change forces a recompile.
11
+ * 2. On cache miss, write `src.c` into a fresh tmpdir and shell out to
12
+ * the C compiler (`$NUMBL_CC` or `cc`) with `-shared -fPIC`.
13
+ * 3. Load with koffi.load() and declare the function.
14
+ *
15
+ * No Node API headers are needed — functions are plain C with raw types.
16
+ * No NAPI_MODULE_INIT, no module registration, no exit hooks.
17
+ */
18
+ export interface CompiledCFn {
19
+ fn: (...args: unknown[]) => unknown;
20
+ cachedPath: string;
21
+ /** The loaded koffi library handle, for declaring additional exports. */
22
+ lib: any;
23
+ }
24
+ /**
25
+ * Compile + load a C function via koffi. Returns null on any failure.
26
+ *
27
+ * `koffiSignature` is the koffi type-string for the C function, e.g.:
28
+ * "void jit_fn(double, double *, int64_t, double *)"
29
+ */
30
+ export declare function compileAndLoad(cSource: string, koffiSignature: string, _cFnName: string, log?: (m: string) => void, extraFlags?: string[]): CompiledCFn | null;
31
+ export declare function resetCEnvForTesting(): void;
32
+ export declare function cJitUnavailableReason(): string | undefined;
33
+ export declare function cJitCacheSize(): number;
34
+ export declare function readCachedBuild(cachedPath: string): Buffer;
35
+ /** True when the C compiler supports `-fopenmp` (thread-level parallelism).
36
+ * Triggers env discovery on first call so it can be used before compileAndLoad. */
37
+ export declare function cJitOpenmpAvailable(log?: (m: string) => void): boolean;