numbl 0.1.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/binding.gyp +59 -3
  2. package/dist-cli/cli.js +22538 -7936
  3. package/dist-lib/lib.js +34682 -20852
  4. package/dist-lib/numbl-core/executeCode.d.ts +13 -0
  5. package/dist-lib/numbl-core/fileIOAdapter.d.ts +2 -0
  6. package/dist-lib/numbl-core/helpers/reduction-helpers.d.ts +7 -2
  7. package/dist-lib/numbl-core/interpreter/builtins/datetime.d.ts +39 -0
  8. package/dist-lib/numbl-core/interpreter/builtins/index.d.ts +1 -0
  9. package/dist-lib/numbl-core/interpreter/builtins/time-system.d.ts +1 -0
  10. package/dist-lib/numbl-core/interpreter/builtins/types.d.ts +96 -5
  11. package/dist-lib/numbl-core/interpreter/interpreter.d.ts +41 -3
  12. package/dist-lib/numbl-core/interpreter/types.d.ts +1 -1
  13. package/dist-lib/numbl-core/jit/c/abi.d.ts +90 -0
  14. package/dist-lib/numbl-core/jit/c/assemble.d.ts +56 -0
  15. package/dist-lib/numbl-core/jit/c/classify.d.ts +70 -0
  16. package/dist-lib/numbl-core/jit/c/compile.d.ts +37 -0
  17. package/dist-lib/numbl-core/jit/c/context.d.ts +152 -0
  18. package/dist-lib/numbl-core/jit/c/emit/assign.d.ts +20 -0
  19. package/dist-lib/numbl-core/jit/c/emit/complexScalar.d.ts +18 -0
  20. package/dist-lib/numbl-core/jit/c/emit/fused.d.ts +42 -0
  21. package/dist-lib/numbl-core/jit/c/emit/helpers.d.ts +40 -0
  22. package/dist-lib/numbl-core/jit/c/emit/index.d.ts +14 -0
  23. package/dist-lib/numbl-core/jit/c/emit/scalar.d.ts +23 -0
  24. package/dist-lib/numbl-core/jit/c/emit/stmt.d.ts +25 -0
  25. package/dist-lib/numbl-core/jit/c/emit/tensor.d.ts +127 -0
  26. package/dist-lib/numbl-core/jit/c/emit/userCall.d.ts +58 -0
  27. package/dist-lib/numbl-core/jit/c/epilogue.d.ts +26 -0
  28. package/dist-lib/numbl-core/jit/c/feasibility.d.ts +44 -0
  29. package/dist-lib/numbl-core/jit/c/prelude.d.ts +37 -0
  30. package/dist-lib/numbl-core/jit/c/visit.d.ts +63 -0
  31. package/dist-lib/numbl-core/jit/e1/complexKernelEmit.d.ts +46 -0
  32. package/dist-lib/numbl-core/jit/e1/hash.d.ts +10 -0
  33. package/dist-lib/numbl-core/jit/e1/install.d.ts +13 -0
  34. package/dist-lib/numbl-core/jit/e1/kernelEmit.d.ts +54 -0
  35. package/dist-lib/numbl-core/jit/e1/multiReductionKernel.d.ts +66 -0
  36. package/dist-lib/numbl-core/jit/e1/openmpFlag.d.ts +13 -0
  37. package/dist-lib/numbl-core/jit/e1/scalarFnKernel.d.ts +44 -0
  38. package/dist-lib/numbl-core/jit/e2/assignKernel.d.ts +34 -0
  39. package/dist-lib/numbl-core/jit/e2/astToJitExpr.d.ts +25 -0
  40. package/dist-lib/numbl-core/jit/e2/cache.d.ts +80 -0
  41. package/dist-lib/numbl-core/jit/e2/chainKernelEmit.d.ts +55 -0
  42. package/dist-lib/numbl-core/jit/e2/classify.d.ts +119 -0
  43. package/dist-lib/numbl-core/jit/e2/compileFn.d.ts +16 -0
  44. package/dist-lib/numbl-core/jit/e2/complexChainKernelEmit.d.ts +79 -0
  45. package/dist-lib/numbl-core/jit/e2/emitShared.d.ts +71 -0
  46. package/dist-lib/numbl-core/jit/e2/install.d.ts +11 -0
  47. package/dist-lib/numbl-core/jit/e2/liveness.d.ts +29 -0
  48. package/dist-lib/numbl-core/jit/e2/loopKernel.d.ts +49 -0
  49. package/dist-lib/numbl-core/jit/e2/loopKernelEmit.d.ts +75 -0
  50. package/dist-lib/numbl-core/jit/e2/multiReductionDriver.d.ts +24 -0
  51. package/dist-lib/numbl-core/jit/e2/reductionKernelEmit.d.ts +72 -0
  52. package/dist-lib/numbl-core/jit/e2/scalarFnDriver.d.ts +29 -0
  53. package/dist-lib/numbl-core/jit/fusedChainHelpers.d.ts +65 -0
  54. package/dist-lib/numbl-core/jit/fusedScalarEmit.d.ts +69 -0
  55. package/dist-lib/numbl-core/jit/fusion.d.ts +71 -0
  56. package/dist-lib/numbl-core/jit/fusionOps.d.ts +25 -0
  57. package/dist-lib/numbl-core/jit/heavyOps.d.ts +15 -0
  58. package/dist-lib/numbl-core/{interpreter/jit → jit}/index.d.ts +2 -2
  59. package/dist-lib/numbl-core/jit/jitBailSafety.d.ts +41 -0
  60. package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLoop.d.ts +2 -2
  61. package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLoopAnalysis.d.ts +6 -1
  62. package/dist-lib/numbl-core/jit/jitLower.d.ts +122 -0
  63. package/dist-lib/numbl-core/jit/jitLowerExpr.d.ts +27 -0
  64. package/dist-lib/numbl-core/jit/jitLowerStmt.d.ts +9 -0
  65. package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLowerTypes.d.ts +7 -3
  66. package/dist-lib/numbl-core/jit/jitTopLevel.d.ts +22 -0
  67. package/dist-lib/numbl-core/{interpreter/jit → jit}/jitTypes.d.ts +133 -1
  68. package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitCodegen.d.ts +2 -2
  69. package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitCodegenHoist.d.ts +19 -1
  70. package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitHelpers.d.ts +15 -3
  71. package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitHelpersIndex.d.ts +7 -0
  72. package/dist-lib/numbl-core/jit/js/jitHelpersTensor.d.ts +34 -0
  73. package/dist-lib/numbl-core/jit/js/jsFusedCodegen.d.ts +17 -0
  74. package/dist-lib/numbl-core/jit/js/jsMultiReduction.d.ts +70 -0
  75. package/dist-lib/numbl-core/jit/scalarEmit.d.ts +58 -0
  76. package/dist-lib/numbl-core/lexer/types.d.ts +2 -1
  77. package/dist-lib/numbl-core/native/lapack-bridge.d.ts +39 -1
  78. package/dist-lib/numbl-core/ops/bessel.d.ts +18 -0
  79. package/dist-lib/numbl-core/ops/comparison.d.ts +11 -0
  80. package/dist-lib/numbl-core/ops/complexBinaryElemwise.d.ts +10 -0
  81. package/dist-lib/numbl-core/ops/complexUnaryElemwise.d.ts +8 -0
  82. package/dist-lib/numbl-core/ops/dispatch.d.ts +26 -0
  83. package/dist-lib/numbl-core/ops/index.d.ts +8 -0
  84. package/dist-lib/numbl-core/ops/opCodes.d.ts +70 -0
  85. package/dist-lib/numbl-core/ops/realBinaryElemwise.d.ts +8 -0
  86. package/dist-lib/numbl-core/ops/realUnaryElemwise.d.ts +5 -0
  87. package/dist-lib/numbl-core/ops/reduce.d.ts +6 -0
  88. package/dist-lib/numbl-core/parser/types.d.ts +6 -0
  89. package/dist-lib/numbl-core/runtime/alloc.d.ts +23 -0
  90. package/dist-lib/numbl-core/runtime/runtime.d.ts +1 -0
  91. package/dist-lib/numbl-core/version.d.ts +1 -1
  92. package/native/jit_runtime/jit_runtime.c +261 -0
  93. package/native/jit_runtime/jit_runtime.h +204 -0
  94. package/native/numbl_addon.cpp +62 -1
  95. package/native/ops/bessel.c +572 -0
  96. package/native/ops/comparison.c +150 -0
  97. package/native/ops/complex_binary_elemwise.c +192 -0
  98. package/native/ops/complex_unary_elemwise.c +152 -0
  99. package/native/ops/numbl_ops.c +66 -0
  100. package/native/ops/numbl_ops.h +262 -0
  101. package/native/ops/real_binary_elemwise.c +85 -0
  102. package/native/ops/real_unary_elemwise.c +104 -0
  103. package/native/ops/reduce.c +162 -0
  104. package/native/ops_napi.cpp +320 -0
  105. package/package.json +8 -9
  106. package/dist-lib/numbl-core/interpreter/jit/jitHelpersTensor.d.ts +0 -28
  107. package/dist-lib/numbl-core/interpreter/jit/jitLower.d.ts +0 -23
  108. /package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitHelpersComplex.d.ts +0 -0
@@ -22,6 +22,8 @@ export interface ExecOptions {
22
22
  profile?: boolean;
23
23
  /** Called each time a JIT function is compiled, with a description and the generated JS. */
24
24
  onJitCompile?: (description: string, jsCode: string) => void;
25
+ /** Called each time an e2 C kernel is compiled, with a description and the generated C source. */
26
+ onCCompile?: (description: string, cCode: string) => void;
25
27
  /** Initial hold state for plotting (persisted across REPL executions). */
26
28
  initialHoldState?: boolean;
27
29
  /** Override or add builtins for this execution only. */
@@ -34,6 +36,15 @@ export interface ExecOptions {
34
36
  onInput?: (prompt: string) => string;
35
37
  /** Optimization level for interpreter (0 = none, >=1 = JIT scalar functions). */
36
38
  optimization?: number;
39
+ /**
40
+ * Experimental opt variant selector — e.g. `"e1"` for the prototype
41
+ * that keeps JS-JIT as the outer and emits on-demand C kernels for
42
+ * fusible tensor chains. Orthogonal to `optimization`; when set,
43
+ * `optimization` is still the base level (typically 1).
44
+ */
45
+ experimental?: string;
46
+ /** Parallelize fused loops with OpenMP threads (--par flag). */
47
+ par?: boolean;
37
48
  /**
38
49
  * Initial implicit cwd path for the MATLAB-style "cwd is the first search path" feature.
39
50
  * - undefined → auto-detect from `system.cwd()` and scan its files.
@@ -74,6 +85,8 @@ export interface ProfileData {
74
85
  export interface ExecResult {
75
86
  output: string[];
76
87
  generatedJS: string;
88
+ /** Concatenated C-kernel source for all e2 compilations during the run. */
89
+ generatedC: string;
77
90
  plotInstructions: PlotInstruction[];
78
91
  returnValue: RuntimeValue;
79
92
  variableValues: Record<string, RuntimeValue>;
@@ -62,6 +62,8 @@ export interface FileIOAdapter {
62
62
  unzip?(zipfilename: string, outputfolder: string): string[];
63
63
  /** Return the temporary directory path. Optional. */
64
64
  tempdir?(): string;
65
+ /** Return the numbl user/config directory path. Optional. */
66
+ userpath?(): string;
65
67
  /** List directory entries. Returns array of {name, folder, bytes, isdir, mtimeMs}. Optional. */
66
68
  listDir?(dirPath: string): {
67
69
  name: string;
@@ -46,8 +46,13 @@ export type ReductionKernel = {
46
46
  reduceAll: (v: RuntimeTensor) => RuntimeValue;
47
47
  reduceDim: (v: RuntimeTensor, dim: number) => RuntimeValue;
48
48
  };
49
- /** Create an accumulator-based reduction kernel (sum, mean, etc.) */
50
- export declare function accumKernel(reduceFn: (acc: number, val: number) => number, initial: number, finalizeFn?: (acc: number, count: number) => number): ReductionKernel;
49
+ /** Create an accumulator-based reduction kernel (sum, mean, etc.).
50
+ *
51
+ * When `opCode` is provided (an OpReduce.* value whose semantics match
52
+ * `reduceFn`/`initial`), the reduceAll fast path routes Float64 real and
53
+ * complex tensors through tensorOps.realFlatReduce. The closure path
54
+ * is still used as a fallback for non-Float64 data. */
55
+ export declare function accumKernel(reduceFn: (acc: number, val: number) => number, initial: number, finalizeFn?: (acc: number, count: number) => number, opCode?: number): ReductionKernel;
51
56
  /** Create a slice-based reduction kernel (median, mode, etc.) */
52
57
  export declare function sliceKernel(sliceFn: (slice: ArrayLike<number>) => number): ReductionKernel;
53
58
  /** Create an accumulator kernel that skips NaN values. */
@@ -0,0 +1,39 @@
1
+ /**
2
+ * datetime / duration builtins — scalar-only initial implementation.
3
+ *
4
+ * datetime:
5
+ * datetime() current time
6
+ * datetime('now' | 'today' | 'yesterday' | 'tomorrow')
7
+ * datetime(Y, M, D)
8
+ * datetime(Y, M, D, H, MI, S[, MS])
9
+ * datetime(X, 'ConvertFrom', 'datenum' | 'posixtime' | 'excel' | 'excel1904')
10
+ *
11
+ * datetime values are class_instance with className="datetime" and fields
12
+ * Year/Month/Day/Hour/Minute/Second. They are display-formatted by
13
+ * display.ts as "dd-MMM-yyyy [HH:mm:ss]".
14
+ *
15
+ * duration:
16
+ * Produced by datetime - datetime, or by seconds(N) / minutes(N) / ...
17
+ * Represented as class_instance with className="duration" and a single
18
+ * Seconds field. Display format "hh:mm:ss".
19
+ *
20
+ * Arithmetic:
21
+ * datetime - datetime -> duration
22
+ * datetime + duration -> datetime
23
+ * datetime - duration -> datetime
24
+ * duration + duration -> duration
25
+ * duration - duration -> duration
26
+ *
27
+ * `seconds(d)` returns the numeric seconds of a duration, or wraps a
28
+ * number as a duration.
29
+ */
30
+ import type { RuntimeValue, RuntimeClassInstance } from "../../runtime/types.js";
31
+ export declare function makeDatetime(year: number, month: number, day: number, hour: number, minute: number, second: number): RuntimeClassInstance;
32
+ export declare function makeDuration(totalSeconds: number): RuntimeClassInstance;
33
+ /**
34
+ * Attempt to handle a named binary operator ("plus", "minus", "lt", ...)
35
+ * when at least one operand is a datetime or duration class_instance.
36
+ * Returns the result value on success, or undefined to let the generic
37
+ * numeric path run.
38
+ */
39
+ export declare function tryDatetimeDurationBinop(opName: string, a: RuntimeValue, b: RuntimeValue): RuntimeValue | undefined;
@@ -25,6 +25,7 @@ import "./string-extras.js";
25
25
  import "./prng.js";
26
26
  import "./cell-struct.js";
27
27
  import "./time-system.js";
28
+ import "./datetime.js";
28
29
  import "./sparse.js";
29
30
  import "./special-math.js";
30
31
  import "./misc.js";
@@ -3,3 +3,4 @@
3
3
  * ismac, ispc, isunix.
4
4
  */
5
5
  export declare function getTicTime(): number;
6
+ export declare function setTicTime(ms: number): void;
@@ -3,7 +3,7 @@
3
3
  */
4
4
  import type { RuntimeValue, RuntimeTensor, RuntimeComplexNumber } from "../../runtime/types.js";
5
5
  import { FloatXArray } from "../../runtime/types.js";
6
- import { type JitType } from "../jit/jitTypes.js";
6
+ import { type JitType } from "../../jit/jitTypes.js";
7
7
  export interface IBuiltinResolution {
8
8
  outputTypes: JitType[];
9
9
  apply: (args: RuntimeValue[], nargout: number) => RuntimeValue | RuntimeValue[];
@@ -18,7 +18,50 @@ export interface IBuiltin {
18
18
  /** Given input JIT types + nargout, return output types and a specialized apply, or null. */
19
19
  resolve: (argTypes: JitType[], nargout: number) => IBuiltinResolution | null;
20
20
  /** Optional fast-path JS code emission for JIT. Return null to fall back to $h.ib_<name>. */
21
- jitEmit?: (argCode: string[], argTypes: JitType[]) => string | null;
21
+ jitEmit?: (argCode: string[], argTypes: JitType[], getDest?: () => string) => string | null;
22
+ /**
23
+ * Optional fast-path C code emission for the C-JIT. Return null if the
24
+ * builtin can't be emitted as a C expression for the given arg types
25
+ * — the C-JIT will bail to JS-JIT for this call site. Covers the
26
+ * scalar-argument case only (tensor-argument emission is handled by
27
+ * separate tensor-op dispatch in assemble.ts / emit/fused.ts).
28
+ */
29
+ jitEmitC?: (argCode: string[], argTypes: JitType[]) => string | null;
30
+ /**
31
+ * Optional C-JIT tensor-op dispatch metadata. When a field is present,
32
+ * both the C feasibility check and C codegen read it directly —
33
+ * adding a new tensor-unary / tensor-binary / tensor-reduction builtin
34
+ * is one edit here (plus the matching native-side enum or C function).
35
+ * Previously these were three parallel hardcoded tables (one in
36
+ * feasibility.ts, two in context.ts) that could silently drift
37
+ * from the native-side opcode enums. Centralizing on the IBuiltin
38
+ * registration removes that drift risk.
39
+ */
40
+ jitCapabilities?: JitCapabilities;
41
+ }
42
+ /** Per-builtin C-JIT tensor-op dispatch metadata. See IBuiltin.jitCapabilities. */
43
+ export interface JitCapabilities {
44
+ /**
45
+ * libnumbl_ops opcode enum name (e.g. "NUMBL_UNARY_EXP") for
46
+ * element-wise unary tensor builtins routed through
47
+ * `numbl_realUnaryElemwise`. Set this on element-wise unary functions
48
+ * that have a libnumbl_ops opcode and are safe to invoke on any real
49
+ * input (domain-restricted ones like log/sqrt stay excluded).
50
+ */
51
+ tensorUnaryOp?: string;
52
+ /**
53
+ * C function name (e.g. "fmax", "atan2", "numbl_mod") for 2-arg
54
+ * element-wise tensor builtins. The C-JIT emits an inline per-element
55
+ * loop calling this function; it must match the interpreter's
56
+ * scalar-apply semantics exactly.
57
+ */
58
+ tensorBinaryFn?: string;
59
+ /**
60
+ * libnumbl_ops opcode enum name (e.g. "NUMBL_REDUCE_SUM") for
61
+ * tensor→scalar reductions routed through `numbl_tensor_reduce_op`.
62
+ * Set on reduction builtins (sum / prod / max / min / any / all / mean).
63
+ */
64
+ tensorReductionOp?: string;
22
65
  }
23
66
  export declare function getIBuiltin(name: string): IBuiltin | undefined;
24
67
  export declare function registerIBuiltin(b: IBuiltin): void;
@@ -66,7 +109,9 @@ export declare function defineBuiltin(opts: {
66
109
  name: string;
67
110
  help?: BuiltinHelp;
68
111
  cases: BuiltinCase[];
69
- jitEmit?: (argCode: string[], argTypes: JitType[]) => string | null;
112
+ jitEmit?: (argCode: string[], argTypes: JitType[], getDest?: () => string) => string | null;
113
+ jitEmitC?: (argCode: string[], argTypes: JitType[]) => string | null;
114
+ jitCapabilities?: JitCapabilities;
70
115
  }): void;
71
116
  type NumberJitType = Extract<JitType, {
72
117
  kind: "number";
@@ -105,8 +150,54 @@ export declare function unaryRealResultCases(realFn: (x: number) => number, comp
105
150
  /** Build cases for numeric predicates (isnan, isinf, isfinite) that return logical. */
106
151
  export declare function predicateCases(scalarTest: (x: number) => boolean, complexTest: (re: number, im: number) => boolean, tensorTest: (x: number) => boolean, tensorComplexTest: (re: number, im: number) => boolean, name: string): BuiltinCase[];
107
152
  /** Fast-path emitter for unary Math.* functions.
108
- * Emits Math.fn(x) for scalar numbers, $h.tHelper(x) for real tensors. */
109
- export declare function unaryMathJitEmit(mathFn: string, tensorHelper: string, requireNonneg?: boolean): (argCode: string[], argTypes: JitType[]) => string | null;
153
+ * Emits Math.fn(x) for scalar numbers, $h.tHelper(dest, x) for real
154
+ * tensors. `getDest` is a lazy callback returning the dest local: either
155
+ * a mangled LHS (top-level Assign) or a fresh scratch (inner tensor
156
+ * sub-expression). It's only invoked when the tensor fast path is
157
+ * actually taken, so scalar / rejected paths don't burn a scratch. */
158
+ export declare function unaryMathJitEmit(mathFn: string, tensorHelper: string, requireNonneg?: boolean): (argCode: string[], argTypes: JitType[], getDest?: () => string) => string | null;
110
159
  /** Fast-path emitter for binary Math.* functions on two scalar numbers. */
111
160
  export declare function binaryMathJitEmit(mathFn: string): (argCode: string[], argTypes: JitType[]) => string | null;
161
+ /** Fast-path C emitter for unary math functions on a scalar.
162
+ * Emits `cFn(x)` for scalar number/boolean; returns null otherwise
163
+ * (tensor emission is handled separately by emit/tensor.ts).
164
+ * If `requireNonneg` is set, rejects values whose sign isn't known
165
+ * to be nonneg — matches the JS guard for domain-restricted functions. */
166
+ export declare function unaryMathJitEmitC(cFn: string, requireNonneg?: boolean): (argCode: string[], argTypes: JitType[]) => string | null;
167
+ /** Fast-path C emitter for binary math functions on two scalar numbers. */
168
+ export declare function binaryMathJitEmitC(cFn: string): (argCode: string[], argTypes: JitType[]) => string | null;
169
+ /**
170
+ * Fast-path C emitter for 1-arg scalar builtins that collapse to a
171
+ * compile-time constant given the arg's kind. Common for shape/type
172
+ * predicates where the answer is fully determined by the type
173
+ * (e.g. `isnumeric(number) -> 1.0`, `isscalar(number) -> 1.0`,
174
+ * `ndims(number) -> 2.0`, `numel(number) -> 1.0`).
175
+ *
176
+ * `valueByKind` maps each supported JitType kind to its C constant.
177
+ * Arg kinds not in the map return null, which bails the C-JIT to
178
+ * JS-JIT for that call site. All values must be valid C double
179
+ * literals (`"1.0"`, `"0.0"`, `"2.0"`, ...).
180
+ */
181
+ export declare function scalarConstantJitEmitC(valueByKind: Partial<Record<JitType["kind"], string>>): (argCode: string[], argTypes: JitType[]) => string | null;
182
+ /**
183
+ * Fast-path C emitter for 1-arg scalar predicates backed by a runtime
184
+ * helper whose return value is int (e.g. `numbl_is_nan`,
185
+ * `numbl_is_inf`, `numbl_is_finite`). The int is cast to double for
186
+ * the C-JIT's uniform boolean-as-double representation. Returns null
187
+ * for non-scalar args.
188
+ *
189
+ * Note: `isnan` / `isinf` / `isfinite` from `<math.h>` can't be used
190
+ * directly because the JIT compiles with `-ffast-math`, which implies
191
+ * `-ffinite-math-only` and constant-folds those macros to false/true.
192
+ * The `numbl_is_nan` / `_is_inf` / `_is_finite` helpers in
193
+ * `jit_runtime` use bit-pattern inspection and live in a separately
194
+ * compiled archive, so the caller's `-ffast-math` can't defeat them.
195
+ */
196
+ export declare function unaryPredicateJitEmitC(cFn: string): (argCode: string[], argTypes: JitType[]) => string | null;
197
+ /**
198
+ * Fast-path C emitter for 1-arg scalar builtins that are the identity
199
+ * on real scalars (e.g. `double(x)`, `real(x)`, `conj(x)`). Returns
200
+ * `(x)` for `number`/`boolean`, null otherwise.
201
+ */
202
+ export declare function scalarIdentityJitEmitC(): (argCode: string[], argTypes: JitType[]) => string | null;
112
203
  export {};
@@ -54,15 +54,53 @@ export declare class Interpreter {
54
54
  source: string;
55
55
  } | null>;
56
56
  /** @internal Progressive type widening for loop JIT: location -> last unified input types. */
57
- loopLastInputTypes: Map<string, import("./jit/jitTypes.js").JitType[]>;
57
+ loopLastInputTypes: Map<string, import("../jit/jitTypes.js").JitType[]>;
58
58
  /** @internal Sibling stmts of the currently-executing stmt (set by execStmts). */
59
59
  _postSiblings: import("../parser/types.js").Stmt[] | null;
60
60
  /** @internal Index in _postSiblings of the next stmt after the current one. */
61
61
  _postSiblingsIdx: number;
62
- /** Optimization level (0 = pure interpreter, >=1 = JIT scalar functions). */
62
+ /** @internal Number of EXTRA sibling stmts that the current execStmt
63
+ * consumed beyond the one passed in. The surrounding sibling loop
64
+ * reads this after each execStmt and advances its index by this
65
+ * many. Used by `--opt e2` chain fusion to atomically execute a run
66
+ * of consecutive Assigns as one C kernel. The interpreter must
67
+ * reset this to 0 before each execStmt call. */
68
+ _e2ChainAdvance: number;
69
+ /** @internal The stmt list of the innermost enclosing function body
70
+ * (or top-level script body). Used by `--opt e2` chain liveness
71
+ * analysis to decide whether a chain LHS is actually referenced
72
+ * outside the chain — if not, the LHS becomes a per-element stack-
73
+ * local rather than a materialized output buffer. Pushed on call
74
+ * frame entry, popped on exit. */
75
+ _currentScopeBody: import("../parser/types.js").Stmt[] | null;
76
+ /** @internal Names that "escape" the current scope regardless of
77
+ * textual usage. For functions: the declared output names (plus
78
+ * `varargout`). For top-level scripts: `null`, meaning every name
79
+ * escapes (the surrounding caller can read all script-level vars
80
+ * via `result.variableValues`). Pushed/popped alongside
81
+ * `_currentScopeBody`. */
82
+ _currentScopeExports: Set<string> | null;
83
+ /**
84
+ * Optimization level:
85
+ * 0 — pure AST interpreter, no JIT.
86
+ * 1 — JS-JIT (default): type-specialize hot functions/loops to JS via `new Function()`.
87
+ */
63
88
  optimization: number;
64
- /** Callback for JIT compilation logging. */
89
+ /**
90
+ * Experimental opt variant selector — e.g. `"e1"` for the mode that
91
+ * keeps JS-JIT as the outer and emits on-demand C kernels for fusible
92
+ * tensor chains and pure-scalar user functions. Undefined for the
93
+ * standard `--opt <n>` path.
94
+ */
95
+ experimental?: string;
96
+ /** Parallelize fused loops with OpenMP threads (--par flag). */
97
+ par: boolean;
98
+ /** Callback for JIT compilation logging (JS codegen). */
65
99
  onJitCompile?: (description: string, jsCode: string) => void;
100
+ /** Callback for C-kernel compilation logging (--opt e2 / --dump-c). */
101
+ onCCompile?: (description: string, cCode: string) => void;
102
+ /** Verbose log sink (plumbed from ExecOptions.log). */
103
+ log?: (message: string) => void;
66
104
  constructor(rt: Runtime, ctx: LoweringContext, functionIndex: FunctionIndex, mainFileName: string, initialVariableValues?: Record<string, RuntimeValue>);
67
105
  /** Clear all JIT and function resolution caches. Called after addpath/rmpath. */
68
106
  clearAllCaches(): void;
@@ -4,7 +4,7 @@
4
4
  import type { Stmt, ArgumentsBlock } from "../parser/types.js";
5
5
  import type { Runtime } from "../runtime/runtime.js";
6
6
  import type { RuntimeValue } from "../runtime/types.js";
7
- import type { JitType } from "./jit/jitTypes.js";
7
+ import type { JitType } from "../jit/jitTypes.js";
8
8
  export declare class BreakSignal {
9
9
  readonly _tag = "break";
10
10
  }
@@ -0,0 +1,90 @@
1
+ /**
2
+ * The native ABI as an explicit slot schema — one source of truth shared
3
+ * between the C-signature builder (assemble.ts) and the JS wrapper
4
+ * marshaller (install.ts).
5
+ *
6
+ * Each `AbiSlot` carries everything either side needs: the C type for
7
+ * the signature, the koffi type string (with `_Out_` prefix for
8
+ * out-pointers), the identifier name in the emitted C, and a backref
9
+ * (paramIdx / outputIdx) so the JS wrapper can locate the source value
10
+ * or output buffer without reconstructing the param-to-slot mapping
11
+ * itself.
12
+ *
13
+ * Adding a new ABI shape = add a slot kind here, emit it in
14
+ * `buildAbiSlots`, and handle it in the JS marshaller.
15
+ */
16
+ import type { ClassificationResult } from "./classify.js";
17
+ export type AbiSlotKind = "scalar" | "complexScalarRe" | "complexScalarIm" | "tensorData"
18
+ /** Imaginary data pointer for a complex tensor param. Paired with
19
+ * `tensorData`. Marshaller passes the RuntimeTensor's `.imag` (a
20
+ * Float64Array) or NULL when `.imag === undefined`; the numbl_ops
21
+ * complex kernels treat NULL imag as all-zero. */
22
+ | "tensorDataIm" | "tensorLen" | "tensorD0" | "tensorD1" | "scalarOut" | "complexScalarReOut" | "complexScalarImOut" | "fixedOutBuf"
23
+ /** Imaginary fixed-output buffer (complex tensor output, non-dynamic). */
24
+ | "fixedOutBufIm" | "fixedOutLen" | "dynOutBuf"
25
+ /** Imaginary buffer pointer for a dynamic complex tensor output. C
26
+ * mallocs, transfers ownership via `double **`; wrapper decodes+copies
27
+ * into a fresh Float64Array then frees. */
28
+ | "dynOutBufIm" | "dynOutLen" | "dynOutD0" | "dynOutD1" | "ticState" | "errFlag"
29
+ /** Callback for `disp(...)` — JS-registered function pointer. The C
30
+ * body invokes it directly; the JS wrapper supplies a koffi-registered
31
+ * pointer that routes into `rt.output`. Signature:
32
+ * void __disp_cb(const char *s, double num, int kind)
33
+ * kind=0 => use `s`, kind=1 => use `num`. */
34
+ | "dispCb";
35
+ export interface AbiSlot {
36
+ kind: AbiSlotKind;
37
+ /** C type string for the signature, e.g. "double", "const double *",
38
+ * "double **". */
39
+ cType: string;
40
+ /** Identifier as it appears in the C signature. */
41
+ cName: string;
42
+ /** koffi type string, with `_Out_` prefix where koffi must treat the
43
+ * pointer as an out-param. */
44
+ koffiType: string;
45
+ /** Index into paramDescs, for "scalar" / "tensor*" kinds. */
46
+ paramIdx?: number;
47
+ /** Index into outputDescs, for output-allocated kinds. */
48
+ outputIdx?: number;
49
+ }
50
+ export interface CParamDesc {
51
+ name: string;
52
+ kind: "scalar" | "complexScalar" | "tensor";
53
+ /** For tensor params: max indexing arity the body uses (1, 2, or 3).
54
+ * Drives the extra `_d0` / `_d1` shape args the JS wrapper must
55
+ * marshal. `undefined` means the tensor is only used in whole-tensor
56
+ * ops (legacy data/len ABI). */
57
+ ndim?: number;
58
+ /** True for complex tensor params. Adds an imag-data slot right after
59
+ * the real-data slot; the marshaller supplies the tensor's `.imag`
60
+ * Float64Array or NULL. Ignored for scalar kinds. */
61
+ isComplex?: boolean;
62
+ /** Ordered slots this param contributes to the ABI. One slot for a
63
+ * scalar; two for a complex scalar (re + im); two or more
64
+ * (data + [imag for complex] + len + optional d0/d1) for a tensor. */
65
+ slots: AbiSlot[];
66
+ }
67
+ /** Per-output descriptor. Tells the JS wrapper how to marshal outputs. */
68
+ export interface COutputDesc {
69
+ name: string;
70
+ kind: "scalar" | "boolean" | "complexScalar" | "tensor";
71
+ /** True for tensor outputs using the dynamic-output ABI: the C code
72
+ * malloc's the buffer and transfers ownership via `double **` and
73
+ * extra d0/d1 out-slots. The JS wrapper decodes the pointer, copies
74
+ * into a fresh Float64Array, and frees the C allocation. */
75
+ dynamic?: boolean;
76
+ /** True for complex tensor outputs. Fixed outputs add a paired imag
77
+ * Float64Array buffer; dynamic outputs add a paired imag `double **`
78
+ * out-pointer the caller decodes + frees after the call. */
79
+ isComplex?: boolean;
80
+ /** Ordered slots this output contributes to the ABI. One for scalars,
81
+ * two for complex scalars (reOut + imOut), two for fixed real tensor
82
+ * outputs (buf + lenOut), three for fixed complex (buf + bufIm +
83
+ * lenOut), four for dynamic real tensor outputs, five for dynamic
84
+ * complex (dynBuf + dynBufIm + dynLen + dynD0 + dynD1). */
85
+ slots: AbiSlot[];
86
+ }
87
+ /** Build the ABI schema for one generated function. Mutates paramDescs /
88
+ * outputDescs in place by filling in `slots`; returns the complete
89
+ * `abiSlots` array in calling order (params, then outputs, then trailers). */
90
+ export declare function buildAbiSlots(paramDescs: CParamDesc[], outputDescs: COutputDesc[], cls: ClassificationResult, paramOutputTensors: Set<string>, unshareTensorParams: Set<string>, needsTicState: boolean, needsErrorFlag: boolean, needsDispCb: boolean): AbiSlot[];
@@ -0,0 +1,56 @@
1
+ /**
2
+ * JIT IR → pure C code generation (koffi path).
3
+ *
4
+ * Orchestration only: this file wires the classify / ABI / emit pieces
5
+ * together and assembles the final C source (headers + per-callee
6
+ * static functions + outer function).
7
+ *
8
+ * classify.ts — TensorMeta / analyzeTensorUsage, the single pass
9
+ * feeding every downstream decision.
10
+ * abi.ts — AbiSlot / CParamDesc / COutputDesc, buildAbiSlots.
11
+ * The one schema walked by both signature and JS.
12
+ * emit/ — per-statement / per-expression C emission, split
13
+ * by concern (scalar, complexScalar, tensor, assign,
14
+ * userCall, stmt, fused). Reads ctx.cls for every
15
+ * classification decision.
16
+ * context.ts — EmitCtx + shared name/opcode helpers.
17
+ *
18
+ * UserCall support: when a feasible user-defined function is called
19
+ * from the outer body, its lowered IR is already in `generatedIRBodies`
20
+ * (populated by `lowerUserFuncCall` in jitLower.ts). We emit each
21
+ * reachable callee as a `static void jit_<jitName>(...)` in the same
22
+ * .c file, in post-order so callees are defined before callers. The
23
+ * shared `__err_flag` pointer flows from outer to every callee.
24
+ */
25
+ import { type JitStmt, type JitType } from "../jitTypes.js";
26
+ import type { GeneratedFn } from "../jitLower.js";
27
+ import { type AbiSlot, type CParamDesc, type COutputDesc } from "./abi.js";
28
+ export type { AbiSlot, AbiSlotKind } from "./abi.js";
29
+ export type { CParamDesc, COutputDesc } from "./abi.js";
30
+ export { mangle, mangleIm, tensorData, tensorDataIm, tensorLen, tensorD0, tensorD1, formatNumberLiteral, C_SCALAR_TARGET, } from "./context.js";
31
+ export interface GenerateCResult {
32
+ cSource: string;
33
+ cFnName: string;
34
+ paramDescs: CParamDesc[];
35
+ outputDescs: COutputDesc[];
36
+ /** The full ABI slot list in calling order:
37
+ * paramDescs[0].slots ++ paramDescs[1].slots ++ ...
38
+ * ++ outputDescs[0].slots ++ ... ++ trailer slots (ticState/errFlag).
39
+ * The JS wrapper walks this list to marshal values. */
40
+ abiSlots: AbiSlot[];
41
+ /** True when any tensor is involved (params, locals, or outputs). */
42
+ usesTensors: boolean;
43
+ /** koffi function signature string for declaring the C function. */
44
+ koffiSignature: string;
45
+ /** True when tic/toc are used — the function has an extra `double*` param. */
46
+ needsTicState: boolean;
47
+ /** True when any Index read was emitted — the function has an extra
48
+ * `double *__err_flag` trailing param. */
49
+ needsErrorFlag: boolean;
50
+ /** True when a `disp(...)` call was emitted — the function has an
51
+ * extra `void (*__disp_cb)(const char *, double, int)` trailing
52
+ * param. The JS wrapper registers a callback that routes back to
53
+ * `rt.output`. */
54
+ needsDispCb: boolean;
55
+ }
56
+ export declare function generateC(body: JitStmt[], params: string[], outputs: string[], nargout: number, localVars: Set<string>, argTypes: JitType[], _outputType: JitType | null, outputTypes: JitType[], fnName: string, fuse?: boolean, openmp?: boolean, generatedIRBodies?: Map<string, GeneratedFn>): GenerateCResult;
@@ -0,0 +1,70 @@
1
+ /**
2
+ * C-JIT: unified classification pass for tensor names.
3
+ *
4
+ * Replaces the ten ad-hoc sets/maps (`tensorVars`, `paramTensorNames`,
5
+ * `outputTensorNames`, `localTensorNames`, `assignIndexTargets`,
6
+ * `unshareTensorParams`, `tensorMaxDim`, `freshAllocTensors`,
7
+ * `dynamicOutputs`, `paramOutputTensors`) that the codegen used to
8
+ * build from 7+ body walks. Everything downstream — signature builder,
9
+ * prelude, epilogue, emit helpers, fusion call — now reads from the
10
+ * `TensorMeta` table this pass produces.
11
+ */
12
+ import type { JitExpr, JitStmt, JitType } from "../jitTypes.js";
13
+ export type TensorKind =
14
+ /** Tensor param, never appears in the output list. */
15
+ "param"
16
+ /** Tensor param whose name is also in the output list. */
17
+ | "paramOutput"
18
+ /** Pure tensor output (not a param). */
19
+ | "output"
20
+ /** Tensor local (neither param nor output). */
21
+ | "local";
22
+ export interface TensorMeta {
23
+ kind: TensorKind;
24
+ /** Max index arity on this name (1/2/3). 0 means the name is never
25
+ * used as an Index or AssignIndex base. */
26
+ maxIndexDim: number;
27
+ /** True when any `Assign(name, RHS)` has RHS ∈ {TensorLiteral,
28
+ * VConcatGrow, Call(zeros|ones), Var(src) where src.hasFreshAlloc,
29
+ * RangeSliceRead}. Propagated to fixed point. */
30
+ hasFreshAlloc: boolean;
31
+ /** True when any AssignIndex / AssignIndexRange / AssignIndexCol
32
+ * uses this name as the base. */
33
+ isAssignIndexTarget: boolean;
34
+ /** Derived: `kind === "param"` and (`isAssignIndexTarget` or
35
+ * `hasFreshAlloc`). Triggers the unshare-at-entry malloc+memcpy
36
+ * prelude so writes don't leak to the caller's buffer. */
37
+ needsUnshare: boolean;
38
+ /** Derived: `hasFreshAlloc` and (`kind === "output"` or
39
+ * `kind === "paramOutput"`). Triggers the `double **` dynamic-output
40
+ * ABI. */
41
+ isDynamicOutput: boolean;
42
+ /** True when this tensor's `JitType.isComplex === true` — either at
43
+ * the boundary (param / output type) or propagated from a complex
44
+ * RHS for locals. Drives paired imag-buffer plumbing: every complex
45
+ * tensor gets a `v_name_data_im` companion pointer, an extra ABI
46
+ * slot for boundaries, and imag malloc / free / copy parallel to
47
+ * the existing real path. */
48
+ isComplex: boolean;
49
+ }
50
+ export interface ClassificationResult {
51
+ /** All names with a tensor role, in insertion order (params first,
52
+ * then outputs, then locals in body order). */
53
+ tensorNames: string[];
54
+ /** Per-name metadata. */
55
+ meta: Map<string, TensorMeta>;
56
+ /** True iff any name has `hasFreshAlloc`. Lets callers skip the
57
+ * dynamic-output marshalling when no name uses it. */
58
+ hasAnyDynamic: boolean;
59
+ /** Name → is any tensor role. `meta.has(name)`. */
60
+ tensorVars: Set<string>;
61
+ /** Names with `kind === "param"` or `"paramOutput"`. */
62
+ paramTensorNames: Set<string>;
63
+ /** Names with `kind === "output"` or `"paramOutput"`. */
64
+ outputTensorNames: Set<string>;
65
+ /** Names with `kind === "local"`. */
66
+ localTensorNames: Set<string>;
67
+ }
68
+ /** Does this Assign RHS allocate a fresh C-owned buffer? */
69
+ export declare function isFreshTensorRhs(expr: JitExpr): boolean;
70
+ export declare function analyzeTensorUsage(body: JitStmt[], params: string[], argTypes: JitType[], outputNames: string[], outputTypes: JitType[]): ClassificationResult;
@@ -0,0 +1,37 @@
1
+ /**
2
+ * C-JIT compilation driver (koffi path).
3
+ *
4
+ * Takes generated C source, compiles it into a `.so` shared library,
5
+ * loads it via koffi, and returns the declared function.
6
+ *
7
+ * Strategy:
8
+ * 1. Content-addressed cache under ~/.cache/numbl/c-jit/ — the hash
9
+ * includes the source bytes plus compiler/platform/numbl versions,
10
+ * so any input change forces a recompile.
11
+ * 2. On cache miss, write `src.c` into a fresh tmpdir and shell out to
12
+ * the C compiler (`$NUMBL_CC` or `cc`) with `-shared -fPIC`.
13
+ * 3. Load with koffi.load() and declare the function.
14
+ *
15
+ * No Node API headers are needed — functions are plain C with raw types.
16
+ * No NAPI_MODULE_INIT, no module registration, no exit hooks.
17
+ */
18
+ export interface CompiledCFn {
19
+ fn: (...args: unknown[]) => unknown;
20
+ cachedPath: string;
21
+ /** The loaded koffi library handle, for declaring additional exports. */
22
+ lib: any;
23
+ }
24
+ /**
25
+ * Compile + load a C function via koffi. Returns null on any failure.
26
+ *
27
+ * `koffiSignature` is the koffi type-string for the C function, e.g.:
28
+ * "void jit_fn(double, double *, int64_t, double *)"
29
+ */
30
+ export declare function compileAndLoad(cSource: string, koffiSignature: string, _cFnName: string, log?: (m: string) => void, extraFlags?: string[]): CompiledCFn | null;
31
+ export declare function resetCEnvForTesting(): void;
32
+ export declare function cJitUnavailableReason(): string | undefined;
33
+ export declare function cJitCacheSize(): number;
34
+ export declare function readCachedBuild(cachedPath: string): Buffer;
35
+ /** True when the C compiler supports `-fopenmp` (thread-level parallelism).
36
+ * Triggers env discovery on first call so it can be used before compileAndLoad. */
37
+ export declare function cJitOpenmpAvailable(log?: (m: string) => void): boolean;