numbl 0.1.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +53 -2
- package/dist-cli/cli.js +35560 -23939
- package/dist-lib/lib.js +42463 -31995
- package/dist-lib/numbl-core/executeCode.d.ts +20 -0
- package/dist-lib/numbl-core/helpers/reduction-helpers.d.ts +7 -2
- package/dist-lib/numbl-core/interpreter/builtins/datetime.d.ts +39 -0
- package/dist-lib/numbl-core/interpreter/builtins/index.d.ts +1 -0
- package/dist-lib/numbl-core/interpreter/builtins/time-system.d.ts +1 -0
- package/dist-lib/numbl-core/interpreter/builtins/types.d.ts +96 -5
- package/dist-lib/numbl-core/interpreter/interpreter.d.ts +37 -3
- package/dist-lib/numbl-core/interpreter/types.d.ts +1 -1
- package/dist-lib/numbl-core/jit/c/abi.d.ts +90 -0
- package/dist-lib/numbl-core/jit/c/assemble.d.ts +56 -0
- package/dist-lib/numbl-core/jit/c/classify.d.ts +70 -0
- package/dist-lib/numbl-core/jit/c/compile.d.ts +37 -0
- package/dist-lib/numbl-core/jit/c/context.d.ts +152 -0
- package/dist-lib/numbl-core/jit/c/emit/assign.d.ts +20 -0
- package/dist-lib/numbl-core/jit/c/emit/complexScalar.d.ts +18 -0
- package/dist-lib/numbl-core/jit/c/emit/fused.d.ts +42 -0
- package/dist-lib/numbl-core/jit/c/emit/helpers.d.ts +40 -0
- package/dist-lib/numbl-core/jit/c/emit/index.d.ts +14 -0
- package/dist-lib/numbl-core/jit/c/emit/scalar.d.ts +23 -0
- package/dist-lib/numbl-core/jit/c/emit/stmt.d.ts +25 -0
- package/dist-lib/numbl-core/jit/c/emit/tensor.d.ts +127 -0
- package/dist-lib/numbl-core/jit/c/emit/userCall.d.ts +58 -0
- package/dist-lib/numbl-core/jit/c/epilogue.d.ts +26 -0
- package/dist-lib/numbl-core/jit/c/feasibility.d.ts +44 -0
- package/dist-lib/numbl-core/jit/c/hybrid.d.ts +42 -0
- package/dist-lib/numbl-core/jit/c/install.d.ts +15 -0
- package/dist-lib/numbl-core/jit/c/parityError.d.ts +26 -0
- package/dist-lib/numbl-core/jit/c/prelude.d.ts +37 -0
- package/dist-lib/numbl-core/jit/c/registry.d.ts +51 -0
- package/dist-lib/numbl-core/jit/c/visit.d.ts +63 -0
- package/dist-lib/numbl-core/jit/e1/install.d.ts +13 -0
- package/dist-lib/numbl-core/jit/e1/kernelEmit.d.ts +54 -0
- package/dist-lib/numbl-core/jit/e1/openmpFlag.d.ts +13 -0
- package/dist-lib/numbl-core/jit/e1/scalarFnKernel.d.ts +44 -0
- package/dist-lib/numbl-core/jit/fusedChainHelpers.d.ts +65 -0
- package/dist-lib/numbl-core/jit/fusedScalarEmit.d.ts +61 -0
- package/dist-lib/numbl-core/jit/fusion.d.ts +71 -0
- package/dist-lib/numbl-core/jit/fusionOps.d.ts +25 -0
- package/dist-lib/numbl-core/{interpreter/jit → jit}/index.d.ts +2 -2
- package/dist-lib/numbl-core/jit/jitBailSafety.d.ts +41 -0
- package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLoop.d.ts +2 -2
- package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLoopAnalysis.d.ts +6 -1
- package/dist-lib/numbl-core/jit/jitLower.d.ts +122 -0
- package/dist-lib/numbl-core/jit/jitLowerExpr.d.ts +27 -0
- package/dist-lib/numbl-core/jit/jitLowerStmt.d.ts +9 -0
- package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLowerTypes.d.ts +7 -3
- package/dist-lib/numbl-core/jit/jitTopLevel.d.ts +22 -0
- package/dist-lib/numbl-core/{interpreter/jit → jit}/jitTypes.d.ts +133 -1
- package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitCodegen.d.ts +2 -2
- package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitCodegenHoist.d.ts +19 -1
- package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitHelpers.d.ts +15 -3
- package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitHelpersIndex.d.ts +7 -0
- package/dist-lib/numbl-core/jit/js/jitHelpersTensor.d.ts +34 -0
- package/dist-lib/numbl-core/jit/js/jsFusedCodegen.d.ts +17 -0
- package/dist-lib/numbl-core/jit/scalarEmit.d.ts +58 -0
- package/dist-lib/numbl-core/lexer/types.d.ts +2 -1
- package/dist-lib/numbl-core/native/lapack-bridge.d.ts +39 -1
- package/dist-lib/numbl-core/ops/bessel.d.ts +18 -0
- package/dist-lib/numbl-core/ops/comparison.d.ts +11 -0
- package/dist-lib/numbl-core/ops/complexBinaryElemwise.d.ts +10 -0
- package/dist-lib/numbl-core/ops/complexUnaryElemwise.d.ts +8 -0
- package/dist-lib/numbl-core/ops/dispatch.d.ts +26 -0
- package/dist-lib/numbl-core/ops/index.d.ts +8 -0
- package/dist-lib/numbl-core/ops/opCodes.d.ts +70 -0
- package/dist-lib/numbl-core/ops/realBinaryElemwise.d.ts +8 -0
- package/dist-lib/numbl-core/ops/realUnaryElemwise.d.ts +5 -0
- package/dist-lib/numbl-core/ops/reduce.d.ts +6 -0
- package/dist-lib/numbl-core/parser/types.d.ts +6 -0
- package/dist-lib/numbl-core/runtime/alloc.d.ts +23 -0
- package/dist-lib/numbl-core/runtime/runtime.d.ts +1 -0
- package/dist-lib/numbl-core/version.d.ts +1 -1
- package/native/jit_runtime/jit_runtime.c +261 -0
- package/native/jit_runtime/jit_runtime.h +204 -0
- package/native/numbl_addon.cpp +53 -1
- package/native/ops/bessel.c +572 -0
- package/native/ops/comparison.c +150 -0
- package/native/ops/complex_binary_elemwise.c +192 -0
- package/native/ops/complex_unary_elemwise.c +152 -0
- package/native/ops/numbl_ops.c +66 -0
- package/native/ops/numbl_ops.h +262 -0
- package/native/ops/real_binary_elemwise.c +85 -0
- package/native/ops/real_unary_elemwise.c +104 -0
- package/native/ops/reduce.c +162 -0
- package/native/ops_napi.cpp +320 -0
- package/package.json +10 -9
- package/dist-lib/numbl-core/interpreter/jit/jitHelpersTensor.d.ts +0 -28
- package/dist-lib/numbl-core/interpreter/jit/jitLower.d.ts +0 -23
- /package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitHelpersComplex.d.ts +0 -0
|
@@ -22,6 +22,8 @@ export interface ExecOptions {
|
|
|
22
22
|
profile?: boolean;
|
|
23
23
|
/** Called each time a JIT function is compiled, with a description and the generated JS. */
|
|
24
24
|
onJitCompile?: (description: string, jsCode: string) => void;
|
|
25
|
+
/** Called each time the C-JIT compiles a function, with a description and the generated C. */
|
|
26
|
+
onCJitCompile?: (description: string, cSource: string) => void;
|
|
25
27
|
/** Initial hold state for plotting (persisted across REPL executions). */
|
|
26
28
|
initialHoldState?: boolean;
|
|
27
29
|
/** Override or add builtins for this execution only. */
|
|
@@ -34,6 +36,23 @@ export interface ExecOptions {
|
|
|
34
36
|
onInput?: (prompt: string) => string;
|
|
35
37
|
/** Optimization level for interpreter (0 = none, >=1 = JIT scalar functions). */
|
|
36
38
|
optimization?: number;
|
|
39
|
+
/**
|
|
40
|
+
* Experimental opt variant selector — e.g. `"e1"` for the prototype
|
|
41
|
+
* that keeps JS-JIT as the outer and emits on-demand C kernels for
|
|
42
|
+
* fusible tensor chains. Orthogonal to `optimization`; when set,
|
|
43
|
+
* `optimization` is still the base level (typically 1).
|
|
44
|
+
*/
|
|
45
|
+
experimental?: string;
|
|
46
|
+
/** Emit fused per-element loops in C-JIT (requires --opt 2). */
|
|
47
|
+
fuse?: boolean;
|
|
48
|
+
/** Parallelize fused loops with OpenMP threads (--par flag). */
|
|
49
|
+
par?: boolean;
|
|
50
|
+
/**
|
|
51
|
+
* Diagnostic mode (`--check-c-jit-parity`, only meaningful with `--opt 2`):
|
|
52
|
+
* throw on any C-JIT miss where JS-JIT would have compiled. Lets us
|
|
53
|
+
* enumerate parity gaps as hard errors rather than silent fallbacks.
|
|
54
|
+
*/
|
|
55
|
+
checkCJitParity?: boolean;
|
|
37
56
|
/**
|
|
38
57
|
* Initial implicit cwd path for the MATLAB-style "cwd is the first search path" feature.
|
|
39
58
|
* - undefined → auto-detect from `system.cwd()` and scan its files.
|
|
@@ -74,6 +93,7 @@ export interface ProfileData {
|
|
|
74
93
|
export interface ExecResult {
|
|
75
94
|
output: string[];
|
|
76
95
|
generatedJS: string;
|
|
96
|
+
generatedC: string;
|
|
77
97
|
plotInstructions: PlotInstruction[];
|
|
78
98
|
returnValue: RuntimeValue;
|
|
79
99
|
variableValues: Record<string, RuntimeValue>;
|
|
@@ -46,8 +46,13 @@ export type ReductionKernel = {
|
|
|
46
46
|
reduceAll: (v: RuntimeTensor) => RuntimeValue;
|
|
47
47
|
reduceDim: (v: RuntimeTensor, dim: number) => RuntimeValue;
|
|
48
48
|
};
|
|
49
|
-
/** Create an accumulator-based reduction kernel (sum, mean, etc.)
|
|
50
|
-
|
|
49
|
+
/** Create an accumulator-based reduction kernel (sum, mean, etc.).
|
|
50
|
+
*
|
|
51
|
+
* When `opCode` is provided (an OpReduce.* value whose semantics match
|
|
52
|
+
* `reduceFn`/`initial`), the reduceAll fast path routes Float64 real and
|
|
53
|
+
* complex tensors through tensorOps.realFlatReduce. The closure path
|
|
54
|
+
* is still used as a fallback for non-Float64 data. */
|
|
55
|
+
export declare function accumKernel(reduceFn: (acc: number, val: number) => number, initial: number, finalizeFn?: (acc: number, count: number) => number, opCode?: number): ReductionKernel;
|
|
51
56
|
/** Create a slice-based reduction kernel (median, mode, etc.) */
|
|
52
57
|
export declare function sliceKernel(sliceFn: (slice: ArrayLike<number>) => number): ReductionKernel;
|
|
53
58
|
/** Create an accumulator kernel that skips NaN values. */
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* datetime / duration builtins — scalar-only initial implementation.
|
|
3
|
+
*
|
|
4
|
+
* datetime:
|
|
5
|
+
* datetime() current time
|
|
6
|
+
* datetime('now' | 'today' | 'yesterday' | 'tomorrow')
|
|
7
|
+
* datetime(Y, M, D)
|
|
8
|
+
* datetime(Y, M, D, H, MI, S[, MS])
|
|
9
|
+
* datetime(X, 'ConvertFrom', 'datenum' | 'posixtime' | 'excel' | 'excel1904')
|
|
10
|
+
*
|
|
11
|
+
* datetime values are class_instance with className="datetime" and fields
|
|
12
|
+
* Year/Month/Day/Hour/Minute/Second. They are display-formatted by
|
|
13
|
+
* display.ts as "dd-MMM-yyyy [HH:mm:ss]".
|
|
14
|
+
*
|
|
15
|
+
* duration:
|
|
16
|
+
* Produced by datetime - datetime, or by seconds(N) / minutes(N) / ...
|
|
17
|
+
* Represented as class_instance with className="duration" and a single
|
|
18
|
+
* Seconds field. Display format "hh:mm:ss".
|
|
19
|
+
*
|
|
20
|
+
* Arithmetic:
|
|
21
|
+
* datetime - datetime -> duration
|
|
22
|
+
* datetime + duration -> datetime
|
|
23
|
+
* datetime - duration -> datetime
|
|
24
|
+
* duration + duration -> duration
|
|
25
|
+
* duration - duration -> duration
|
|
26
|
+
*
|
|
27
|
+
* `seconds(d)` returns the numeric seconds of a duration, or wraps a
|
|
28
|
+
* number as a duration.
|
|
29
|
+
*/
|
|
30
|
+
import type { RuntimeValue, RuntimeClassInstance } from "../../runtime/types.js";
|
|
31
|
+
export declare function makeDatetime(year: number, month: number, day: number, hour: number, minute: number, second: number): RuntimeClassInstance;
|
|
32
|
+
export declare function makeDuration(totalSeconds: number): RuntimeClassInstance;
|
|
33
|
+
/**
|
|
34
|
+
* Attempt to handle a named binary operator ("plus", "minus", "lt", ...)
|
|
35
|
+
* when at least one operand is a datetime or duration class_instance.
|
|
36
|
+
* Returns the result value on success, or undefined to let the generic
|
|
37
|
+
* numeric path run.
|
|
38
|
+
*/
|
|
39
|
+
export declare function tryDatetimeDurationBinop(opName: string, a: RuntimeValue, b: RuntimeValue): RuntimeValue | undefined;
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
import type { RuntimeValue, RuntimeTensor, RuntimeComplexNumber } from "../../runtime/types.js";
|
|
5
5
|
import { FloatXArray } from "../../runtime/types.js";
|
|
6
|
-
import { type JitType } from "
|
|
6
|
+
import { type JitType } from "../../jit/jitTypes.js";
|
|
7
7
|
export interface IBuiltinResolution {
|
|
8
8
|
outputTypes: JitType[];
|
|
9
9
|
apply: (args: RuntimeValue[], nargout: number) => RuntimeValue | RuntimeValue[];
|
|
@@ -18,7 +18,50 @@ export interface IBuiltin {
|
|
|
18
18
|
/** Given input JIT types + nargout, return output types and a specialized apply, or null. */
|
|
19
19
|
resolve: (argTypes: JitType[], nargout: number) => IBuiltinResolution | null;
|
|
20
20
|
/** Optional fast-path JS code emission for JIT. Return null to fall back to $h.ib_<name>. */
|
|
21
|
-
jitEmit?: (argCode: string[], argTypes: JitType[]) => string | null;
|
|
21
|
+
jitEmit?: (argCode: string[], argTypes: JitType[], getDest?: () => string) => string | null;
|
|
22
|
+
/**
|
|
23
|
+
* Optional fast-path C code emission for the C-JIT. Return null if the
|
|
24
|
+
* builtin can't be emitted as a C expression for the given arg types
|
|
25
|
+
* — the C-JIT will bail to JS-JIT for this call site. Covers the
|
|
26
|
+
* scalar-argument case only (tensor-argument emission is handled by
|
|
27
|
+
* separate tensor-op dispatch in assemble.ts / emit/fused.ts).
|
|
28
|
+
*/
|
|
29
|
+
jitEmitC?: (argCode: string[], argTypes: JitType[]) => string | null;
|
|
30
|
+
/**
|
|
31
|
+
* Optional C-JIT tensor-op dispatch metadata. When a field is present,
|
|
32
|
+
* both the C feasibility check and C codegen read it directly —
|
|
33
|
+
* adding a new tensor-unary / tensor-binary / tensor-reduction builtin
|
|
34
|
+
* is one edit here (plus the matching native-side enum or C function).
|
|
35
|
+
* Previously these were three parallel hardcoded tables (one in
|
|
36
|
+
* feasibility.ts, two in context.ts) that could silently drift
|
|
37
|
+
* from the native-side opcode enums. Centralizing on the IBuiltin
|
|
38
|
+
* registration removes that drift risk.
|
|
39
|
+
*/
|
|
40
|
+
jitCapabilities?: JitCapabilities;
|
|
41
|
+
}
|
|
42
|
+
/** Per-builtin C-JIT tensor-op dispatch metadata. See IBuiltin.jitCapabilities. */
|
|
43
|
+
export interface JitCapabilities {
|
|
44
|
+
/**
|
|
45
|
+
* libnumbl_ops opcode enum name (e.g. "NUMBL_UNARY_EXP") for
|
|
46
|
+
* element-wise unary tensor builtins routed through
|
|
47
|
+
* `numbl_realUnaryElemwise`. Set this on element-wise unary functions
|
|
48
|
+
* that have a libnumbl_ops opcode and are safe to invoke on any real
|
|
49
|
+
* input (domain-restricted ones like log/sqrt stay excluded).
|
|
50
|
+
*/
|
|
51
|
+
tensorUnaryOp?: string;
|
|
52
|
+
/**
|
|
53
|
+
* C function name (e.g. "fmax", "atan2", "numbl_mod") for 2-arg
|
|
54
|
+
* element-wise tensor builtins. The C-JIT emits an inline per-element
|
|
55
|
+
* loop calling this function; it must match the interpreter's
|
|
56
|
+
* scalar-apply semantics exactly.
|
|
57
|
+
*/
|
|
58
|
+
tensorBinaryFn?: string;
|
|
59
|
+
/**
|
|
60
|
+
* libnumbl_ops opcode enum name (e.g. "NUMBL_REDUCE_SUM") for
|
|
61
|
+
* tensor→scalar reductions routed through `numbl_tensor_reduce_op`.
|
|
62
|
+
* Set on reduction builtins (sum / prod / max / min / any / all / mean).
|
|
63
|
+
*/
|
|
64
|
+
tensorReductionOp?: string;
|
|
22
65
|
}
|
|
23
66
|
export declare function getIBuiltin(name: string): IBuiltin | undefined;
|
|
24
67
|
export declare function registerIBuiltin(b: IBuiltin): void;
|
|
@@ -66,7 +109,9 @@ export declare function defineBuiltin(opts: {
|
|
|
66
109
|
name: string;
|
|
67
110
|
help?: BuiltinHelp;
|
|
68
111
|
cases: BuiltinCase[];
|
|
69
|
-
jitEmit?: (argCode: string[], argTypes: JitType[]) => string | null;
|
|
112
|
+
jitEmit?: (argCode: string[], argTypes: JitType[], getDest?: () => string) => string | null;
|
|
113
|
+
jitEmitC?: (argCode: string[], argTypes: JitType[]) => string | null;
|
|
114
|
+
jitCapabilities?: JitCapabilities;
|
|
70
115
|
}): void;
|
|
71
116
|
type NumberJitType = Extract<JitType, {
|
|
72
117
|
kind: "number";
|
|
@@ -105,8 +150,54 @@ export declare function unaryRealResultCases(realFn: (x: number) => number, comp
|
|
|
105
150
|
/** Build cases for numeric predicates (isnan, isinf, isfinite) that return logical. */
|
|
106
151
|
export declare function predicateCases(scalarTest: (x: number) => boolean, complexTest: (re: number, im: number) => boolean, tensorTest: (x: number) => boolean, tensorComplexTest: (re: number, im: number) => boolean, name: string): BuiltinCase[];
|
|
107
152
|
/** Fast-path emitter for unary Math.* functions.
|
|
108
|
-
* Emits Math.fn(x) for scalar numbers, $h.tHelper(x) for real
|
|
109
|
-
|
|
153
|
+
* Emits Math.fn(x) for scalar numbers, $h.tHelper(dest, x) for real
|
|
154
|
+
* tensors. `getDest` is a lazy callback returning the dest local: either
|
|
155
|
+
* a mangled LHS (top-level Assign) or a fresh scratch (inner tensor
|
|
156
|
+
* sub-expression). It's only invoked when the tensor fast path is
|
|
157
|
+
* actually taken, so scalar / rejected paths don't burn a scratch. */
|
|
158
|
+
export declare function unaryMathJitEmit(mathFn: string, tensorHelper: string, requireNonneg?: boolean): (argCode: string[], argTypes: JitType[], getDest?: () => string) => string | null;
|
|
110
159
|
/** Fast-path emitter for binary Math.* functions on two scalar numbers. */
|
|
111
160
|
export declare function binaryMathJitEmit(mathFn: string): (argCode: string[], argTypes: JitType[]) => string | null;
|
|
161
|
+
/** Fast-path C emitter for unary math functions on a scalar.
|
|
162
|
+
* Emits `cFn(x)` for scalar number/boolean; returns null otherwise
|
|
163
|
+
* (tensor emission is handled separately by emit/tensor.ts).
|
|
164
|
+
* If `requireNonneg` is set, rejects values whose sign isn't known
|
|
165
|
+
* to be nonneg — matches the JS guard for domain-restricted functions. */
|
|
166
|
+
export declare function unaryMathJitEmitC(cFn: string, requireNonneg?: boolean): (argCode: string[], argTypes: JitType[]) => string | null;
|
|
167
|
+
/** Fast-path C emitter for binary math functions on two scalar numbers. */
|
|
168
|
+
export declare function binaryMathJitEmitC(cFn: string): (argCode: string[], argTypes: JitType[]) => string | null;
|
|
169
|
+
/**
|
|
170
|
+
* Fast-path C emitter for 1-arg scalar builtins that collapse to a
|
|
171
|
+
* compile-time constant given the arg's kind. Common for shape/type
|
|
172
|
+
* predicates where the answer is fully determined by the type
|
|
173
|
+
* (e.g. `isnumeric(number) -> 1.0`, `isscalar(number) -> 1.0`,
|
|
174
|
+
* `ndims(number) -> 2.0`, `numel(number) -> 1.0`).
|
|
175
|
+
*
|
|
176
|
+
* `valueByKind` maps each supported JitType kind to its C constant.
|
|
177
|
+
* Arg kinds not in the map return null, which bails the C-JIT to
|
|
178
|
+
* JS-JIT for that call site. All values must be valid C double
|
|
179
|
+
* literals (`"1.0"`, `"0.0"`, `"2.0"`, ...).
|
|
180
|
+
*/
|
|
181
|
+
export declare function scalarConstantJitEmitC(valueByKind: Partial<Record<JitType["kind"], string>>): (argCode: string[], argTypes: JitType[]) => string | null;
|
|
182
|
+
/**
|
|
183
|
+
* Fast-path C emitter for 1-arg scalar predicates backed by a runtime
|
|
184
|
+
* helper whose return value is int (e.g. `numbl_is_nan`,
|
|
185
|
+
* `numbl_is_inf`, `numbl_is_finite`). The int is cast to double for
|
|
186
|
+
* the C-JIT's uniform boolean-as-double representation. Returns null
|
|
187
|
+
* for non-scalar args.
|
|
188
|
+
*
|
|
189
|
+
* Note: `isnan` / `isinf` / `isfinite` from `<math.h>` can't be used
|
|
190
|
+
* directly because the JIT compiles with `-ffast-math`, which implies
|
|
191
|
+
* `-ffinite-math-only` and constant-folds those macros to false/true.
|
|
192
|
+
* The `numbl_is_nan` / `_is_inf` / `_is_finite` helpers in
|
|
193
|
+
* `jit_runtime` use bit-pattern inspection and live in a separately
|
|
194
|
+
* compiled archive, so the caller's `-ffast-math` can't defeat them.
|
|
195
|
+
*/
|
|
196
|
+
export declare function unaryPredicateJitEmitC(cFn: string): (argCode: string[], argTypes: JitType[]) => string | null;
|
|
197
|
+
/**
|
|
198
|
+
* Fast-path C emitter for 1-arg scalar builtins that are the identity
|
|
199
|
+
* on real scalars (e.g. `double(x)`, `real(x)`, `conj(x)`). Returns
|
|
200
|
+
* `(x)` for `number`/`boolean`, null otherwise.
|
|
201
|
+
*/
|
|
202
|
+
export declare function scalarIdentityJitEmitC(): (argCode: string[], argTypes: JitType[]) => string | null;
|
|
112
203
|
export {};
|
|
@@ -53,16 +53,50 @@ export declare class Interpreter {
|
|
|
53
53
|
fn: (...args: unknown[]) => unknown;
|
|
54
54
|
source: string;
|
|
55
55
|
} | null>;
|
|
56
|
+
/** @internal Per-instance cache for C-JIT-compiled loops (parallel to loopJitCache). */
|
|
57
|
+
loopCJitCache: Map<string, {
|
|
58
|
+
fn: (...args: unknown[]) => unknown;
|
|
59
|
+
} | null>;
|
|
56
60
|
/** @internal Progressive type widening for loop JIT: location -> last unified input types. */
|
|
57
|
-
loopLastInputTypes: Map<string, import("
|
|
61
|
+
loopLastInputTypes: Map<string, import("../jit/jitTypes.js").JitType[]>;
|
|
58
62
|
/** @internal Sibling stmts of the currently-executing stmt (set by execStmts). */
|
|
59
63
|
_postSiblings: import("../parser/types.js").Stmt[] | null;
|
|
60
64
|
/** @internal Index in _postSiblings of the next stmt after the current one. */
|
|
61
65
|
_postSiblingsIdx: number;
|
|
62
|
-
/**
|
|
66
|
+
/**
|
|
67
|
+
* Optimization level:
|
|
68
|
+
* 0 — pure AST interpreter, no JIT.
|
|
69
|
+
* 1 — JS-JIT (default): type-specialize hot functions/loops to JS via `new Function()`.
|
|
70
|
+
* 2 — C-JIT: additionally emit C for feasible scalar specializations,
|
|
71
|
+
* compile to a native `.node` module, and invoke via N-API.
|
|
72
|
+
* Infeasible IR transparently falls back to the JS-JIT path.
|
|
73
|
+
*/
|
|
63
74
|
optimization: number;
|
|
64
|
-
/**
|
|
75
|
+
/**
|
|
76
|
+
* Experimental opt variant selector — e.g. `"e1"` for the prototype
|
|
77
|
+
* that keeps JS-JIT as the outer and emits on-demand C kernels for
|
|
78
|
+
* fusible tensor chains. Undefined for the standard `--opt <n>` path.
|
|
79
|
+
*/
|
|
80
|
+
experimental?: string;
|
|
81
|
+
/** Emit fused per-element loops in C-JIT (--fuse flag). */
|
|
82
|
+
fuse: boolean;
|
|
83
|
+
/** Parallelize fused loops with OpenMP threads (--par flag). */
|
|
84
|
+
par: boolean;
|
|
85
|
+
/**
|
|
86
|
+
* Diagnostic mode (`--check-c-jit-parity`, only meaningful with `--opt 2`).
|
|
87
|
+
* When set, any C-JIT miss where JS-JIT would have compiled throws a
|
|
88
|
+
* `CJitParityError` instead of silently falling back — surfacing parity
|
|
89
|
+
* gaps as a punch list of features to implement in the C-JIT. Env
|
|
90
|
+
* failures (missing `cc`, compile failure) also throw, since the user
|
|
91
|
+
* explicitly asked to audit C-JIT coverage.
|
|
92
|
+
*/
|
|
93
|
+
checkCJitParity: boolean;
|
|
94
|
+
/** Callback for JIT compilation logging (JS codegen). */
|
|
65
95
|
onJitCompile?: (description: string, jsCode: string) => void;
|
|
96
|
+
/** Callback for C-JIT compilation logging (--dump-c). */
|
|
97
|
+
onCJitCompile?: (description: string, cSource: string) => void;
|
|
98
|
+
/** Verbose log sink (plumbed from ExecOptions.log; used by C-JIT for diagnostics). */
|
|
99
|
+
log?: (message: string) => void;
|
|
66
100
|
constructor(rt: Runtime, ctx: LoweringContext, functionIndex: FunctionIndex, mainFileName: string, initialVariableValues?: Record<string, RuntimeValue>);
|
|
67
101
|
/** Clear all JIT and function resolution caches. Called after addpath/rmpath. */
|
|
68
102
|
clearAllCaches(): void;
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
import type { Stmt, ArgumentsBlock } from "../parser/types.js";
|
|
5
5
|
import type { Runtime } from "../runtime/runtime.js";
|
|
6
6
|
import type { RuntimeValue } from "../runtime/types.js";
|
|
7
|
-
import type { JitType } from "
|
|
7
|
+
import type { JitType } from "../jit/jitTypes.js";
|
|
8
8
|
export declare class BreakSignal {
|
|
9
9
|
readonly _tag = "break";
|
|
10
10
|
}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The native ABI as an explicit slot schema — one source of truth shared
|
|
3
|
+
* between the C-signature builder (assemble.ts) and the JS wrapper
|
|
4
|
+
* marshaller (install.ts).
|
|
5
|
+
*
|
|
6
|
+
* Each `AbiSlot` carries everything either side needs: the C type for
|
|
7
|
+
* the signature, the koffi type string (with `_Out_` prefix for
|
|
8
|
+
* out-pointers), the identifier name in the emitted C, and a backref
|
|
9
|
+
* (paramIdx / outputIdx) so the JS wrapper can locate the source value
|
|
10
|
+
* or output buffer without reconstructing the param-to-slot mapping
|
|
11
|
+
* itself.
|
|
12
|
+
*
|
|
13
|
+
* Adding a new ABI shape = add a slot kind here, emit it in
|
|
14
|
+
* `buildAbiSlots`, and handle it in the JS marshaller.
|
|
15
|
+
*/
|
|
16
|
+
import type { ClassificationResult } from "./classify.js";
|
|
17
|
+
export type AbiSlotKind = "scalar" | "complexScalarRe" | "complexScalarIm" | "tensorData"
|
|
18
|
+
/** Imaginary data pointer for a complex tensor param. Paired with
|
|
19
|
+
* `tensorData`. Marshaller passes the RuntimeTensor's `.imag` (a
|
|
20
|
+
* Float64Array) or NULL when `.imag === undefined`; the numbl_ops
|
|
21
|
+
* complex kernels treat NULL imag as all-zero. */
|
|
22
|
+
| "tensorDataIm" | "tensorLen" | "tensorD0" | "tensorD1" | "scalarOut" | "complexScalarReOut" | "complexScalarImOut" | "fixedOutBuf"
|
|
23
|
+
/** Imaginary fixed-output buffer (complex tensor output, non-dynamic). */
|
|
24
|
+
| "fixedOutBufIm" | "fixedOutLen" | "dynOutBuf"
|
|
25
|
+
/** Imaginary buffer pointer for a dynamic complex tensor output. C
|
|
26
|
+
* mallocs, transfers ownership via `double **`; wrapper decodes+copies
|
|
27
|
+
* into a fresh Float64Array then frees. */
|
|
28
|
+
| "dynOutBufIm" | "dynOutLen" | "dynOutD0" | "dynOutD1" | "ticState" | "errFlag"
|
|
29
|
+
/** Callback for `disp(...)` — JS-registered function pointer. The C
|
|
30
|
+
* body invokes it directly; the JS wrapper supplies a koffi-registered
|
|
31
|
+
* pointer that routes into `rt.output`. Signature:
|
|
32
|
+
* void __disp_cb(const char *s, double num, int kind)
|
|
33
|
+
* kind=0 => use `s`, kind=1 => use `num`. */
|
|
34
|
+
| "dispCb";
|
|
35
|
+
export interface AbiSlot {
|
|
36
|
+
kind: AbiSlotKind;
|
|
37
|
+
/** C type string for the signature, e.g. "double", "const double *",
|
|
38
|
+
* "double **". */
|
|
39
|
+
cType: string;
|
|
40
|
+
/** Identifier as it appears in the C signature. */
|
|
41
|
+
cName: string;
|
|
42
|
+
/** koffi type string, with `_Out_` prefix where koffi must treat the
|
|
43
|
+
* pointer as an out-param. */
|
|
44
|
+
koffiType: string;
|
|
45
|
+
/** Index into paramDescs, for "scalar" / "tensor*" kinds. */
|
|
46
|
+
paramIdx?: number;
|
|
47
|
+
/** Index into outputDescs, for output-allocated kinds. */
|
|
48
|
+
outputIdx?: number;
|
|
49
|
+
}
|
|
50
|
+
export interface CParamDesc {
|
|
51
|
+
name: string;
|
|
52
|
+
kind: "scalar" | "complexScalar" | "tensor";
|
|
53
|
+
/** For tensor params: max indexing arity the body uses (1, 2, or 3).
|
|
54
|
+
* Drives the extra `_d0` / `_d1` shape args the JS wrapper must
|
|
55
|
+
* marshal. `undefined` means the tensor is only used in whole-tensor
|
|
56
|
+
* ops (legacy data/len ABI). */
|
|
57
|
+
ndim?: number;
|
|
58
|
+
/** True for complex tensor params. Adds an imag-data slot right after
|
|
59
|
+
* the real-data slot; the marshaller supplies the tensor's `.imag`
|
|
60
|
+
* Float64Array or NULL. Ignored for scalar kinds. */
|
|
61
|
+
isComplex?: boolean;
|
|
62
|
+
/** Ordered slots this param contributes to the ABI. One slot for a
|
|
63
|
+
* scalar; two for a complex scalar (re + im); two or more
|
|
64
|
+
* (data + [imag for complex] + len + optional d0/d1) for a tensor. */
|
|
65
|
+
slots: AbiSlot[];
|
|
66
|
+
}
|
|
67
|
+
/** Per-output descriptor. Tells the JS wrapper how to marshal outputs. */
|
|
68
|
+
export interface COutputDesc {
|
|
69
|
+
name: string;
|
|
70
|
+
kind: "scalar" | "boolean" | "complexScalar" | "tensor";
|
|
71
|
+
/** True for tensor outputs using the dynamic-output ABI: the C code
|
|
72
|
+
* malloc's the buffer and transfers ownership via `double **` and
|
|
73
|
+
* extra d0/d1 out-slots. The JS wrapper decodes the pointer, copies
|
|
74
|
+
* into a fresh Float64Array, and frees the C allocation. */
|
|
75
|
+
dynamic?: boolean;
|
|
76
|
+
/** True for complex tensor outputs. Fixed outputs add a paired imag
|
|
77
|
+
* Float64Array buffer; dynamic outputs add a paired imag `double **`
|
|
78
|
+
* out-pointer the caller decodes + frees after the call. */
|
|
79
|
+
isComplex?: boolean;
|
|
80
|
+
/** Ordered slots this output contributes to the ABI. One for scalars,
|
|
81
|
+
* two for complex scalars (reOut + imOut), two for fixed real tensor
|
|
82
|
+
* outputs (buf + lenOut), three for fixed complex (buf + bufIm +
|
|
83
|
+
* lenOut), four for dynamic real tensor outputs, five for dynamic
|
|
84
|
+
* complex (dynBuf + dynBufIm + dynLen + dynD0 + dynD1). */
|
|
85
|
+
slots: AbiSlot[];
|
|
86
|
+
}
|
|
87
|
+
/** Build the ABI schema for one generated function. Mutates paramDescs /
|
|
88
|
+
* outputDescs in place by filling in `slots`; returns the complete
|
|
89
|
+
* `abiSlots` array in calling order (params, then outputs, then trailers). */
|
|
90
|
+
export declare function buildAbiSlots(paramDescs: CParamDesc[], outputDescs: COutputDesc[], cls: ClassificationResult, paramOutputTensors: Set<string>, unshareTensorParams: Set<string>, needsTicState: boolean, needsErrorFlag: boolean, needsDispCb: boolean): AbiSlot[];
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JIT IR → pure C code generation (koffi path).
|
|
3
|
+
*
|
|
4
|
+
* Orchestration only: this file wires the classify / ABI / emit pieces
|
|
5
|
+
* together and assembles the final C source (headers + per-callee
|
|
6
|
+
* static functions + outer function).
|
|
7
|
+
*
|
|
8
|
+
* classify.ts — TensorMeta / analyzeTensorUsage, the single pass
|
|
9
|
+
* feeding every downstream decision.
|
|
10
|
+
* abi.ts — AbiSlot / CParamDesc / COutputDesc, buildAbiSlots.
|
|
11
|
+
* The one schema walked by both signature and JS.
|
|
12
|
+
* emit/ — per-statement / per-expression C emission, split
|
|
13
|
+
* by concern (scalar, complexScalar, tensor, assign,
|
|
14
|
+
* userCall, stmt, fused). Reads ctx.cls for every
|
|
15
|
+
* classification decision.
|
|
16
|
+
* context.ts — EmitCtx + shared name/opcode helpers.
|
|
17
|
+
*
|
|
18
|
+
* UserCall support: when a feasible user-defined function is called
|
|
19
|
+
* from the outer body, its lowered IR is already in `generatedIRBodies`
|
|
20
|
+
* (populated by `lowerUserFuncCall` in jitLower.ts). We emit each
|
|
21
|
+
* reachable callee as a `static void jit_<jitName>(...)` in the same
|
|
22
|
+
* .c file, in post-order so callees are defined before callers. The
|
|
23
|
+
* shared `__err_flag` pointer flows from outer to every callee.
|
|
24
|
+
*/
|
|
25
|
+
import { type JitStmt, type JitType } from "../jitTypes.js";
|
|
26
|
+
import type { GeneratedFn } from "../jitLower.js";
|
|
27
|
+
import { type AbiSlot, type CParamDesc, type COutputDesc } from "./abi.js";
|
|
28
|
+
export type { AbiSlot, AbiSlotKind } from "./abi.js";
|
|
29
|
+
export type { CParamDesc, COutputDesc } from "./abi.js";
|
|
30
|
+
export { mangle, mangleIm, tensorData, tensorDataIm, tensorLen, tensorD0, tensorD1, formatNumberLiteral, C_SCALAR_TARGET, } from "./context.js";
|
|
31
|
+
export interface GenerateCResult {
|
|
32
|
+
cSource: string;
|
|
33
|
+
cFnName: string;
|
|
34
|
+
paramDescs: CParamDesc[];
|
|
35
|
+
outputDescs: COutputDesc[];
|
|
36
|
+
/** The full ABI slot list in calling order:
|
|
37
|
+
* paramDescs[0].slots ++ paramDescs[1].slots ++ ...
|
|
38
|
+
* ++ outputDescs[0].slots ++ ... ++ trailer slots (ticState/errFlag).
|
|
39
|
+
* The JS wrapper walks this list to marshal values. */
|
|
40
|
+
abiSlots: AbiSlot[];
|
|
41
|
+
/** True when any tensor is involved (params, locals, or outputs). */
|
|
42
|
+
usesTensors: boolean;
|
|
43
|
+
/** koffi function signature string for declaring the C function. */
|
|
44
|
+
koffiSignature: string;
|
|
45
|
+
/** True when tic/toc are used — the function has an extra `double*` param. */
|
|
46
|
+
needsTicState: boolean;
|
|
47
|
+
/** True when any Index read was emitted — the function has an extra
|
|
48
|
+
* `double *__err_flag` trailing param. */
|
|
49
|
+
needsErrorFlag: boolean;
|
|
50
|
+
/** True when a `disp(...)` call was emitted — the function has an
|
|
51
|
+
* extra `void (*__disp_cb)(const char *, double, int)` trailing
|
|
52
|
+
* param. The JS wrapper registers a callback that routes back to
|
|
53
|
+
* `rt.output`. */
|
|
54
|
+
needsDispCb: boolean;
|
|
55
|
+
}
|
|
56
|
+
export declare function generateC(body: JitStmt[], params: string[], outputs: string[], nargout: number, localVars: Set<string>, argTypes: JitType[], _outputType: JitType | null, outputTypes: JitType[], fnName: string, fuse?: boolean, openmp?: boolean, generatedIRBodies?: Map<string, GeneratedFn>): GenerateCResult;
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* C-JIT: unified classification pass for tensor names.
|
|
3
|
+
*
|
|
4
|
+
* Replaces the ten ad-hoc sets/maps (`tensorVars`, `paramTensorNames`,
|
|
5
|
+
* `outputTensorNames`, `localTensorNames`, `assignIndexTargets`,
|
|
6
|
+
* `unshareTensorParams`, `tensorMaxDim`, `freshAllocTensors`,
|
|
7
|
+
* `dynamicOutputs`, `paramOutputTensors`) that the codegen used to
|
|
8
|
+
* build from 7+ body walks. Everything downstream — signature builder,
|
|
9
|
+
* prelude, epilogue, emit helpers, fusion call — now reads from the
|
|
10
|
+
* `TensorMeta` table this pass produces.
|
|
11
|
+
*/
|
|
12
|
+
import type { JitExpr, JitStmt, JitType } from "../jitTypes.js";
|
|
13
|
+
export type TensorKind =
|
|
14
|
+
/** Tensor param, never appears in the output list. */
|
|
15
|
+
"param"
|
|
16
|
+
/** Tensor param whose name is also in the output list. */
|
|
17
|
+
| "paramOutput"
|
|
18
|
+
/** Pure tensor output (not a param). */
|
|
19
|
+
| "output"
|
|
20
|
+
/** Tensor local (neither param nor output). */
|
|
21
|
+
| "local";
|
|
22
|
+
export interface TensorMeta {
|
|
23
|
+
kind: TensorKind;
|
|
24
|
+
/** Max index arity on this name (1/2/3). 0 means the name is never
|
|
25
|
+
* used as an Index or AssignIndex base. */
|
|
26
|
+
maxIndexDim: number;
|
|
27
|
+
/** True when any `Assign(name, RHS)` has RHS ∈ {TensorLiteral,
|
|
28
|
+
* VConcatGrow, Call(zeros|ones), Var(src) where src.hasFreshAlloc,
|
|
29
|
+
* RangeSliceRead}. Propagated to fixed point. */
|
|
30
|
+
hasFreshAlloc: boolean;
|
|
31
|
+
/** True when any AssignIndex / AssignIndexRange / AssignIndexCol
|
|
32
|
+
* uses this name as the base. */
|
|
33
|
+
isAssignIndexTarget: boolean;
|
|
34
|
+
/** Derived: `kind === "param"` and (`isAssignIndexTarget` or
|
|
35
|
+
* `hasFreshAlloc`). Triggers the unshare-at-entry malloc+memcpy
|
|
36
|
+
* prelude so writes don't leak to the caller's buffer. */
|
|
37
|
+
needsUnshare: boolean;
|
|
38
|
+
/** Derived: `hasFreshAlloc` and (`kind === "output"` or
|
|
39
|
+
* `kind === "paramOutput"`). Triggers the `double **` dynamic-output
|
|
40
|
+
* ABI. */
|
|
41
|
+
isDynamicOutput: boolean;
|
|
42
|
+
/** True when this tensor's `JitType.isComplex === true` — either at
|
|
43
|
+
* the boundary (param / output type) or propagated from a complex
|
|
44
|
+
* RHS for locals. Drives paired imag-buffer plumbing: every complex
|
|
45
|
+
* tensor gets a `v_name_data_im` companion pointer, an extra ABI
|
|
46
|
+
* slot for boundaries, and imag malloc / free / copy parallel to
|
|
47
|
+
* the existing real path. */
|
|
48
|
+
isComplex: boolean;
|
|
49
|
+
}
|
|
50
|
+
export interface ClassificationResult {
|
|
51
|
+
/** All names with a tensor role, in insertion order (params first,
|
|
52
|
+
* then outputs, then locals in body order). */
|
|
53
|
+
tensorNames: string[];
|
|
54
|
+
/** Per-name metadata. */
|
|
55
|
+
meta: Map<string, TensorMeta>;
|
|
56
|
+
/** True iff any name has `hasFreshAlloc`. Lets callers skip the
|
|
57
|
+
* dynamic-output marshalling when no name uses it. */
|
|
58
|
+
hasAnyDynamic: boolean;
|
|
59
|
+
/** Name → is any tensor role. `meta.has(name)`. */
|
|
60
|
+
tensorVars: Set<string>;
|
|
61
|
+
/** Names with `kind === "param"` or `"paramOutput"`. */
|
|
62
|
+
paramTensorNames: Set<string>;
|
|
63
|
+
/** Names with `kind === "output"` or `"paramOutput"`. */
|
|
64
|
+
outputTensorNames: Set<string>;
|
|
65
|
+
/** Names with `kind === "local"`. */
|
|
66
|
+
localTensorNames: Set<string>;
|
|
67
|
+
}
|
|
68
|
+
/** Does this Assign RHS allocate a fresh C-owned buffer? */
|
|
69
|
+
export declare function isFreshTensorRhs(expr: JitExpr): boolean;
|
|
70
|
+
export declare function analyzeTensorUsage(body: JitStmt[], params: string[], argTypes: JitType[], outputNames: string[], outputTypes: JitType[]): ClassificationResult;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* C-JIT compilation driver (koffi path).
|
|
3
|
+
*
|
|
4
|
+
* Takes generated C source, compiles it into a `.so` shared library,
|
|
5
|
+
* loads it via koffi, and returns the declared function.
|
|
6
|
+
*
|
|
7
|
+
* Strategy:
|
|
8
|
+
* 1. Content-addressed cache under ~/.cache/numbl/c-jit/ — the hash
|
|
9
|
+
* includes the source bytes plus compiler/platform/numbl versions,
|
|
10
|
+
* so any input change forces a recompile.
|
|
11
|
+
* 2. On cache miss, write `src.c` into a fresh tmpdir and shell out to
|
|
12
|
+
* the C compiler (`$NUMBL_CC` or `cc`) with `-shared -fPIC`.
|
|
13
|
+
* 3. Load with koffi.load() and declare the function.
|
|
14
|
+
*
|
|
15
|
+
* No Node API headers are needed — functions are plain C with raw types.
|
|
16
|
+
* No NAPI_MODULE_INIT, no module registration, no exit hooks.
|
|
17
|
+
*/
|
|
18
|
+
export interface CompiledCFn {
|
|
19
|
+
fn: (...args: unknown[]) => unknown;
|
|
20
|
+
cachedPath: string;
|
|
21
|
+
/** The loaded koffi library handle, for declaring additional exports. */
|
|
22
|
+
lib: any;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Compile + load a C function via koffi. Returns null on any failure.
|
|
26
|
+
*
|
|
27
|
+
* `koffiSignature` is the koffi type-string for the C function, e.g.:
|
|
28
|
+
* "void jit_fn(double, double *, int64_t, double *)"
|
|
29
|
+
*/
|
|
30
|
+
export declare function compileAndLoad(cSource: string, koffiSignature: string, _cFnName: string, log?: (m: string) => void, extraFlags?: string[]): CompiledCFn | null;
|
|
31
|
+
export declare function resetCEnvForTesting(): void;
|
|
32
|
+
export declare function cJitUnavailableReason(): string | undefined;
|
|
33
|
+
export declare function cJitCacheSize(): number;
|
|
34
|
+
export declare function readCachedBuild(cachedPath: string): Buffer;
|
|
35
|
+
/** True when the C compiler supports `-fopenmp` (thread-level parallelism).
|
|
36
|
+
* Triggers env discovery on first call so it can be used before compileAndLoad. */
|
|
37
|
+
export declare function cJitOpenmpAvailable(log?: (m: string) => void): boolean;
|