numbl 0.1.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +59 -3
- package/dist-cli/cli.js +22538 -7936
- package/dist-lib/lib.js +34682 -20852
- package/dist-lib/numbl-core/executeCode.d.ts +13 -0
- package/dist-lib/numbl-core/fileIOAdapter.d.ts +2 -0
- package/dist-lib/numbl-core/helpers/reduction-helpers.d.ts +7 -2
- package/dist-lib/numbl-core/interpreter/builtins/datetime.d.ts +39 -0
- package/dist-lib/numbl-core/interpreter/builtins/index.d.ts +1 -0
- package/dist-lib/numbl-core/interpreter/builtins/time-system.d.ts +1 -0
- package/dist-lib/numbl-core/interpreter/builtins/types.d.ts +96 -5
- package/dist-lib/numbl-core/interpreter/interpreter.d.ts +41 -3
- package/dist-lib/numbl-core/interpreter/types.d.ts +1 -1
- package/dist-lib/numbl-core/jit/c/abi.d.ts +90 -0
- package/dist-lib/numbl-core/jit/c/assemble.d.ts +56 -0
- package/dist-lib/numbl-core/jit/c/classify.d.ts +70 -0
- package/dist-lib/numbl-core/jit/c/compile.d.ts +37 -0
- package/dist-lib/numbl-core/jit/c/context.d.ts +152 -0
- package/dist-lib/numbl-core/jit/c/emit/assign.d.ts +20 -0
- package/dist-lib/numbl-core/jit/c/emit/complexScalar.d.ts +18 -0
- package/dist-lib/numbl-core/jit/c/emit/fused.d.ts +42 -0
- package/dist-lib/numbl-core/jit/c/emit/helpers.d.ts +40 -0
- package/dist-lib/numbl-core/jit/c/emit/index.d.ts +14 -0
- package/dist-lib/numbl-core/jit/c/emit/scalar.d.ts +23 -0
- package/dist-lib/numbl-core/jit/c/emit/stmt.d.ts +25 -0
- package/dist-lib/numbl-core/jit/c/emit/tensor.d.ts +127 -0
- package/dist-lib/numbl-core/jit/c/emit/userCall.d.ts +58 -0
- package/dist-lib/numbl-core/jit/c/epilogue.d.ts +26 -0
- package/dist-lib/numbl-core/jit/c/feasibility.d.ts +44 -0
- package/dist-lib/numbl-core/jit/c/prelude.d.ts +37 -0
- package/dist-lib/numbl-core/jit/c/visit.d.ts +63 -0
- package/dist-lib/numbl-core/jit/e1/complexKernelEmit.d.ts +46 -0
- package/dist-lib/numbl-core/jit/e1/hash.d.ts +10 -0
- package/dist-lib/numbl-core/jit/e1/install.d.ts +13 -0
- package/dist-lib/numbl-core/jit/e1/kernelEmit.d.ts +54 -0
- package/dist-lib/numbl-core/jit/e1/multiReductionKernel.d.ts +66 -0
- package/dist-lib/numbl-core/jit/e1/openmpFlag.d.ts +13 -0
- package/dist-lib/numbl-core/jit/e1/scalarFnKernel.d.ts +44 -0
- package/dist-lib/numbl-core/jit/e2/assignKernel.d.ts +34 -0
- package/dist-lib/numbl-core/jit/e2/astToJitExpr.d.ts +25 -0
- package/dist-lib/numbl-core/jit/e2/cache.d.ts +80 -0
- package/dist-lib/numbl-core/jit/e2/chainKernelEmit.d.ts +55 -0
- package/dist-lib/numbl-core/jit/e2/classify.d.ts +119 -0
- package/dist-lib/numbl-core/jit/e2/compileFn.d.ts +16 -0
- package/dist-lib/numbl-core/jit/e2/complexChainKernelEmit.d.ts +79 -0
- package/dist-lib/numbl-core/jit/e2/emitShared.d.ts +71 -0
- package/dist-lib/numbl-core/jit/e2/install.d.ts +11 -0
- package/dist-lib/numbl-core/jit/e2/liveness.d.ts +29 -0
- package/dist-lib/numbl-core/jit/e2/loopKernel.d.ts +49 -0
- package/dist-lib/numbl-core/jit/e2/loopKernelEmit.d.ts +75 -0
- package/dist-lib/numbl-core/jit/e2/multiReductionDriver.d.ts +24 -0
- package/dist-lib/numbl-core/jit/e2/reductionKernelEmit.d.ts +72 -0
- package/dist-lib/numbl-core/jit/e2/scalarFnDriver.d.ts +29 -0
- package/dist-lib/numbl-core/jit/fusedChainHelpers.d.ts +65 -0
- package/dist-lib/numbl-core/jit/fusedScalarEmit.d.ts +69 -0
- package/dist-lib/numbl-core/jit/fusion.d.ts +71 -0
- package/dist-lib/numbl-core/jit/fusionOps.d.ts +25 -0
- package/dist-lib/numbl-core/jit/heavyOps.d.ts +15 -0
- package/dist-lib/numbl-core/{interpreter/jit → jit}/index.d.ts +2 -2
- package/dist-lib/numbl-core/jit/jitBailSafety.d.ts +41 -0
- package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLoop.d.ts +2 -2
- package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLoopAnalysis.d.ts +6 -1
- package/dist-lib/numbl-core/jit/jitLower.d.ts +122 -0
- package/dist-lib/numbl-core/jit/jitLowerExpr.d.ts +27 -0
- package/dist-lib/numbl-core/jit/jitLowerStmt.d.ts +9 -0
- package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLowerTypes.d.ts +7 -3
- package/dist-lib/numbl-core/jit/jitTopLevel.d.ts +22 -0
- package/dist-lib/numbl-core/{interpreter/jit → jit}/jitTypes.d.ts +133 -1
- package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitCodegen.d.ts +2 -2
- package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitCodegenHoist.d.ts +19 -1
- package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitHelpers.d.ts +15 -3
- package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitHelpersIndex.d.ts +7 -0
- package/dist-lib/numbl-core/jit/js/jitHelpersTensor.d.ts +34 -0
- package/dist-lib/numbl-core/jit/js/jsFusedCodegen.d.ts +17 -0
- package/dist-lib/numbl-core/jit/js/jsMultiReduction.d.ts +70 -0
- package/dist-lib/numbl-core/jit/scalarEmit.d.ts +58 -0
- package/dist-lib/numbl-core/lexer/types.d.ts +2 -1
- package/dist-lib/numbl-core/native/lapack-bridge.d.ts +39 -1
- package/dist-lib/numbl-core/ops/bessel.d.ts +18 -0
- package/dist-lib/numbl-core/ops/comparison.d.ts +11 -0
- package/dist-lib/numbl-core/ops/complexBinaryElemwise.d.ts +10 -0
- package/dist-lib/numbl-core/ops/complexUnaryElemwise.d.ts +8 -0
- package/dist-lib/numbl-core/ops/dispatch.d.ts +26 -0
- package/dist-lib/numbl-core/ops/index.d.ts +8 -0
- package/dist-lib/numbl-core/ops/opCodes.d.ts +70 -0
- package/dist-lib/numbl-core/ops/realBinaryElemwise.d.ts +8 -0
- package/dist-lib/numbl-core/ops/realUnaryElemwise.d.ts +5 -0
- package/dist-lib/numbl-core/ops/reduce.d.ts +6 -0
- package/dist-lib/numbl-core/parser/types.d.ts +6 -0
- package/dist-lib/numbl-core/runtime/alloc.d.ts +23 -0
- package/dist-lib/numbl-core/runtime/runtime.d.ts +1 -0
- package/dist-lib/numbl-core/version.d.ts +1 -1
- package/native/jit_runtime/jit_runtime.c +261 -0
- package/native/jit_runtime/jit_runtime.h +204 -0
- package/native/numbl_addon.cpp +62 -1
- package/native/ops/bessel.c +572 -0
- package/native/ops/comparison.c +150 -0
- package/native/ops/complex_binary_elemwise.c +192 -0
- package/native/ops/complex_unary_elemwise.c +152 -0
- package/native/ops/numbl_ops.c +66 -0
- package/native/ops/numbl_ops.h +262 -0
- package/native/ops/real_binary_elemwise.c +85 -0
- package/native/ops/real_unary_elemwise.c +104 -0
- package/native/ops/reduce.c +162 -0
- package/native/ops_napi.cpp +320 -0
- package/package.json +8 -9
- package/dist-lib/numbl-core/interpreter/jit/jitHelpersTensor.d.ts +0 -28
- package/dist-lib/numbl-core/interpreter/jit/jitLower.d.ts +0 -23
- /package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitHelpersComplex.d.ts +0 -0
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* numbl_jit_runtime — see jit_runtime.h for the shape and invariants.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
#include "jit_runtime.h"
|
|
6
|
+
#include "numbl_ops.h"
|
|
7
|
+
|
|
8
|
+
#include <math.h>
|
|
9
|
+
#include <string.h>
|
|
10
|
+
#include <time.h>
|
|
11
|
+
|
|
12
|
+
int numbl_jit_rt_version(void) {
|
|
13
|
+
return NUMBL_JIT_RT_VERSION;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
double numbl_idx1r(const double* data, size_t len, double i, double* err_flag) {
|
|
17
|
+
/* Truncation-to-zero via int64 cast mirrors JS-JIT's `(i - 1) | 0`.
|
|
18
|
+
* Unsigned compare catches both negative idx and idx >= len in one
|
|
19
|
+
* branch. */
|
|
20
|
+
int64_t idx = (int64_t)(i - 1.0);
|
|
21
|
+
if ((uint64_t)idx >= (uint64_t)len) {
|
|
22
|
+
*err_flag = 1.0;
|
|
23
|
+
return 0.0;
|
|
24
|
+
}
|
|
25
|
+
return data[idx];
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
void numbl_set1r_h(double* data, size_t len, double i, double v,
|
|
29
|
+
double* err_flag) {
|
|
30
|
+
int64_t idx = (int64_t)(i - 1.0);
|
|
31
|
+
if ((uint64_t)idx >= (uint64_t)len) {
|
|
32
|
+
/* 2.0 = "growth needed → soft-bail to interpreter", distinct from
|
|
33
|
+
* 1.0 which the JS wrapper translates into a hard bounds error. */
|
|
34
|
+
*err_flag = 2.0;
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
data[idx] = v;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
double numbl_idx2r(const double* data, size_t len, size_t d0,
|
|
41
|
+
double i, double j, double* err_flag) {
|
|
42
|
+
int64_t r = (int64_t)(i - 1.0);
|
|
43
|
+
int64_t c = (int64_t)(j - 1.0);
|
|
44
|
+
/* d0 == 0 would mean an empty tensor — any index is OOB. */
|
|
45
|
+
if (d0 == 0 || (uint64_t)r >= (uint64_t)d0) {
|
|
46
|
+
*err_flag = 1.0;
|
|
47
|
+
return 0.0;
|
|
48
|
+
}
|
|
49
|
+
size_t cols = len / d0;
|
|
50
|
+
if ((uint64_t)c >= (uint64_t)cols) {
|
|
51
|
+
*err_flag = 1.0;
|
|
52
|
+
return 0.0;
|
|
53
|
+
}
|
|
54
|
+
return data[(size_t)c * d0 + (size_t)r];
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
double numbl_idx3r(const double* data, size_t len, size_t d0, size_t d1,
|
|
58
|
+
double i, double j, double k, double* err_flag) {
|
|
59
|
+
int64_t k0 = (int64_t)(i - 1.0);
|
|
60
|
+
int64_t k1 = (int64_t)(j - 1.0);
|
|
61
|
+
int64_t k2 = (int64_t)(k - 1.0);
|
|
62
|
+
if (d0 == 0 || d1 == 0 ||
|
|
63
|
+
(uint64_t)k0 >= (uint64_t)d0 ||
|
|
64
|
+
(uint64_t)k1 >= (uint64_t)d1) {
|
|
65
|
+
*err_flag = 1.0;
|
|
66
|
+
return 0.0;
|
|
67
|
+
}
|
|
68
|
+
size_t plane = d0 * d1;
|
|
69
|
+
size_t d2 = len / plane;
|
|
70
|
+
if ((uint64_t)k2 >= (uint64_t)d2) {
|
|
71
|
+
*err_flag = 1.0;
|
|
72
|
+
return 0.0;
|
|
73
|
+
}
|
|
74
|
+
return data[(size_t)k2 * plane + (size_t)k1 * d0 + (size_t)k0];
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
void numbl_set2r_h(double* data, size_t len, size_t d0,
|
|
78
|
+
double i, double j, double v, double* err_flag) {
|
|
79
|
+
int64_t r = (int64_t)(i - 1.0);
|
|
80
|
+
int64_t c = (int64_t)(j - 1.0);
|
|
81
|
+
if (d0 == 0) {
|
|
82
|
+
*err_flag = 2.0;
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
size_t cols = len / d0;
|
|
86
|
+
if ((uint64_t)r >= (uint64_t)d0 || (uint64_t)c >= (uint64_t)cols) {
|
|
87
|
+
*err_flag = 2.0;
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
90
|
+
data[(size_t)c * d0 + (size_t)r] = v;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
void numbl_set3r_h(double* data, size_t len, size_t d0, size_t d1,
|
|
94
|
+
double i, double j, double k, double v,
|
|
95
|
+
double* err_flag) {
|
|
96
|
+
int64_t k0 = (int64_t)(i - 1.0);
|
|
97
|
+
int64_t k1 = (int64_t)(j - 1.0);
|
|
98
|
+
int64_t k2 = (int64_t)(k - 1.0);
|
|
99
|
+
if (d0 == 0 || d1 == 0) {
|
|
100
|
+
*err_flag = 2.0;
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
size_t plane = d0 * d1;
|
|
104
|
+
size_t d2 = len / plane;
|
|
105
|
+
if ((uint64_t)k0 >= (uint64_t)d0 ||
|
|
106
|
+
(uint64_t)k1 >= (uint64_t)d1 ||
|
|
107
|
+
(uint64_t)k2 >= (uint64_t)d2) {
|
|
108
|
+
*err_flag = 2.0;
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
data[(size_t)k2 * plane + (size_t)k1 * d0 + (size_t)k0] = v;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
void numbl_setRange1r_h(double* dstData, size_t dstLen,
|
|
115
|
+
double dstStart, double dstEnd,
|
|
116
|
+
const double* srcData, size_t srcLen,
|
|
117
|
+
double srcStart, double srcEnd,
|
|
118
|
+
double* err_flag) {
|
|
119
|
+
int64_t dS = (int64_t)(dstStart - 1.0);
|
|
120
|
+
int64_t dE = (int64_t)(dstEnd - 1.0);
|
|
121
|
+
int64_t sS = (int64_t)(srcStart - 1.0);
|
|
122
|
+
int64_t sE = (int64_t)(srcEnd - 1.0);
|
|
123
|
+
int64_t dN = dE - dS + 1;
|
|
124
|
+
int64_t sN = sE - sS + 1;
|
|
125
|
+
if (dN != sN) {
|
|
126
|
+
/* 3.0 — length-mismatch error; JS wrapper translates to MATLAB's
|
|
127
|
+
* "Unable to perform assignment..." message. */
|
|
128
|
+
*err_flag = 3.0;
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
if (dN <= 0) return;
|
|
132
|
+
if ((uint64_t)dS >= (uint64_t)dstLen ||
|
|
133
|
+
(uint64_t)dE >= (uint64_t)dstLen ||
|
|
134
|
+
(uint64_t)sS >= (uint64_t)srcLen ||
|
|
135
|
+
(uint64_t)sE >= (uint64_t)srcLen) {
|
|
136
|
+
*err_flag = 1.0;
|
|
137
|
+
return;
|
|
138
|
+
}
|
|
139
|
+
/* memmove handles overlap when src and dst alias. */
|
|
140
|
+
memmove(dstData + (size_t)dS, srcData + (size_t)sS,
|
|
141
|
+
(size_t)dN * sizeof(double));
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
void numbl_setCol2r_h(double* dstData, size_t dstRows, size_t dstLen,
|
|
145
|
+
double col,
|
|
146
|
+
const double* srcData, size_t srcLen,
|
|
147
|
+
double* err_flag) {
|
|
148
|
+
if (srcLen != dstRows) {
|
|
149
|
+
*err_flag = 3.0;
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
int64_t j = (int64_t)(col - 1.0);
|
|
153
|
+
if (j < 0) {
|
|
154
|
+
*err_flag = 1.0;
|
|
155
|
+
return;
|
|
156
|
+
}
|
|
157
|
+
if (dstRows == 0) {
|
|
158
|
+
/* Empty dst with a nonempty src: growth territory — soft-bail. */
|
|
159
|
+
*err_flag = 2.0;
|
|
160
|
+
return;
|
|
161
|
+
}
|
|
162
|
+
size_t dstCols = dstLen / dstRows;
|
|
163
|
+
if ((uint64_t)j >= (uint64_t)dstCols) {
|
|
164
|
+
/* Growth on write — the interpreter handles growing dst; JS-JIT
|
|
165
|
+
* mirrors this with JitBailToInterpreter. */
|
|
166
|
+
*err_flag = 2.0;
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
memcpy(dstData + (size_t)j * dstRows, srcData,
|
|
170
|
+
dstRows * sizeof(double));
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
void numbl_copyRange1r(const double* srcData, size_t srcLen,
|
|
174
|
+
double start, double end,
|
|
175
|
+
double* dstData,
|
|
176
|
+
double* err_flag) {
|
|
177
|
+
int64_t s = (int64_t)(start - 1.0);
|
|
178
|
+
int64_t e = (int64_t)(end - 1.0);
|
|
179
|
+
int64_t n = e - s + 1;
|
|
180
|
+
if (n <= 0) return;
|
|
181
|
+
if ((uint64_t)s >= (uint64_t)srcLen ||
|
|
182
|
+
(uint64_t)e >= (uint64_t)srcLen) {
|
|
183
|
+
*err_flag = 1.0;
|
|
184
|
+
return;
|
|
185
|
+
}
|
|
186
|
+
memcpy(dstData, srcData + (size_t)s, (size_t)n * sizeof(double));
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
double numbl_mod(double a, double b) {
|
|
190
|
+
if (b == 0.0) return a;
|
|
191
|
+
double r = fmod(a, b);
|
|
192
|
+
if (r != 0.0 && ((r < 0.0) != (b < 0.0))) r += b;
|
|
193
|
+
return r;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
double numbl_sign(double x) {
|
|
197
|
+
if (x > 0.0) return 1.0;
|
|
198
|
+
if (x < 0.0) return -1.0;
|
|
199
|
+
return 0.0;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
double numbl_reduce_flat(int op, const double* data, int64_t len) {
|
|
203
|
+
double out = 0.0;
|
|
204
|
+
numbl_real_flat_reduce(op, (size_t)len, data, &out);
|
|
205
|
+
return out;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
double numbl_monotonic_time(void) {
|
|
209
|
+
struct timespec ts;
|
|
210
|
+
clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
211
|
+
return (double)ts.tv_sec + (double)ts.tv_nsec * 1e-9;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
double numbl_tic(double* state) {
|
|
215
|
+
double t = numbl_monotonic_time();
|
|
216
|
+
*state = t;
|
|
217
|
+
return t;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
double numbl_toc(const double* state) {
|
|
221
|
+
return numbl_monotonic_time() - *state;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/* ── NaN / Inf / finite predicates ─────────────────────────────────────
|
|
225
|
+
*
|
|
226
|
+
* Bit-pattern inspection so the answer doesn't depend on the caller's
|
|
227
|
+
* `-ffast-math` / `-ffinite-math-only` posture. IEEE-754 binary64
|
|
228
|
+
* double: sign bit (1) | exponent (11) | mantissa (52). NaN has
|
|
229
|
+
* exponent all-1s and non-zero mantissa; ±Inf has exponent all-1s and
|
|
230
|
+
* zero mantissa; finite values have exponent != all-1s.
|
|
231
|
+
*
|
|
232
|
+
* memcpy is the portable way to reinterpret the bit pattern (type-
|
|
233
|
+
* punning through a union is UB in strict C; the compiler may still
|
|
234
|
+
* generate a type-punning read from memcpy as a single mov on any
|
|
235
|
+
* real platform).
|
|
236
|
+
*/
|
|
237
|
+
|
|
238
|
+
static uint64_t bits_of(double x) {
|
|
239
|
+
uint64_t u;
|
|
240
|
+
memcpy(&u, &x, sizeof u);
|
|
241
|
+
return u;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
#define NUMBL_DBL_EXP_MASK 0x7FF0000000000000ULL
|
|
245
|
+
#define NUMBL_DBL_MANT_MASK 0x000FFFFFFFFFFFFFULL
|
|
246
|
+
|
|
247
|
+
int numbl_is_nan(double x) {
|
|
248
|
+
uint64_t u = bits_of(x);
|
|
249
|
+
return (u & NUMBL_DBL_EXP_MASK) == NUMBL_DBL_EXP_MASK
|
|
250
|
+
&& (u & NUMBL_DBL_MANT_MASK) != 0;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
int numbl_is_inf(double x) {
|
|
254
|
+
uint64_t u = bits_of(x);
|
|
255
|
+
return (u & ~0x8000000000000000ULL) == NUMBL_DBL_EXP_MASK;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
int numbl_is_finite(double x) {
|
|
259
|
+
uint64_t u = bits_of(x);
|
|
260
|
+
return (u & NUMBL_DBL_EXP_MASK) != NUMBL_DBL_EXP_MASK;
|
|
261
|
+
}
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* numbl_jit_runtime — helpers the C-JIT emitter calls from generated code.
|
|
3
|
+
*
|
|
4
|
+
* Kept in a static archive (jit_runtime.a) separate from numbl_ops.a:
|
|
5
|
+
* the `ops/` directory is reserved for op-code-dispatched kernels that
|
|
6
|
+
* achieve parity with the TS ops layer. Anything the JIT emitter calls
|
|
7
|
+
* *directly* from emitted C (bounds-checked index reads, MATLAB-semantics
|
|
8
|
+
* math helpers, tic/toc, reduction shims, ...) lives here instead.
|
|
9
|
+
*
|
|
10
|
+
* When adding a new helper, bump NUMBL_JIT_RT_VERSION below AND
|
|
11
|
+
* NUMBL_JIT_RT_REQUIRED_VERSION in jitCodegenC.ts. The generated C asserts
|
|
12
|
+
* `NUMBL_JIT_RT_VERSION >= N`, so a stale archive fails the per-JIT
|
|
13
|
+
* compile step with a clear "rebuild the addon" message instead of a
|
|
14
|
+
* cryptic linker error.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
#ifndef NUMBL_JIT_RUNTIME_H
|
|
18
|
+
#define NUMBL_JIT_RUNTIME_H
|
|
19
|
+
|
|
20
|
+
#include <stddef.h>
|
|
21
|
+
#include <stdint.h>
|
|
22
|
+
|
|
23
|
+
#ifdef __cplusplus
|
|
24
|
+
extern "C" {
|
|
25
|
+
#endif
|
|
26
|
+
|
|
27
|
+
/* ── Version ──────────────────────────────────────────────────────────── */
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Version log:
|
|
31
|
+
* 1 — initial: idx1r, mod, sign, reduce_flat, tic/toc/monotonic_time.
|
|
32
|
+
* 2 — set1r_h (scalar linear Index write with soft-bail on OOB).
|
|
33
|
+
* 3 — idx2r / idx3r / set2r_h / set3r_h (multi-index Index read/write).
|
|
34
|
+
* 4 — setRange1r_h / setCol2r_h / copyRange1r (range/col slice r/w).
|
|
35
|
+
* 5 — is_nan / is_inf / is_finite (predicates that survive -ffast-math
|
|
36
|
+
* by inspecting IEEE-754 bit patterns, not FP ops).
|
|
37
|
+
*/
|
|
38
|
+
#define NUMBL_JIT_RT_VERSION 5
|
|
39
|
+
|
|
40
|
+
/** Returns NUMBL_JIT_RT_VERSION baked into the compiled archive. */
|
|
41
|
+
int numbl_jit_rt_version(void);
|
|
42
|
+
|
|
43
|
+
/* ── Scalar linear Index read ─────────────────────────────────────────── */
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* 1-based MATLAB linear Index read on a real-valued tensor buffer.
|
|
47
|
+
*
|
|
48
|
+
* `i` — 1-based double index (truncation-to-zero via int64 cast). The
|
|
49
|
+
* emitter wraps non-integer indices with `round()` beforehand to
|
|
50
|
+
* match the JS-JIT's `Math.round`-then-truncate sequence.
|
|
51
|
+
* `err_flag` — set to 1.0 on OOB (and returns 0.0) so the C function
|
|
52
|
+
* finishes without a native crash. The caller must zero
|
|
53
|
+
* the flag before each koffi call; the JS wrapper checks
|
|
54
|
+
* it after and throws "Index exceeds array bounds".
|
|
55
|
+
*/
|
|
56
|
+
double numbl_idx1r(const double* data, size_t len, double i, double* err_flag);
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* 1-based MATLAB linear Index write on a real-valued tensor buffer.
|
|
60
|
+
*
|
|
61
|
+
* On OOB, writes `2.0` to *err_flag (the "growth-needed" code the JS
|
|
62
|
+
* wrapper translates to a JitBailToInterpreter) and returns without
|
|
63
|
+
* writing, mirroring the JS-JIT's `set1r_h` helper. The interpreter
|
|
64
|
+
* then re-runs the call with proper tensor-growth semantics. As with
|
|
65
|
+
* the read path, the caller must zero the flag before each call.
|
|
66
|
+
*/
|
|
67
|
+
void numbl_set1r_h(double* data, size_t len, double i, double v, double* err_flag);
|
|
68
|
+
|
|
69
|
+
/* ── Multi-index Index read (2D, 3D) ──────────────────────────────────── */
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* 1-based MATLAB 2D Index read on a real-valued tensor buffer.
|
|
73
|
+
*
|
|
74
|
+
* Column-major: returns data[(j-1)*d0 + (i-1)]. `d0` is the row count.
|
|
75
|
+
* The derived column count is `len / d0`; both dimensions are bounds-
|
|
76
|
+
* checked independently. Emitter wraps non-integer indices with
|
|
77
|
+
* `round()` to match the JS-JIT's `Math.round`-then-truncate sequence.
|
|
78
|
+
* On OOB, sets *err_flag = 1.0 and returns 0.0 (hard bounds error).
|
|
79
|
+
*/
|
|
80
|
+
double numbl_idx2r(const double* data, size_t len, size_t d0,
|
|
81
|
+
double i, double j, double* err_flag);
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* 1-based MATLAB 3D Index read on a real-valued tensor buffer.
|
|
85
|
+
*
|
|
86
|
+
* Column-major: returns data[((k-1)*d1 + (j-1))*d0 + (i-1)]. `d0` is
|
|
87
|
+
* the row count, `d1` the column count. The derived page count is
|
|
88
|
+
* `len / (d0 * d1)`; each dimension is bounds-checked independently.
|
|
89
|
+
*/
|
|
90
|
+
double numbl_idx3r(const double* data, size_t len, size_t d0, size_t d1,
|
|
91
|
+
double i, double j, double k, double* err_flag);
|
|
92
|
+
|
|
93
|
+
/* ── Multi-index Index write (2D, 3D), soft-bail on OOB ───────────────── */
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* 2D Index write. On OOB (along either dim) writes `2.0` to *err_flag
|
|
97
|
+
* (the "growth-needed" code the JS wrapper translates to a
|
|
98
|
+
* JitBailToInterpreter) and returns without writing.
|
|
99
|
+
*/
|
|
100
|
+
void numbl_set2r_h(double* data, size_t len, size_t d0,
|
|
101
|
+
double i, double j, double v, double* err_flag);
|
|
102
|
+
|
|
103
|
+
/** 3D Index write with the same soft-bail convention as set2r_h. */
|
|
104
|
+
void numbl_set3r_h(double* data, size_t len, size_t d0, size_t d1,
|
|
105
|
+
double i, double j, double k, double v,
|
|
106
|
+
double* err_flag);
|
|
107
|
+
|
|
108
|
+
/* ── Range / column slice read and write ──────────────────────────────── */
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Range → range write: `dst(dStart:dEnd) = src(sStart:sEnd)` on real
|
|
112
|
+
* tensors, 1-based MATLAB indices. Sizes must match: on mismatch,
|
|
113
|
+
* writes `3.0` to *err_flag and returns (the JS wrapper translates this
|
|
114
|
+
* into "Unable to perform assignment because the indices on the left
|
|
115
|
+
* side are not compatible with the size of the right side."). Bounds
|
|
116
|
+
* violations on either range write `1.0` and return. Uses memmove so
|
|
117
|
+
* overlapping dst == src ranges are handled correctly.
|
|
118
|
+
*/
|
|
119
|
+
void numbl_setRange1r_h(double* dstData, size_t dstLen,
|
|
120
|
+
double dstStart, double dstEnd,
|
|
121
|
+
const double* srcData, size_t srcLen,
|
|
122
|
+
double srcStart, double srcEnd,
|
|
123
|
+
double* err_flag);
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Column-vector write into column `col` of a 2-D tensor `dst` with row
|
|
127
|
+
* count `dstRows`. `srcLen` must equal `dstRows` (length mismatch →
|
|
128
|
+
* *err_flag = 3.0). When `col` exceeds the current column count, the
|
|
129
|
+
* MATLAB interpreter would grow dst — we soft-bail (*err_flag = 2.0) so
|
|
130
|
+
* the caller's stale hoisted aliases don't corrupt the growth path.
|
|
131
|
+
*/
|
|
132
|
+
void numbl_setCol2r_h(double* dstData, size_t dstRows, size_t dstLen,
|
|
133
|
+
double col,
|
|
134
|
+
const double* srcData, size_t srcLen,
|
|
135
|
+
double* err_flag);
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Range read: copy `src(start:end)` into caller-provided `dstData`. The
|
|
139
|
+
* caller must ensure `dstData` has capacity >= `end - start + 1`. OOB
|
|
140
|
+
* on `src` writes `1.0` to *err_flag and returns without writing.
|
|
141
|
+
* `start > end` is a valid empty range (no write, no error).
|
|
142
|
+
*/
|
|
143
|
+
void numbl_copyRange1r(const double* srcData, size_t srcLen,
|
|
144
|
+
double start, double end,
|
|
145
|
+
double* dstData,
|
|
146
|
+
double* err_flag);
|
|
147
|
+
|
|
148
|
+
/* ── Scalar math helpers with MATLAB semantics ────────────────────────── */
|
|
149
|
+
|
|
150
|
+
/** MATLAB `mod(a, b)`: result has the sign of `b`; `mod(a, 0) == a`. */
|
|
151
|
+
double numbl_mod(double a, double b);
|
|
152
|
+
|
|
153
|
+
/** Three-valued sign: -1, 0, or 1. */
|
|
154
|
+
double numbl_sign(double x);
|
|
155
|
+
|
|
156
|
+
/* ── Reduction wrapper ────────────────────────────────────────────────── */
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Thin wrapper around numbl_real_flat_reduce (from numbl_ops.a) that
|
|
160
|
+
* returns the scalar directly, so generated C doesn't need a local
|
|
161
|
+
* `double out; … ; return out;` dance inline.
|
|
162
|
+
*/
|
|
163
|
+
double numbl_reduce_flat(int op, const double* data, int64_t len);
|
|
164
|
+
|
|
165
|
+
/* ── Timers (tic/toc) ─────────────────────────────────────────────────── */
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Monotonic wall-clock seconds. Exported by every JIT .so so the JS
|
|
169
|
+
* wrapper can cross-reference the C clock domain when bridging tic/toc
|
|
170
|
+
* state between JS and C.
|
|
171
|
+
*/
|
|
172
|
+
double numbl_monotonic_time(void);
|
|
173
|
+
|
|
174
|
+
/** Capture the current monotonic time into *state, return it. */
|
|
175
|
+
double numbl_tic(double* state);
|
|
176
|
+
|
|
177
|
+
/** Elapsed seconds since *state. */
|
|
178
|
+
double numbl_toc(const double* state);
|
|
179
|
+
|
|
180
|
+
/* ── NaN / Inf / finite predicates ───────────────────────────────────── */
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Bit-pattern-based predicates that return 1 for true, 0 for false.
|
|
184
|
+
*
|
|
185
|
+
* The JIT emits calls to these instead of C99's `isnan` / `isinf` /
|
|
186
|
+
* `isfinite` because the generated C is compiled with `-ffast-math`,
|
|
187
|
+
* which implies `-ffinite-math-only` and constant-folds those macros
|
|
188
|
+
* away. These helpers inspect the IEEE-754 bit pattern via `memcpy`,
|
|
189
|
+
* so the optimizer can't prove they're always-false even if the
|
|
190
|
+
* caller's `-ffast-math` assumes operands are finite — the function
|
|
191
|
+
* call is opaque, and the bit operations have no FP semantics.
|
|
192
|
+
*
|
|
193
|
+
* Assumes IEEE-754 binary64 doubles (true on every platform numbl
|
|
194
|
+
* targets).
|
|
195
|
+
*/
|
|
196
|
+
int numbl_is_nan(double x);
|
|
197
|
+
int numbl_is_inf(double x);
|
|
198
|
+
int numbl_is_finite(double x);
|
|
199
|
+
|
|
200
|
+
#ifdef __cplusplus
|
|
201
|
+
}
|
|
202
|
+
#endif
|
|
203
|
+
|
|
204
|
+
#endif /* NUMBL_JIT_RUNTIME_H */
|
package/native/numbl_addon.cpp
CHANGED
|
@@ -22,14 +22,34 @@
|
|
|
22
22
|
#include "numbl_addon_common.h"
|
|
23
23
|
#include <cstdlib>
|
|
24
24
|
|
|
25
|
+
#ifndef __APPLE__
|
|
25
26
|
extern "C" {
|
|
26
27
|
void openblas_set_num_threads(int num_threads);
|
|
27
28
|
}
|
|
29
|
+
#endif
|
|
28
30
|
|
|
29
31
|
// ── Addon version ────────────────────────────────────────────────────────────
|
|
30
32
|
// Bump this integer whenever the addon's API changes (new functions, signature
|
|
31
33
|
// changes, etc.) so that the JS side can detect stale builds.
|
|
32
|
-
static const int ADDON_VERSION =
|
|
34
|
+
static const int ADDON_VERSION = 8;
|
|
35
|
+
|
|
36
|
+
// ── New tensor-ops layer (native/ops/) ───────────────────────────────────────
|
|
37
|
+
Napi::Value TensorOpRealBinary(const Napi::CallbackInfo& info);
|
|
38
|
+
Napi::Value TensorOpRealScalarBinary(const Napi::CallbackInfo& info);
|
|
39
|
+
Napi::Value TensorOpComplexBinary(const Napi::CallbackInfo& info);
|
|
40
|
+
Napi::Value TensorOpComplexScalarBinary(const Napi::CallbackInfo& info);
|
|
41
|
+
Napi::Value TensorOpRealUnary(const Napi::CallbackInfo& info);
|
|
42
|
+
Napi::Value TensorOpComplexUnary(const Napi::CallbackInfo& info);
|
|
43
|
+
Napi::Value TensorOpComplexAbs(const Napi::CallbackInfo& info);
|
|
44
|
+
Napi::Value TensorOpRealComparison(const Napi::CallbackInfo& info);
|
|
45
|
+
Napi::Value TensorOpRealScalarComparison(const Napi::CallbackInfo& info);
|
|
46
|
+
Napi::Value TensorOpComplexComparison(const Napi::CallbackInfo& info);
|
|
47
|
+
Napi::Value TensorOpComplexScalarComparison(const Napi::CallbackInfo& info);
|
|
48
|
+
Napi::Value TensorOpRealFlatReduce(const Napi::CallbackInfo& info);
|
|
49
|
+
Napi::Value TensorOpComplexFlatReduce(const Napi::CallbackInfo& info);
|
|
50
|
+
Napi::Value TensorOpBesselReal(const Napi::CallbackInfo& info);
|
|
51
|
+
Napi::Value TensorOpBesselH(const Napi::CallbackInfo& info);
|
|
52
|
+
Napi::Value TensorOpDumpCodes(const Napi::CallbackInfo& info);
|
|
33
53
|
|
|
34
54
|
static Napi::Value AddonVersion(const Napi::CallbackInfo& info) {
|
|
35
55
|
return Napi::Number::New(info.Env(), ADDON_VERSION);
|
|
@@ -40,9 +60,16 @@ static Napi::Value AddonVersion(const Napi::CallbackInfo& info) {
|
|
|
40
60
|
Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
|
41
61
|
// Use single-threaded BLAS unless the user explicitly set the env var.
|
|
42
62
|
// Multi-threaded BLAS adds overhead for the many small matmuls in numbl.
|
|
63
|
+
#ifdef __APPLE__
|
|
64
|
+
// Accelerate reads VECLIB_MAXIMUM_THREADS from the environment on first use.
|
|
65
|
+
if (!std::getenv("VECLIB_MAXIMUM_THREADS")) {
|
|
66
|
+
setenv("VECLIB_MAXIMUM_THREADS", "1", 0);
|
|
67
|
+
}
|
|
68
|
+
#else
|
|
43
69
|
if (!std::getenv("OPENBLAS_NUM_THREADS")) {
|
|
44
70
|
openblas_set_num_threads(1);
|
|
45
71
|
}
|
|
72
|
+
#endif
|
|
46
73
|
exports.Set(Napi::String::New(env, "addonVersion"),
|
|
47
74
|
Napi::Function::New(env, AddonVersion));
|
|
48
75
|
exports.Set(Napi::String::New(env, "inv"),
|
|
@@ -107,6 +134,40 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
|
|
107
134
|
Napi::Function::New(env, Gmres));
|
|
108
135
|
exports.Set(Napi::String::New(env, "gmresComplex"),
|
|
109
136
|
Napi::Function::New(env, GmresComplex));
|
|
137
|
+
|
|
138
|
+
// ── New tensor-ops layer ──────────────────────────────────────────────────
|
|
139
|
+
exports.Set(Napi::String::New(env, "tensorOpRealBinary"),
|
|
140
|
+
Napi::Function::New(env, TensorOpRealBinary));
|
|
141
|
+
exports.Set(Napi::String::New(env, "tensorOpRealScalarBinary"),
|
|
142
|
+
Napi::Function::New(env, TensorOpRealScalarBinary));
|
|
143
|
+
exports.Set(Napi::String::New(env, "tensorOpComplexBinary"),
|
|
144
|
+
Napi::Function::New(env, TensorOpComplexBinary));
|
|
145
|
+
exports.Set(Napi::String::New(env, "tensorOpComplexScalarBinary"),
|
|
146
|
+
Napi::Function::New(env, TensorOpComplexScalarBinary));
|
|
147
|
+
exports.Set(Napi::String::New(env, "tensorOpRealUnary"),
|
|
148
|
+
Napi::Function::New(env, TensorOpRealUnary));
|
|
149
|
+
exports.Set(Napi::String::New(env, "tensorOpComplexUnary"),
|
|
150
|
+
Napi::Function::New(env, TensorOpComplexUnary));
|
|
151
|
+
exports.Set(Napi::String::New(env, "tensorOpComplexAbs"),
|
|
152
|
+
Napi::Function::New(env, TensorOpComplexAbs));
|
|
153
|
+
exports.Set(Napi::String::New(env, "tensorOpRealComparison"),
|
|
154
|
+
Napi::Function::New(env, TensorOpRealComparison));
|
|
155
|
+
exports.Set(Napi::String::New(env, "tensorOpRealScalarComparison"),
|
|
156
|
+
Napi::Function::New(env, TensorOpRealScalarComparison));
|
|
157
|
+
exports.Set(Napi::String::New(env, "tensorOpComplexComparison"),
|
|
158
|
+
Napi::Function::New(env, TensorOpComplexComparison));
|
|
159
|
+
exports.Set(Napi::String::New(env, "tensorOpComplexScalarComparison"),
|
|
160
|
+
Napi::Function::New(env, TensorOpComplexScalarComparison));
|
|
161
|
+
exports.Set(Napi::String::New(env, "tensorOpRealFlatReduce"),
|
|
162
|
+
Napi::Function::New(env, TensorOpRealFlatReduce));
|
|
163
|
+
exports.Set(Napi::String::New(env, "tensorOpComplexFlatReduce"),
|
|
164
|
+
Napi::Function::New(env, TensorOpComplexFlatReduce));
|
|
165
|
+
exports.Set(Napi::String::New(env, "tensorOpBesselReal"),
|
|
166
|
+
Napi::Function::New(env, TensorOpBesselReal));
|
|
167
|
+
exports.Set(Napi::String::New(env, "tensorOpBesselH"),
|
|
168
|
+
Napi::Function::New(env, TensorOpBesselH));
|
|
169
|
+
exports.Set(Napi::String::New(env, "tensorOpDumpCodes"),
|
|
170
|
+
Napi::Function::New(env, TensorOpDumpCodes));
|
|
110
171
|
return exports;
|
|
111
172
|
}
|
|
112
173
|
|