numbl 0.0.22 → 0.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +6 -4
- package/dist-cli/cli.js +288 -74
- package/native/elemwise.cpp +168 -0
- package/native/lapack_chol.cpp +1 -1
- package/native/lapack_eig.cpp +1 -1
- package/native/lapack_fft.cpp +1 -1
- package/native/lapack_fft_batch.cpp +1 -1
- package/native/lapack_inv.cpp +1 -1
- package/native/lapack_linsolve.cpp +1 -1
- package/native/lapack_lu.cpp +1 -1
- package/native/lapack_matmul.cpp +1 -1
- package/native/lapack_matmul_complex.cpp +110 -0
- package/native/lapack_qr.cpp +1 -1
- package/native/lapack_qz.cpp +1 -1
- package/native/lapack_svd.cpp +1 -1
- package/native/{lapack_addon.cpp → numbl_addon.cpp} +30 -3
- package/native/{lapack_common.h → numbl_addon_common.h} +11 -1
- package/package.json +1 -1
package/binding.gyp
CHANGED
|
@@ -1,20 +1,22 @@
|
|
|
1
1
|
{
|
|
2
2
|
"targets": [
|
|
3
3
|
{
|
|
4
|
-
"target_name": "
|
|
4
|
+
"target_name": "numbl_addon",
|
|
5
5
|
"sources": [
|
|
6
|
-
"native/
|
|
6
|
+
"native/numbl_addon.cpp",
|
|
7
7
|
"native/lapack_inv.cpp",
|
|
8
8
|
"native/lapack_qr.cpp",
|
|
9
9
|
"native/lapack_lu.cpp",
|
|
10
10
|
"native/lapack_svd.cpp",
|
|
11
11
|
"native/lapack_matmul.cpp",
|
|
12
|
+
"native/lapack_matmul_complex.cpp",
|
|
12
13
|
"native/lapack_linsolve.cpp",
|
|
13
14
|
"native/lapack_eig.cpp",
|
|
14
15
|
"native/lapack_chol.cpp",
|
|
15
16
|
"native/lapack_qz.cpp",
|
|
16
17
|
"native/lapack_fft.cpp",
|
|
17
|
-
"native/lapack_fft_batch.cpp"
|
|
18
|
+
"native/lapack_fft_batch.cpp",
|
|
19
|
+
"native/elemwise.cpp"
|
|
18
20
|
],
|
|
19
21
|
"include_dirs": [
|
|
20
22
|
"<!@(node -p \"require('node-addon-api').include\")",
|
|
@@ -29,7 +31,7 @@
|
|
|
29
31
|
"<!@(pkg-config --libs fftw3 2>/dev/null || echo '-lfftw3')",
|
|
30
32
|
"<!@(pkg-config --libs-only-L fftw3 2>/dev/null | sed 's/-L/-Wl,-rpath,/g' || true)"
|
|
31
33
|
],
|
|
32
|
-
"cflags_cc": [ "-std=c++17", "-
|
|
34
|
+
"cflags_cc": [ "-std=c++17", "-O3", "-march=native" ]
|
|
33
35
|
}
|
|
34
36
|
]
|
|
35
37
|
}
|
package/dist-cli/cli.js
CHANGED
|
@@ -1214,6 +1214,10 @@ var RTV = {
|
|
|
1214
1214
|
while (s.length > 2 && s[s.length - 1] === 1) s.pop();
|
|
1215
1215
|
return { kind: "tensor", data: d, imag: im, shape: s, _rc: 1 };
|
|
1216
1216
|
},
|
|
1217
|
+
/** Fast tensor constructor — data must be FloatXArray, shape already normalized (no trailing singletons). */
|
|
1218
|
+
tensorRaw(data, shape) {
|
|
1219
|
+
return { kind: "tensor", data, imag: void 0, shape, _rc: 1 };
|
|
1220
|
+
},
|
|
1217
1221
|
/** Create a scalar tensor (1x1) */
|
|
1218
1222
|
scalar(value) {
|
|
1219
1223
|
return value;
|
|
@@ -1352,6 +1356,7 @@ var getItemTypeFromRuntimeValue = (value) => {
|
|
|
1352
1356
|
};
|
|
1353
1357
|
|
|
1354
1358
|
// src/numbl-core/native/lapack-bridge.ts
|
|
1359
|
+
var NATIVE_ADDON_EXPECTED_VERSION = 1;
|
|
1355
1360
|
var _bridge = null;
|
|
1356
1361
|
function setLapackBridge(bridge) {
|
|
1357
1362
|
_bridge = bridge;
|
|
@@ -19027,16 +19032,90 @@ function complexBinaryOp(a, b, op) {
|
|
|
19027
19032
|
`Matrix dimensions must agree: [${at.shape.join(",")}] vs [${bt.shape.join(",")}]`
|
|
19028
19033
|
);
|
|
19029
19034
|
}
|
|
19030
|
-
|
|
19031
|
-
|
|
19032
|
-
|
|
19033
|
-
|
|
19034
|
-
|
|
19035
|
-
|
|
19036
|
-
|
|
19037
|
-
|
|
19038
|
-
|
|
19035
|
+
var ELEMWISE_ADD = 0;
|
|
19036
|
+
var ELEMWISE_SUB = 1;
|
|
19037
|
+
var ELEMWISE_MUL = 2;
|
|
19038
|
+
var ELEMWISE_DIV = 3;
|
|
19039
|
+
function matchSameShapeTensors(a, b) {
|
|
19040
|
+
if (typeof a !== "object" || a === null || a.kind !== "tensor" || typeof b !== "object" || b === null || b.kind !== "tensor")
|
|
19041
|
+
return null;
|
|
19042
|
+
const at = a;
|
|
19043
|
+
const bt = b;
|
|
19044
|
+
if (at.data.length !== bt.data.length || at.shape.length !== bt.shape.length || at.shape.some((d, i) => d !== bt.shape[i]))
|
|
19045
|
+
return null;
|
|
19046
|
+
return [at, bt];
|
|
19047
|
+
}
|
|
19048
|
+
function tryNativeElemwiseReal(at, bt, opCode) {
|
|
19049
|
+
const bridge = getLapackBridge();
|
|
19050
|
+
if (!bridge?.elemwise) return null;
|
|
19051
|
+
const result = bridge.elemwise(
|
|
19052
|
+
at.data,
|
|
19053
|
+
bt.data,
|
|
19054
|
+
opCode
|
|
19055
|
+
);
|
|
19056
|
+
return RTV.tensorRaw(result, at.shape);
|
|
19057
|
+
}
|
|
19058
|
+
function tensorElemwiseComplex(at, bt, opCode, jsOp) {
|
|
19059
|
+
const bridge = getLapackBridge();
|
|
19060
|
+
if (bridge?.elemwiseComplex) {
|
|
19061
|
+
const r = bridge.elemwiseComplex(
|
|
19062
|
+
at.data,
|
|
19063
|
+
at.imag ?? null,
|
|
19064
|
+
bt.data,
|
|
19065
|
+
bt.imag ?? null,
|
|
19066
|
+
opCode
|
|
19067
|
+
);
|
|
19068
|
+
if (r.im) return RTV.tensor(r.re, at.shape, r.im);
|
|
19069
|
+
return RTV.tensorRaw(r.re, at.shape);
|
|
19070
|
+
}
|
|
19071
|
+
const len = at.data.length;
|
|
19072
|
+
const aIm = at.imag;
|
|
19073
|
+
const bIm = bt.imag;
|
|
19074
|
+
const resultRe = new FloatXArray(len);
|
|
19075
|
+
const resultIm = new FloatXArray(len);
|
|
19076
|
+
if (aIm && bIm) {
|
|
19077
|
+
for (let i = 0; i < len; i++) {
|
|
19078
|
+
const r = jsOp(at.data[i], aIm[i], bt.data[i], bIm[i]);
|
|
19079
|
+
resultRe[i] = r.re;
|
|
19080
|
+
resultIm[i] = r.im;
|
|
19081
|
+
}
|
|
19082
|
+
} else if (aIm) {
|
|
19083
|
+
for (let i = 0; i < len; i++) {
|
|
19084
|
+
const r = jsOp(at.data[i], aIm[i], bt.data[i], 0);
|
|
19085
|
+
resultRe[i] = r.re;
|
|
19086
|
+
resultIm[i] = r.im;
|
|
19039
19087
|
}
|
|
19088
|
+
} else {
|
|
19089
|
+
for (let i = 0; i < len; i++) {
|
|
19090
|
+
const r = jsOp(at.data[i], 0, bt.data[i], bIm[i]);
|
|
19091
|
+
resultRe[i] = r.re;
|
|
19092
|
+
resultIm[i] = r.im;
|
|
19093
|
+
}
|
|
19094
|
+
}
|
|
19095
|
+
const isReal = resultIm.every((x) => x === 0);
|
|
19096
|
+
return RTV.tensor(resultRe, at.shape, isReal ? void 0 : resultIm);
|
|
19097
|
+
}
|
|
19098
|
+
function mAdd(a, b) {
|
|
19099
|
+
const m = matchSameShapeTensors(a, b);
|
|
19100
|
+
if (m) {
|
|
19101
|
+
const [at, bt] = m;
|
|
19102
|
+
if (!at.imag && !bt.imag) {
|
|
19103
|
+
const nr = tryNativeElemwiseReal(at, bt, ELEMWISE_ADD);
|
|
19104
|
+
if (nr) return nr;
|
|
19105
|
+
const len = at.data.length;
|
|
19106
|
+
const result = new FloatXArray(len);
|
|
19107
|
+
for (let i = 0; i < len; i++) result[i] = at.data[i] + bt.data[i];
|
|
19108
|
+
return RTV.tensorRaw(result, at.shape);
|
|
19109
|
+
}
|
|
19110
|
+
return tensorElemwiseComplex(
|
|
19111
|
+
at,
|
|
19112
|
+
bt,
|
|
19113
|
+
ELEMWISE_ADD,
|
|
19114
|
+
(aRe, aIm, bRe, bIm) => ({
|
|
19115
|
+
re: aRe + bRe,
|
|
19116
|
+
im: aIm + bIm
|
|
19117
|
+
})
|
|
19118
|
+
);
|
|
19040
19119
|
}
|
|
19041
19120
|
if (isRuntimeSparseMatrix(a) || isRuntimeSparseMatrix(b))
|
|
19042
19121
|
return mAddSparse(a, b);
|
|
@@ -19049,6 +19128,27 @@ function mAdd(a, b) {
|
|
|
19049
19128
|
return binaryOp(a, b, (x, y) => x + y);
|
|
19050
19129
|
}
|
|
19051
19130
|
function mSub(a, b) {
|
|
19131
|
+
const m = matchSameShapeTensors(a, b);
|
|
19132
|
+
if (m) {
|
|
19133
|
+
const [at, bt] = m;
|
|
19134
|
+
if (!at.imag && !bt.imag) {
|
|
19135
|
+
const nr = tryNativeElemwiseReal(at, bt, ELEMWISE_SUB);
|
|
19136
|
+
if (nr) return nr;
|
|
19137
|
+
const len = at.data.length;
|
|
19138
|
+
const result = new FloatXArray(len);
|
|
19139
|
+
for (let i = 0; i < len; i++) result[i] = at.data[i] - bt.data[i];
|
|
19140
|
+
return RTV.tensorRaw(result, at.shape);
|
|
19141
|
+
}
|
|
19142
|
+
return tensorElemwiseComplex(
|
|
19143
|
+
at,
|
|
19144
|
+
bt,
|
|
19145
|
+
ELEMWISE_SUB,
|
|
19146
|
+
(aRe, aIm, bRe, bIm) => ({
|
|
19147
|
+
re: aRe - bRe,
|
|
19148
|
+
im: aIm - bIm
|
|
19149
|
+
})
|
|
19150
|
+
);
|
|
19151
|
+
}
|
|
19052
19152
|
if (isRuntimeSparseMatrix(a) || isRuntimeSparseMatrix(b))
|
|
19053
19153
|
return mSubSparse(a, b);
|
|
19054
19154
|
if (isComplexOrMixed(a, b)) {
|
|
@@ -19074,15 +19174,26 @@ function mMul(a, b) {
|
|
|
19074
19174
|
return binaryOp(a, b, (x, y) => x * y);
|
|
19075
19175
|
}
|
|
19076
19176
|
function mElemMul(a, b) {
|
|
19077
|
-
|
|
19078
|
-
|
|
19079
|
-
const bt =
|
|
19080
|
-
if (!at.imag && !bt.imag
|
|
19081
|
-
const
|
|
19082
|
-
|
|
19083
|
-
|
|
19084
|
-
|
|
19085
|
-
|
|
19177
|
+
const m = matchSameShapeTensors(a, b);
|
|
19178
|
+
if (m) {
|
|
19179
|
+
const [at, bt] = m;
|
|
19180
|
+
if (!at.imag && !bt.imag) {
|
|
19181
|
+
const nr = tryNativeElemwiseReal(at, bt, ELEMWISE_MUL);
|
|
19182
|
+
if (nr) return nr;
|
|
19183
|
+
const len = at.data.length;
|
|
19184
|
+
const result = new FloatXArray(len);
|
|
19185
|
+
for (let i = 0; i < len; i++) result[i] = at.data[i] * bt.data[i];
|
|
19186
|
+
return RTV.tensorRaw(result, at.shape);
|
|
19187
|
+
}
|
|
19188
|
+
return tensorElemwiseComplex(
|
|
19189
|
+
at,
|
|
19190
|
+
bt,
|
|
19191
|
+
ELEMWISE_MUL,
|
|
19192
|
+
(aRe, aIm, bRe, bIm) => ({
|
|
19193
|
+
re: aRe * bRe - aIm * bIm,
|
|
19194
|
+
im: aRe * bIm + aIm * bRe
|
|
19195
|
+
})
|
|
19196
|
+
);
|
|
19086
19197
|
}
|
|
19087
19198
|
if (isRuntimeSparseMatrix(a) || isRuntimeSparseMatrix(b))
|
|
19088
19199
|
return mElemMulSparse(a, b);
|
|
@@ -19110,6 +19221,19 @@ function mDiv(a, b) {
|
|
|
19110
19221
|
return binaryOp(a, b, (x, y) => x / y);
|
|
19111
19222
|
}
|
|
19112
19223
|
function mElemDiv(a, b) {
|
|
19224
|
+
const m = matchSameShapeTensors(a, b);
|
|
19225
|
+
if (m) {
|
|
19226
|
+
const [at, bt] = m;
|
|
19227
|
+
if (!at.imag && !bt.imag) {
|
|
19228
|
+
const nr = tryNativeElemwiseReal(at, bt, ELEMWISE_DIV);
|
|
19229
|
+
if (nr) return nr;
|
|
19230
|
+
const len = at.data.length;
|
|
19231
|
+
const result = new FloatXArray(len);
|
|
19232
|
+
for (let i = 0; i < len; i++) result[i] = at.data[i] / bt.data[i];
|
|
19233
|
+
return RTV.tensorRaw(result, at.shape);
|
|
19234
|
+
}
|
|
19235
|
+
return tensorElemwiseComplex(at, bt, ELEMWISE_DIV, complexDivide);
|
|
19236
|
+
}
|
|
19113
19237
|
if (isRuntimeSparseMatrix(a) || isRuntimeSparseMatrix(b))
|
|
19114
19238
|
return mElemDivSparse(a, b);
|
|
19115
19239
|
if (isComplexOrMixed(a, b)) {
|
|
@@ -19736,14 +19860,37 @@ function matMul(a, b) {
|
|
|
19736
19860
|
}
|
|
19737
19861
|
const isComplex2 = a.imag !== void 0 || b.imag !== void 0;
|
|
19738
19862
|
if (!isComplex2) {
|
|
19739
|
-
const
|
|
19863
|
+
const bridge2 = getEffectiveBridge("matmul", "matmul");
|
|
19740
19864
|
const f64A = a.data instanceof Float64Array ? a.data : new Float64Array(a.data);
|
|
19741
19865
|
const f64B = b.data instanceof Float64Array ? b.data : new Float64Array(b.data);
|
|
19742
|
-
const raw =
|
|
19866
|
+
const raw = bridge2.matmul(f64A, aRows, aCols, f64B, bCols);
|
|
19743
19867
|
return unwrap1x1(RTV.tensor(new FloatXArray(raw), [aRows, bCols]));
|
|
19744
19868
|
}
|
|
19745
19869
|
const aIm = a.imag || new FloatXArray(a.data.length);
|
|
19746
19870
|
const bIm = b.imag || new FloatXArray(b.data.length);
|
|
19871
|
+
const bridge = getEffectiveBridge("matmul", "matmulComplex");
|
|
19872
|
+
if (bridge.matmulComplex) {
|
|
19873
|
+
const f64ARe = a.data instanceof Float64Array ? a.data : new Float64Array(a.data);
|
|
19874
|
+
const f64AIm = aIm instanceof Float64Array ? aIm : new Float64Array(aIm);
|
|
19875
|
+
const f64BRe = b.data instanceof Float64Array ? b.data : new Float64Array(b.data);
|
|
19876
|
+
const f64BIm = bIm instanceof Float64Array ? bIm : new Float64Array(bIm);
|
|
19877
|
+
const raw = bridge.matmulComplex(
|
|
19878
|
+
f64ARe,
|
|
19879
|
+
f64AIm,
|
|
19880
|
+
aRows,
|
|
19881
|
+
aCols,
|
|
19882
|
+
f64BRe,
|
|
19883
|
+
f64BIm,
|
|
19884
|
+
bCols
|
|
19885
|
+
);
|
|
19886
|
+
return unwrap1x1(
|
|
19887
|
+
RTV.tensor(
|
|
19888
|
+
new FloatXArray(raw.re),
|
|
19889
|
+
[aRows, bCols],
|
|
19890
|
+
raw.im ? new FloatXArray(raw.im) : void 0
|
|
19891
|
+
)
|
|
19892
|
+
);
|
|
19893
|
+
}
|
|
19747
19894
|
const resultRe = new FloatXArray(aRows * bCols);
|
|
19748
19895
|
const resultIm = new FloatXArray(aRows * bCols);
|
|
19749
19896
|
for (let i = 0; i < aRows; i++) {
|
|
@@ -20239,6 +20386,30 @@ function indexIntoTensor1D(base, idx) {
|
|
|
20239
20386
|
}
|
|
20240
20387
|
function indexIntoTensor2D(base, rowIdx, colIdx) {
|
|
20241
20388
|
const [rows, cols] = tensorSize2D(base);
|
|
20389
|
+
if (isRuntimeNumber(rowIdx) && isColonIndex(colIdx)) {
|
|
20390
|
+
const r = Math.round(rowIdx) - 1;
|
|
20391
|
+
if (r < 0 || r >= rows)
|
|
20392
|
+
throw new RuntimeError("Index exceeds array bounds");
|
|
20393
|
+
const resultData2 = new FloatXArray(cols);
|
|
20394
|
+
const resultImag2 = base.imag ? new FloatXArray(cols) : void 0;
|
|
20395
|
+
for (let ci = 0; ci < cols; ci++) {
|
|
20396
|
+
resultData2[ci] = base.data[r + ci * rows];
|
|
20397
|
+
if (resultImag2 && base.imag) resultImag2[ci] = base.imag[r + ci * rows];
|
|
20398
|
+
}
|
|
20399
|
+
return RTV.tensor(resultData2, [1, cols], resultImag2);
|
|
20400
|
+
}
|
|
20401
|
+
if (isColonIndex(rowIdx) && isRuntimeNumber(colIdx)) {
|
|
20402
|
+
const c = Math.round(colIdx) - 1;
|
|
20403
|
+
if (c < 0 || c >= cols)
|
|
20404
|
+
throw new RuntimeError("Index exceeds array bounds");
|
|
20405
|
+
const offset = c * rows;
|
|
20406
|
+
const resultData2 = new FloatXArray(rows);
|
|
20407
|
+
for (let ri = 0; ri < rows; ri++) resultData2[ri] = base.data[offset + ri];
|
|
20408
|
+
const resultImag2 = base.imag ? new FloatXArray(rows) : void 0;
|
|
20409
|
+
if (resultImag2 && base.imag)
|
|
20410
|
+
for (let ri = 0; ri < rows; ri++) resultImag2[ri] = base.imag[offset + ri];
|
|
20411
|
+
return RTV.tensor(resultData2, [rows, 1], resultImag2);
|
|
20412
|
+
}
|
|
20242
20413
|
const rowIdxArr = resolveIndex(rowIdx, rows);
|
|
20243
20414
|
const colIdxArr = resolveIndex(colIdx, cols);
|
|
20244
20415
|
const numR = rowIdxArr.length;
|
|
@@ -27582,6 +27753,19 @@ function registerArrayManipulationFunctions() {
|
|
|
27582
27753
|
if (n !== data.length) {
|
|
27583
27754
|
throw new RuntimeError("reshape: number of elements must not change");
|
|
27584
27755
|
}
|
|
27756
|
+
if (isRuntimeTensor(v)) {
|
|
27757
|
+
v._rc++;
|
|
27758
|
+
const s = [...shape];
|
|
27759
|
+
while (s.length > 2 && s[s.length - 1] === 1) s.pop();
|
|
27760
|
+
return {
|
|
27761
|
+
kind: "tensor",
|
|
27762
|
+
data,
|
|
27763
|
+
imag,
|
|
27764
|
+
shape: s,
|
|
27765
|
+
_isLogical: v._isLogical,
|
|
27766
|
+
_rc: v._rc
|
|
27767
|
+
};
|
|
27768
|
+
}
|
|
27585
27769
|
return RTV.tensor(
|
|
27586
27770
|
new FloatXArray(data),
|
|
27587
27771
|
shape,
|
|
@@ -30474,10 +30658,10 @@ function registerSortUnique() {
|
|
|
30474
30658
|
}
|
|
30475
30659
|
function uniqueByRows(v, nargout, stable) {
|
|
30476
30660
|
const [rows, cols] = tensorSize2D(v);
|
|
30477
|
-
const rowKey = (r) => {
|
|
30478
|
-
|
|
30479
|
-
for (let c =
|
|
30480
|
-
return
|
|
30661
|
+
const rowKey = cols === 2 ? (r) => v.data[r] + "," + v.data[rows + r] : (r) => {
|
|
30662
|
+
let key = "" + v.data[r];
|
|
30663
|
+
for (let c = 1; c < cols; c++) key += "," + v.data[c * rows + r];
|
|
30664
|
+
return key;
|
|
30481
30665
|
};
|
|
30482
30666
|
const rowHasNaN = (r) => {
|
|
30483
30667
|
for (let c = 0; c < cols; c++) {
|
|
@@ -30530,10 +30714,21 @@ function uniqueByRows(v, nargout, stable) {
|
|
|
30530
30714
|
1
|
|
30531
30715
|
]);
|
|
30532
30716
|
if (!stable) {
|
|
30533
|
-
const
|
|
30717
|
+
const sortedKeyToPos = /* @__PURE__ */ new Map();
|
|
30718
|
+
for (let u = 0; u < nUnique; u++) {
|
|
30719
|
+
sortedKeyToPos.set(rowKey(uniqueRowOrder[u]), u + 1);
|
|
30720
|
+
}
|
|
30534
30721
|
for (let r = 0; r < rows; r++) {
|
|
30535
|
-
|
|
30536
|
-
|
|
30722
|
+
if (rowHasNaN(r)) {
|
|
30723
|
+
for (let u = 0; u < nUnique; u++) {
|
|
30724
|
+
if (uniqueRowOrder[u] === r) {
|
|
30725
|
+
ic[r] = u + 1;
|
|
30726
|
+
break;
|
|
30727
|
+
}
|
|
30728
|
+
}
|
|
30729
|
+
} else {
|
|
30730
|
+
ic[r] = sortedKeyToPos.get(rowKey(r));
|
|
30731
|
+
}
|
|
30537
30732
|
}
|
|
30538
30733
|
}
|
|
30539
30734
|
const icTensor = RTV.tensor(ic, [rows, 1]);
|
|
@@ -37520,56 +37715,61 @@ function not(v) {
|
|
|
37520
37715
|
return RTV.logical(false);
|
|
37521
37716
|
}
|
|
37522
37717
|
function binop(op, a, b) {
|
|
37523
|
-
|
|
37524
|
-
const bn = asNumber(b);
|
|
37525
|
-
if (an !== null && bn !== null) {
|
|
37718
|
+
if (typeof a === "number" && typeof b === "number") {
|
|
37526
37719
|
switch (op) {
|
|
37527
37720
|
case "Add" /* Add */:
|
|
37528
|
-
return
|
|
37721
|
+
return a + b;
|
|
37529
37722
|
case "Sub" /* Sub */:
|
|
37530
|
-
return
|
|
37723
|
+
return a - b;
|
|
37531
37724
|
case "Mul" /* Mul */:
|
|
37532
|
-
return
|
|
37725
|
+
return a * b;
|
|
37533
37726
|
case "Div" /* Div */:
|
|
37534
|
-
return
|
|
37727
|
+
return a / b;
|
|
37535
37728
|
case "Pow" /* Pow */: {
|
|
37536
|
-
const r = Math.pow(
|
|
37537
|
-
if (isNaN(r) && !isNaN(
|
|
37729
|
+
const r = Math.pow(a, b);
|
|
37730
|
+
if (isNaN(r) && !isNaN(a) && !isNaN(b)) break;
|
|
37538
37731
|
return r;
|
|
37539
37732
|
}
|
|
37540
37733
|
case "ElemMul" /* ElemMul */:
|
|
37541
|
-
return
|
|
37734
|
+
return a * b;
|
|
37542
37735
|
case "ElemDiv" /* ElemDiv */:
|
|
37543
|
-
return
|
|
37736
|
+
return a / b;
|
|
37544
37737
|
case "ElemPow" /* ElemPow */: {
|
|
37545
|
-
const r = Math.pow(
|
|
37546
|
-
if (isNaN(r) && !isNaN(
|
|
37738
|
+
const r = Math.pow(a, b);
|
|
37739
|
+
if (isNaN(r) && !isNaN(a) && !isNaN(b)) break;
|
|
37547
37740
|
return r;
|
|
37548
37741
|
}
|
|
37549
37742
|
case "LeftDiv" /* LeftDiv */:
|
|
37550
|
-
return
|
|
37743
|
+
return b / a;
|
|
37551
37744
|
case "ElemLeftDiv" /* ElemLeftDiv */:
|
|
37552
|
-
return
|
|
37745
|
+
return b / a;
|
|
37553
37746
|
case "Equal" /* Equal */:
|
|
37554
|
-
return RTV.logical(
|
|
37747
|
+
return RTV.logical(a === b);
|
|
37555
37748
|
case "NotEqual" /* NotEqual */:
|
|
37556
|
-
return RTV.logical(
|
|
37749
|
+
return RTV.logical(a !== b);
|
|
37557
37750
|
case "Less" /* Less */:
|
|
37558
|
-
return RTV.logical(
|
|
37751
|
+
return RTV.logical(a < b);
|
|
37559
37752
|
case "LessEqual" /* LessEqual */:
|
|
37560
|
-
return RTV.logical(
|
|
37753
|
+
return RTV.logical(a <= b);
|
|
37561
37754
|
case "Greater" /* Greater */:
|
|
37562
|
-
return RTV.logical(
|
|
37755
|
+
return RTV.logical(a > b);
|
|
37563
37756
|
case "GreaterEqual" /* GreaterEqual */:
|
|
37564
|
-
return RTV.logical(
|
|
37757
|
+
return RTV.logical(a >= b);
|
|
37565
37758
|
case "BitAnd" /* BitAnd */:
|
|
37566
|
-
return RTV.logical(
|
|
37759
|
+
return RTV.logical(a !== 0 && b !== 0);
|
|
37567
37760
|
case "BitOr" /* BitOr */:
|
|
37568
|
-
return RTV.logical(
|
|
37761
|
+
return RTV.logical(a !== 0 || b !== 0);
|
|
37569
37762
|
}
|
|
37570
37763
|
}
|
|
37571
|
-
|
|
37572
|
-
|
|
37764
|
+
if (typeof a !== "object" || typeof b !== "object") {
|
|
37765
|
+
const an = asNumber(a);
|
|
37766
|
+
const bn = asNumber(b);
|
|
37767
|
+
if (an !== null && bn !== null) {
|
|
37768
|
+
return binop(op, an, bn);
|
|
37769
|
+
}
|
|
37770
|
+
}
|
|
37771
|
+
const ma = typeof a === "object" && a !== null && "kind" in a ? a : ensureRuntimeValue(a);
|
|
37772
|
+
const mb = typeof b === "object" && b !== null && "kind" in b ? b : ensureRuntimeValue(b);
|
|
37573
37773
|
let result;
|
|
37574
37774
|
switch (op) {
|
|
37575
37775
|
case "Add" /* Add */:
|
|
@@ -38013,13 +38213,6 @@ function callBuiltin(rt, name, nargout, args) {
|
|
|
38013
38213
|
if (builtin) return builtin(nargout, args);
|
|
38014
38214
|
throw new RuntimeError(`'${name}' is not a builtin function`);
|
|
38015
38215
|
}
|
|
38016
|
-
function callBuiltinSync(rt, name, nargout, args) {
|
|
38017
|
-
const plotResult = dispatchPlotCall(rt, name, args);
|
|
38018
|
-
if (plotResult !== void 0) return plotResult;
|
|
38019
|
-
const builtin = rt.builtins[name];
|
|
38020
|
-
if (builtin) return builtin(nargout, args);
|
|
38021
|
-
throw new RuntimeError(`'${name}' is not a builtin function`);
|
|
38022
|
-
}
|
|
38023
38216
|
function callClassMethod(rt, className, methodName, nargout, args) {
|
|
38024
38217
|
return dispatch(rt, methodName, nargout, args, className);
|
|
38025
38218
|
}
|
|
@@ -38306,6 +38499,22 @@ function structfunImpl(rt, _nargout, args) {
|
|
|
38306
38499
|
return RTV.struct(fields);
|
|
38307
38500
|
}
|
|
38308
38501
|
}
|
|
38502
|
+
var bsxfunOpMap = {
|
|
38503
|
+
plus: mAdd,
|
|
38504
|
+
minus: mSub,
|
|
38505
|
+
times: mElemMul,
|
|
38506
|
+
rdivide: mElemDiv
|
|
38507
|
+
};
|
|
38508
|
+
function resolveKnownBsxfunOp(fnArg) {
|
|
38509
|
+
if (typeof fnArg === "function") {
|
|
38510
|
+
return void 0;
|
|
38511
|
+
}
|
|
38512
|
+
const mv = ensureRuntimeValue(fnArg);
|
|
38513
|
+
if (isRuntimeFunction(mv) && mv.impl === "builtin") {
|
|
38514
|
+
return bsxfunOpMap[mv.name];
|
|
38515
|
+
}
|
|
38516
|
+
return void 0;
|
|
38517
|
+
}
|
|
38309
38518
|
function bsxfunImpl(rt, _nargout, args) {
|
|
38310
38519
|
if (args.length !== 3)
|
|
38311
38520
|
throw new RuntimeError("bsxfun requires exactly 3 arguments");
|
|
@@ -38323,6 +38532,10 @@ function bsxfunImpl(rt, _nargout, args) {
|
|
|
38323
38532
|
);
|
|
38324
38533
|
}
|
|
38325
38534
|
}
|
|
38535
|
+
const knownOp = resolveKnownBsxfunOp(fnArg);
|
|
38536
|
+
if (knownOp) {
|
|
38537
|
+
return knownOp(ensureRuntimeValue(args[1]), ensureRuntimeValue(args[2]));
|
|
38538
|
+
}
|
|
38326
38539
|
const rawA = ensureRuntimeValue(args[1]);
|
|
38327
38540
|
const rawB = ensureRuntimeValue(args[2]);
|
|
38328
38541
|
const a = coerceToTensor2(rawA, "bsxfun", "first");
|
|
@@ -39089,7 +39302,7 @@ function registerSpecialBuiltins(rt) {
|
|
|
39089
39302
|
if (fn) {
|
|
39090
39303
|
return fn(nargout, args.slice(1));
|
|
39091
39304
|
}
|
|
39092
|
-
return rt.
|
|
39305
|
+
return rt.callBuiltin(fnName, nargout, args.slice(1));
|
|
39093
39306
|
};
|
|
39094
39307
|
const requireFileIO = () => {
|
|
39095
39308
|
if (!rt.fileIO)
|
|
@@ -41083,9 +41296,6 @@ var Runtime = class _Runtime {
|
|
|
41083
41296
|
}
|
|
41084
41297
|
return binop(op, a, b);
|
|
41085
41298
|
}
|
|
41086
|
-
binopSync(op, a, b) {
|
|
41087
|
-
return binop(op, a, b);
|
|
41088
|
-
}
|
|
41089
41299
|
range(start, step, end) {
|
|
41090
41300
|
return range(start, step, end);
|
|
41091
41301
|
}
|
|
@@ -41205,9 +41415,6 @@ var Runtime = class _Runtime {
|
|
|
41205
41415
|
callBuiltin(name, nargout, args) {
|
|
41206
41416
|
return callBuiltin(this, name, nargout, args);
|
|
41207
41417
|
}
|
|
41208
|
-
callBuiltinSync(name, nargout, args) {
|
|
41209
|
-
return callBuiltinSync(this, name, nargout, args);
|
|
41210
|
-
}
|
|
41211
41418
|
callClassMethod(className, methodName, nargout, args) {
|
|
41212
41419
|
return callClassMethod(this, className, methodName, nargout, args);
|
|
41213
41420
|
}
|
|
@@ -44347,9 +44554,6 @@ function genBinary(cg, kind) {
|
|
|
44347
44554
|
break;
|
|
44348
44555
|
}
|
|
44349
44556
|
}
|
|
44350
|
-
if (leftType.kind !== "Unknown" && rightType.kind !== "Unknown" && leftType.kind !== "ClassInstance" && rightType.kind !== "ClassInstance") {
|
|
44351
|
-
return `$rt.binopSync(${JSON.stringify(kind.op)}, ${left}, ${right})`;
|
|
44352
|
-
}
|
|
44353
44557
|
return `$rt.binop(${JSON.stringify(kind.op)}, ${left}, ${right})`;
|
|
44354
44558
|
}
|
|
44355
44559
|
function genTensor(cg, kind) {
|
|
@@ -47572,7 +47776,7 @@ Call stack (most recent call first):`;
|
|
|
47572
47776
|
}
|
|
47573
47777
|
|
|
47574
47778
|
// src/numbl-core/version.ts
|
|
47575
|
-
var NUMBL_VERSION = "0.0.
|
|
47779
|
+
var NUMBL_VERSION = "0.0.23";
|
|
47576
47780
|
|
|
47577
47781
|
// src/cli-repl.ts
|
|
47578
47782
|
import { createInterface } from "readline";
|
|
@@ -48203,16 +48407,26 @@ var NodeFileIOAdapter = class {
|
|
|
48203
48407
|
var __filename = fileURLToPath2(import.meta.url);
|
|
48204
48408
|
var __dirname = dirname3(__filename);
|
|
48205
48409
|
var packageDir2 = join6(__dirname, "..");
|
|
48206
|
-
var addonPath = join6(packageDir2, "build", "Release", "
|
|
48410
|
+
var addonPath = join6(packageDir2, "build", "Release", "numbl_addon.node");
|
|
48207
48411
|
var nativeAddonLoaded = false;
|
|
48208
48412
|
if (!process.env.NUMBL_NO_NATIVE) {
|
|
48209
48413
|
try {
|
|
48210
48414
|
const req = createRequire(import.meta.url);
|
|
48211
48415
|
const addon = req(addonPath);
|
|
48212
|
-
|
|
48213
|
-
|
|
48214
|
-
|
|
48416
|
+
const addonVer = typeof addon.addonVersion === "function" ? addon.addonVersion() : 0;
|
|
48417
|
+
if (addonVer !== NATIVE_ADDON_EXPECTED_VERSION) {
|
|
48418
|
+
console.error(
|
|
48419
|
+
`Warning: native addon version mismatch (got ${addonVer}, expected ${NATIVE_ADDON_EXPECTED_VERSION}). Run "npx numbl build-addon" to rebuild. Using JS fallbacks.`
|
|
48420
|
+
);
|
|
48421
|
+
} else {
|
|
48422
|
+
setLapackBridge(addon);
|
|
48423
|
+
setLapackBridge(addon);
|
|
48424
|
+
nativeAddonLoaded = true;
|
|
48425
|
+
}
|
|
48215
48426
|
} catch {
|
|
48427
|
+
console.error(
|
|
48428
|
+
`Warning: native addon not found. Run "npx numbl build-addon" to build it. Using JS fallbacks.`
|
|
48429
|
+
);
|
|
48216
48430
|
}
|
|
48217
48431
|
}
|
|
48218
48432
|
var nativeBridge;
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Element-wise binary operations on Float64Arrays.
|
|
3
|
+
*
|
|
4
|
+
* Real:
|
|
5
|
+
* elemwise(a: Float64Array, b: Float64Array, op: number): Float64Array
|
|
6
|
+
* op: 0=add, 1=sub, 2=mul, 3=div
|
|
7
|
+
*
|
|
8
|
+
* Complex:
|
|
9
|
+
* elemwiseComplex(aRe: Float64Array, aIm: Float64Array,
|
|
10
|
+
* bRe: Float64Array, bIm: Float64Array,
|
|
11
|
+
* op: number): { re: Float64Array, im: Float64Array }
|
|
12
|
+
* op: 0=add, 1=sub, 2=mul, 3=div
|
|
13
|
+
* Pass null for aIm or bIm to treat as zero (mixed real/complex).
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
#include "numbl_addon_common.h"
|
|
17
|
+
|
|
18
|
+
// ── elemwise() — real element-wise binary op ────────────────────────────────
|
|
19
|
+
|
|
20
|
+
Napi::Value Elemwise(const Napi::CallbackInfo& info) {
|
|
21
|
+
Napi::Env env = info.Env();
|
|
22
|
+
|
|
23
|
+
if (info.Length() < 3
|
|
24
|
+
|| !info[0].IsTypedArray()
|
|
25
|
+
|| !info[1].IsTypedArray()
|
|
26
|
+
|| !info[2].IsNumber()) {
|
|
27
|
+
Napi::TypeError::New(env,
|
|
28
|
+
"elemwise: expected (Float64Array a, Float64Array b, number op)")
|
|
29
|
+
.ThrowAsJavaScriptException();
|
|
30
|
+
return env.Null();
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
auto arrA = info[0].As<Napi::Float64Array>();
|
|
34
|
+
auto arrB = info[1].As<Napi::Float64Array>();
|
|
35
|
+
int op = info[2].As<Napi::Number>().Int32Value();
|
|
36
|
+
|
|
37
|
+
size_t n = arrA.ElementLength();
|
|
38
|
+
if (arrB.ElementLength() != n) {
|
|
39
|
+
Napi::RangeError::New(env, "elemwise: arrays must have same length")
|
|
40
|
+
.ThrowAsJavaScriptException();
|
|
41
|
+
return env.Null();
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
auto result = Napi::Float64Array::New(env, n);
|
|
45
|
+
const double* a = arrA.Data();
|
|
46
|
+
const double* b = arrB.Data();
|
|
47
|
+
double* out = result.Data();
|
|
48
|
+
|
|
49
|
+
switch (op) {
|
|
50
|
+
case 0: // add
|
|
51
|
+
for (size_t i = 0; i < n; i++) out[i] = a[i] + b[i];
|
|
52
|
+
break;
|
|
53
|
+
case 1: // sub
|
|
54
|
+
for (size_t i = 0; i < n; i++) out[i] = a[i] - b[i];
|
|
55
|
+
break;
|
|
56
|
+
case 2: // mul
|
|
57
|
+
for (size_t i = 0; i < n; i++) out[i] = a[i] * b[i];
|
|
58
|
+
break;
|
|
59
|
+
case 3: // div
|
|
60
|
+
for (size_t i = 0; i < n; i++) out[i] = a[i] / b[i];
|
|
61
|
+
break;
|
|
62
|
+
default:
|
|
63
|
+
Napi::RangeError::New(env, "elemwise: op must be 0-3")
|
|
64
|
+
.ThrowAsJavaScriptException();
|
|
65
|
+
return env.Null();
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return result;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// ── elemwiseComplex() — complex element-wise binary op ──────────────────────
|
|
72
|
+
|
|
73
|
+
Napi::Value ElemwiseComplex(const Napi::CallbackInfo& info) {
|
|
74
|
+
Napi::Env env = info.Env();
|
|
75
|
+
|
|
76
|
+
// (aRe, aIm_or_null, bRe, bIm_or_null, op)
|
|
77
|
+
if (info.Length() < 5 || !info[0].IsTypedArray() || !info[2].IsTypedArray()
|
|
78
|
+
|| !info[4].IsNumber()) {
|
|
79
|
+
Napi::TypeError::New(env,
|
|
80
|
+
"elemwiseComplex: expected (Float64Array aRe, Float64Array|null aIm, "
|
|
81
|
+
"Float64Array bRe, Float64Array|null bIm, number op)")
|
|
82
|
+
.ThrowAsJavaScriptException();
|
|
83
|
+
return env.Null();
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
auto arrARe = info[0].As<Napi::Float64Array>();
|
|
87
|
+
auto arrBRe = info[2].As<Napi::Float64Array>();
|
|
88
|
+
int op = info[4].As<Napi::Number>().Int32Value();
|
|
89
|
+
|
|
90
|
+
size_t n = arrARe.ElementLength();
|
|
91
|
+
if (arrBRe.ElementLength() != n) {
|
|
92
|
+
Napi::RangeError::New(env, "elemwiseComplex: arrays must have same length")
|
|
93
|
+
.ThrowAsJavaScriptException();
|
|
94
|
+
return env.Null();
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const double* aRe = arrARe.Data();
|
|
98
|
+
const double* bRe = arrBRe.Data();
|
|
99
|
+
|
|
100
|
+
// aIm and bIm may be null (treat as zero)
|
|
101
|
+
bool hasAIm = info[1].IsTypedArray();
|
|
102
|
+
bool hasBIm = info[3].IsTypedArray();
|
|
103
|
+
const double* aIm = hasAIm ? info[1].As<Napi::Float64Array>().Data() : nullptr;
|
|
104
|
+
const double* bIm = hasBIm ? info[3].As<Napi::Float64Array>().Data() : nullptr;
|
|
105
|
+
|
|
106
|
+
auto outRe = Napi::Float64Array::New(env, n);
|
|
107
|
+
auto outIm = Napi::Float64Array::New(env, n);
|
|
108
|
+
double* oRe = outRe.Data();
|
|
109
|
+
double* oIm = outIm.Data();
|
|
110
|
+
|
|
111
|
+
switch (op) {
|
|
112
|
+
case 0: // add
|
|
113
|
+
for (size_t i = 0; i < n; i++) {
|
|
114
|
+
oRe[i] = aRe[i] + bRe[i];
|
|
115
|
+
oIm[i] = (aIm ? aIm[i] : 0.0) + (bIm ? bIm[i] : 0.0);
|
|
116
|
+
}
|
|
117
|
+
break;
|
|
118
|
+
case 1: // sub
|
|
119
|
+
for (size_t i = 0; i < n; i++) {
|
|
120
|
+
oRe[i] = aRe[i] - bRe[i];
|
|
121
|
+
oIm[i] = (aIm ? aIm[i] : 0.0) - (bIm ? bIm[i] : 0.0);
|
|
122
|
+
}
|
|
123
|
+
break;
|
|
124
|
+
case 2: { // mul: (a+bi)(c+di) = (ac-bd) + (ad+bc)i
|
|
125
|
+
for (size_t i = 0; i < n; i++) {
|
|
126
|
+
double ar = aRe[i], ai = aIm ? aIm[i] : 0.0;
|
|
127
|
+
double br = bRe[i], bi = bIm ? bIm[i] : 0.0;
|
|
128
|
+
oRe[i] = ar * br - ai * bi;
|
|
129
|
+
oIm[i] = ar * bi + ai * br;
|
|
130
|
+
}
|
|
131
|
+
break;
|
|
132
|
+
}
|
|
133
|
+
case 3: { // div: (a+bi)/(c+di) = ((ac+bd) + (bc-ad)i) / (c²+d²)
|
|
134
|
+
for (size_t i = 0; i < n; i++) {
|
|
135
|
+
double ar = aRe[i], ai = aIm ? aIm[i] : 0.0;
|
|
136
|
+
double br = bRe[i], bi = bIm ? bIm[i] : 0.0;
|
|
137
|
+
double denom = br * br + bi * bi;
|
|
138
|
+
if (denom == 0.0) {
|
|
139
|
+
oRe[i] = (ar == 0.0 && ai == 0.0) ? 0.0 / 0.0 /* NaN */
|
|
140
|
+
: (ar > 0 ? 1.0 : ar < 0 ? -1.0 : 0.0) / 0.0 /* ±Inf */;
|
|
141
|
+
oIm[i] = (ar == 0.0 && ai == 0.0) ? 0.0
|
|
142
|
+
: (ai > 0 ? 1.0 : ai < 0 ? -1.0 : 0.0) / 0.0;
|
|
143
|
+
} else {
|
|
144
|
+
oRe[i] = (ar * br + ai * bi) / denom;
|
|
145
|
+
oIm[i] = (ai * br - ar * bi) / denom;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
break;
|
|
149
|
+
}
|
|
150
|
+
default:
|
|
151
|
+
Napi::RangeError::New(env, "elemwiseComplex: op must be 0-3")
|
|
152
|
+
.ThrowAsJavaScriptException();
|
|
153
|
+
return env.Null();
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Check if result is purely real
|
|
157
|
+
bool isReal = true;
|
|
158
|
+
for (size_t i = 0; i < n; i++) {
|
|
159
|
+
if (oIm[i] != 0.0) { isReal = false; break; }
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
auto result = Napi::Object::New(env);
|
|
163
|
+
result.Set("re", outRe);
|
|
164
|
+
if (!isReal) {
|
|
165
|
+
result.Set("im", outIm);
|
|
166
|
+
}
|
|
167
|
+
return result;
|
|
168
|
+
}
|
package/native/lapack_chol.cpp
CHANGED
package/native/lapack_eig.cpp
CHANGED
package/native/lapack_fft.cpp
CHANGED
package/native/lapack_inv.cpp
CHANGED
package/native/lapack_lu.cpp
CHANGED
package/native/lapack_matmul.cpp
CHANGED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* matmulComplex() — Complex matrix-matrix multiplication via BLAS zgemm.
|
|
3
|
+
*
|
|
4
|
+
* matmulComplex(ARe: Float64Array, AIm: Float64Array,
|
|
5
|
+
* m: number, k: number,
|
|
6
|
+
* BRe: Float64Array, BIm: Float64Array,
|
|
7
|
+
* n: number): { re: Float64Array, im: Float64Array }
|
|
8
|
+
*
|
|
9
|
+
* Computes C = A * B where:
|
|
10
|
+
* A is an m×k complex matrix (split re/im) stored in column-major order
|
|
11
|
+
* B is a k×n complex matrix (split re/im) stored in column-major order
|
|
12
|
+
* C is an m×n complex matrix returned as {re, im} in column-major order
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
#include "numbl_addon_common.h"
|
|
16
|
+
|
|
17
|
+
Napi::Value MatmulComplex(const Napi::CallbackInfo& info) {
|
|
18
|
+
Napi::Env env = info.Env();
|
|
19
|
+
|
|
20
|
+
// matmulComplex(ARe, AIm, m, k, BRe, BIm, n)
|
|
21
|
+
if (info.Length() < 7
|
|
22
|
+
|| !info[0].IsTypedArray() // ARe
|
|
23
|
+
|| !info[1].IsTypedArray() // AIm
|
|
24
|
+
|| !info[2].IsNumber() // m
|
|
25
|
+
|| !info[3].IsNumber() // k
|
|
26
|
+
|| !info[4].IsTypedArray() // BRe
|
|
27
|
+
|| !info[5].IsTypedArray() // BIm
|
|
28
|
+
|| !info[6].IsNumber()) { // n
|
|
29
|
+
Napi::TypeError::New(env,
|
|
30
|
+
"matmulComplex: expected (Float64Array ARe, Float64Array AIm, "
|
|
31
|
+
"number m, number k, Float64Array BRe, Float64Array BIm, number n)")
|
|
32
|
+
.ThrowAsJavaScriptException();
|
|
33
|
+
return env.Null();
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
auto arrARe = info[0].As<Napi::Float64Array>();
|
|
37
|
+
auto arrAIm = info[1].As<Napi::Float64Array>();
|
|
38
|
+
int m = info[2].As<Napi::Number>().Int32Value();
|
|
39
|
+
int k = info[3].As<Napi::Number>().Int32Value();
|
|
40
|
+
auto arrBRe = info[4].As<Napi::Float64Array>();
|
|
41
|
+
auto arrBIm = info[5].As<Napi::Float64Array>();
|
|
42
|
+
int n = info[6].As<Napi::Number>().Int32Value();
|
|
43
|
+
|
|
44
|
+
if (m < 0 || k < 0 || n < 0) {
|
|
45
|
+
Napi::RangeError::New(env, "matmulComplex: m, k, n must be non-negative")
|
|
46
|
+
.ThrowAsJavaScriptException();
|
|
47
|
+
return env.Null();
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
int mk = m * k;
|
|
51
|
+
int kn = k * n;
|
|
52
|
+
int mn = m * n;
|
|
53
|
+
|
|
54
|
+
// Handle empty-dimension multiply
|
|
55
|
+
if (m == 0 || k == 0 || n == 0) {
|
|
56
|
+
auto result = Napi::Object::New(env);
|
|
57
|
+
result.Set("re", Napi::Float64Array::New(env, static_cast<size_t>(mn)));
|
|
58
|
+
result.Set("im", Napi::Float64Array::New(env, static_cast<size_t>(mn)));
|
|
59
|
+
return result;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Interleave into complex arrays for zgemm
|
|
63
|
+
std::vector<lapack_complex_double> a(mk);
|
|
64
|
+
for (int i = 0; i < mk; ++i) {
|
|
65
|
+
a[i].real = arrARe[i];
|
|
66
|
+
a[i].imag = arrAIm[i];
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
std::vector<lapack_complex_double> b(kn);
|
|
70
|
+
for (int i = 0; i < kn; ++i) {
|
|
71
|
+
b[i].real = arrBRe[i];
|
|
72
|
+
b[i].imag = arrBIm[i];
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
std::vector<lapack_complex_double> c(mn, {0.0, 0.0});
|
|
76
|
+
|
|
77
|
+
char transa = 'N';
|
|
78
|
+
char transb = 'N';
|
|
79
|
+
lapack_complex_double alpha = {1.0, 0.0};
|
|
80
|
+
lapack_complex_double beta = {0.0, 0.0};
|
|
81
|
+
int lda = m;
|
|
82
|
+
int ldb = k;
|
|
83
|
+
int ldc = m;
|
|
84
|
+
|
|
85
|
+
zgemm_(&transa, &transb,
|
|
86
|
+
&m, &n, &k,
|
|
87
|
+
&alpha, a.data(), &lda,
|
|
88
|
+
b.data(), &ldb,
|
|
89
|
+
&beta, c.data(), &ldc);
|
|
90
|
+
|
|
91
|
+
// Deinterleave result
|
|
92
|
+
auto result = Napi::Object::New(env);
|
|
93
|
+
auto outRe = Napi::Float64Array::New(env, static_cast<size_t>(mn));
|
|
94
|
+
auto outIm = Napi::Float64Array::New(env, static_cast<size_t>(mn));
|
|
95
|
+
for (int i = 0; i < mn; ++i) {
|
|
96
|
+
outRe[i] = c[i].real;
|
|
97
|
+
outIm[i] = c[i].imag;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Check if result is purely real
|
|
101
|
+
bool isReal = true;
|
|
102
|
+
for (int i = 0; i < mn; ++i) {
|
|
103
|
+
if (outIm[i] != 0.0) { isReal = false; break; }
|
|
104
|
+
}
|
|
105
|
+
result.Set("re", outRe);
|
|
106
|
+
if (!isReal) {
|
|
107
|
+
result.Set("im", outIm);
|
|
108
|
+
}
|
|
109
|
+
return result;
|
|
110
|
+
}
|
package/native/lapack_qr.cpp
CHANGED
package/native/lapack_qz.cpp
CHANGED
package/native/lapack_svd.cpp
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* numbl native addon — LAPACK/BLAS, FFT, element-wise arithmetic, and more.
|
|
3
3
|
*
|
|
4
4
|
* Exported functions (see individual .cpp files for full documentation):
|
|
5
5
|
*
|
|
@@ -19,11 +19,32 @@
|
|
|
19
19
|
* cholComplex(dataRe, dataIm, n, upper) — complex Cholesky (lapack_chol.cpp)
|
|
20
20
|
*/
|
|
21
21
|
|
|
22
|
-
#include "
|
|
22
|
+
#include "numbl_addon_common.h"
|
|
23
|
+
#include <cstdlib>
|
|
24
|
+
|
|
25
|
+
extern "C" {
|
|
26
|
+
void openblas_set_num_threads(int num_threads);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// ── Addon version ────────────────────────────────────────────────────────────
|
|
30
|
+
// Bump this integer whenever the addon's API changes (new functions, signature
|
|
31
|
+
// changes, etc.) so that the JS side can detect stale builds.
|
|
32
|
+
static const int ADDON_VERSION = 1;
|
|
33
|
+
|
|
34
|
+
static Napi::Value AddonVersion(const Napi::CallbackInfo& info) {
|
|
35
|
+
return Napi::Number::New(info.Env(), ADDON_VERSION);
|
|
36
|
+
}
|
|
23
37
|
|
|
24
38
|
// ── Module initialisation ─────────────────────────────────────────────────────
|
|
25
39
|
|
|
26
40
|
Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
|
41
|
+
// Use single-threaded BLAS unless the user explicitly set the env var.
|
|
42
|
+
// Multi-threaded BLAS adds overhead for the many small matmuls in numbl.
|
|
43
|
+
if (!std::getenv("OPENBLAS_NUM_THREADS")) {
|
|
44
|
+
openblas_set_num_threads(1);
|
|
45
|
+
}
|
|
46
|
+
exports.Set(Napi::String::New(env, "addonVersion"),
|
|
47
|
+
Napi::Function::New(env, AddonVersion));
|
|
27
48
|
exports.Set(Napi::String::New(env, "inv"),
|
|
28
49
|
Napi::Function::New(env, Inv));
|
|
29
50
|
exports.Set(Napi::String::New(env, "invComplex"),
|
|
@@ -42,6 +63,8 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
|
|
42
63
|
Napi::Function::New(env, SvdComplex));
|
|
43
64
|
exports.Set(Napi::String::New(env, "matmul"),
|
|
44
65
|
Napi::Function::New(env, Matmul));
|
|
66
|
+
exports.Set(Napi::String::New(env, "matmulComplex"),
|
|
67
|
+
Napi::Function::New(env, MatmulComplex));
|
|
45
68
|
exports.Set(Napi::String::New(env, "linsolve"),
|
|
46
69
|
Napi::Function::New(env, Linsolve));
|
|
47
70
|
exports.Set(Napi::String::New(env, "linsolveComplex"),
|
|
@@ -64,7 +87,11 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
|
|
64
87
|
Napi::Function::New(env, Fft1dComplex));
|
|
65
88
|
exports.Set(Napi::String::New(env, "fftAlongDim"),
|
|
66
89
|
Napi::Function::New(env, FftAlongDim));
|
|
90
|
+
exports.Set(Napi::String::New(env, "elemwise"),
|
|
91
|
+
Napi::Function::New(env, Elemwise));
|
|
92
|
+
exports.Set(Napi::String::New(env, "elemwiseComplex"),
|
|
93
|
+
Napi::Function::New(env, ElemwiseComplex));
|
|
67
94
|
return exports;
|
|
68
95
|
}
|
|
69
96
|
|
|
70
|
-
NODE_API_MODULE(
|
|
97
|
+
NODE_API_MODULE(numbl_addon, Init)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Common includes, type definitions, LAPACK/BLAS declarations, and function
|
|
3
|
-
* prototypes shared across the
|
|
3
|
+
* prototypes shared across the numbl_addon source files.
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
#pragma once
|
|
@@ -83,6 +83,13 @@ extern "C" {
|
|
|
83
83
|
double* b, int* ldb,
|
|
84
84
|
double* beta, double* c, int* ldc);
|
|
85
85
|
|
|
86
|
+
// Complex matrix-matrix multiplication: C = alpha * op(A) * op(B) + beta * C
|
|
87
|
+
void zgemm_(char* transa, char* transb,
|
|
88
|
+
int* m, int* n, int* k,
|
|
89
|
+
lapack_complex_double* alpha, lapack_complex_double* a, int* lda,
|
|
90
|
+
lapack_complex_double* b, int* ldb,
|
|
91
|
+
lapack_complex_double* beta, lapack_complex_double* c, int* ldc);
|
|
92
|
+
|
|
86
93
|
// ── Linear solve (square) ─────────────────────────────────────────────────
|
|
87
94
|
// LU factorisation + solve: A * X = B (A is n×n, B is n×nrhs)
|
|
88
95
|
// On exit A contains the LU factors; B contains X.
|
|
@@ -253,6 +260,7 @@ Napi::Value LuComplex(const Napi::CallbackInfo& info);
|
|
|
253
260
|
Napi::Value Svd(const Napi::CallbackInfo& info);
|
|
254
261
|
Napi::Value SvdComplex(const Napi::CallbackInfo& info);
|
|
255
262
|
Napi::Value Matmul(const Napi::CallbackInfo& info);
|
|
263
|
+
Napi::Value MatmulComplex(const Napi::CallbackInfo& info);
|
|
256
264
|
Napi::Value Linsolve(const Napi::CallbackInfo& info);
|
|
257
265
|
Napi::Value LinsolveComplex(const Napi::CallbackInfo& info);
|
|
258
266
|
Napi::Value Eig(const Napi::CallbackInfo& info);
|
|
@@ -264,3 +272,5 @@ Napi::Value QzComplex(const Napi::CallbackInfo& info);
|
|
|
264
272
|
Napi::Value Fft1d(const Napi::CallbackInfo& info);
|
|
265
273
|
Napi::Value Fft1dComplex(const Napi::CallbackInfo& info);
|
|
266
274
|
Napi::Value FftAlongDim(const Napi::CallbackInfo& info);
|
|
275
|
+
Napi::Value Elemwise(const Napi::CallbackInfo& info);
|
|
276
|
+
Napi::Value ElemwiseComplex(const Napi::CallbackInfo& info);
|